+++ /dev/null
-*.cmo
-*.cmx
-*.cmi
-
-*.o
-*.a
-
+++ /dev/null
-*.cmo
-*.cmx
-*.cmi
-
-*.o
-*.a
-
+++ /dev/null
-Copyright 1999 by Gerd Stolpmann
-
-The package "netstring" is copyright by Gerd Stolpmann.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of the "netstring" software (the "Software"), to deal in the
-Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-The Software is provided ``as is'', without warranty of any kind, express
-or implied, including but not limited to the warranties of
-merchantability, fitness for a particular purpose and noninfringement.
-In no event shall Gerd Stolpmann be liable for any claim, damages or
-other liability, whether in an action of contract, tort or otherwise,
-arising from, out of or in connection with the Software or the use or
-other dealings in the software.
+++ /dev/null
-version = "0.9.3"
-requires = "str"
-description = "String processing for the Internet"
-
-archive(byte) =
- "netstring.cma netmappings_iso.cmo netmappings_other.cmo"
-archive(byte,toploop) =
- "netstring.cma netmappings_iso.cmo netmappings_other.cmo
- netstring_top.cmo"
-archive(byte,mt) =
- "netstring.cma netmappings_iso.cmo netmappings_other.cmo
- netstring_mt.cmo"
-archive(byte,mt,toploop) =
- "netstring.cma netmappings_iso.cmo netmappings_other.cmo
- netstring_mt.cmo netstring_top.cmo"
-archive(native) =
- "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx"
-archive(native,mt) =
- "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx
- netstring_mt.cmx"
-
-archive(byte,netstring_only_iso) =
- "netstring.cma netmappings_iso.cmo"
-archive(byte,toploop,netstring_only_iso) =
- "netstring.cma netmappings_iso.cmo
- netstring_top.cmo"
-archive(byte,mt,netstring_only_iso) =
- "netstring.cma netmappings_iso.cmo
- netstring_mt.cmo"
-archive(byte,mt,toploop,netstring_only_iso) =
- "netstring.cma netmappings_iso.cmo
- netstring_mt.cmo netstring_top.cmo"
-archive(native,netstring_only_iso) =
- "netstring.cmxa netmappings_iso.cmx"
-archive(native,mt,netstring_only_iso) =
- "netstring.cmxa netmappings_iso.cmx
- netstring_mt.cmx"
-
-archive(byte,netstring_minimum) =
- "netstring.cma"
-archive(byte,toploop,netstring_minimum) =
- "netstring.cma
- netstring_top.cmo"
-archive(byte,mt,netstring_minimum) =
- "netstring.cma
- netstring_mt.cmo"
-archive(byte,mt,toploop,netstring_minimum) =
- "netstring.cma
- netstring_mt.cmo netstring_top.cmo"
-archive(native,netstring_minimum) =
- "netstring.cmxa"
-archive(native,mt,netstring_minimum) =
- "netstring.cmxa
- netstring_mt.cmx"
+++ /dev/null
-# make all: make bytecode archive
-# make opt: make native archive
-# make install: install bytecode archive, and if present, native archive
-# make uninstall: uninstall package
-# make clean: remove intermediate files
-# make distclean: remove any superflous files
-# make release: cleanup, create archive, tag CVS module
-# (for developers)
-
-#----------------------------------------------------------------------
-# specific rules for this package:
-
-OBJECTS = netstring_str.cmo \
- netencoding.cmo netbuffer.cmo netstream.cmo \
- mimestring.cmo cgi.cmo base64.cmo \
- nethtml_scanner.cmo nethtml.cmo \
- neturl.cmo \
- netmappings.cmo netconversion.cmo
-XOBJECTS = $(OBJECTS:.cmo=.cmx)
-ARCHIVE = netstring.cma
-XARCHIVE = netstring.cmxa
-
-NAME = netstring
-REQUIRES = str
-
-ISO_MAPPINGS = mappings/iso*.unimap
-OTHER_MAPPINGS = mappings/cp*.unimap \
- mappings/adobe*.unimap \
- mappings/jis*.unimap \
- mappings/koi*.unimap \
- mappings/mac*.unimap \
- mappings/windows*.unimap
-
-all: $(ARCHIVE) \
- netstring_top.cmo netstring_mt.cmo \
- netmappings_iso.cmo netmappings_other.cmo
-
-opt: $(XARCHIVE) \
- netstring_mt.cmx \
- netmappings_iso.cmx netmappings_other.cmx
-
-
-$(ARCHIVE): $(OBJECTS)
- $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
-
-$(XARCHIVE): $(XOBJECTS)
- $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
-
-netmappings_iso.ml:
- $(MAKE) -C tools
- test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
- -o netmappings_iso.ml $(ISO_MAPPINGS)
-
-netmappings_other.ml:
- $(MAKE) -C tools
- test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
- -o netmappings_other.ml $(OTHER_MAPPINGS)
-
-#----------------------------------------------------------------------
-# general rules:
-
-OPTIONS =
-OCAMLC = ocamlc $(DEBUG) $(OPTIONS) $(ROPTIONS)
-OCAMLOPT = ocamlopt $(OPTIONS) $(ROPTIONS)
-OCAMLLEX = ocamllex
-OCAMLDEP = ocamldep $(OPTIONS)
-OCAMLFIND = ocamlfind
-
-DEBUG =
-# Invoke with: make DEBUG=-g
-
-depend: *.ml *.mli
- $(OCAMLDEP) *.ml *.mli >depend
-
-depend.pkg: Makefile
- $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
-
-.PHONY: install
-install: all
- { test ! -f $(XARCHIVE) || extra="*.cmxa *.a netstring_mt.cmx netmappings_iso.cmx netmappings_other.cmx netstring_mt.o netmappings_iso.o netmappings_other.o"; }; \
- $(OCAMLFIND) install $(NAME) *.mli *.cmi *.cma netstring_top.cmo netstring_mt.cmo netmappings_iso.cmo netmappings_other.cmo META $$extra
-
-.PHONY: install-cgi
-install-cgi:
- $(OCAMLFIND) install cgi compat-cgi/META
-
-
-.PHONY: install-base64
-install-base64:
- $(OCAMLFIND) install base64 compat-base64/META
-
-
-.PHONY: uninstall
-uninstall:
- $(OCAMLFIND) remove $(NAME)
-
-.PHONY: uninstall-cgi
-uninstall-cgi:
- $(OCAMLFIND) remove cgi
-
-.PHONY: uninstall-base64
-uninstall-base64:
- $(OCAMLFIND) remove base64
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
- test ! -d mappings || rm -f netmappings_iso.ml netmappings_other.ml
-
-.PHONY: distclean
-distclean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
- rm -f *~ depend depend.pkg compat-cgi/*~ compat-base64/*~
- $(MAKE) -C tests distclean
- $(MAKE) -C doc distclean
- $(MAKE) -C tools distclean
-
-RELEASE: META
- awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
-
-.PHONY: dist
-dist: RELEASE
- r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*/doc/readme.dtd" --exclude="*/Mail" --exclude="*/mappings" $(NAME)
-
-.PHONY: tag-release
-tag-release: RELEASE
- r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r $(NAME)
-
-.PHONY: release
-release: distclean
- test -f netmappings_iso.ml
- test -f netmappings_other.ml
- $(MAKE) tag-release
- $(MAKE) dist
-
-.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll
-
-.ml.cmx:
- $(OCAMLOPT) -c -thread $<
-
-.ml.cmo:
- $(OCAMLC) -c -thread $<
-
-.mli.cmi:
- $(OCAMLC) -c $<
-
-.mll.ml:
- $(OCAMLLEX) $<
-
-include depend
-include depend.pkg
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-let encode s = Netencoding.Base64.encode s;;
-let url_encode s = Netencoding.Base64.url_encode s;;
-let decode s = Netencoding.Base64.decode s;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.1 2000/03/02 01:15:20 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(**********************************************************************)
-(* Base64 compatibility module *)
-(**********************************************************************)
-
-(* PLEASE DO NOT USE THIS MODULE IN NEW SOFTWARE!
- * The module Netencoding.Base64 is the preferred API. This module is
- * only for compatibility with older software.
- *)
-
-(* This interface is compatible with all previously released Base64
- * modules (0.1 and 0.2).
- *)
-
-val encode : string -> string
-
-val url_encode : string -> string
-
-val decode : string -> string
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/03/02 01:15:20 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-exception Resources_exceeded
-
-type argument_processing = Memory | File | Automatic;;
-
-type argument =
- { mutable arg_name : string;
- mutable arg_processing : argument_processing;
- mutable arg_buf_value : Buffer.t;
- mutable arg_mem_value : string option;
- (* Here, the value is stored if it must be kept in memory *)
- mutable arg_disk_value : string Weak.t;
- (* This component is used iff arg_mem_value = None. The
- * weak array has a length of 1, and the single element stores
- * the value (if any).
- *)
- mutable arg_file : string option;
- (* The filename of the temporary file storing the value *)
- mutable arg_fd : out_channel option;
- (* The file descriptor of the temp file (if open) *)
- mutable arg_mimetype : string;
- mutable arg_filename : string option;
- mutable arg_header : (string * string) list;
- (* For the last three components, see the description of the
- * corresponding functions in the mli file.
- *)
- }
-;;
-
-type workaround =
- Work_around_MSIE_Content_type_bug
- | Work_around_backslash_bug
-;;
-
-type config =
- { maximum_content_length : int;
- how_to_process_arguments : argument -> argument_processing;
- tmp_directory : string;
- tmp_prefix : string;
- workarounds : workaround list;
- }
-;;
-
-
-let print_argument arg =
- Format.printf
- "<CGIARG name=%s filename=%s mimetype=%s store=%s>"
- arg.arg_name
- (match arg.arg_filename with None -> "*" | Some n -> n)
- arg.arg_mimetype
- (match arg.arg_file with None -> "Memory" | Some n -> n)
-;;
-
-
-let encode = Netencoding.Url.encode ;;
-let decode = Netencoding.Url.decode ;;
-
-
-
-let url_split_re =
- Str.regexp "[&=]";;
-
-
-let mk_url_encoded_parameters nv_pairs =
- String.concat "&"
- (List.map
- (fun (name,value) ->
- let name_encoded = Netencoding.Url.encode name in
- let value_encoded = Netencoding.Url.encode value in
- name_encoded ^ "=" ^ value_encoded
- )
- nv_pairs
- )
-;;
-
-
-let dest_url_encoded_parameters parstr =
-
- let rec parse_after_amp tl =
- match tl with
- Str.Text name :: Str.Delim "=" :: Str.Text value :: tl' ->
- (Netencoding.Url.decode name,
- Netencoding.Url.decode value) :: parse_next tl'
- | Str.Text name :: Str.Delim "=" :: Str.Delim "&" :: tl' ->
- (Netencoding.Url.decode name, "") :: parse_after_amp tl'
- | Str.Text name :: Str.Delim "=" :: [] ->
- [Netencoding.Url.decode name, ""]
- | _ ->
- failwith "Cgi.dest_url_encoded_parameters"
- and parse_next tl =
- match tl with
- [] -> []
- | Str.Delim "&" :: tl' ->
- parse_after_amp tl'
- | _ ->
- failwith "Cgi.dest_url_encoded_parameters"
- in
- let toklist = Str.full_split url_split_re parstr in
- match toklist with
- [] -> []
- | _ -> parse_after_amp toklist
-;;
-
-
-let mk_form_encoded_parameters ntv_triples =
- failwith "Cgi.mk_form_encoded_parameters: not implemented";;
-
-
-let dest_parameter_header header options =
- let get_name s =
- (* s is: form-data; ... name="fieldname" ...
- * Extract "fieldname"
- *)
- try
- let tok, params = Mimestring.scan_value_with_parameters s options in
- List.assoc "name" params
- with
- Not_found ->
- failwith "Cgi.dest_form_encoded_parameters"
- | Failure "Mimestring.scan_value_with_parameters" ->
- failwith "Cgi.dest_form_encoded_parameters"
- in
-
- let get_filename s =
- (* s is: form-data; ... filename="fieldname" ...
- * Extract "fieldname"
- *)
- try
- let tok, params = Mimestring.scan_value_with_parameters s options in
- Some(List.assoc "filename" params)
- with
- Not_found ->
- None
- | Failure "Mimestring.scan_value_with_parameters" ->
- failwith "Cgi.dest_form_encoded_parameters"
- in
-
- let mime_type =
- try List.assoc "content-type" header
- with Not_found -> "text/plain" in (* the default *)
-
- let content_disposition =
- try List.assoc "content-disposition" header
- with
- Not_found ->
- failwith "Cgi.dest_form_encoded_parameters: no content-disposition"
- in
-
- let name = get_name content_disposition in
- let filename = get_filename content_disposition in
-
- name, mime_type, filename
-;;
-
-
-let dest_form_encoded_parameters parstr ~boundary config =
- let options =
- if List.mem Work_around_backslash_bug config.workarounds then
- [ Mimestring.No_backslash_escaping ]
- else
- []
- in
- let parts =
- Mimestring.scan_multipart_body_and_decode
- parstr 0 (String.length parstr) boundary in
- List.map
- (fun (params, value) ->
-
- let name, mime_type, filename = dest_parameter_header params options in
- { arg_name = name;
- arg_processing = Memory;
- arg_buf_value = Buffer.create 1;
- arg_mem_value = Some value;
- arg_disk_value = Weak.create 1;
- arg_file = None;
- arg_fd = None;
- arg_mimetype = mime_type;
- arg_filename = filename;
- arg_header = params;
- }
-
- )
- parts
-;;
-
-
-let make_temporary_file config =
- (* Returns (filename, out_channel). *)
- let rec try_creation n =
- try
- let fn =
- Filename.concat
- config.tmp_directory
- (config.tmp_prefix ^ "-" ^ (string_of_int n))
- in
- let fd =
- open_out_gen
- [ Open_wronly; Open_creat; Open_excl; Open_binary ]
- 0o666
- fn
- in
- fn, fd
- with
- Sys_error m ->
- (* This does not look very intelligent, but it is the only chance
- * to limit the number of trials.
- *)
- if n > 1000 then
- failwith ("Cgi: Cannot create temporary file: " ^ m);
- try_creation (n+1)
- in
- try_creation 0
-;;
-
-
-let dest_form_encoded_parameters_from_netstream s ~boundary config =
- let parts = ref [] in
- let options =
- if List.mem Work_around_backslash_bug config.workarounds then
- [ Mimestring.No_backslash_escaping ]
- else
- []
- in
-
- let create header =
- (* CALLBACK for scan_multipart_body_from_netstream *)
- let name, mime_type, filename = dest_parameter_header header options in
- let p0 =
- { arg_name = name;
- arg_processing = Memory;
- arg_buf_value = Buffer.create 80;
- arg_mem_value = None;
- arg_disk_value = Weak.create 1;
- arg_file = None;
- arg_fd = None;
- arg_mimetype = mime_type;
- arg_filename = filename;
- arg_header = header;
- }
- in
- let pr = config.how_to_process_arguments p0 in
- let p = { p0 with arg_processing = pr } in
- if pr = File then begin
- let fn, fd = make_temporary_file config in
- p.arg_file <- Some fn;
- p.arg_fd <- Some fd;
- p.arg_mem_value <- None;
- end;
- p
- in
-
- let add p s k n =
- (* CALLBACK for scan_multipart_body_from_netstream *)
- if (p.arg_processing = Automatic) &&
- (Buffer.length (p.arg_buf_value) >= Netstream.block_size s) then begin
- (* This is a LARGE argument *)
- p.arg_processing <- File;
- let fn, fd = make_temporary_file config in
- p.arg_file <- Some fn;
- p.arg_fd <- Some fd;
- p.arg_mem_value <- None;
- output_string fd (Buffer.contents p.arg_buf_value);
- p.arg_buf_value <- Buffer.create 1;
- end;
-
- match p.arg_processing with
- (Memory|Automatic) ->
- Buffer.add_substring
- p.arg_buf_value
- (Netbuffer.unsafe_buffer (Netstream.window s))
- k
- n
- | File ->
- let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
- output
- fd
- (Netbuffer.unsafe_buffer (Netstream.window s))
- k
- n;
- in
-
- let stop p =
- (* CALLBACK for scan_multipart_body_from_netstream *)
- begin match p.arg_processing with
- (Memory|Automatic) ->
- p.arg_mem_value <- Some (Buffer.contents p.arg_buf_value);
- p.arg_buf_value <- Buffer.create 1;
- | File ->
- let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
- close_out fd;
- p.arg_mem_value <- None
- end;
- parts := p :: !parts
- in
-
- Mimestring.scan_multipart_body_from_netstream
- s
- boundary
- create
- add
- stop;
-
- List.rev !parts
-;;
-
-
-let getenv name =
- try Sys.getenv name with Not_found -> "";;
-
-(* getenv:
- * We use this getenv instead of Sys.getenv. The CGI specification does not
- * say anything about what should happen if a certain environment variable
- * is not set.
- * Some servers initialize the environment variable to the empty string if
- * it is not applicable, some servers do not set the variable at all. Because
- * of this, unset variables are always reported as empty variables.
- *
- * This is especially a problem with QUERY_STRING.
- *)
-
-let mk_simple_arg ~name v =
- { arg_name = name;
- arg_processing = Memory;
- arg_buf_value = Buffer.create 1;
- arg_mem_value = Some v;
- arg_disk_value = Weak.create 0;
- arg_file = None;
- arg_fd = None;
- arg_mimetype = "text/plain";
- arg_filename = None;
- arg_header = [];
- }
-;;
-
-let mk_memory_arg ~name ?(mime = "text/plain") ?filename ?(header = []) v =
- { arg_name = name;
- arg_processing = Memory;
- arg_buf_value = Buffer.create 1;
- arg_mem_value = Some v;
- arg_disk_value = Weak.create 0;
- arg_file = None;
- arg_fd = None;
- arg_mimetype = mime;
- arg_filename = filename;
- arg_header = header;
- }
-;;
-
-let mk_file_arg
- ~name ?(mime = "text/plain") ?filename ?(header = []) v_filename =
- let v_abs_filename =
- if Filename.is_relative v_filename then
- Filename.concat (Sys.getcwd()) v_filename
- else
- v_filename
- in
- { arg_name = name;
- arg_processing = File;
- arg_buf_value = Buffer.create 1;
- arg_mem_value = None;
- arg_disk_value = Weak.create 0;
- arg_file = Some v_abs_filename;
- arg_fd = None;
- arg_mimetype = mime;
- arg_filename = filename;
- arg_header = header;
- }
-;;
-
-
-let get_content_type config =
- (* Get the environment variable CONTENT_TYPE; if necessary apply
- * workarounds for browser bugs.
- *)
- let content_type = getenv "CONTENT_TYPE" in
- let user_agent = getenv "HTTP_USER_AGENT" in
- let eff_content_type =
- if Str.string_match (Str.regexp ".*MSIE") user_agent 0 &&
- List.mem Work_around_MSIE_Content_type_bug config.workarounds
- then begin
- (* Microsoft Internet Explorer: When used with SSL connections,
- * this browser sometimes produces CONTENT_TYPEs like
- * "multipart/form-data; boundary=..., multipart/form-data; boundary=..."
- * Workaround: Throw away everything after ", ".
- *)
- if Str.string_match (Str.regexp "\\([^,]*boundary[^,]*\\), .*boundary")
- content_type 0
- then
- Str.matched_group 1 content_type
- else
- content_type
- end
- else
- content_type
- in
- eff_content_type
-;;
-
-
-let really_parse_args config =
- let make_simple_arg (n,v) = mk_simple_arg n v in
-
- match getenv "REQUEST_METHOD" with
- ("GET"|"HEAD") ->
- List.map
- make_simple_arg
- (dest_url_encoded_parameters(getenv "QUERY_STRING"))
-
- | "POST" ->
- let n =
- try
- int_of_string (getenv "CONTENT_LENGTH")
- with
- _ -> failwith "Cgi.parse_arguments"
- in
- if n > config.maximum_content_length then
- raise Resources_exceeded;
- begin
- let mime_type, params =
- Mimestring.scan_mime_type(get_content_type config) [] in
- match mime_type with
- "application/x-www-form-urlencoded" ->
- let buf = String.create n in
- really_input stdin buf 0 n;
- List.map
- make_simple_arg
- (dest_url_encoded_parameters buf)
- | "multipart/form-data" ->
- let boundary =
- try
- List.assoc "boundary" params
- with
- Not_found ->
- failwith "Cgi.parse_arguments"
- in
- (* -------------------------------------------------- DEBUG
- let f = open_out "/tmp/cgiout" in
- output_string f buf;
- close_out f;
- * --------------------------------------------------
- *)
- dest_form_encoded_parameters_from_netstream
- (Netstream.create_from_channel stdin (Some n) 4096)
- boundary
- config
- | _ ->
- failwith ("Cgi.parse_arguments: unknown content-type " ^ mime_type)
- end
- | _ ->
- failwith "Cgi.parse_arguments: unknown method"
-
-let parsed = ref None;; (* protected by lock/unlock *)
-
-let lock = ref (fun () -> ());;
-let unlock = ref (fun () -> ());;
-
-let init_mt new_lock new_unlock =
- lock := new_lock;
- unlock := new_unlock
-;;
-
-let protect f =
- !lock();
- try
- let r = f() in
- !unlock();
- r
- with
- x ->
- !unlock();
- raise x
-;;
-
-let parse_arguments config =
- protect
- (fun () ->
- match !parsed with
- Some _ -> ()
- | None ->
- parsed := Some (List.map
- (fun arg -> arg.arg_name, arg)
- (really_parse_args config))
- )
-;;
-
-let arguments () =
- protect
- (fun () ->
- match !parsed with
- Some plist -> plist
- | None ->
- failwith "Cgi.arguments"
- )
-;;
-
-let set_arguments arglist =
- protect
- (fun () ->
- parsed := Some (List.map
- (fun arg -> arg.arg_name, arg)
- arglist)
- )
-;;
-
-let default_config =
- { maximum_content_length = max_int;
- how_to_process_arguments = (fun _ -> Memory);
- tmp_directory = "/var/tmp";
- tmp_prefix = "cgi-";
- workarounds = [ Work_around_MSIE_Content_type_bug;
- Work_around_backslash_bug;
- ]
- }
-;;
-
-let arg_value arg =
- match arg.arg_mem_value with
- None ->
- begin
- match Weak.get arg.arg_disk_value 0 with
- None ->
- begin
- match arg.arg_file with
- None ->
- failwith "Cgi.arg_value: no value present"
- | Some filename ->
- let fd = open_in_bin filename in
- try
- let len = in_channel_length fd in
- let s = String.create len in
- really_input fd s 0 len;
- Weak.set arg.arg_disk_value 0 (Some s);
- close_in fd;
- s
- with
- any -> close_in fd; raise any
- end
- | Some v -> v
- end
- | Some s ->
- s
-;;
-
-let arg_name arg = arg.arg_name;;
-let arg_file arg = arg.arg_file;;
-let arg_mimetype arg = arg.arg_mimetype;;
-let arg_filename arg = arg.arg_filename;;
-let arg_header arg = arg.arg_header;;
-
-let cleanup () =
- protect
- (fun () ->
- match !parsed with
- None -> ()
- | Some plist ->
- List.iter
- (fun (name, arg) ->
- match arg.arg_file with
- None -> ()
- | Some filename ->
- (* We do not complain if the file does not exist anymore. *)
- if Sys.file_exists filename then
- Sys.remove filename;
- arg.arg_file <- None
- )
- plist
- )
-;;
-
-let argument name = List.assoc name (arguments());;
-let argument_value name = arg_value (argument name);;
-
-module Operators = struct
- let ( !% ) = argument
- let ( !$ ) = argument_value
-end;;
-
-
-let parse_args() =
- parse_arguments default_config;
- List.map
- (fun (name, arg) -> name, arg_value arg)
- (arguments())
-;;
-
-let parse_args_with_mimetypes() =
- parse_arguments default_config;
- List.map
- (fun (name, arg) -> name, arg_mimetype arg, arg_value arg)
- (arguments())
-;;
-
-let header s =
- let t =
- match s with
- "" -> "text/html"
- | _ -> s
- in
- print_string ("Content-type: " ^ t ^ "\n\n");
- flush stdout
-;;
-
-
-let this_url() =
- "http://" ^ (getenv "SERVER_NAME") ^ (getenv "SCRIPT_NAME")
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.7 2000/06/25 21:40:36 gerd
- * Added printer.
- *
- * Revision 1.6 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.5 2000/05/16 22:29:36 gerd
- * Added support for two common file upload bugs.
- *
- * Revision 1.4 2000/04/15 16:47:27 gerd
- * Last minor changes before releasing 0.6.
- *
- * Revision 1.3 2000/04/15 13:09:01 gerd
- * Implemented uploads to temporary files.
- *
- * Revision 1.2 2000/03/02 01:15:30 gerd
- * Updated.
- *
- * Revision 1.1 2000/02/25 15:21:12 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* FOR SIMPLE CGI PROGRAMS:
- *
- * If you do not need all the features of the API below, the following may
- * be enough:
- *
- * - At the beginning of the main program, call 'parse_argument' with
- * either 'default_config' as argument or with a customized configuration.
- * - Use 'argument_value(name)' to get the string value of the CGI parameter
- * 'name'. If you like, you can also open the Cgi.Operators module and
- * write '!$ name' instead. Here, !$ is a prefix operator equivalent to
- * argument_value.
- *
- * If you do not change the default configuration, you do not need to
- * worry about temporary files - there are not any.
- *
- * Most of the other functions defined below deal with file uploads, and
- * are only useful for that.
- *)
-
-
-(**********************************************************************)
-(* CGI functions *)
-(**********************************************************************)
-
-(* First, the general interface to the CGI argument parser. *)
-
-exception Resources_exceeded
-
-type argument
-
-type argument_processing =
- Memory (* Keep the value of the argument in memory *)
- | File (* Store the value of the argument into a temporary file *)
- | Automatic (* Store only large arguments into files. An argument
- * value is large if it is longer than about one block (4K).
- * This is not an exact definition.
- *)
-
-type workaround =
- Work_around_MSIE_Content_type_bug
- (* There is a bug in MSIE I observed together with SSL connections.
- * The CONTENT_TYPE passed to the server has sometimes the wrong
- * format. This option enables a workaround if the user agent string
- * contains the word "MSIE".
- *)
- | Work_around_backslash_bug
- (* There is a bug in many browsers: The backslash character is not
- * handled as an escaping character in MIME headers. Because DOS-
- * based systems use the backslash regularly in filenames, this bug
- * matters.
- * This option changes the interpretation of backslashes such that
- * these are handled as normal characters. I do not know any browser
- * that is not affected by this bug, so there is no check on
- * the user agent string.
- *)
-
-
-type config =
- { maximum_content_length : int;
- (* The maximum CONTENT_LENGTH. Bigger requests trigger an
- * Resources_exceeded exception. This feature can be used
- * to detect primitive denial-of-service attacks.
- *)
- how_to_process_arguments : argument -> argument_processing;
- (* After the beginning of an argument has been decoded, the
- * type of processing is decided by invoking this function on
- * the argument. Note that the passed argument is incomplete -
- * it does not have a value. You can assume that name, filename,
- * MIME type and the whole header are already known.
- * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS
- * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION.
- * All other transport methods can only handle the Memory
- * processing type.
- *)
- tmp_directory : string;
- (* The temporary directory to use for the temporary files. *)
- tmp_prefix : string;
- (* A prefix for temporary files. It is recommended that the prefix
- * contains a part that is random or that depends on rapidly changing
- * environment properties. For example, the process ID is a good
- * candidate, or the current system time. It is not required that
- * the prefix is unique; there is a fail-safe algorithm that
- * computes a unique file name from the prefix, even if several
- * CGI programs run concurrently.
- *)
- workarounds : workaround list;
- (* Specifies which workarounds should be enabled. *)
- }
-
-val parse_arguments : config -> unit
-val arguments : unit -> (string * argument) list
- (* - let () = parse_arguments config:
- * Decodes the CGI arguments. 'config' specifies limits and processing
- * hints; you can simply pass default_config (see below).
- *
- * - let arglist = get_arguments():
- * The function returns a list with (name, arg) pairs. The name is
- * passed back as string while the value is returned as opaque type
- * 'argument'. Below accessor functions are defined. These functions
- * require that parse_arguments was invoked before.
- *
- * Note 1: You can invoke 'parse_arguments' several times, but only
- * the first time the arguments are read in. If you call the function
- * again, it does nothing (even if the config changes). This is also
- * true if 'parse_arguments' has been invoked after 'set_arguments'.
- *
- * Note 2: It is not guaranteed that stdin has been read until EOF.
- * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec).
- *
- * Note 3: If arguments are processed in File or Automatic mode, the
- * caller of 'parse_arguments' is responsible for deleting the files
- * after use. You may consider to apply the at_exit function of the
- * core library for this purpose. See also 'cleanup' below.
- *)
-
-val set_arguments : argument list -> unit
- (* Alternatively, you can set the arguments to use. This overrides any
- * previously parsed set of arguments, and also any following parsing.
- * - Intended for debugging, and to make it possible to replace the
- * CGI parser by a different one while retaining this API.
- *)
-
-val default_config : config
- (* maximum_content_length = maxint
- * how_to_process_arguments = "use always Memory"
- * tmp_directory = "/var/tmp"
- * tmp_prefix = "cgi"
- * workarounds = [ Work_around_MSIE_content_type_bug;
- * Work_around_backslash_bug;
- * ]
- *
- * Note 1: On some Unixes, a special file system is used for /tmp that
- * stores the files into the virtual memory (main memory or swap area).
- * Because of this, /var/tmp is preferred as default.
- *
- * Note 2: Filename.temp_file is not used because it depends on
- * environment variables which are usually not set in a CGI environment.
- *)
-
-val arg_name : argument -> string
-val arg_value : argument -> string
-val arg_file : argument -> string option
-val arg_mimetype : argument -> string
-val arg_filename : argument -> string option
-val arg_header : argument -> (string * string) list
- (* The accessor functions that return several aspects of arguments.
- * arg_name: returns the name of the argument
- * arg_value: returns the value of the argument. If the value is stored
- * in a temporary file, the contents of this file are returned, i.e.
- * the file is loaded. This may have some consequences:
- * (1) The function may fail because of I/O errors.
- * (2) The function may be very slow, especially if the file is
- * non-local.
- * (3) If the value is bigger than Sys.max_string_length, the function
- * raises the exception Resources_exceeded. On 32 bit architectures,
- * strings are limited to 16 MB.
- * Note that loaded values are put into weak arrays. This makes it
- * possible that subsequent calls of 'arg_value' on the same argument
- * can avoid loading the value again, and that unused values will
- * nevertheless be collected by the GC.
- * arg_file: returns 'Some filename' if the value resides in a temporary
- * file, and 'filename' is the absolute path of this file. If the
- * value is only available in memory, None is returned.
- * arg_mimetype: returns the MIME type of the argument. Note that the
- * default MIME type is "text/plain", and that the default is returned
- * if the MIME type is not available.
- * arg_filename: returns 'Some filename' if the argument is associated
- * with a certain filename (e.g. from a file upload); otherwise None
- * arg_header: returns pairs (name,value) containing the complete header
- * of the argument. If the transmission protocol does not specify
- * a header, the empty list is passed back.
- *)
-
-val mk_simple_arg : name:string -> string -> argument
- (* mk_simple_arg name value:
- * Creates a simple argument with only name, and a value passed by string.
- * The MIME type is "text/plain".
- *)
-
-val mk_memory_arg
- : name:string -> ?mime:string -> ?filename:string ->
- ?header:((string * string) list) -> string -> argument
- (* mk_memory_arg name mimetype filename header value:
- * Creates an argument whose value is kept in memory.
- *
- * Note: The signature of this function changed in release 0.8.
- *)
-
-val mk_file_arg
- : name:string -> ?mime:string -> ?filename:string ->
- ?header:((string * string) list) -> string -> argument
- (* mk_file_arg name mimetype filename header value_filename:
- * Creates an argument whose value is stored in the file
- * 'value_filename'. If this file name is not absolute, it is interpreted
- * relative to the directory returned by Sys.getcwd() - this might not
- * be what you want with respect to mount points and symlinks (and it
- * depends on the operating system as getcwd is only POSIX.1). The
- * file name is turned into an absolute name immediately, and the
- * function arg_file returns the rewritten name.
- *
- * Note: The signature of this function changed in release 0.8.
- *)
-
-
-val cleanup : unit -> unit
- (* Removes all temporary files that occur in the current set of arguments
- * (as returned by 'arguments()').
- *)
-
-
-(* Convenience functions: *)
-
-val argument : string -> argument
- (* let argument name = List.assoc name (arguments()) -- i.e. returns
- * the argument with the passed name. Of course, this function expects
- * that 'parse_arguments' was called before.
- *)
-
-val argument_value : string -> string
- (* let argument_value name = arg_value(argument name) -- i.e. returns
- * the value of the argument.
- * See also Operators.( !$ ) below.
- *)
-
-(* For toploop printers: *)
-
-val print_argument : argument -> unit
-
-
-(* Now, the compatibility functions. *)
-
-val parse_args : unit -> (string * string) list
- (* Decodes the arguments of the CGI and returns them as an association list
- * Works whatever the method is (GET or POST)
- *)
-
-val parse_args_with_mimetypes : unit -> (string * string * string) list
- (* Like parse_args, but returns also the MIME type.
- * The triples contain (name, mime_type, value).
- * If an encoding was chosen that does not transfer the MIME type,
- * "text/plain" is returned.
- *
- * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED.
- * It was included in netstring-0.4, but most people want not only
- * the MIME type. parse_arguments should be used instead.
- *)
-
-val header : string -> unit
- (* Prints the content-type header.
- * the argument is the MIME type (default value is "text/html" if the
- * argument is the empty string)
- *)
-
-val this_url : unit -> string
- (* Returns the address of the CGI *)
-
-(**********************************************************************)
-(* The Operators module *)
-(**********************************************************************)
-
-(* If you open the Operators module, you can write
- * !% "name" instead of argument "name", and
- * !$ "name" instead of argument_value "name"
- *)
-
-module Operators : sig
- val ( !% ) : string -> argument
- (* same as 'argument' above *)
- val ( !$ ) : string -> string
- (* same as 'argument_value' above *)
-end
-
-(**********************************************************************)
-(* Low-level functions *)
-(**********************************************************************)
-
-(* Encoding/Decoding within URLs:
- *
- * The following two functions perform the '%'-substitution for
- * characters that may otherwise be interpreted as metacharacters.
- *
- * See also the Netencoding module. This interface contains these functions
- * to keep the compatibility with the old Cgi module.
- *)
-
-val decode : string -> string
-val encode : string -> string
-
-(* URL-encoded parameters:
- *
- * The following two functions create and analyze URL-encoded parameters.
- * Format: name1=val1&name2=val2&...
- *)
-
-val mk_url_encoded_parameters : (string * string) list -> string
- (* The argument is a list of (name,value) pairs. The result is the
- * single URL-encoded parameter string.
- *)
-
-val dest_url_encoded_parameters : string -> (string * string) list
- (* The argument is the URL-encoded parameter string. The result is
- * the corresponding list of (name,value) pairs.
- * Note: Whitespace within the parameter string is ignored.
- * If there is a format error, the function fails.
- *)
-
-(* Form-encoded parameters:
- *
- * According to: RFCs 2388, 2183, 2045, 2046
- *
- * General note: This is a simple API to encode/decode form-encoded parameters.
- * Especially, it is not possible to pass the header of the parts through
- * this API.
- *)
-
-val mk_form_encoded_parameters : (string * string * string) list ->
- (string * string)
- (* The argument is a list of (name,mimetype,value) triples.
- * The result is (parstr, boundary), where 'parstr' is the
- * single form-encoded parameter string, and 'boundary' is the
- * boundary to separate the message parts.
- *
- * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED!
- *)
-
-val dest_form_encoded_parameters : string -> boundary:string -> config ->
- argument list
- (* The first argument is the form-encoded parameter string.
- * The second argument is the boundary (extracted from the mime type).
- * Third argument: Only the workarounds component is used.
- * The result is
- * the corresponding list of arguments (all in memory).
- * If there is a format error, the function fails.
- * Note: embedded multipart/mixed types are returned as they are,
- * and are not recursively decoded.
- * Note: The content-transfer-encodings "7bit", "8bit", "binary",
- * "base64", and "quoted-printable" are supported.
- * Note: Parameter names which include spaces or non-alphanumeric
- * characters may be problematic (the rules of RFC 2047 are NOT applied).
- * Note: The returned MIME type is not normalized.
- *)
-
-val dest_form_encoded_parameters_from_netstream
- : Netstream.t -> boundary:string -> config -> argument list
- (* let arglist = dest_form_encoded_parameters_from_netstream s b c:
- * Reads the form-encoded parameters from netstream s. The boundary
- * is passed in b, and the configuration in c.
- * A list of arguments is returned.
- *
- * See also dest_form_encoded_parameters.
- *
- * Restriction: In contrast to dest_form_encoded_parameters, this
- * function is not able to handle the content-transfer-encodings
- * "base64" and "quoted-printable". (This is not really a restriction
- * because no browser uses these encodings in conjunction with HTTP.
- * This is different if mail transport is chosen. - The reason for
- * this restriction is that there are currently no stream functions
- * for decoding.)
- *)
-
-(* Private functions: *)
-
-val init_mt : (unit -> unit) -> (unit -> unit) -> unit
-
-
-(**********************************************************************)
-(* Compatibility with CGI library by J.-C. Filliatre *)
-(**********************************************************************)
-
-(* The following functions are compatible with J.-C. Filliatre's CGI
- * library:
- *
- * parse_args, header, this_url, decode, encode.
- *
- * Note that the new implementation of parse_args can be safely invoked
- * several times.
- *
- * Since release 0.8, Netstring's CGI implementation is again thread-safe.
- *)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.7 2000/06/25 21:40:36 gerd
- * Added printer.
- *
- * Revision 1.6 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.5 2000/05/16 22:28:13 gerd
- * New "workarounds" config component.
- *
- * Revision 1.4 2000/04/15 16:47:27 gerd
- * Last minor changes before releasing 0.6.
- *
- * Revision 1.3 2000/04/15 13:09:01 gerd
- * Implemented uploads to temporary files.
- *
- * Revision 1.2 2000/03/02 01:15:30 gerd
- * Updated.
- *
- * Revision 1.1 2000/02/25 15:21:12 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-version = "0.5"
-requires = "netstring"
-description = "Compatibility with base64"
+++ /dev/null
-version = "0.5"
-requires = "netstring"
-description = "Compatibility with cgi"
+++ /dev/null
-base64.cmo: netencoding.cmi base64.cmi
-base64.cmx: netencoding.cmx base64.cmi
-cgi.cmo: mimestring.cmi netbuffer.cmi netencoding.cmi netstream.cmi cgi.cmi
-cgi.cmx: mimestring.cmx netbuffer.cmx netencoding.cmx netstream.cmx cgi.cmi
-mimestring.cmo: netbuffer.cmi netencoding.cmi netstream.cmi netstring_str.cmi \
- mimestring.cmi
-mimestring.cmx: netbuffer.cmx netencoding.cmx netstream.cmx netstring_str.cmx \
- mimestring.cmi
-netbuffer.cmo: netbuffer.cmi
-netbuffer.cmx: netbuffer.cmi
-netconversion.cmo: netmappings.cmi netconversion.cmi
-netconversion.cmx: netmappings.cmx netconversion.cmi
-netencoding.cmo: netstring_str.cmi netencoding.cmi
-netencoding.cmx: netstring_str.cmx netencoding.cmi
-nethtml.cmo: nethtml.cmi
-nethtml.cmx: nethtml.cmi
-netmappings.cmo: netmappings.cmi
-netmappings.cmx: netmappings.cmi
-netmappings_iso.cmo: netmappings.cmi
-netmappings_iso.cmx: netmappings.cmx
-netmappings_other.cmo: netmappings.cmi
-netmappings_other.cmx: netmappings.cmx
-netstream.cmo: netbuffer.cmi netstream.cmi
-netstream.cmx: netbuffer.cmx netstream.cmi
-netstring_mt.cmo: cgi.cmi netmappings.cmi netstring_str.cmi netstring_mt.cmi
-netstring_mt.cmx: cgi.cmx netmappings.cmx netstring_str.cmx netstring_mt.cmi
-netstring_str.cmo: netstring_str.cmi
-netstring_str.cmx: netstring_str.cmi
-netstring_top.cmo: netstring_top.cmi
-netstring_top.cmx: netstring_top.cmi
-neturl.cmo: netencoding.cmi neturl.cmi
-neturl.cmx: netencoding.cmx neturl.cmi
-cgi.cmi: netstream.cmi
-mimestring.cmi: netstream.cmi
-netmappings.cmi: netconversion.cmi
-netstream.cmi: netbuffer.cmi
+++ /dev/null
-******************************************************************************
-ABOUT-FINDLIB - Package manager for O'Caml
-******************************************************************************
-
-
-==============================================================================
-Abstract
-==============================================================================
-
-The findlib library provides a scheme to manage reusable software components
-(packages), and includes tools that support this scheme. Packages are
-collections of OCaml modules for which metainformation can be stored. The
-packages are kept in the filesystem hierarchy, but with strict directory
-structure. The library contains functions to look the directory up that stores
-a package, to query metainformation about a package, and to retrieve dependency
-information about multiple packages. There is also a tool that allows the user
-to enter queries on the command-line. In order to simplify compilation and
-linkage, there are new frontends of the various OCaml compilers that can
-directly deal with packages.
-
-Together with the packages metainformation is stored. This includes a version
-string, the archives the package consists of, and additional linker options.
-Packages can also be dependent on other packages. There is a query which finds
-out all predecessors of a list of packages and sorts them topologically. The
-new compiler frontends do this implicitly.
-
-Metainformation can be conditional, i.e. depend on a set of predicates. This is
-mainly used to be able to react on certain properties of the environment, such
-as if the bytecode or the native compiler is invoked, if the application is
-multi-threaded, and a few more. If the new compiler frontends are used, most
-predicates are found out automatically.
-
-There is special support for scripts. A new directive, "#require", loads
-packages into scripts. Of course, this works only with newly created toploops
-which include the findlib library.
-
-==============================================================================
-Where to get findlib
-==============================================================================
-
-The manual of findlib is available online [1]. You can download findlib here
-[2].
-
-
---------------------------
-
-[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
-
-[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
-
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!ENTITY f "<em>findlib</em>">
-<!ENTITY F "<em>Findlib</em>">
-
-]>
-
-<readme title="ABOUT-FINDLIB - Package manager for O'Caml">
- <sect1>
- <title>Abstract</title>
-<p>
-The &f; library provides a scheme to manage reusable software
-components (packages), and includes tools that support this
-scheme. Packages are collections of OCaml modules for which
-metainformation can be stored. The packages are kept in the filesystem
-hierarchy, but with strict directory structure. The library contains
-functions to look the directory up that stores a package, to query
-metainformation about a package, and to retrieve dependency
-information about multiple packages. There is also a tool that allows
-the user to enter queries on the command-line. In order to simplify
-compilation and linkage, there are new frontends of the various OCaml
-compilers that can directly deal with packages.
-</p>
-
-<p>
-Together with the packages metainformation is stored. This includes a
-version string, the archives the package consists of, and additional
-linker options. Packages can also be dependent on other
-packages. There is a query which finds out all predecessors of a list
-of packages and sorts them topologically. The new compiler frontends
-do this implicitly.
-</p>
-
-<p>
-Metainformation can be conditional, i.e. depend on a set of
-predicates. This is mainly used to be able to react on certain
-properties of the environment, such as if the bytecode or the native
-compiler is invoked, if the application is multi-threaded, and a few
-more. If the new compiler frontends are used, most predicates are
-found out automatically.
-</p>
-
-<p>
-There is special support for scripts. A new directive, "#require",
-loads packages into scripts. Of course, this works only with newly
-created toploops which include the &f; library.
-</p>
-
- </sect1>
-
- <sect1><title>Where to get findlib</title>
- <p>
-The manual of &f; is available <a href="&url.findlib-project;">online</a>.
-You can download &f; <a href="&url.findlib-download;">here</a>.
-</p>
- </sect1>
-</readme>
+++ /dev/null
-******************************************************************************
-INSTALL - Netstring, string processing functions for the net
-******************************************************************************
-
-
-==============================================================================
-The "Netstring" package
-==============================================================================
-
-------------------------------------------------------------------------------
-Prerequisites
-------------------------------------------------------------------------------
-
-Netstring does not need any other packages besides the O'Caml core. Netstring
-needs at least O'Caml 3.00. The installation procedure defined in the Makefile
-requires findlib [1] to work [2].
-
-------------------------------------------------------------------------------
-Configuration
-------------------------------------------------------------------------------
-
-It is not necessary to configure "Netstring".
-
-------------------------------------------------------------------------------
-Compilation
-------------------------------------------------------------------------------
-
-The Makefile defines the following goals:
-
-- make all
- compiles with the bytecode compiler and creates netstring.cma,
- netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and
- netmappings_other.cmo
-
-- make opt
- compiles with the native compiler and creates netstring.cmxa,
- netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx
-
-The archive netstring.cmx?a contains the functionality, and the two
-single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add
-configurations to the character set conversion module. These configurations are
-optional:
-
-- Netmappings_iso: Conversion tables for the character sets ISO-8859-2, -3,
- -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.
-
-- Netmappings_other: Conversion tables for the character sets WINDOWS-1250,
- -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258; code pages 037, 424,
- 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861, 862, 863, 864, 865,
- 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh Roman encoding;
- Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats Encodings.
-
-Even without these configuration files, the conversion module is able to handle
-the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant of
-UTF-8.
-
-The module Netstring_mt must be linked into multi-threaded applications;
-otherwise some mutexes remain uninitialized.
-
-The module Netstring_top loads several printers for abstract values (for
-toploops).
-
-------------------------------------------------------------------------------
-Installation
-------------------------------------------------------------------------------
-
-The Makefile defines the following goals:
-
-- make install
- installs the bytecode archive, the interface definitions, and if present,
- the native archive in the default location of findlib
-
-- make install-cgi
- Installs a pseudo package "cgi" which is compatible with the old cgi
- package. This has the effect that software searching the "cgi" package will
- find the netstring package instead. This is recommended.
-
-- make install-base64
- Installs a pseudo package "base64" which is compatible with the old base64
- package. This has the effect that software searching the "base64" package
- will find the netstring package instead. This is recommended.
-
-- make uninstall
- removes the package
-
-- make uninstall-cgi
- removes the "cgi" compatibility package
-
-- make uninstall-base64
- removes the "base64" compatibility package
-
-------------------------------------------------------------------------------
-Linking netstring with findlib
-------------------------------------------------------------------------------
-
-The command
-
-ocamlfind ocamlc ... -package netstring ... -linkpkg ...
-
-links as much as possible code from netstring into your application: All
-conversion tables; when -thread is specified, the initialization code for
-multi-threaded programs; when a toploop is created, the code setting the value
-printers.
-
-The following predicates reduce the amount of linked code:
-
-- netstring_only_iso: Only the conversion tables for the ISO-8859 series of
- character sets are linked.
-
-- netstring_minimum: No additional conversion tables are linked; only
- ISO-8859-1 and the UTF encodings work.
-
-For example, the command may look like
-
-ocamlfind ocamlc ...
- -package netstring -predicates netstring_only_iso ... -linkpkg ...
-
-to link only the ISO-8859 conversion tables.
-
-
---------------------------
-
-[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
-
-[2] Findlib is a package manager, see the file ABOUT-FINDLIB.
-
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!ENTITY m "<em>Netstring</em>">
-
-]>
-
-<readme title="INSTALL - Netstring, string processing functions for the net">
- <sect1><title>The "Netstring" package</title>
- <sect2><title>Prerequisites</title>
- <p>
-&m; does not need any other packages besides the O'Caml core. &m; needs
-at least O'Caml 3.00. The installation procedure defined in the Makefile
-requires <a href="&url.findlib-project;">findlib</a> to
-work<footnote><em>Findlib</em> is a package manager, see the file
-ABOUT-FINDLIB.</footnote>.
-</p>
- </sect2>
-
- <sect2><title>Configuration</title>
- <p>
-It is not necessary to configure "Netstring".
-</p>
- </sect2>
-
- <sect2><title>Compilation</title>
- <p>
-The Makefile defines the following goals:
-</p>
- <ul>
- <li>
- <p>make all</p>
- <p>compiles with the bytecode compiler and creates netstring.cma,
-netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and
-netmappings_other.cmo</p>
- </li>
- <li>
- <p>make opt</p>
- <p>compiles with the native compiler and creates netstring.cmxa,
-netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx</p>
- </li>
- </ul>
-
- <p>The archive netstring.cmx?a contains the functionality, and the two
-single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add
-configurations to the character set conversion module. These configurations are
-optional:</p>
-
- <ul>
- <li><p>Netmappings_iso: Conversion tables for the character sets
-ISO-8859-2, -3, -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.</p>
- </li>
- <li><p>Netmappings_other: Conversion tables for the character sets
-WINDOWS-1250, -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258;
-code pages 037, 424, 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861,
-862, 863, 864, 865, 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh
-Roman encoding; Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats
-Encodings.</p>
- </li>
- </ul>
-
-<p>Even without these configuration files, the conversion module is able to
-handle the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant
-of UTF-8.</p>
-
-<p>The module Netstring_mt must be linked into multi-threaded applications;
-otherwise some mutexes remain uninitialized.</p>
-
-<p>The module Netstring_top loads several printers for abstract values (for
-toploops).</p>
-
- </sect2>
-
- <sect2><title>Installation</title>
- <p>
-The Makefile defines the following goals:</p>
- <ul>
- <li>
- <p>make install</p>
- <p>installs the bytecode archive, the interface definitions, and if
-present, the native archive in the default location of <em>findlib</em>
-</p>
- </li>
-
- <li>
- <p>make install-cgi</p>
- <p>Installs a pseudo package "cgi" which is compatible with the old
-cgi package. This has the effect that software searching the "cgi" package will
-find the netstring package instead. This is recommended.</p>
- </li>
-
- <li>
- <p>make install-base64</p> <p>Installs a pseudo package "base64"
-which is compatible with the old base64 package. This has the effect that
-software searching the "base64" package will find the netstring package
-instead. This is recommended.</p>
- </li>
-
- <li>
- <p>make uninstall</p>
- <p>removes the package</p>
- </li>
-
- <li>
- <p>make uninstall-cgi</p>
- <p>removes the "cgi" compatibility package</p>
- </li>
-
- <li>
- <p>make uninstall-base64</p>
- <p>removes the "base64" compatibility package</p>
- </li>
- </ul>
- </sect2>
-
-
- <sect2>
- <title>Linking netstring with findlib</title>
- <p>The command
-<code>
-ocamlfind ocamlc ... -package netstring ... -linkpkg ...
-</code>
-links as much as possible code from netstring into your application: All
-conversion tables; when -thread is specified, the initialization code for
-multi-threaded programs; when a toploop is created, the code setting the value
-printers.</p>
-
-<p>The following predicates reduce the amount of linked code:</p>
-
- <ul>
- <li><p>netstring_only_iso: Only the conversion tables for the ISO-8859
-series of character sets are linked.</p>
- </li>
- <li><p>netstring_minimum: No additional conversion tables are linked;
-only ISO-8859-1 and the UTF encodings work.</p>
- </li>
- </ul>
-
-<p>For example, the command may look like
-
-<code>
-ocamlfind ocamlc ...
- -package netstring -predicates netstring_only_iso ... -linkpkg ...
-</code>
-
-to link only the ISO-8859 conversion tables.</p>
- </sect2>
-
- </sect1>
-</readme>
\ No newline at end of file
+++ /dev/null
-.PHONY: all
-all: README INSTALL ABOUT-FINDLIB
-
-README: README.xml common.xml config.xml
- readme -text README.xml >README
-
-INSTALL: INSTALL.xml common.xml config.xml
- readme -text INSTALL.xml >INSTALL
-
-ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
- readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
-
-.PHONY: clean
-clean:
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
-
+++ /dev/null
-******************************************************************************
-README - Netstring, string processing functions for the net
-******************************************************************************
-
-
-==============================================================================
-Abstract
-==============================================================================
-
-Netstring is a collection of string processing functions that are useful in
-conjunction with Internet messages and protocols. In particular, it contains
-functions for the following purposes:
-
-- Parsing MIME messages
-
-- Several encoding/decoding functions (Base 64, Quoted Printable, Q,
- URL-encoding)
-
-- A new implementation of the CGI interface that allows users to upload files
-
-- A simple HTML parser
-
-- URL parsing, printing and processing
-
-- Conversion between character sets
-
-==============================================================================
-Download
-==============================================================================
-
-You can download Netstring as gzip'ed tarball [1].
-
-==============================================================================
-Documentation
-==============================================================================
-
-Sorry, there is no manual. The mli files describe each function in detail.
-Furthermore, the following additional information may be useful.
-
-------------------------------------------------------------------------------
-New CGI implementation
-------------------------------------------------------------------------------
-
-For a long time, the CGI implementation by Jean-Christophe Filliatre has been
-the only freely available module that implemented the CGI interface (it also
-based on code by Daniel de Rauglaudre). It worked well, but it did not support
-file uploads because this requires a parser for MIME messages.
-
-The main goal of Netstring is to realize such uploads, and because of this it
-contains an almost complete parser for MIME messages.
-
-The new CGI implementation provides the same functions than the old one, and
-some extensions. If you call Cgi.parse_args(), you get the CGI parameters as
-before, but as already explained this works also if the parameters are
-encaspulated as MIME message. In the HTML code, you can select the MIME format
-by using
-
-<form action="..." method="post" enctype="multipart/form-data">
-...
-</form>
-
-
-- this "enctype" attribute forces the browser to send the form parameters as
-multipart MIME message (Note: You can neither send the parameters of a
-conventional hyperlink as MIME message nor the form parameters if the "method"
-is "get"). In many browsers only this particular encoding enables the file
-upload elements, you cannot perform file uploads with other encodings.
-
-As MIME messages can transport MIME types, filename, and other additional
-properties, it is also possible to get these using the enhanced interface.
-After calling
-
-Cgi.parse_arguments config
-
-you can get all available information about a certain parameter by invoking
-
-let param = Cgi.argument "name"
-
-- where "param" has the type "argument". There are several accessor functions
-to extract the various aspects of arguments (name, filename, value by string,
-value by temporary file, MIME type, MIME header) from "argument" values.
-
-------------------------------------------------------------------------------
-Base64, and other encodings
-------------------------------------------------------------------------------
-
-Netstring is also the successor of the Base64 package. It provides a Base64
-compatible interface, and an enhanced API. The latter is contained in the
-Netencoding module which also offers implementations of the "quoted printable",
-"Q", and "URL" encodings. Please see netencoding.mli for details.
-
-------------------------------------------------------------------------------
-The MIME scanner functions
-------------------------------------------------------------------------------
-
-In the Mimestring module you can find several functions scanning parts of MIME
-messages. These functions already cover most aspects of MIME messages: Scanning
-of headers, analysis of structured header entries, and scanning of multipart
-bodies. Of course, a full-featured MIME scanner would require some more
-functions, especially concrete parsers for frequent structures (mail addresses
-or date strings).
-
-Please see the file mimestring.mli for details.
-
-------------------------------------------------------------------------------
-The HTML parser
-------------------------------------------------------------------------------
-
-The HTML parser should be able to read every HTML file; whether it is correct
-or not. The parser tries to recover from parsing errors as much as possible.
-
-The parser returns the HTML term as conventional recursive value (i.e. no
-object-oriented design).
-
-The parser depends a bit on knowledge about the HTML version; mainly because it
-needs to know the tags that are always empty. It may be necessary that you must
-adjust this configuration before the parser works well enough for your purpose.
-
-Please see the Nethtml module for details.
-
-------------------------------------------------------------------------------
-The abstract data type URL
-------------------------------------------------------------------------------
-
-The module Neturl contains support for URL parsing and processing. The
-implementation follows strictly the standards RFC 1738 and RFC 1808. URLs can
-be parsed, and several accessor functions allow the user to get components of
-parsed URLs, or to change components. Modifying URLs is safe; it is impossible
-to create a URL that does not have a valid string representation.
-
-Both absolute and relative URLs are supported. It is possible to apply a
-relative URL to a base URL in order to get the corresponding absolute URL.
-
-------------------------------------------------------------------------------
-Conversion between character sets and encodings
-------------------------------------------------------------------------------
-
-The module Netconversion converts strings from one characters set to another.
-It is Unicode-based, and there are conversion tables for more than 50
-encodings.
-
-==============================================================================
-Author, Copying
-==============================================================================
-
-Netstring has been written by Gerd Stolpmann [2]. You may copy it as you like,
-you may use it even for commercial purposes as long as the license conditions
-are respected, see the file LICENSE coming with the distribution. It allows
-almost everything.
-
-==============================================================================
-History
-==============================================================================
-
-- Changed in 0.9.3: Fixed a bug in the "install" rule of the Makefile.
-
-- Changed in 0.9.2: New format for the conversion tables which are now much
- smaller.
-
-- Changed in 0.9.1: Updated the Makefile such that (native-code) compilation
- of netmappings.ml becomes possible.
-
-- Changed in 0.9: Extended Mimestring module: It can now process RFC-2047
- messages.
- New Netconversion module which converts strings between character encodings.
-
-- Changed in 0.8.1: Added the component url_accepts_8bits to
- Neturl.url_syntax. This helps processing URLs which intentionally contain
- bytes >= 0x80.
- Fixed a bug: Every URL containing a 'j' was malformed!
-
-- Changed in 0.8: Added the module Neturl which provides the abstract data
- types of URLs.
- The whole package is now thread-safe.
- Added printers for the various opaque data types.
- Added labels to function arguments where appropriate. The following
- functions changed their signatures significantly: Cgi.mk_memory_arg,
- Cgi.mk_file_arg.
-
-- Changed in 0.7: Added workarounds for frequent browser bugs. Some functions
- take now an additional argument specifying which workarounds are enabled.
-
-- Changed in 0.6.1: Updated URLs in documentation.
-
-- Changed in 0.6: The file upload has been re-implemented to support large
- files; the file is now read block by block and the blocks can be collected
- either in memory or in a temporary file.
- Furthermore, the CGI API has been revised. There is now an opaque data type
- "argument" that hides all implementation details and that is extensible (if
- necessary, it is possible to add features without breaking the interface
- again).
- The CGI argument parser can be configured; currently it is possible to limit
- the size of uploaded data, to control by which method arguments are
- processed, and to set up where temporary files are created.
- The other parts of the package that have nothing to do with CGI remain
- unchanged.
-
-- Changed in 0.5.1: A mistake in the documentation has been corrected.
-
-- Initial version 0.5: The Netstring package wants to be the successor of the
- Base64-0.2 and the Cgi-0.3 packages. The sum of both numbers is 0.5, and
- because of this, the first version number is 0.5.
-
-
---------------------------
-
-[1] see http://www.ocaml-programming.de/packages/netstring-0.9.2.tar.gz
-
-[2] see mailto:gerd@gerd-stolpmann.de
-
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!-- Special HTML config: -->
-<!ENTITY % readme:html:up '<a href="../..">up</a>'>
-
-<!ENTITY % config SYSTEM "config.xml">
-%config;
-
-]>
-
-<readme title="README - Netstring, string processing functions for the net">
- <sect1>
- <title>Abstract</title>
- <p>
-<em>Netstring</em> is a collection of string processing functions that are
-useful in conjunction with Internet messages and protocols. In particular,
-it contains functions for the following purposes:</p>
-
-<ul>
- <li><p>Parsing MIME messages</p>
- </li>
- <li><p>Several encoding/decoding functions (Base 64, Quoted Printable, Q, URL-encoding)</p>
- </li>
- <li><p>A new implementation of the CGI interface that allows users to upload files</p>
- </li>
- <li><p>A simple HTML parser</p>
- </li>
- <li><p>URL parsing, printing and processing</p>
- </li>
- <li><p>Conversion between character sets</p>
- </li>
- </ul>
-
- </sect1>
-
- <sect1>
- <title>Download</title>
- <p>
-You can download <em>Netstring</em> as gzip'ed <a
- href="&url.netstring-download;">tarball</a>.
-</p>
-
- </sect1>
-
- <sect1>
- <title>Documentation</title>
- <p>
-Sorry, there is no manual. The mli files describe each function in
-detail. Furthermore, the following additional information may be useful.</p>
-
- <sect2>
- <title>New CGI implementation</title>
-
- <p>For a long time, the CGI implementation by Jean-Christophe Filliatre
-has been the only freely available module that implemented the CGI interface
-(it also based on code by Daniel de Rauglaudre). It worked well, but it did not
-support file uploads because this requires a parser for MIME messages.</p>
- <p>The main goal of Netstring is to realize such uploads, and because of
-this it contains an almost complete parser for MIME messages.</p>
- <p>The new CGI implementation provides the same functions than the old
-one, and some extensions. If you call Cgi.parse_args(), you get the CGI
-parameters as before, but as already explained this works also if the
-parameters are encaspulated as MIME message. In the HTML code, you can select
-the MIME format by using
-<code><![CDATA[
-<form action="..." method="post" enctype="multipart/form-data">
-...
-</form>
-]]>
-</code>
-- this "enctype" attribute forces the browser to send the form parameters
-as multipart MIME message (Note: You can neither send the parameters of a
-conventional hyperlink as MIME message nor the form parameters if the
-"method" is "get"). In many browsers only this particular encoding enables
-the file upload elements, you cannot perform file uploads with other encodings.
-</p>
-
- <p>As MIME messages can transport MIME types, filename, and other
-additional properties, it is also possible to get these using the enhanced
-interface. After calling
-<code><![CDATA[
-Cgi.parse_arguments config
-]]></code>
-you can get all available information about a certain parameter by invoking
-<code><![CDATA[
-let param = Cgi.argument "name"
-]]></code>
-- where "param" has the type "argument". There are several accessor functions
-to extract the various aspects of arguments (name, filename, value by string,
-value by temporary file, MIME type, MIME header) from "argument" values.
-</p>
-
- </sect2>
-
-
- <sect2>
- <title>Base64, and other encodings</title>
-
- <p>Netstring is also the successor of the Base64 package. It provides a
-Base64 compatible interface, and an enhanced API. The latter is contained in
-the Netencoding module which also offers implementations of the "quoted
-printable", "Q", and "URL" encodings. Please see netencoding.mli for
-details.</p>
-
- </sect2>
-
-
- <sect2>
- <title>The MIME scanner functions</title>
-
- <p>In the Mimestring module you can find several functions scanning parts
-of MIME messages. These functions already cover most aspects of MIME messages:
-Scanning of headers, analysis of structured header entries, and scanning of
-multipart bodies. Of course, a full-featured MIME scanner would require some
-more functions, especially concrete parsers for frequent structures
-(mail addresses or date strings).
-</p>
- <p>Please see the file mimestring.mli for details.</p>
- </sect2>
-
-
- <sect2>
- <title>The HTML parser</title>
-
- <p>The HTML parser should be able to read every HTML file; whether it is
-correct or not. The parser tries to recover from parsing errors as much as
-possible.
-</p>
- <p>The parser returns the HTML term as conventional recursive value
-(i.e. no object-oriented design).</p>
- <p>The parser depends a bit on knowledge about the HTML version; mainly
-because it needs to know the tags that are always empty. It may be necessary
-that you must adjust this configuration before the parser works well enough for
-your purpose.
-</p>
- <p>Please see the Nethtml module for details.</p>
- </sect2>
-
- <sect2>
- <title>The abstract data type URL</title>
- <p>The module Neturl contains support for URL parsing and processing.
-The implementation follows strictly the standards RFC 1738 and RFC 1808.
-URLs can be parsed, and several accessor functions allow the user to
-get components of parsed URLs, or to change components. Modifying URLs
-is safe; it is impossible to create a URL that does not have a valid
-string representation.</p>
-
- <p>Both absolute and relative URLs are supported. It is possible to
-apply a relative URL to a base URL in order to get the corresponding
-absolute URL.</p>
- </sect2>
-
- <sect2>
- <title>Conversion between character sets and encodings</title>
- <p>The module Netconversion converts strings from one characters set
-to another. It is Unicode-based, and there are conversion tables for more than
-50 encodings.</p>
- </sect2>
-
- </sect1>
-
- <sect1>
- <title>Author, Copying</title>
- <p>
-<em>Netstring</em> has been written by &person.gps;. You may copy it as you like,
-you may use it even for commercial purposes as long as the license conditions
-are respected, see the file LICENSE coming with the distribution. It allows
-almost everything.
-</p>
- </sect1>
-
- <sect1>
- <title>History</title>
-
- <ul>
- <li><p><em>Changed in 0.9.3:</em> Fixed a bug in the "install" rule of
-the Makefile.</p>
- </li>
- <li><p><em>Changed in 0.9.2:</em> New format for the conversion tables
-which are now much smaller.</p>
- </li>
- <li><p><em>Changed in 0.9.1:</em> Updated the Makefile such that
-(native-code) compilation of netmappings.ml becomes possible.
-</p>
- </li>
- <li><p><em>Changed in 0.9:</em> Extended Mimestring module: It can
-now process RFC-2047 messages.</p>
- <p>New Netconversion module which converts strings between character
-encodings.</p>
- </li>
- <li><p><em>Changed in 0.8.1:</em> Added the component
-url_accepts_8bits to Neturl.url_syntax. This helps processing URLs which
-intentionally contain bytes >= 0x80.</p>
- <p>Fixed a bug: Every URL containing a 'j' was malformed!</p>
- </li>
- <li><p><em>Changed in 0.8:</em> Added the module Neturl which
-provides the abstract data types of URLs.</p>
- <p>The whole package is now thread-safe.</p>
- <p>Added printers for the various opaque data types.</p>
- <p>Added labels to function arguments where appropriate. The
-following functions changed their signatures significantly:
-Cgi.mk_memory_arg, Cgi.mk_file_arg.</p>
- </li>
- <li><p><em>Changed in 0.7:</em> Added workarounds for frequent
-browser bugs. Some functions take now an additional argument
-specifying which workarounds are enabled.</p>
- </li>
- <li><p><em>Changed in 0.6.1:</em> Updated URLs in documentation.</p>
- </li>
-
- <li><p><em>Changed in 0.6:</em> The file upload has been re-implemented
-to support large files; the file is now read block by block and the blocks can
-be collected either in memory or in a temporary file.<br/>
-Furthermore, the CGI API has been revised. There is now an opaque data type
-"argument" that hides all implementation details and that is extensible (if
-necessary, it is possible to add features without breaking the interface
-again).<br/>
-The CGI argument parser can be configured; currently it is possible to
-limit the size of uploaded data, to control by which method arguments are
-processed, and to set up where temporary files are created.<br/>
-The other parts of the package that have nothing to do with CGI remain
-unchanged.
-</p>
- </li>
-
- <li><p><em>Changed in 0.5.1:</em> A mistake in the documentation has
-been corrected.</p>
- </li>
-
- <li><p><em>Initial version 0.5:</em>
-The Netstring package wants to be the successor of the Base64-0.2 and
-the Cgi-0.3 packages. The sum of both numbers is 0.5, and because of this,
-the first version number is 0.5.
-</p>
- </li>
- </ul>
- </sect1>
-
-</readme>
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-module Str = Netstring_str;;
-
-let cr_or_lf_re = Str.regexp "[\013\n]";;
-
-let trim_right_spaces_re =
- Str.regexp "[ \t]+$";;
-
-let trim_left_spaces_re =
- Str.regexp "^[ \t]+";;
-
-let header_re =
- Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;
-
-let empty_line_re =
- Str.regexp "\013?\n";;
-
-let end_of_header_re =
- Str.regexp "\n\013?\n";;
-
-
-let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
- let rec parse_header i l =
- match Str.string_partial_match header_re parstr i with
- Some r ->
- let i' = Str.match_end r in
- if i' > i1 then
- failwith "Mimestring.scan_header";
- let name = String.lowercase(Str.matched_group r 1 parstr) in
- let value_with_crlf =
- Str.matched_group r 2 parstr in
- let value =
- if unfold then begin
- let value_with_rspaces =
- Str.global_replace cr_or_lf_re "" value_with_crlf in
- let value_with_lspaces =
- Str.global_replace trim_right_spaces_re "" value_with_rspaces in
- Str.global_replace trim_left_spaces_re "" value_with_lspaces
- end
- else value_with_crlf
- in
- parse_header i' ( (name,value) :: l)
- | None ->
- (* The header must end with an empty line *)
- begin match Str.string_partial_match empty_line_re parstr i with
- Some r' ->
- List.rev l, Str.match_end r'
- | None ->
- failwith "Mimestring.scan_header"
- end
- in
- parse_header i0 []
-;;
-
-type s_token =
- Atom of string
- | EncodedWord of (string * string * string)
- | QString of string
- | Control of char
- | Special of char
- | DomainLiteral of string
- | Comment
- | End
-;;
-
-type s_option =
- No_backslash_escaping
- | Return_comments
- | Recognize_encoded_words
-;;
-
-type s_extended_token =
- { token : s_token;
- token_pos : int;
- token_line : int;
- token_linepos : int; (* Position of the beginning of the line *)
- token_len : int;
- mutable token_sep : bool; (* separates adjacent encoded words *)
- }
-;;
-
-let get_token et = et.token;;
-let get_pos et = et.token_pos;;
-let get_line et = et.token_line;;
-let get_column et = et.token_pos - et.token_linepos;;
-let get_length et = et.token_len;;
-let separates_adjacent_encoded_words et = et.token_sep;;
-
-let get_decoded_word et =
- match et.token with
- Atom s -> s
- | QString s -> s
- | Control c -> String.make 1 c
- | Special c -> String.make 1 c
- | DomainLiteral s -> s
- | Comment -> ""
- | EncodedWord (_, encoding, content) ->
- ( match encoding with
- ("Q"|"q") ->
- Netencoding.Q.decode content
- | ("B"|"b") ->
- Netencoding.Base64.decode
- ~url_variant:false
- ~accept_spaces:false
- content
- | _ -> failwith "get_decoded_word"
- )
- | End ->
- failwith "get_decoded_word"
-;;
-
-let get_charset et =
- match et.token with
- EncodedWord (charset, _, _) -> charset
- | End -> failwith "get_charset"
- | _ -> "US-ASCII"
-;;
-
-type scanner_spec =
- { (* What the user specifies: *)
- scanner_specials : char list;
- scanner_options : s_option list;
- (* Derived from that: *)
- mutable opt_no_backslash_escaping : bool;
- mutable opt_return_comments : bool;
- mutable opt_recognize_encoded_words : bool;
-
- mutable is_special : bool array;
- mutable space_is_special : bool;
- }
-;;
-
-type scanner_target =
- { scanned_string : string;
- mutable scanner_pos : int;
- mutable scanner_line : int;
- mutable scanner_linepos : int;
- (* Position of the beginning of the line *)
- mutable scanned_tokens : s_extended_token Queue.t;
- (* A queue of already scanned tokens in order to look ahead *)
- mutable last_token : s_token;
- (* The last returned token. It is only important whether it is
- * EncodedWord or not.
- *)
- }
-;;
-
-type mime_scanner = scanner_spec * scanner_target
-;;
-
-let get_pos_of_scanner (spec, target) =
- if spec.opt_recognize_encoded_words then
- failwith "get_pos_of_scanner"
- else
- target.scanner_pos
-;;
-
-let get_line_of_scanner (spec, target) =
- if spec.opt_recognize_encoded_words then
- failwith "get_line_of_scanner"
- else
- target.scanner_line
-;;
-
-let get_column_of_scanner (spec, target) =
- if spec.opt_recognize_encoded_words then
- failwith "get_column_of_scanner"
- else
- target.scanner_pos - target.scanner_linepos
-;;
-
-let create_mime_scanner ~specials ~scan_options =
- let is_spcl = Array.create 256 false in
- List.iter
- (fun c -> is_spcl.( Char.code c ) <- true)
- specials;
- let spec =
- { scanner_specials = specials;
- scanner_options = scan_options;
- opt_no_backslash_escaping =
- List.mem No_backslash_escaping scan_options;
- opt_return_comments =
- List.mem Return_comments scan_options;
- opt_recognize_encoded_words =
- List.mem Recognize_encoded_words scan_options;
- is_special = is_spcl;
- space_is_special = is_spcl.(32);
- }
- in
- (* Grab the remaining arguments: *)
- fun ?(pos=0) ?(line=1) ?(column=0) s ->
- let target =
- { scanned_string = s;
- scanner_pos = pos;
- scanner_line = line;
- scanner_linepos = pos - column;
- scanned_tokens = Queue.create();
- last_token = Comment; (* Must not be initialized with EncodedWord *)
- }
- in
- spec, target
-;;
-
-
-let encoded_word_re =
- Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;
-
-let scan_next_token ((spec,target) as scn) =
- let mk_pair t len =
- { token = t;
- token_pos = target.scanner_pos;
- token_line = target.scanner_line;
- token_linepos = target.scanner_linepos;
- token_len = len;
- token_sep = false;
- },
- t
- in
-
- (* Note: mk_pair creates a new token pair, and it assumes that
- * target.scanner_pos (and also scanner_line and scanner_linepos)
- * still contain the position of the beginning of the token.
- *)
-
- let s = target.scanned_string in
- let l = String.length s in
- let rec scan i =
- if i < l then begin
- let c = s.[i] in
- if spec.is_special.( Char.code c ) then begin
- let pair = mk_pair (Special c) 1 in
- target.scanner_pos <- target.scanner_pos + 1;
- (match c with
- '\n' ->
- target.scanner_line <- target.scanner_line + 1;
- target.scanner_linepos <- target.scanner_pos;
- | _ -> ()
- );
- pair
- end
- else
- match c with
- '"' ->
- (* Quoted string: *)
- scan_qstring (i+1) (i+1) 0
- | '(' ->
- (* Comment: *)
- let i', line, linepos =
- scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
- in
- let advance() =
- target.scanner_pos <- i';
- target.scanner_line <- line;
- target.scanner_linepos <- linepos
- in
- if spec.opt_return_comments then begin
- let pair = mk_pair Comment (i' - i) in
- advance();
- pair
- end
- else
- if spec.space_is_special then begin
- let pair = mk_pair (Special ' ') (i' - i) in
- advance();
- pair
- end
- else begin
- advance();
- scan i'
- end
- | (' '|'\t'|'\r') ->
- (* Ignore whitespace by default: *)
- target.scanner_pos <- target.scanner_pos + 1;
- scan (i+1)
- | '\n' ->
- (* Ignore whitespace by default: *)
- target.scanner_pos <- target.scanner_pos + 1;
- target.scanner_line <- target.scanner_line + 1;
- target.scanner_linepos <- target.scanner_pos;
- scan (i+1)
- | ('\000'..'\031'|'\127'..'\255') ->
- let pair = mk_pair (Control c) 1 in
- target.scanner_pos <- target.scanner_pos + 1;
- pair
- | '[' ->
- (* Domain literal: *)
- scan_dliteral (i+1) (i+1) 0
- | _ ->
- scan_atom i i
- end
- else
- mk_pair End 0
-
- and scan_atom i0 i =
- let return_atom() =
- let astring = String.sub s i0 (i-i0) in
- let r =
- if spec.opt_recognize_encoded_words then
- Str.string_match ~groups:4 encoded_word_re astring 0
- else
- None
- in
- match r with
- None ->
- (* An atom contains never a linefeed character, so we can ignore
- * scanner_line here.
- *)
- let pair = mk_pair (Atom astring) (i-i0) in
- target.scanner_pos <- i;
- pair
- | Some mr ->
- (* Found an encoded word. *)
- let charset = Str.matched_group mr 1 astring in
- let encoding = Str.matched_group mr 2 astring in
- let content = Str.matched_group mr 3 astring in
- let t = EncodedWord(String.uppercase charset,
- String.uppercase encoding,
- content) in
- let pair = mk_pair t (i-i0) in
- target.scanner_pos <- i;
- pair
- in
-
- if i < l then
- let c = s.[i] in
- match c with
- ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
- return_atom()
- | _ ->
- if spec.is_special.( Char.code c ) then
- return_atom()
- else
- scan_atom i0 (i+1)
- else
- return_atom()
-
- and scan_qstring i0 i n =
- if i < l then
- let c = s.[i] in
- match c with
- '"' ->
- (* Regular end of the quoted string: *)
- let content, line, linepos = copy_qstring i0 (i-1) n in
- let pair = mk_pair (QString content) (i-i0+2) in
- target.scanner_pos <- i+1;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
- | '\\' when not spec.opt_no_backslash_escaping ->
- scan_qstring i0 (i+2) (n+1)
- | _ ->
- scan_qstring i0 (i+1) (n+1)
- else
- (* Missing right double quote *)
- let content, line, linepos = copy_qstring i0 (l-1) n in
- let pair = mk_pair (QString content) (l-i0+1) in
- target.scanner_pos <- l;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
-
- and copy_qstring i0 i1 n =
- (* Used for quoted strings and for domain literals *)
- let r = String.create n in
- let k = ref 0 in
- let line = ref target.scanner_line in
- let linepos = ref target.scanner_linepos in
- for i = i0 to i1 do
- let c = s.[i] in
- match c with
- '\\' when i < i1 && not spec.opt_no_backslash_escaping -> ()
- | '\n' ->
- line := !line + 1;
- linepos := i+1;
- r.[ !k ] <- c;
- incr k
- | _ ->
- r.[ !k ] <- c;
- incr k
- done;
- assert (!k = n);
- r, !line, !linepos
-
- and scan_dliteral i0 i n =
- if i < l then
- let c = s.[i] in
- match c with
- ']' ->
- (* Regular end of the domain literal: *)
- let content, line, linepos = copy_qstring i0 (i-1) n in
- let pair = mk_pair (DomainLiteral content) (i-i0+2) in
- target.scanner_pos <- i+1;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
- | '\\' when not spec.opt_no_backslash_escaping ->
- scan_dliteral i0 (i+2) (n+1)
- | _ ->
- (* Note: '[' is not allowed by RFC 822; we treat it here as
- * a regular character (questionable)
- *)
- scan_dliteral i0 (i+1) (n+1)
- else
- (* Missing right bracket *)
- let content, line, linepos = copy_qstring i0 (l-1) n in
- let pair = mk_pair (DomainLiteral content) (l-i0+1) in
- target.scanner_pos <- l;
- target.scanner_line <- line;
- target.scanner_linepos <- linepos;
- pair
-
-
- and scan_comment i level line linepos =
- if i < l then
- let c = s.[i] in
- match c with
- ')' ->
- (i+1), line, linepos
- | '(' ->
- (* nested comment *)
- let i', line', linepos' =
- scan_comment (i+1) (level+1) line linepos
- in
- scan_comment i' level line' linepos'
- | '\\' when not spec.opt_no_backslash_escaping ->
- if (i+1) < l && s.[i+1] = '\n' then
- scan_comment (i+2) level (line+1) (i+2)
- else
- scan_comment (i+2) level line linepos
- | '\n' ->
- scan_comment (i+1) level (line+1) (i+1)
- | _ ->
- scan_comment (i+1) level line linepos
- else
- (* Missing closing ')' *)
- i, line, linepos
- in
-
- scan target.scanner_pos
-;;
-
-
-let scan_token ((spec,target) as scn) =
- (* This function handles token queueing in order to recognize white space
- * that separates adjacent encoded words.
- *)
-
- let rec collect_whitespace () =
- (* Scans whitespace tokens and returns them as:
- * (ws_list, other_tok) if there is some other_tok following the
- * list (other_tok = End is possible)
- *)
- let (et, t) as pair = scan_next_token scn in
- ( match t with
- (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
- let ws_list, tok = collect_whitespace() in
- pair :: ws_list, tok
- | _ ->
- [], pair
- )
- in
-
- try
- (* Is there an already scanned token in the queue? *)
- let et = Queue.take target.scanned_tokens in
- let t = et.token in
- target.last_token <- t;
- et, et.token
- with
- Queue.Empty ->
- (* If not: inspect the last token. If that token is an EncodedWord,
- * the next tokens are scanned in advance to determine if there
- * are spaces separating two EncodedWords. These tokens are put
- * into the queue such that it is avoided that they are scanned
- * twice. (The sole purpose of the queue.)
- *)
- match target.last_token with
- EncodedWord(_,_,_) as ew ->
- let ws_list, tok = collect_whitespace() in
- (* If tok is an EncodedWord, too, the tokens in ws_list must
- * be flagged as separating two adjacent encoded words.
- *)
- ( match tok with
- _, EncodedWord(_,_,_) ->
- List.iter
- (fun (et,t) ->
- et.token_sep <- true)
- ws_list
- | _ ->
- ()
- );
- (* Anyway, queue the read tokens but the first up *)
- ( match ws_list with
- [] ->
- (* Nothing to queue *)
- let et, t = tok in
- target.last_token <- t;
- tok
- | (et,t) as pair :: ws_list' ->
- List.iter
- (fun (et',_) ->
- Queue.add et' target.scanned_tokens)
- ws_list';
- ( match tok with
- | _, End ->
- ()
- | (et',_) ->
- Queue.add et' target.scanned_tokens
- );
- (* Return the first scanned token *)
- target.last_token <- t;
- pair
- )
- | _ ->
- (* Regular case: Scan one token; do not queue it up *)
- let (et, t) as pair = scan_next_token scn in
- target.last_token <- t;
- pair
-;;
-
-
-let scan_token_list scn =
- let rec collect() =
- match scan_token scn with
- _, End ->
- []
- | pair ->
- pair :: collect()
- in
- collect()
-;;
-
-
-let scan_structured_value s specials options =
- let rec collect scn =
- match scan_token scn with
- _, End ->
- []
- | _, t ->
- t :: collect scn
- in
- let scn = create_mime_scanner specials options s in
- collect scn
-;;
-
-
-let specials_rfc822 =
- [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;
-
-
-let specials_rfc2045 =
- [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;
-
-
-let scan_encoded_text_value s =
- let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
- let options = [ Recognize_encoded_words ] in
- let scn = create_mime_scanner specials options s in
-
- let rec collect () =
- match scan_token scn with
- _, End ->
- []
- | et, _ when separates_adjacent_encoded_words et ->
- collect()
- | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
- et :: collect ()
- | _, _ ->
- assert false
- in
- collect()
-;;
-
-
-let scan_value_with_parameters s options =
- let rec parse_params tl =
- match tl with
- Atom n :: Special '=' :: Atom v :: tl' ->
- (n,v) :: parse_rest tl'
- | Atom n :: Special '=' :: QString v :: tl' ->
- (n,v) :: parse_rest tl'
- | _ ->
- failwith "Mimestring.scan_value_with_parameters"
- and parse_rest tl =
- match tl with
- [] -> []
- | Special ';' :: tl' ->
- parse_params tl'
- | _ ->
- failwith "Mimestring.scan_value_with_parameters"
- in
-
- (* Note: Even if not used here, the comma is a very common separator
- * and should be recognized as being special. You will get a
- * failure if there is a comma in the scanned string.
- *)
- let tl = scan_structured_value s [ ';'; '='; ',' ] options in
- match tl with
- [ Atom n ] -> n, []
- | [ QString n ] -> n, []
- | Atom n :: Special ';' :: tl' ->
- n, parse_params tl'
- | QString n :: Special ';' :: tl' ->
- n, parse_params tl'
- | _ ->
- failwith "Mimestring.scan_value_with_parameters"
-;;
-
-
-let scan_mime_type s options =
- let n, params = scan_value_with_parameters s options in
- (String.lowercase n),
- (List.map (fun (n,v) -> (String.lowercase n, v)) params)
-;;
-
-
-let lf_re = Str.regexp "[\n]";;
-
-let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
- let l_s = String.length s in
- if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
- invalid_arg "Mimestring.scan_multipart_body";
-
- (* First compile the regexps scanning for 'boundary': *)
- let boundary1_re =
- Str.regexp ("\n--" ^ Str.quote boundary) in
- let boundary2_re =
- Str.regexp ("--" ^ Str.quote boundary) in
-
- let rec parse i =
- (* i: Beginning of the current part (position directly after the
- * boundary line
- *)
- (* Search for next boundary at position i *)
- let i' =
- try min (fst (Str.search_forward boundary1_re s i) + 1) i1
- with
- Not_found -> i1
- in
- (* i': Either the position of the first '-' of the boundary line,
- * or i1 if no boundary has been found
- *)
- if i' >= i1 then
- [] (* Ignore everything after the last boundary *)
- else
- let i'' =
- try min (fst (Str.search_forward lf_re s i') + 1) i1
- with
- Not_found -> i1
- in
- (* i'': The position after the boundary line *)
-(*
- print_int i; print_newline();
- print_int i'; print_newline();
- print_int i''; print_newline();
- flush stdout;
-*)
- let header, k = scan_header s i i' in
- (* header: the header of the part
- * k: beginning of the body
- *)
-
- let value =
- (* We know that i'-1 is a linefeed character. i'-2 should be a CR
- * character. Both characters are not part of the value.
- *)
- if i' >= 2 then
- match s.[i'-2] with
- '\013' -> String.sub s k (i'-2-k)
- | _ -> String.sub s k (i'-1-k)
- else
- String.sub s k (i'-1-k)
- in
-
- let pair =
- (header, value) in
-
- if i'' >= i1
- then
- [ pair ]
- else
- pair :: parse i''
- in
-
- (* Find the first boundary. This is a special case, because it may be
- * right at the beginning of the string (no preceding CRLF)
- *)
-
- let i_bnd =
- if Str.string_partial_match boundary2_re s i0 <> None then
- i0
- else
- try min (fst (Str.search_forward boundary1_re s i0)) i1
- with
- Not_found -> i1
- in
-
- if i_bnd >= i1 then
- []
- else
- let i_bnd' =
- try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
- with
- Not_found -> i1
- in
- if i_bnd' >= i1 then
- []
- else
- parse i_bnd'
-;;
-
-
-let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
- let parts = scan_multipart_body s i0 i1 boundary in
- List.map
- (fun (params, value) ->
- let encoding =
- try List.assoc "content-transfer-encoding" params
- with Not_found -> "7bit"
- in
-
- (* NOTE: In the case of "base64" and "quoted-printable", the allocation
- * of the string "value" could be avoided.
- *)
-
- let value' =
- match encoding with
- ("7bit"|"8bit"|"binary") -> value
- | "base64" ->
- Netencoding.Base64.decode_substring
- value 0 (String.length value) false true
- | "quoted-printable" ->
- Netencoding.QuotedPrintable.decode_substring
- value 0 (String.length value)
- | _ ->
- failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
- in
- (params, value')
- )
- parts
-;;
-
-
-let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =
-
- (* The block size of s must be at least the length of the boundary + 3.
- * Otherwise it is not guaranteed that the boundary is always recognized.
- *)
- if Netstream.block_size s < String.length boundary + 3 then
- invalid_arg "Mimestring.scan_multipart_body_from_netstream";
-
- (* First compile the regexps scanning for 'boundary': *)
- let boundary1_re =
- Str.regexp ("\n--" ^ Str.quote boundary) in
- let boundary2_re =
- Str.regexp ("--" ^ Str.quote boundary) in
-
- (* Subtask 1: Search the end of the MIME header: CR LF CR LF
- * (or LF LF). Enlarge the window until the complete header
- * is covered by the window.
- *)
- let rec search_end_of_header k =
- (* Search the end of the header beginning at position k of the
- * current window.
- * Return the position of the first character of the body.
- *)
- try
- (* Search for LF CR? LF: *)
- let i, r = Str.search_forward
- end_of_header_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- k
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then
- Str.match_end r
- else
- raise Not_found
- with
- Not_found ->
- (* If the end of the stream is reached, the end of the header
- * is missing: Error.
- * Otherwise, we try to read another block, and continue.
- *)
- if Netstream.at_eos s then
- failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
- let w0 = Netstream.window_length s in
- Netstream.want_another_block s;
- search_end_of_header (max (w0 - 2) 0)
- in
-
- (* Subtask 2: Search the first boundary line. *)
- let rec search_first_boundary() =
- (* Search boundary per regexp; return the position of the character
- * immediately following the boundary (on the same line), or
- * raise Not_found.
- *)
- try
- (* Search boundary per regexp: *)
- let i, r = Str.search_forward
- boundary1_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- 0
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then begin
- Str.match_end r
- end
- else raise Not_found
- with
- Not_found ->
- if Netstream.at_eos s then raise Not_found;
- (* The regexp did not match: Move the window by one block.
- *)
- let n =
- min
- (Netstream.window_length s)
- (Netstream.block_size s)
- in
- Netstream.move s n;
- search_first_boundary()
- in
-
- (* Subtask 3: Search the next boundary line. Invoke 'add' for every
- * read chunk
- *)
- let rec search_next_boundary p =
- (* Returns the position directly after the boundary on the same line *)
- try
- (* Search boundary per regexp: *)
- let i,r = Str.search_forward
- boundary1_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- 0
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then begin
- (* Add the last chunk of the part. *)
- let n =
- (* i is a LF. i - 1 should be CR. Ignore these characters. *)
- if i >= 1 then
- match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
- '\013' -> i - 1
- | _ -> i
- else
- i
- in
- (* Printf.printf "add n=%d\n" n; *)
- add p s 0 n;
- Str.match_end r
- end
- else raise Not_found
- with
- Not_found ->
- if Netstream.at_eos s then
- failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
- (* The regexp did not match: Add the first block of the window;
- * and move the window.
- *)
- let n =
- min
- (Netstream.window_length s)
- (Netstream.block_size s)
- in
- (* Printf.printf "add n=%d\n" n; *)
- add p s 0 n;
- Netstream.move s n;
- search_next_boundary p
- in
-
- (* Subtask 4: Search the end of the boundary line *)
- let rec search_end_of_line k =
- (* Search LF beginning at position k. Discard any contents until that. *)
- try
- let i,r = Str.search_forward
- lf_re
- (Netbuffer.unsafe_buffer (Netstream.window s))
- k
- in
- (* If match_end <= window_length, the search was successful.
- * Otherwise, we searched in the uninitialized region of the
- * buffer.
- *)
- if Str.match_end r <= Netstream.window_length s then begin
- Str.match_end r
- end
- else raise Not_found
- with
- Not_found ->
- if Netstream.at_eos s then
- failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
- (* The regexp did not match: move the window.
- *)
- let n = Netstream.window_length s in
- Netstream.move s n;
- search_end_of_line 0
- in
-
- (* Subtask 5: Check whether "--" follows the boundary on the same line *)
- let check_whether_last_boundary k =
- (* k: The position directly after the boundary. *)
- Netstream.want s (k+2);
- let str = Netbuffer.unsafe_buffer (Netstream.window s) in
- (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
- in
-
- (* Subtask 6: Check whether the buffer begins with a boundary. *)
- let check_beginning_is_boundary () =
- let m = String.length boundary + 2 in
- Netstream.want s m;
- let str = Netbuffer.unsafe_buffer (Netstream.window s) in
- (Netstream.window_length s >= m) &&
- (Str.string_partial_match boundary2_re str 0 <> None)
- in
-
- let rec parse_part () =
- (* The first byte of the current window of s contains the character
- * directly following the boundary line that starts this part.
- *)
- (* Search the end of the MIME header: *)
- let k_eoh = search_end_of_header 0 in
- (* Printf.printf "k_eoh=%d\n" k_eoh; *)
- (* Get the MIME header: *)
- let str = Netbuffer.unsafe_buffer (Netstream.window s) in
- let header, k_eoh' = scan_header str 0 k_eoh in
- assert (k_eoh = k_eoh');
- (* Move the window over the header: *)
- Netstream.move s k_eoh;
- (* Create the part: *)
- let p = create header in
- let continue =
- begin try
- (* Search the next boundary; add the chunks while searching: *)
- let k_eob = search_next_boundary p in
- (* Printf.printf "k_eob=%d\n" k_eob; *)
- (* Is this the last boundary? *)
- if check_whether_last_boundary k_eob then begin
- (* Skip the rest: *)
- while not (Netstream.at_eos s) do
- Netstream.move s (Netstream.window_length s)
- done;
- Netstream.move s (Netstream.window_length s);
- false
- end
- else begin
- (* Move to the beginning of the next line: *)
- let k_eol = search_end_of_line k_eob in
- Netstream.move s k_eol;
- true
- end
- with
- any ->
- (try stop p with _ -> ());
- raise any
- end in
- stop p;
- if continue then
- (* Continue with next part: *)
- parse_part()
- in
-
- (* Check whether s directly begins with a boundary: *)
- if check_beginning_is_boundary() then begin
- (* Move to the beginning of the next line: *)
- let k_eol = search_end_of_line 0 in
- Netstream.move s k_eol;
- (* Begin with first part: *)
- parse_part()
- end
- else begin
- (* Search the first boundary: *)
- try
- let k_eob = search_first_boundary() in
- (* Printf.printf "k_eob=%d\n" k_eob; *)
- (* Move to the beginning of the next line: *)
- let k_eol = search_end_of_line k_eob in
- (* Printf.printf "k_eol=%d\n" k_eol; *)
- Netstream.move s k_eol;
- (* Begin with first part: *)
- parse_part()
- with
- Not_found ->
- (* No boundary at all: The body is empty. *)
- ()
- end;
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/08/13 00:04:36 gerd
- * Encoded_word -> EncodedWord
- * Bugfixes.
- *
- * Revision 1.7 2000/08/07 00:25:14 gerd
- * Implemented the new functions for structured field lexing.
- *
- * Revision 1.6 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.5 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.4 2000/05/16 22:30:14 gerd
- * Added support for some types of malformed MIME messages.
- *
- * Revision 1.3 2000/04/15 13:09:01 gerd
- * Implemented uploads to temporary files.
- *
- * Revision 1.2 2000/03/02 01:15:30 gerd
- * Updated.
- *
- * Revision 1.1 2000/02/25 15:21:12 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(**********************************************************************)
-(* Collection of auxiliary functions to parse MIME headers *)
-(**********************************************************************)
-
-
-val scan_header :
- ?unfold:bool ->
- string -> start_pos:int -> end_pos:int ->
- ((string * string) list * int)
- (* let params, i2 = scan_header s i0 i1:
- *
- * DESCRIPTION
- *
- * Scans the MIME header that begins at position i0 in the string s
- * and that must end somewhere before position i1. It is intended
- * that in i1 the character position following the end of the body of the
- * MIME message is passed.
- * Returns the parameters of the header as (name,value) pairs (in
- * params), and in i2 the position of the character following
- * directly after the header (i.e. after the blank line separating
- * the header from the body).
- * The following normalizations have already been applied:
- * - The names are all in lowercase
- * - Newline characters (CR and LF) have been removed (unless
- * ?unfold:false has been passed)
- * - Whitespace at the beginning and at the end of values has been
- * removed (unless ?unfold:false is specified)
- * The rules of RFC 2047 have NOT been applied.
- * The function fails if the header violates the header format
- * strongly. (Some minor deviations are tolerated, e.g. it is sufficient
- * to separate lines by only LF instead of CRLF.)
- *
- * OPTIONS:
- *
- * unfold: If true (the default), folded lines are concatenated and
- * returned as one line. This means that CR and LF characters are
- * deleted and that whitespace at the beginning and the end of the
- * string is removed.
- * You may set ?unfold:false to locate individual characters in the
- * parameter value exactly.
- *
- * ABOUT MIME MESSAGE FORMAT:
- *
- * This is the modern name for messages in "E-Mail format". Messages
- * consist of a header and a body; the first empty line separates both
- * parts. The header contains lines "param-name: param-value" where
- * the param-name must begin on column 0 of the line, and the ":"
- * separates the name and the value. So the format is roughly:
- *
- * param1-name: param1-value
- * ...
- * paramN-name: paramN-value
- *
- * body
- *
- * This function wants in i0 the position of the first character of
- * param1-name in the string, and in i1 the position of the character
- * following the body. It returns as i2 the position where the body
- * begins. Furthermore, in 'params' all parameters are returned that
- * exist in the header.
- *
- * DETAILS
- *
- * Note that parameter values are restricted; you cannot represent
- * arbitrary strings. The following problems can arise:
- * - Values cannot begin with whitespace characters, because there
- * may be an arbitrary number of whitespaces between the ':' and the
- * value.
- * - Values (and names of parameters, too) must only be formed of
- * 7 bit ASCII characters. (If this is not enough, the MIME standard
- * knows the extension RFC 2047 that allows that header values may
- * be composed of arbitrary characters of arbitrary character sets.)
- * - Header values may be broken into several lines, the continuation
- * lines must begin with whitespace characters. This means that values
- * must not contain line breaks as semantical part of the value.
- * And it may mean that ONE whitespace character is not distinguishable
- * from SEVERAL whitespace characters.
- * - Header lines must not be longer than 76 characters. Values that
- * would result into longer lines must be broken into several lines.
- * This means that you cannot represent strings that contain too few
- * whitespace characters.
- * - Some gateways pad the lines with spaces at the end of the lines.
- *
- * This implementation of a MIME scanner tolerates a number of
- * deviations from the standard: long lines are not rejected; 8 bit
- * values are accepted; lines may be ended only with LF instead of
- * CRLF.
- * Furthermore, header values are transformed:
- * - leading and trailing spaces are always removed
- * - CRs and LFs are deleted; it is guaranteed that there is at least
- * one space or tab where CR/LFs are deleted.
- * Last but not least, the names of the header values are converted
- * to lowercase; MIME specifies that they are case-independent.
- *
- * COMPATIBILITY WITH THE STANDARD
- *
- * This function can parse all MIME headers that conform to RFC 822.
- * But there may be still problems, as RFC 822 allows some crazy
- * representations that are actually not used in practice.
- * In particular, RFC 822 allows it to use backslashes to "indicate"
- * that a CRLF sequence is semantically meant as line break. As this
- * function normally deletes CRLFs, it is not possible to recognize such
- * indicators in the result of the function.
- *)
-
-(**********************************************************************)
-
-(* The following types and functions allow it to build scanners for
- * structured MIME values in a highly configurable way.
- *
- * WHAT ARE STRUCTURED VALUES?
- *
- * RFC 822 (together with some other RFCs) defines lexical rules
- * how formal MIME header values should be divided up into tokens. Formal
- * MIME headers are those headers that are formed according to some
- * grammar, e.g. mail addresses or MIME types.
- * Some of the characters separate phrases of the value; these are
- * the "special" characters. For example, '@' is normally a special
- * character for mail addresses, because it separates the user name
- * from the domain name. RFC 822 defines a fixed set of special
- * characters, but other RFCs use different sets. Because of this,
- * the following functions allow it to configure the set of special characters.
- * Every sequence of characters may be embraced by double quotes,
- * which means that the sequence is meant as literal data item;
- * special characters are not recognized inside a quoted string. You may
- * use the backslash to insert any character (including double quotes)
- * verbatim into the quoted string (e.g. "He said: \"Give it to me!\"").
- * The sequence of a backslash character and another character is called
- * a quoted pair.
- * Structured values may contain comments. The beginning of a comment
- * is indicated by '(', and the end by ')'. Comments may be nested.
- * Comments may contain quoted pairs. A
- * comment counts as if a space character were written instead of it.
- * Control characters are the ASCII characters 0 to 31, and 127.
- * RFC 822 demands that MIME headers are 7 bit ASCII strings. Because
- * of this, this function also counts the characters 128 to 255 as
- * control characters.
- * Domain literals are strings embraced by '[' and ']'; such literals
- * may contain quoted pairs. Today, domain literals are used to specify
- * IP addresses.
- * Every character sequence not falling in one of the above categories
- * is an atom (a sequence of non-special and non-control characters).
- * When recognized, atoms may be encoded in a character set different than
- * US-ASCII; such atoms are called encoded words (see RFC 2047).
- *
- * EXTENDED INTERFACE:
- *
- * In order to scan a string containing a MIME value, you must first
- * create a mime_scanner using the function create_mime_scanner.
- * The scanner contains the reference to the scanned string, and a
- * specification how the string is to be scanned. The specification
- * consists of the lists 'specials' and 'scan_options'.
- *
- * The character list 'specials' specifies the set of special characters.
- * These characters are returned as Special c token; the following additional
- * rules apply:
- *
- * - Spaces:
- * If ' ' in specials: A space character is returned as Special ' '.
- * Note that there may also be an effect on how comments are returned
- * (see below).
- * If ' ' not in specials: Spaces are ignored.
- *
- * - Tabs, CRs, LFs:
- * If '\t' in specials: A tab character is returned as Special '\t'.
- * If '\t' not in specials: Tabs are ignored.
- *
- * If '\r' in specials: A CR character is returned as Special '\r'.
- * If '\r' not in specials: CRs are ignored.
- *
- * If '\n' in specials: A LF character is returned as Special '\n'.
- * If '\n' not in specials: LFs are ignored.
- *
- * - Comments:
- * If '(' in specials: Comments are not recognized. The character '('
- * is returned as Special '('.
- * If '(' not in specials: Comments are recognized. How comments are
- * returned, depends on the following:
- * If Return_comments in scan_options: Outer comments are returned as
- * Comment (note that inner comments count but
- * are not returned as tokens)
- * If otherwise ' ' in specials: Outer comments are returned as
- * Special ' '
- * Otherwise: Comments are recognized but ignored.
- *
- * - Quoted strings:
- * If '"' in specials: Quoted strings are not recognized, and double quotes
- * are returned as Special '"'.
- * If '"' not in specials: Quoted strings are returned as QString tokens.
- *
- * - Domain literals:
- * If '[' in specials: Domain literals are not recognized, and left brackets
- * are returned as Special '['.
- * If '[' not in specials: Domain literals are returned as DomainLiteral
- * tokens.
- *
- * Note that the rule for domain literals is completely new in netstring-0.9.
- * It may cause incompatibilities with previous versions if '[' is not
- * special.
- *
- * The general rule for special characters: Every special character c is
- * returned as Special c, and any additional scanning functionality
- * for this character is turned off.
- *
- * If recognized, quoted strings are returned as QString s, where
- * s is the string without the embracing quotes, and with already
- * decoded quoted pairs.
- *
- * Control characters c are returned as Control c.
- *
- * If recognized, comments may either be returned as spaces (in the case
- * you are not interested in the contents of comments), or as Comment tokens.
- * The contents of comments are not further scanned; you must start a
- * subscanner to analyze comments as structured values.
- *
- * If recognized, domain literals are returned as DomainLiteral s, where
- * s is the literal without brackets, and with decoded quoted pairs.
- *
- * Atoms are returned as Atom s where s is a longest sequence of
- * atomic characters (all characters which are neither special nor control
- * characters nor delimiters for substructures). If the option
- * Recognize_encoded_words is on, atoms which look like encoded words
- * are returned as EncodedWord tokens. (Important note: Neither '?' nor
- * '=' must be special in order to enable this functionality.)
- *
- * After the mime_scanner has been created, you can scan the tokens by
- * invoking scan_token which returns one token at a time, or by invoking
- * scan_token_list which returns all following tokens.
- *
- * There are two token types: s_token is the base type and is intended to
- * be used for pattern matching. s_extended_token is a wrapper that
- * additionally contains information where the token occurs.
- *
- * SIMPLE INTERFACE
- *
- * Instead of creating a mime_scanner and calling the scan functions,
- * you may also invoke scan_structured_value. This function returns the
- * list of tokens directly; however, it is restricted to s_token.
- *
- * EXAMPLES
- *
- * scan_structured_value "user@domain.com" [ '@'; '.' ] []
- * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
- *
- * scan_structured_value "user @ domain . com" [ '@'; '.' ] []
- * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
- *
- * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] []
- * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
- *
- * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ]
- * [ Return_comments ]
- * = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.';
- * Atom "com" ]
- *
- * scan_structured_value "user (Do you know him?) @ domain . com"
- * [ '@'; '.'; ' ' ] []
- * = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@';
- * Special ' '; Atom "domain";
- * Special ' '; Special '.'; Special ' '; Atom "com" ]
- *
- * scan_structured_value "user (Do you know him?) @ domain . com"
- * [ '@'; '.'; ' ' ] [ Return_comments ]
- * = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@';
- * Special ' '; Atom "domain";
- * Special ' '; Special '.'; Special ' '; Atom "com" ]
- *
- * scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] []
- * = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
- * Special ' '; Special '.'; Special ' '; Atom "com" ]
- *
- * scan_structured_value "user(Do you know him?)@domain.com" ['@'; '.'; '(']
- * []
- * = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
- * Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
- *
- * scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] []
- * = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
- * Atom "com" ]
- *
- * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
- * [ ] [ ]
- * = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
- *
- * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
- * [ ] [ Recognize_encoded_words ]
- * = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
- *
- *)
-
-
-
-type s_token =
- Atom of string
- | EncodedWord of (string * string * string)
- | QString of string
- | Control of char
- | Special of char
- | DomainLiteral of string
- | Comment
- | End
-
-(* - Words are: Atom, EncodedWord, QString.
- * - Atom s: The character sequence forming the atom is contained in s
- * - EncodedWord(charset, encoding, encoded_string) means:
- * * charset is the (uppercase) character set
- * * encoding is either "Q" or "B"
- * * encoded_string: contains the text of the word; the text is represented
- * as octet string following the conventions for character set charset and
- * then encoded either as "Q" or "B" string.
- * - QString s: Here, s are the characters inside the double quotes after
- * decoding any quoted pairs (backslash + character pairs)
- * - Control c: The control character c
- * - Special c: The special character c
- * - DomainLiteral s: s contains the characters inside the brackets after
- * decoding any quoted pairs
- * - Comment: if the option Return_comments is specified, this token
- * represents the whole comment.
- * - End: Is returned after the last token
- *)
-
-
-type s_option =
- No_backslash_escaping
- (* Do not handle backslashes in quoted string and comments as escape
- * characters; backslashes are handled as normal characters.
- * For example: "C:\dir\file" will be returned as
- * QString "C:\dir\file", and not as QString "C:dirfile".
- * - This is a common error in many MIME implementations.
- *)
- | Return_comments
- (* Comments are returned as token Comment (unless '(' is included
- * in the list of special characters, in which case comments are
- * not recognized at all).
- * You may get the exact location of the comment by applying
- * get_pos and get_length to the extended token.
- *)
- | Recognize_encoded_words
- (* Enables that encoded words are recognized and returned as
- * EncodedWord(charset,encoding,content) instead of Atom.
- *)
-
-type s_extended_token
- (* An opaque type containing s_token plus:
- * - where the token occurs
- * - RFC-2047 access functions
- *)
-
-val get_token : s_extended_token -> s_token
- (* Return the s_token within the s_extended_token *)
-
-val get_decoded_word : s_extended_token -> string
-val get_charset : s_extended_token -> string
- (* Return the decoded word (the contents of the word after decoding the
- * "Q" or "B" representation), and the character set of the decoded word
- * (uppercase).
- * These functions not only work for EncodedWord:
- * - Atom: Returns the atom without decoding it
- * - QString: Returns the characters inside the double quotes, and
- * decodes any quoted pairs (backslash + character)
- * - Control: Returns the one-character string
- * - Special: Returns the one-character string
- * - DomainLiteral: Returns the characters inside the brackets, and
- * decodes any quoted pairs
- * - Comment: Returns ""
- * The character set is "US-ASCII" for these tokens.
- *)
-
-val get_pos : s_extended_token -> int
- (* Return the byte position where the token starts in the string
- * (the first byte has position 0)
- *)
-
-val get_line : s_extended_token -> int
- (* Return the line number where the token starts (numbering begins
- * usually with 1)
- *)
-
-val get_column : s_extended_token -> int
- (* Return the column of the line where the token starts (first column
- * is number 0)
- *)
-
-val get_length : s_extended_token -> int
- (* Return the length of the token in bytes *)
-
-val separates_adjacent_encoded_words : s_extended_token -> bool
- (* True iff the current token is white space (Special ' ', Special '\t',
- * Special '\r' or Special '\n') and the last non-white space token
- * was EncodedWord and the next non-white space token will be
- * EncodedWord.
- * Such spaces do not count and must be ignored by any application.
- *)
-
-
-type mime_scanner
-
-val create_mime_scanner :
- specials:char list ->
- scan_options:s_option list ->
- ?pos:int ->
- ?line:int ->
- ?column:int ->
- string ->
- mime_scanner
- (* Creates a new mime_scanner scanning the passed string.
- * specials: The list of characters recognized as special characters.
- * scan_options: The list of global options modifying the behaviour
- * of the scanner
- * pos: The position of the byte where the scanner starts in the
- * passed string. Defaults to 0.
- * line: The line number of this byte. Defaults to 1.
- * column: The column number of this byte. Default to 0.
- *
- * The optional parameters pos, line, column are intentionally after
- * scan_options and before the string argument, so you can specify
- * scanners by partially applying arguments to create_mime_scanner
- * which are not yet connected with a particular string:
- * let my_scanner_spec = create_mime_scanner my_specials my_options in
- * ...
- * let my_scanner = my_scanner_spec my_string in
- * ...
- *)
-
-val get_pos_of_scanner : mime_scanner -> int
-val get_line_of_scanner : mime_scanner -> int
-val get_column_of_scanner : mime_scanner -> int
- (* Return the current position, line, and column of a mime_scanner.
- * The primary purpose of these functions is to simplify switching
- * from one mime_scanner to another within a string:
- *
- * let scanner1 = create_mime_scanner ... s in
- * ... now scanning some tokens from s using scanner1 ...
- * let scanner2 = create_mime_scanner ...
- * ?pos:(get_pos_of_scanner scanner1)
- * ?line:(get_line_of_scanner scanner1)
- * ?column:(get_column_of_scanner scanner1)
- * s in
- * ... scanning more tokens from s using scanner2 ...
- *
- * RESTRICTION: These functions are not available if the option
- * Recognize_encoded_words is on. The reason is that this option
- * enables look-ahead scanning; please use the location of the last
- * scanned token instead.
- * It is currently not clear whether a better implementation is needed
- * (costs a bit more time).
- *
- * Note: To improve the performance of switching, it is recommended to
- * create scanner specs in advance (see the example my_scanner_spec
- * above).
- *)
-
-val scan_token : mime_scanner -> (s_extended_token * s_token)
- (* Returns the next token, or End if there is no more token. *)
-
-val scan_token_list : mime_scanner -> (s_extended_token * s_token) list
- (* Returns all following tokens as a list (excluding End) *)
-
-val scan_structured_value : string -> char list -> s_option list -> s_token list
- (* This function is included for backwards compatibility, and for all
- * cases not requiring extended tokens.
- *
- * It scans the passed string according to the list of special characters
- * and the list of options, and returns the list of all tokens.
- *)
-
-val specials_rfc822 : char list
-val specials_rfc2045 : char list
- (* The sets of special characters defined by the RFCs 822 and 2045.
- *
- * CHANGE in netstring-0.9: '[' and ']' are no longer special because
- * there is now support for domain literals.
- * '?' and '=' are not special in the rfc2045 version because there is
- * already support for encoded words.
- *)
-
-
-(**********************************************************************)
-
-(* Widely used scanners: *)
-
-
-val scan_encoded_text_value : string -> s_extended_token list
- (* Scans a "text" value. The returned token list contains only
- * Special, Atom and EncodedWord tokens.
- * Spaces, TABs, CRs, LFs are returned unless
- * they occur between adjacent encoded words in which case
- * they are ignored.
- *)
-
-
-val scan_value_with_parameters : string -> s_option list ->
- (string * (string * string) list)
- (* let name, params = scan_value_with_parameters s options:
- * Scans phrases like
- * name ; p1=v1 ; p2=v2 ; ...
- * The scan is done with the set of special characters [';', '='].
- *)
-
-val scan_mime_type : string -> s_option list ->
- (string * (string * string) list)
- (* let name, params = scan_mime_type s options:
- * Scans MIME types like
- * text/plain; charset=iso-8859-1
- * The name of the type and the names of the parameters are converted
- * to lower case.
- *)
-
-
-(**********************************************************************)
-
-(* Scanners for MIME bodies *)
-
-val scan_multipart_body : string -> start_pos:int -> end_pos:int ->
- boundary:string ->
- ((string * string) list * string) list
- (* let [params1, value1; params2, value2; ...]
- * = scan_multipart_body s i0 i1 b
- *
- * Scans the string s that is the body of a multipart message.
- * The multipart message begins at position i0 in s and i1 the position
- * of the character following the message. In b the boundary string
- * must be passed (this is the "boundary" parameter of the multipart
- * MIME type, e.g. multipart/mixed;boundary="some string" ).
- * The return value is the list of the parts, where each part
- * is returned as pair (params, value). The left component params
- * is the list of name/value pairs of the header of the part. The
- * right component is the RAW content of the part, i.e. if the part
- * is encoded ("content-transfer-encoding"), the content is returned
- * in the encoded representation. The caller must himself decode
- * the content.
- * The material before the first boundary and after the last
- * boundary is not returned.
- *
- * MULTIPART MESSAGES
- *
- * The MIME standard defines a way to group several message parts to
- * a larger message (for E-Mails this technique is known as "attaching"
- * files to messages); these are the so-called multipart messages.
- * Such messages are recognized by the major type string "multipart",
- * e.g. multipart/mixed or multipart/form-data. Multipart types MUST
- * have a boundary parameter because boundaries are essential for the
- * representation.
- * Multipart messages have a format like
- *
- * ...Header...
- * Content-type: multipart/xyz; boundary="abc"
- * ...Header...
- *
- * Body begins here ("prologue")
- * --abc
- * ...Header part 1...
- *
- * ...Body part 1...
- * --abc
- * ...Header part 2...
- *
- *
- * ...Body part 2
- * --abc
- * ...
- * --abc--
- * Epilogue
- *
- * The parts are separated by boundary lines which begin with "--" and
- * the string passed as boundary parameter. (Note that there may follow
- * arbitrary text on boundary lines after "--abc".) The boundary is
- * chosen such that it does not occur as prefix of any line of the
- * inner parts of the message.
- * The parts are again MIME messages, with header and body. Note
- * that it is explicitely allowed that the parts are even multipart
- * messages.
- * The texts before the first boundary and after the last boundary
- * are ignored.
- * Note that multipart messages as a whole MUST NOT be encoded.
- * Only the PARTS of the messages may be encoded (if they are not
- * multipart messages themselves).
- *
- * Please read RFC 2046 if want to know the gory details of this
- * brain-dead format.
- *)
-
-val scan_multipart_body_and_decode : string -> start_pos:int -> end_pos:int ->
- boundary:string ->
- ((string * string) list * string) list
- (* Same as scan_multipart_body, but decodes the bodies of the parts
- * if they are encoded using the methods "base64" or "quoted printable".
- * Fails, if an unknown encoding is used.
- *)
-
-val scan_multipart_body_from_netstream
- : Netstream.t ->
- boundary:string ->
- create:((string * string) list -> 'a) ->
- add:('a -> Netstream.t -> int -> int -> unit) ->
- stop:('a -> unit) ->
- unit
- (* scan_multipart_body_from_netstream s b create add stop:
- *
- * Reads the MIME message from the netstream s block by block. The
- * parts are delimited by the boundary b.
- *
- * Once a new part is detected and begins, the function 'create' is
- * called with the MIME header as argument. The result p of this function
- * may be of any type.
- *
- * For every chunk of the part that is being read, the function 'add'
- * is invoked: add p s k n.
- * Here, p is the value returned by the 'create' invocation for the
- * current part. s is the netstream. The current window of s contains
- * the read chunk completely; the chunk begins at position k of the
- * window (relative to the beginning of the window) and has a length
- * of n bytes.
- *
- * When the part has been fully read, the function 'stop' is
- * called with p as argument.
- *
- * That means, for every part the following is executed:
- * - let p = create h
- * - add p s k1 n1
- * - add p s k2 n2
- * - ...
- * - add p s kN nN
- * - stop p
- *
- * IMPORTANT PRECONDITION:
- * - The block size of the netstream s must be at least
- * String.length b + 3
- *
- * EXCEPTIONS:
- * - Exceptions can happen because of ill-formed input, and within
- * the callbacks of the functions 'create', 'add', 'stop'.
- * - If the exception happens while part p is being read, and the
- * 'create' function has already been called (successfully), the
- * 'stop' function is also called (you have the chance to close files).
- *)
-
-
-(* THREAD-SAFETY:
- * The functions are thread-safe as long as the threads do not share
- * values.
- *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/08/13 00:04:36 gerd
- * Encoded_word -> EncodedWord
- * Bugfixes.
- *
- * Revision 1.7 2000/08/07 00:25:00 gerd
- * Major update of the interface for structured field lexing.
- *
- * Revision 1.6 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.5 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.4 2000/05/16 22:29:12 gerd
- * New "option" arguments specifying the level of MIME
- * compatibility.
- *
- * Revision 1.3 2000/04/15 13:09:01 gerd
- * Implemented uploads to temporary files.
- *
- * Revision 1.2 2000/03/02 01:15:30 gerd
- * Updated.
- *
- * Revision 1.1 2000/02/25 15:21:12 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-type t =
- { mutable buffer : string;
- mutable length : int;
- }
-
-(* To help the garbage collector:
- * The 'buffer' has a minimum length of 31 bytes. This minimum can still
- * be stored in the minor heap.
- * The 'buffer' has a length which is always near a multiple of two. This
- * limits the number of different bucket sizes, and simplifies reallocation
- * of freed memory.
- *)
-
-(* Optimal string length:
- * Every string takes: 1 word for the header, enough words for the
- * contents + 1 Null byte (for C compatibility).
- * If the buffer grows, it is best to use a new string length such
- * that the number of words is exactly twice as large as for the previous
- * string.
- * n: length of the previous string in bytes
- * w: storage size of the previous string in words
- * n': length of the new string in bytes
- * w' = 2*w: storage size of the new string in words
- *
- * w = (n+1) / word_length + 1
- * [it is assumed that (n+1) is always a multiple of word_length]
- *
- * n' = (2*w - 1) * word_length - 1
- *
- * n' = [2 * ( [n+1] / word_length + 1) - 1] * word_length - 1
- * = ...
- * = (2*n + 2) + word_length - 1
- * = 2 * n + word_length + 1
- *
- * n'+1 is again a multiple of word_length:
- * n'+1 = 2*n + 2 + word_length
- * = 2*(n+1) + word_length
- * = a multiple of word_length because n+1 is a multiple of word_length
- *)
-
-let word_length = Sys.word_size / 8 (* in bytes *)
-
-let create n =
- { buffer = String.create (max n 31); length = 0; }
-
-let contents b =
- String.sub b.buffer 0 b.length
-
-let sub b ~pos:k ~len:n =
- if k+n > b.length then
- raise (Invalid_argument "Netbuffer.sub");
- String.sub b.buffer k n
-
-let unsafe_buffer b =
- b.buffer
-
-let length b =
- b.length
-
-let add_string b s =
- let l = String.length s in
- if l + b.length > String.length b.buffer then begin
- let l' = l + b.length in
- let rec new_size s =
- if s >= l' then s else new_size(2*s + word_length + 1)
- in
- let buffer' = String.create (new_size (String.length b.buffer)) in
- String.blit b.buffer 0 buffer' 0 b.length;
- b.buffer <- buffer'
- end;
- String.blit s 0 b.buffer b.length l;
- b.length <- b.length + l
-
-let add_sub_string b s ~pos:k ~len:l =
- if l + b.length > String.length b.buffer then begin
- let l' = l + b.length in
- let rec new_size s =
- if s >= l' then s else new_size(2*s + word_length + 1)
- in
- let buffer' = String.create (new_size (String.length b.buffer)) in
- String.blit b.buffer 0 buffer' 0 b.length;
- b.buffer <- buffer'
- end;
- String.blit s k b.buffer b.length l;
- b.length <- b.length + l
-
-let delete b ~pos:k ~len:l =
- (* deletes l bytes at position k in b *)
- let n = String.length b.buffer in
- if k+l <> n & k <> n then
- String.blit b.buffer (k+l) b.buffer k (n-l-k);
- b.length <- b.length - l;
- ()
-
-let try_shrinking b =
- (* If the buffer size decreases drastically, reallocate the buffer *)
- if b.length < (String.length b.buffer / 2) then begin
- let rec new_size s =
- if s >= b.length then s else new_size(2*s + word_length + 1)
- in
- let buffer' = String.create (new_size 31) in
- String.blit b.buffer 0 buffer' 0 b.length;
- b.buffer <- buffer'
- end
-
-let clear b =
- delete b 0 (b.length)
-
-let index_from b k c =
- if k > b.length then
- raise (Invalid_argument "Netbuffer.index_from");
- let p = String.index_from b.buffer k c in
- if p >= b.length then raise Not_found;
- p
-
-let print_buffer b =
- Format.printf
- "<NETBUFFER: %d/%d>"
- b.length
- (String.length b.buffer)
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.2 2000/06/24 20:20:33 gerd
- * Added the toploop printer.
- *
- * Revision 1.1 2000/04/15 13:07:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-(* A Netbuffer.t is a buffer that can grow and shrink dynamically. *)
-
-type t
-
-val create : int -> t
- (* Creates a netbuffer which allocates initially this number of bytes.
- * The logical length is zero.
- *)
-
-val contents : t -> string
- (* Returns the contents of the buffer as fresh string. *)
-
-val sub : t -> pos:int -> len:int -> string
- (* sub nb k n: returns the n characters starting at position n from
- * netbuffer nb as fresh string
- *)
-
-val length : t -> int
- (* Returns the logical length of the buffer *)
-
-val add_string : t -> string -> unit
- (* add_string nb s: Adds a copy of the string s to the logical end of
- * the netbuffer nb. If necessary, the nb grows.
- *)
-
-val add_sub_string : t -> string -> pos:int -> len:int -> unit
- (* add_sub_string nb s k n: Adds the substring of s starting at position
- * k with length n to the logical end of the netbuffer nb. If necessary,
- * the nb grows.
- * This is semantically the same as
- * add_string nb (String.sub s k n), but the extra copy is avoided.
- *)
-
-val delete : t -> pos:int -> len:int -> unit
- (* delete nb k n: Deletes the n bytes at position k of netbuffer nb
- * in-place.
- * The netbuffer does not shrink!
- *)
-
-val clear : t -> unit
- (* deletes all contents from the buffer. As 'delete', the netbuffer does
- * not shrink.
- *)
-
-val try_shrinking : t -> unit
- (* try_shrinking nb: If the length of the buffer is less than half of
- * the allocated space, the netbuffer is reallocated in order to save
- * memory.
- *)
-
-val index_from : t -> int -> char -> int
- (* index_from nb k c: Searches the character c in the netbuffer beginning
- * at position k. If found, the position of the left-most occurence is
- * returned. Otherwise, Not_found is raised.
- *)
-
-val unsafe_buffer : t -> string
- (* WARNING! This is a low-level function!
- * Returns the current string that internally holds the buffer.
- * The byte positions 0 to length - 1 actually store the contents of
- * the buffer. You can directly read and modify the buffer. Note that
- * there is no protection if you read or write positions beyond the
- * length of the buffer.
- *)
-
-val print_buffer : t -> unit
- (* For the toploop *)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.2 2000/06/24 20:20:33 gerd
- * Added the toploop printer.
- *
- * Revision 1.1 2000/04/15 13:07:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-exception Malformed_code
-
-
-type encoding =
- [ `Enc_utf8 (* UTF-8 *)
- | `Enc_java
- | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *)
- | `Enc_utf16_le (* UTF-16 little endian *)
- | `Enc_utf16_be (* UTF-16 big endian *)
- | `Enc_usascii (* US-ASCII (only 7 bit) *)
- | `Enc_iso88591 (* ISO-8859-1 *)
- | `Enc_iso88592 (* ISO-8859-2 *)
- | `Enc_iso88593 (* ISO-8859-3 *)
- | `Enc_iso88594 (* ISO-8859-4 *)
- | `Enc_iso88595 (* ISO-8859-5 *)
- | `Enc_iso88596 (* ISO-8859-6 *)
- | `Enc_iso88597 (* ISO-8859-7 *)
- | `Enc_iso88598 (* ISO-8859-8 *)
- | `Enc_iso88599 (* ISO-8859-9 *)
- | `Enc_iso885910 (* ISO-8859-10 *)
- | `Enc_iso885913 (* ISO-8859-13 *)
- | `Enc_iso885914 (* ISO-8859-14 *)
- | `Enc_iso885915 (* ISO-8859-15 *)
- | `Enc_koi8r (* KOI8-R *)
- | `Enc_jis0201 (* JIS-0201 *)
- (* Microsoft: *)
- | `Enc_windows1250 (* WINDOWS-1250 *)
- | `Enc_windows1251 (* WINDOWS-1251 *)
- | `Enc_windows1252 (* WINDOWS-1252 *)
- | `Enc_windows1253 (* WINDOWS-1253 *)
- | `Enc_windows1254 (* WINDOWS-1254 *)
- | `Enc_windows1255 (* WINDOWS-1255 *)
- | `Enc_windows1256 (* WINDOWS-1256 *)
- | `Enc_windows1257 (* WINDOWS-1257 *)
- | `Enc_windows1258 (* WINDOWS-1258 *)
- (* IBM, ASCII-based: *)
- | `Enc_cp437
- | `Enc_cp737
- | `Enc_cp775
- | `Enc_cp850
- | `Enc_cp852
- | `Enc_cp855
- | `Enc_cp856
- | `Enc_cp857
- | `Enc_cp860
- | `Enc_cp861
- | `Enc_cp862
- | `Enc_cp863
- | `Enc_cp864
- | `Enc_cp865
- | `Enc_cp866
- | `Enc_cp869
- | `Enc_cp874
- | `Enc_cp1006
- (* IBM, EBCDIC-based: *)
- | `Enc_cp037
- | `Enc_cp424
- | `Enc_cp500
- | `Enc_cp875
- | `Enc_cp1026
- (* Adobe: *)
- | `Enc_adobe_standard_encoding
- | `Enc_adobe_symbol_encoding
- | `Enc_adobe_zapf_dingbats_encoding
- (* Apple: *)
- | `Enc_macroman
-
- ]
-;;
-
-
-let norm_enc_name e =
- (* Removes some characters from e; uppercase *)
- let e' = String.create (String.length e) in
- let rec next i j =
- if i < String.length e then
- match e.[i] with
- ('-'|'_'|'.') -> next (i+1) j
- | c -> e'.[j] <- c; next (i+1) (j+1)
- else
- j
- in
- let l = next 0 0 in
- String.uppercase(String.sub e' 0 l)
-;;
-
-
-let encoding_of_string e =
- match norm_enc_name e with
- ("UTF16"|"UCS2"|"ISO10646UCS2") -> `Enc_utf16
- | "UTF16BE" -> `Enc_utf16_be
- | "UTF16LE" -> `Enc_utf16_le
- | "UTF8" -> `Enc_utf8
- | ("UTF8JAVA"|"JAVA") -> `Enc_java
- | ("USASCII"|"ASCII"|"ISO646US"|"IBM367"|"CP367") -> `Enc_usascii
- | ("ISO88591"|"LATIN1"|"IBM819"|"CP819") -> `Enc_iso88591
- | ("ISO88592"|"LATIN2") -> `Enc_iso88592
- | ("ISO88593"|"LATIN3") -> `Enc_iso88593
- | ("ISO88594"|"LATIN4") -> `Enc_iso88594
- | ("ISO88595"|"CYRILLIC") -> `Enc_iso88595
- | ("ISO88596"|"ARABIC"|"ECMA114"|"ASMO708") -> `Enc_iso88596
- | ("ISO88597"|"GREEK"|"GREEK8"|"ELOT928"|"ECMA118") -> `Enc_iso88597
- | ("ISO88598"|"HEBREW") -> `Enc_iso88598
- | ("ISO88599"|"LATIN5") -> `Enc_iso88599
- | ("ISO885910"|"LATIN6") -> `Enc_iso885910
- | "ISO885913" -> `Enc_iso885913
- | "ISO885914" -> `Enc_iso885914
- | "ISO885915" -> `Enc_iso885915
- | "KOI8R" -> `Enc_koi8r
- | ("JIS0201"|"JISX0201"|"X0201") -> `Enc_jis0201
-
- | "WINDOWS1250" -> `Enc_windows1250
- | "WINDOWS1251" -> `Enc_windows1251
- | "WINDOWS1252" -> `Enc_windows1252
- | "WINDOWS1253" -> `Enc_windows1253
- | "WINDOWS1254" -> `Enc_windows1254
- | "WINDOWS1255" -> `Enc_windows1255
- | "WINDOWS1256" -> `Enc_windows1256
- | "WINDOWS1257" -> `Enc_windows1257
- | "WINDOWS1258" -> `Enc_windows1258
-
- | ("CP437"|"IBM437") -> `Enc_cp437
- | ("CP737"|"IBM737") -> `Enc_cp737
- | ("CP775"|"IBM775") -> `Enc_cp775
- | ("CP850"|"IBM850") -> `Enc_cp850
- | ("CP852"|"IBM852") -> `Enc_cp852
- | ("CP855"|"IBM855") -> `Enc_cp855
- | ("CP856"|"IBM856") -> `Enc_cp856
- | ("CP857"|"IBM857") -> `Enc_cp857
- | ("CP860"|"IBM860") -> `Enc_cp860
- | ("CP861"|"IBM861") -> `Enc_cp861
- | ("CP862"|"IBM862") -> `Enc_cp862
- | ("CP863"|"IBM863") -> `Enc_cp863
- | ("CP864"|"IBM864") -> `Enc_cp864
- | ("CP865"|"IBM865") -> `Enc_cp865
- | ("CP866"|"IBM866") -> `Enc_cp866
- | ("CP869"|"IBM869") -> `Enc_cp869
- | ("CP874"|"IBM874") -> `Enc_cp874
- | ("CP1006"|"IBM1006") -> `Enc_cp1006
-
- | ("CP037"|"IBM037"|"EBCDICCPUS"|"EBCDICCPCA"|"EBCDICCPWT"|
- "EBCDICCPNL") -> `Enc_cp037
- | ("CP424"|"IBM424"|"EBCDICCPHE") -> `Enc_cp424
- | ("CP500"|"IBM500"|"EBCDICCPBE"|"EBCDICCPCH") -> `Enc_cp500
- | ("CP875"|"IBM875") -> `Enc_cp875
- | ("CP1026"|"IBM1026") -> `Enc_cp1026
-
- | "ADOBESTANDARDENCODING" -> `Enc_adobe_standard_encoding
- | "ADOBESYMBOLENCODING" -> `Enc_adobe_symbol_encoding
- | "ADOBEZAPFDINGBATSENCODING" -> `Enc_adobe_zapf_dingbats_encoding
-
- | "MACINTOSH" -> `Enc_macroman
-
- | _ ->
- failwith "Netconversion.encoding_of_string: unknown encoding"
-;;
-
-
-let string_of_encoding (e : encoding) =
- (* If there is a "preferred MIME name", this name is returned (see IANA). *)
- match e with
- `Enc_utf16 -> "UTF-16"
- | `Enc_utf16_be -> "UTF-16-BE"
- | `Enc_utf16_le -> "UTF-16-LE"
- | `Enc_utf8 -> "UTF-8"
- | `Enc_java -> "UTF-8-JAVA"
- | `Enc_usascii -> "US-ASCII"
- | `Enc_iso88591 -> "ISO-8859-1"
- | `Enc_iso88592 -> "ISO-8859-2"
- | `Enc_iso88593 -> "ISO-8859-3"
- | `Enc_iso88594 -> "ISO-8859-4"
- | `Enc_iso88595 -> "ISO-8859-5"
- | `Enc_iso88596 -> "ISO-8859-6"
- | `Enc_iso88597 -> "ISO-8859-7"
- | `Enc_iso88598 -> "ISO-8859-8"
- | `Enc_iso88599 -> "ISO-8859-9"
- | `Enc_iso885910 -> "ISO-8859-10"
- | `Enc_iso885913 -> "ISO-8859-13"
- | `Enc_iso885914 -> "ISO-8859-14"
- | `Enc_iso885915 -> "ISO-8859-15"
- | `Enc_koi8r -> "KOI8-R"
- | `Enc_jis0201 -> "JIS_X0201"
- | `Enc_windows1250 -> "WINDOWS-1250"
- | `Enc_windows1251 -> "WINDOWS-1251"
- | `Enc_windows1252 -> "WINDOWS-1252"
- | `Enc_windows1253 -> "WINDOWS-1253"
- | `Enc_windows1254 -> "WINDOWS-1254"
- | `Enc_windows1255 -> "WINDOWS-1255"
- | `Enc_windows1256 -> "WINDOWS-1256"
- | `Enc_windows1257 -> "WINDOWS-1257"
- | `Enc_windows1258 -> "WINDOWS-1258"
- | `Enc_cp437 -> "CP437"
- | `Enc_cp737 -> "CP737"
- | `Enc_cp775 -> "CP775"
- | `Enc_cp850 -> "CP850"
- | `Enc_cp852 -> "CP852"
- | `Enc_cp855 -> "CP855"
- | `Enc_cp856 -> "CP856"
- | `Enc_cp857 -> "CP857"
- | `Enc_cp860 -> "CP860"
- | `Enc_cp861 -> "CP861"
- | `Enc_cp862 -> "CP862"
- | `Enc_cp863 -> "CP863"
- | `Enc_cp864 -> "CP864"
- | `Enc_cp865 -> "CP865"
- | `Enc_cp866 -> "CP866"
- | `Enc_cp869 -> "CP869"
- | `Enc_cp874 -> "CP874"
- | `Enc_cp1006 -> "CP1006"
- | `Enc_cp037 -> "CP037"
- | `Enc_cp424 -> "CP424"
- | `Enc_cp500 -> "CP500"
- | `Enc_cp875 -> "CP875"
- | `Enc_cp1026 -> "CP1026"
- | `Enc_adobe_standard_encoding -> "ADOBE-STANDARD-ENCODING"
- | `Enc_adobe_symbol_encoding -> "ADOBE-SYMBOL-ENCODING"
- | `Enc_adobe_zapf_dingbats_encoding -> "ADOBE-ZAPF-DINGBATS-ENCODING"
- | `Enc_macroman -> "MACINTOSH"
-;;
-
-
-let read_iso88591 write s_in p_in l_in =
- let rec scan k_in k_out c_out =
- if k_in < l_in then begin
- let p = Char.code s_in.[p_in + k_in] in
- let n = write p k_out c_out in
- if n < 0 then
- k_in, k_out, `Enc_iso88591
- else
- scan (k_in + 1) (k_out + n) (c_out + 1)
- end
- else
- k_in, k_out, `Enc_iso88591
- in
- scan 0 0 0
-;;
-
-
-let read_usascii write s_in p_in l_in =
- let rec scan k_in k_out c_out =
- if k_in < l_in then begin
- let p = Char.code s_in.[p_in + k_in] in
- if p >= 0x80 then raise Malformed_code;
- let n = write p k_out c_out in
- if n < 0 then
- k_in, k_out, `Enc_usascii
- else
- scan (k_in + 1) (k_out + n) (c_out + 1)
- end
- else
- k_in, k_out, `Enc_usascii
- in
- scan 0 0 0
-;;
-
-
-let read_8bit m_to_unicode enc write s_in p_in l_in =
- let rec scan k_in k_out c_out =
- if k_in < l_in then begin
- let p_local = Char.code s_in.[p_in + k_in] in
- let p_uni = Array.unsafe_get m_to_unicode p_local in
- if p_uni < 0 then raise Malformed_code;
- let n = write p_uni k_out c_out in
- if n < 0 then
- k_in, k_out, enc
- else
- scan (k_in + 1) (k_out + n) (c_out + 1)
- end
- else
- k_in, k_out, enc
- in
- scan 0 0 0
-;;
-
-
-let read_utf8 is_java write s_in p_in l_in =
- let rec scan k_in k_out c_out =
- if k_in < l_in then begin
- let n_out, n_in =
- match s_in.[p_in + k_in] with
- '\000' ->
- if is_java then raise Malformed_code;
- write 0 k_out c_out, 1
- | ('\001'..'\127' as c) ->
- write (Char.code c) k_out c_out, 1
- | ('\128'..'\223' as c) ->
- if k_in + 1 >= l_in then
- -1, 0
- else begin
- let n1 = Char.code c in
- let n2 = Char.code (s_in.[p_in + k_in + 1]) in
- if is_java && (n1 = 0x80 && n2 = 0xc0) then
- write 0 k_out c_out, 2
- else begin
- if n2 < 128 or n2 > 191 then raise Malformed_code;
- let p = ((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) in
- if p < 128 then raise Malformed_code;
- write p k_out c_out, 2
- end
- end
- | ('\224'..'\239' as c) ->
- if k_in + 2 >= l_in then
- -1, 0
- else begin
- let n1 = Char.code c in
- let n2 = Char.code (s_in.[p_in + k_in + 1]) in
- let n3 = Char.code (s_in.[p_in + k_in + 2]) in
- if n2 < 128 or n2 > 191 then raise Malformed_code;
- if n3 < 128 or n3 > 191 then raise Malformed_code;
- let p =
- ((n1 land 0b1111) lsl 12) lor
- ((n2 land 0b111111) lsl 6) lor
- (n3 land 0b111111)
- in
- if p < 0x800 then raise Malformed_code;
- if (p >= 0xd800 && p < 0xe000) then
- (* Surrogate pairs are not supported in UTF-8 *)
- raise Malformed_code;
- if (p >= 0xfffe && p <= 0xffff) then
- raise Malformed_code;
- write p k_out c_out, 3
- end
- | ('\240'..'\247' as c) ->
- if k_in + 3 >= l_in then
- -1, 0
- else begin
- let n1 = Char.code c in
- let n2 = Char.code (s_in.[p_in + k_in + 1]) in
- let n3 = Char.code (s_in.[p_in + k_in + 2]) in
- let n4 = Char.code (s_in.[p_in + k_in + 3]) in
- if n2 < 128 or n2 > 191 then raise Malformed_code;
- if n3 < 128 or n3 > 191 then raise Malformed_code;
- if n4 < 128 or n4 > 191 then raise Malformed_code;
- let p = ((n1 land 0b111) lsl 18) lor
- ((n2 land 0b111111) lsl 12) lor
- ((n3 land 0b111111) lsl 6) lor
- (n4 land 0b111111)
- in
- if p < 0x10000 then raise Malformed_code;
- if p >= 0x110000 then
- (* These code points are not supported. *)
- raise Malformed_code;
- write p k_out c_out, 4
- end
- | _ ->
- (* Outside the valid range of XML characters *)
- raise Malformed_code;
- in
- (* n_out: number of written bytes; -1 means out buf is full
- * n_in: number of read bytes; 0 means end of in buf reached
- * n_in = 0 implies n_out = -1
- *)
- if n_out < 0 then
- k_in, k_out, `Enc_utf8
- else
- scan (k_in + n_in) (k_out + n_out) (c_out + 1)
- end
- else
- k_in, k_out, `Enc_utf8
- in
- scan 0 0 0
-;;
-
-
-let surrogate_offset = 0x10000 - (0xD800 lsl 10) - 0xDC00;;
-
-let read_utf16_le k_in_0 write s_in p_in l_in =
- let rec scan k_in k_out c_out =
- if k_in + 1 < l_in then begin
- let p = (Char.code s_in.[p_in + k_in]) lor ((Char.code s_in.[p_in + k_in + 1]) lsl 8) in
-
- if p >= 0xd800 & p < 0xe000 then begin
- (* This is a surrogate pair. *)
- if k_in + 3 < l_in then begin
- if p <= 0xdbff then begin
- let q = (Char.code s_in.[p_in + k_in + 2 ]) lor
- ((Char.code s_in.[p_in + k_in + 3]) lsl 8) in
- if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
- let eff_p = (p lsl 10) + q + surrogate_offset in
- let n = write eff_p k_out c_out in
- if n < 0 then
- k_in, k_out, `Enc_utf16_le
- else
- scan (k_in + 4) (k_out + n) (c_out + 1)
- end
- else
- (* Malformed pair: *)
- raise Malformed_code;
- end
- else
- (* Incomplete pair: *)
- k_in, k_out, `Enc_utf16_le
- end
-
- else
- if p = 0xfffe then
- (* Big endian byte order mark: It is illegal here *)
- raise Malformed_code
- else begin
- (* A regular code point *)
- let n = write p k_out c_out in
- if n < 0 then
- k_in, k_out, `Enc_utf16_le
- else
- scan (k_in + 2) (k_out + n) (c_out + 1)
- end
- end
- else
- (* Incomplete character: *)
- k_in, k_out, `Enc_utf16_le
- in
- scan k_in_0 0 0
-;;
-
-
-let read_utf16_be k_in_0 write s_in p_in l_in =
- let rec scan k_in k_out c_out =
- if k_in + 1 < l_in then begin
- let p = (Char.code s_in.[p_in + k_in + 1]) lor ((Char.code s_in.[p_in + k_in]) lsl 8) in
-
- if p >= 0xd800 & p < 0xe000 then begin
- (* This is a surrogate pair. *)
- if k_in + 3 < l_in then begin
- if p <= 0xdbff then begin
- let q = (Char.code s_in.[p_in + k_in + 3 ]) lor
- ((Char.code s_in.[p_in + k_in + 2]) lsl 8) in
- if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
- let eff_p = (p lsl 10) + q + surrogate_offset in
- let n = write eff_p k_out c_out in
- if n < 0 then
- k_in, k_out, `Enc_utf16_be
- else
- scan (k_in + 4) (k_out + n) (c_out + 1)
- end
- else
- (* Malformed pair: *)
- raise Malformed_code;
- end
- else
- (* Incomplete pair: *)
- k_in, k_out, `Enc_utf16_be
- end
-
- else
- if p = 0xfffe then
- (* Little endian byte order mark: It is illegal here *)
- raise Malformed_code
- else begin
- (* A regular code point *)
- let n = write p k_out c_out in
- if n < 0 then
- k_in, k_out, `Enc_utf16_be
- else
- scan (k_in + 2) (k_out + n) (c_out + 1)
- end
-
- end
- else
- (* Incomplete character: *)
- k_in, k_out, `Enc_utf16_be
- in
- scan k_in_0 0 0
-;;
-
-
-let read_utf16 write s_in p_in l_in =
- (* Expect a BOM at the beginning of the text *)
- if l_in >= 2 then begin
- let c0 = s_in.[p_in + 0] in
- let c1 = s_in.[p_in + 1] in
- if c0 = '\254' & c1 = '\255' then begin
- (* 0xfeff as big endian *)
- read_utf16_be 2 write s_in p_in l_in
- end
- else
- if c0 = '\255' & c1 = '\254' then begin
- (* 0xfeff as little endian *)
- read_utf16_le 2 write s_in p_in l_in
- end
- else
- (* byte order mark missing *)
- raise Malformed_code
- end
- else
- 0, 0, `Enc_utf16
-;;
-
-
-let write_iso88591 s_out p_out l_out max_chars w p k_out c_out =
- if k_out < l_out && c_out < max_chars then begin
- if p > 255 then begin
- let subst = w p in
- let l_subst = String.length subst in
- if k_out + l_subst <= l_out then begin
- (* Enough space to store 'subst': *)
- String.blit subst 0 s_out (k_out+p_out) l_subst;
- l_subst
- end
- else
- (* Not enough space: Stop this round of recoding *)
- -1
- end
- else begin
- s_out.[p_out + k_out] <- Char.chr p;
- 1
- end
- end
- else
- -1 (* End-of-buffer indicator *)
-;;
-
-
-let write_usascii s_out p_out l_out max_chars w p k_out c_out =
- if k_out < l_out && c_out < max_chars then begin
- if p > 127 then begin
- let subst = w p in
- let l_subst = String.length subst in
- if k_out + l_subst <= l_out then begin
- (* Enough space to store 'subst': *)
- String.blit subst 0 s_out (k_out+p_out) l_subst;
- l_subst
- end
- else
- (* Not enough space: Stop this round of recoding *)
- -1
- end
- else begin
- s_out.[p_out + k_out] <- Char.chr p;
- 1
- end
- end
- else
- -1 (* End-of-buffer indicator *)
-;;
-
-
-let write_8bit from_unicode s_out p_out l_out max_chars w p k_out c_out =
- if k_out < l_out && c_out < max_chars then begin
- let p' =
- match Array.unsafe_get from_unicode (p land 255) with
- Netmappings.U_nil -> -1
- | Netmappings.U_single (p0,q0) ->
- if p0 = p then q0 else -1
- | Netmappings.U_list l ->
- (try List.assoc p l with Not_found -> -1)
- in
- if p' < 0 then begin
- let subst = w p in
- let l_subst = String.length subst in
- if k_out + l_subst <= l_out then begin
- (* Enough space to store 'subst': *)
- String.blit subst 0 s_out (k_out+p_out) l_subst;
- l_subst
- end
- else
- (* Not enough space: Stop this round of recoding *)
- -1
- end
- else begin
- s_out.[p_out + k_out] <- Char.chr p';
- 1
- end
- end
- else
- -1 (* End-of-buffer indicator *)
-;;
-
-
-let write_utf8 is_java s_out p_out l_out max_chars w p k_out c_out =
- if p <= 127 && (not is_java || p <> 0) then begin
- if k_out < l_out && c_out < max_chars then begin
- s_out.[p_out + k_out] <- Char.chr p;
- 1
- end
- else -1
- end
- else if p <= 0x7ff then begin
- if k_out + 1 < l_out && c_out < max_chars then begin
- s_out.[p_out + k_out] <- Char.chr (0xc0 lor (p lsr 6));
- s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor (p land 0x3f));
- 2
- end
- else -1
- end
- else if p <= 0xffff then begin
- (* Refuse writing surrogate pairs, and fffe, ffff *)
- if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then
- failwith "Netconversion.write_utf8";
- if k_out + 2 < l_out && c_out < max_chars then begin
- s_out.[p_out + k_out] <- Char.chr (0xe0 lor (p lsr 12));
- s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
- s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor (p land 0x3f));
- 3
- end
- else -1
- end
- else if p <= 0x10ffff then begin
- if k_out + 3 < l_out && c_out < max_chars then begin
- s_out.[p_out + k_out] <- Char.chr (0xf0 lor (p lsr 18));
- s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 12) land 0x3f));
- s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
- s_out.[p_out + k_out + 3] <- Char.chr (0x80 lor (p land 0x3f));
- 4
- end
- else -1
- end
- else
- (* Higher code points are not possible in XML: *)
- failwith "Netconversion.write_utf8"
-;;
-
-
-let write_utf16_le s_out p_out l_out max_chars w p k_out c_out =
- if p >= 0xfffe then begin
- if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_le";
- (* Must be written as surrogate pair *)
- if k_out + 3 < l_out && c_out < max_chars then begin
- let high = (p lsr 10) + 0xd800 in
- let low = (p land 0x3ff) + 0xdc00 in
- s_out.[p_out + k_out ] <- Char.chr (high land 0xff);
- s_out.[p_out + k_out + 1] <- Char.chr (high lsr 8);
- s_out.[p_out + k_out + 2] <- Char.chr (low land 0xff);
- s_out.[p_out + k_out + 3] <- Char.chr (low lsr 8);
- 4
- end
- else -1
- end
- else begin
- if k_out + 1 < l_out && c_out < max_chars then begin
- s_out.[p_out + k_out ] <- Char.chr (p land 0xff);
- s_out.[p_out + k_out + 1] <- Char.chr (p lsr 8);
- 2
- end
- else
- -1
- end
-;;
-
-
-let write_utf16_be s_out p_out l_out max_chars w p k_out c_out =
- if p >= 0xfffe then begin
- if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_be";
- (* Must be written as surrogate pair *)
- if k_out + 3 < l_out && c_out < max_chars then begin
- let high = (p lsr 10) + 0xd800 in
- let low = (p land 0x3ff) + 0xdc00 in
- s_out.[p_out + k_out + 1] <- Char.chr (high land 0xff);
- s_out.[p_out + k_out ] <- Char.chr (high lsr 8);
- s_out.[p_out + k_out + 3] <- Char.chr (low land 0xff);
- s_out.[p_out + k_out + 2] <- Char.chr (low lsr 8);
- 4
- end
- else -1
- end
- else begin
- if k_out + 1 < l_out && c_out < max_chars then begin
- s_out.[p_out + k_out + 1] <- Char.chr (p land 0xff);
- s_out.[p_out + k_out ] <- Char.chr (p lsr 8);
- 2
- end
- else
- -1
- end
-;;
-
-
-let recode ~in_enc
- ~in_buf
- ~in_pos
- ~in_len
- ~out_enc
- ~out_buf
- ~out_pos
- ~out_len
- ~max_chars
- ~subst =
- if (in_pos < 0 || in_len < 0 || in_pos + in_len > String.length in_buf ||
- out_pos < 0 || out_len < 0 || out_pos + out_len > String.length out_buf)
- then
- invalid_arg "Netconversion.recode";
-
- let reader =
- match in_enc with
- `Enc_iso88591 -> read_iso88591
- | `Enc_usascii -> read_usascii
- | `Enc_utf8 -> read_utf8 false
- | `Enc_java -> read_utf8 true
- | `Enc_utf16 -> read_utf16
- | `Enc_utf16_le -> read_utf16_le 0
- | `Enc_utf16_be -> read_utf16_be 0
- | _ ->
- (try
- let to_unicode' = Hashtbl.find Netmappings.to_unicode in_enc in
- let to_unicode =
- Netmappings.lock();
- Lazy.force to_unicode' in
- Netmappings.unlock();
- read_8bit to_unicode in_enc
- with
- Not_found ->
- failwith("Support for the encoding `" ^
- string_of_encoding in_enc ^
- "' has not been compiled into Netstring")
- )
- in
- let writer =
- match out_enc with
- `Enc_iso88591 -> write_iso88591 out_buf out_pos out_len max_chars subst
- | `Enc_usascii -> write_usascii out_buf out_pos out_len max_chars subst
- | `Enc_utf8 -> write_utf8 false
- out_buf out_pos out_len max_chars subst
- | `Enc_java -> write_utf8 true out_buf out_pos out_len max_chars subst
- | `Enc_utf16 -> failwith "Netconversion.recode"
- | `Enc_utf16_le -> write_utf16_le out_buf out_pos out_len max_chars subst
- | `Enc_utf16_be -> write_utf16_be out_buf out_pos out_len max_chars subst
- | _ ->
- (try
- let from_unicode' = Hashtbl.find Netmappings.from_unicode out_enc
- in
- let from_unicode =
- Netmappings.lock();
- Lazy.force from_unicode' in
- Netmappings.unlock();
- write_8bit from_unicode out_buf out_pos out_len max_chars subst
- with
- Not_found ->
- failwith("Support for the encoding `" ^
- string_of_encoding out_enc ^
- "' has not been compiled into Netstring")
- )
- in
- reader writer in_buf in_pos in_len
-;;
-
-
-let makechar enc p =
- match enc with
- `Enc_iso88591 ->
- if p > 255 then raise Not_found;
- String.make 1 (Char.chr p)
- | `Enc_usascii ->
- if p > 127 then raise Not_found;
- String.make 1 (Char.chr p)
- | `Enc_utf8 ->
- let s = String.create 4 in
- let n = write_utf8 false s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
- String.sub s 0 n
- | `Enc_java ->
- let s = String.create 4 in
- let n = write_utf8 true s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
- String.sub s 0 n
- | `Enc_utf16_le ->
- let s = String.create 4 in
- let n = write_utf16_le s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
- String.sub s 0 n
- | `Enc_utf16_be ->
- let s = String.create 4 in
- let n = write_utf16_be s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
- String.sub s 0 n
- | `Enc_utf16 ->
- failwith "Netconversion.makechar"
- | _ ->
- let s = String.create 1 in
- let from_unicode' =
- try
- Hashtbl.find Netmappings.from_unicode enc
- with
- Not_found ->
- failwith("Support for the encoding `" ^
- string_of_encoding enc ^
- "' has not been compiled into Netstring")
- in
- let from_unicode =
- Netmappings.lock();
- Lazy.force from_unicode' in
- Netmappings.unlock();
- let n =
- write_8bit from_unicode s 0 1 1 (fun _ -> raise Not_found) p 0 0 in
- s
-;;
-
-
-let recode_string ~in_enc ~out_enc ?(subst = (fun _ -> raise Not_found)) s =
-
- let length = String.length s in
- let size = 1024 in
- let out_buf = String.create size in
-
- let rec recode_loop k s_done in_enc =
- (* 'k' bytes of 's' have already been processed, and the result is in
- * 's_done'.
- *)
- (* Recode to 'out_buf': *)
- let in_len = length - k in
- let in_done, out_done, in_enc' =
- recode ~in_enc:in_enc ~in_buf:s ~in_pos:k ~in_len:in_len
- ~out_enc:out_enc ~out_buf:out_buf ~out_pos:0 ~out_len:size
- ~max_chars:size ~subst:subst in
- (* Collect the results: *)
- let k' = k + in_done in
- let s_done' = String.sub out_buf 0 out_done :: s_done in
- (* Still something to do? *)
- if k' < length then
- recode_loop k' s_done' in_enc'
- else
- (* No: Concatenate s_done' to get the final result. *)
- String.concat "" (List.rev s_done')
- in
-
- recode_loop 0 [] in_enc
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/29 00:46:41 gerd
- * New type for the Unicode to 8 bit translation table.
- * The Netmappings tables are now Lazy.t.
- *
- * Revision 1.1 2000/08/13 00:02:57 gerd
- * Initial revision.
- *
- *
- * ======================================================================
- * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.ml):
- *
- * Revision 1.5 2000/07/27 00:41:14 gerd
- * new 8 bit codes
- *
- * Revision 1.4 2000/07/04 22:11:41 gerd
- * Implemented the enhancements and extensions of
- * rev. 1.4 of pxp_encoding.mli.
- *
- * Revision 1.3 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * Revision 1.2 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.1 2000/05/20 20:30:50 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-exception Malformed_code
-
-(* Encodings:
- * - With the exception of UTF-8 and UTF-16, only single-byte character sets
- * are supported.
- * - I took the mappings from www.unicode.org, and the standard names of
- * the character sets from IANA. Obviously, many character sets are missing
- * that can be supported; especially ISO646 character sets, many EBCDIC
- * code pages.
- * - Because of the copyright statement from Unicode, I cannot put the
- * source tables that describe the mappings into the distribution. They
- * are publicly available from www.unicode.org.
- * - Because of this, it is difficult for you to extend the list of character
- * sets; you need the source tables I am not allowed to distribute.
- * These tables have a very simple format: Every line describes a pair
- * of code points; the left code (<= 0xff) is the code in the character
- * set, the right code (<= 0xffff) is the Unicode equivalent.
- * For an example, see
- * http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT
- * You can send me such files, and I will integrate them into the
- * distribution (if possible).
- * - I really do not know very much about the character sets used in
- * East Asia. If you need them, please write the necessary conversion
- * functions and send them to me.
- *
- * KNOWN PROBLEMS:
- * - The following charsets do not have a bijective mapping to Unicode:
- * adobe_standard_encoding, adobe_symbol_encoding,
- * adobe_zapf_dingbats_encoding, cp1002 (0xFEBE). The current implementation
- * simply removes one of the conflicting code point pairs - this might
- * not what you want.
- *)
-
-type encoding =
- [ `Enc_utf8 (* UTF-8 *)
- | `Enc_java (* The variant of UTF-8 used by Java *)
- | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *)
- | `Enc_utf16_le (* UTF-16 little endian *)
- | `Enc_utf16_be (* UTF-16 big endian *)
- | `Enc_usascii (* US-ASCII (only 7 bit) *)
- | `Enc_iso88591 (* ISO-8859-1 *)
- | `Enc_iso88592 (* ISO-8859-2 *)
- | `Enc_iso88593 (* ISO-8859-3 *)
- | `Enc_iso88594 (* ISO-8859-4 *)
- | `Enc_iso88595 (* ISO-8859-5 *)
- | `Enc_iso88596 (* ISO-8859-6 *)
- | `Enc_iso88597 (* ISO-8859-7 *)
- | `Enc_iso88598 (* ISO-8859-8 *)
- | `Enc_iso88599 (* ISO-8859-9 *)
- | `Enc_iso885910 (* ISO-8859-10 *)
- | `Enc_iso885913 (* ISO-8859-13 *)
- | `Enc_iso885914 (* ISO-8859-14 *)
- | `Enc_iso885915 (* ISO-8859-15 *)
- | `Enc_koi8r (* KOI8-R *)
- | `Enc_jis0201 (* JIS-0201 *)
- (* Microsoft: *)
- | `Enc_windows1250 (* WINDOWS-1250 *)
- | `Enc_windows1251 (* WINDOWS-1251 *)
- | `Enc_windows1252 (* WINDOWS-1252 *)
- | `Enc_windows1253 (* WINDOWS-1253 *)
- | `Enc_windows1254 (* WINDOWS-1254 *)
- | `Enc_windows1255 (* WINDOWS-1255 *)
- | `Enc_windows1256 (* WINDOWS-1256 *)
- | `Enc_windows1257 (* WINDOWS-1257 *)
- | `Enc_windows1258 (* WINDOWS-1258 *)
- (* IBM, ASCII-based: *)
- | `Enc_cp437
- | `Enc_cp737
- | `Enc_cp775
- | `Enc_cp850
- | `Enc_cp852
- | `Enc_cp855
- | `Enc_cp856
- | `Enc_cp857
- | `Enc_cp860
- | `Enc_cp861
- | `Enc_cp862
- | `Enc_cp863
- | `Enc_cp864
- | `Enc_cp865
- | `Enc_cp866
- | `Enc_cp869
- | `Enc_cp874
- | `Enc_cp1006
- (* IBM, EBCDIC-based: *)
- | `Enc_cp037
- | `Enc_cp424
- | `Enc_cp500
- | `Enc_cp875
- | `Enc_cp1026
- (* Adobe: *)
- | `Enc_adobe_standard_encoding
- | `Enc_adobe_symbol_encoding
- | `Enc_adobe_zapf_dingbats_encoding
- (* Apple: *)
- | `Enc_macroman
-
- ]
-
-
-val encoding_of_string : string -> encoding;;
- (* Returns the encoding of the name of the encoding. Fails if the
- * encoding is unknown.
- * E.g. encoding_of_string "iso-8859-1" = `Enc_iso88591
- *)
-
-val string_of_encoding : encoding -> string;;
- (* Returns the name of the encoding. *)
-
-
-val makechar : encoding -> int -> string
- (* makechar enc i:
- * Creates the string representing the code point i in encoding enc.
- * Raises Not_found if the character is legal but cannot be represented
- * in enc.
- *
- * Possible encodings: everything but `Enc_utf16.
- *)
-
-val recode : in_enc:encoding ->
- in_buf:string ->
- in_pos:int ->
- in_len:int ->
- out_enc:encoding ->
- out_buf:string ->
- out_pos:int ->
- out_len:int ->
- max_chars:int ->
- subst:(int -> string) -> (int * int * encoding)
- (*
- * let (in_n, out_n, in_enc') =
- * recode in_enc in_buf in_len out_enc out_buf out_pos out_len max_chars
- * subst:
- * Converts the character sequence contained in the at most in_len bytes
- * of in_buf starting at position in_pos, and writes the result
- * into at most out_len bytes of out_buf starting at out_pos.
- * At most max_chars are written into out_buf.
- * The characters in in_buf are assumed to be encoded as in_enc, and the
- * characters in out_buf will be encoded as out_enc.
- * If there is a code point which cannot be represented in out_enc,
- * the function subst is called with the code point as argument, and the
- * resulting string (which must already be encoded as out_enc) is
- * inserted instead.
- * Note: It is possible that subst is called several times for the same
- * character.
- * Return value: out_n is the actual number of bytes written into out_buf.
- * in_n is the actual number of bytes that have been converted from
- * in_buf; in_n may be smaller than in_len because of incomplete
- * multi-byte characters, or because the output buffer has less space
- * for characters than the input buffer, or because of a change
- * of the encoding variant.
- * If there is at least one complete character in in_buf, and at least
- * space for one complete character in out_buf, and max_chars >= 1, it is
- * guaranteed that in_n > 0 or out_n > 0.
- * in_enc' is normally identical to in_enc. However, there are cases
- * in which the encoding can be refined when looking at the byte
- * sequence; for example whether a little endian or big endian variant
- * of the encoding is used. in_enc' is the variant of in_enc that was
- * used for the last character that has been converted.
- *
- * NOTES:
- *
- * Supported range of code points: 0 to 0xd7ff, 0xe000 to 0xfffd,
- * 0x10000 to 0x10ffff.
- *
- * Enc_utf8: Malformed UTF-8 byte sequences are always rejected. This
- * is also true for the sequence 0xc0 0x80 which is used by some software
- * (Java) as paraphrase for the code point 0.
- *
- * Enc_utf16: When reading from a string encoded as Enc_utf16, a byte
- * order mark is expected at the beginning. The detected variant
- * (Enc_utf16_le or Enc_utf16_be) is returned. The byte order mark is
- * not included into the output string. - It is not possible to
- * write as Enc_utf16.
- *
- * Enc_utf16_le, Enc_utf16_be: When reading from such a string, the
- * code point 0xfeff is returned as it is; it is a "zero-width
- * non-breaking space". The code point 0xfffe is rejected.
- *
- * Surrogate pairs: These are recognized (or written) only for a
- * UTF-16 encoding; and rejected for any other encoding.
- *
- * Rejected byte sequences cause the exception Bad_character_stream.
- *)
-
-val recode_string : in_enc:encoding ->
- out_enc:encoding ->
- ?subst:(int -> string) ->
- string ->
- string
- (* Recodes a complete string from in_enc to out_enc, and returns it.
- * The function subst is invoked for code points of in_enc that cannot
- * be represented in out_enc, and the result of the function invocation
- * is substituted.
- * If subst is missing, Not_found is raised in this case.
- *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/08/13 00:02:57 gerd
- * Initial revision.
- *
- *
- * ======================================================================
- * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.mli):
- *
- * Revision 1.4 2000/07/04 22:05:58 gerd
- * Enhanced version of 'recode'. Labeled arguments.
- * New function 'recode_string'.
- *
- * Revision 1.3 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * Revision 1.2 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.1 2000/05/20 20:30:50 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-module Str = Netstring_str;;
-
-module Base64 = struct
- let b64_pattern plus slash =
- [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
- 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
- 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
- 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
- '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;
-
-
- let rfc_pattern = b64_pattern '+' '/';;
- let url_pattern = b64_pattern '-' '/';;
-
- let encode_with_options b64 equal s pos len linelen crlf =
- (* encode using "base64".
- * 'b64': The encoding table, created by b64_pattern.
- * 'equal': The character that should be used instead of '=' in the original
- * encoding scheme. Pass '=' to get the original encoding scheme.
- * s, pos, len, linelen: See the interface description of encode_substring.
- *)
- assert (Array.length b64 = 64);
- if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
- invalid_arg "Netencoding.Base64.encode_with_options";
- if pos + len > String.length s then
- invalid_arg "Netencoding.Base64.encode_with_options";
-
- let linelen =
- (linelen/4) * 4 in
-
- let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
- (* l_t: length of the result without additional line endings *)
-
- let l_t' =
- if linelen < 4 then
- l_t
- else
- if l_t = 0 then 0 else
- let n_lines = ((l_t - 1) / linelen) + 1 in
- l_t + n_lines * (if crlf then 2 else 1)
- in
- (* l_t': length of the result with CRLF or LF characters *)
-
- let t = String.make l_t' equal in
- let j = ref 0 in
- let q = ref 0 in
- for k = 0 to len / 3 - 1 do
- let p = pos + 3*k in
- (* p >= pos >= 0: this is evident
- * p+2 < pos+len <= String.length s:
- * Because k <= len/3-1
- * 3*k <= 3*(len/3-1) = len - 3
- * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
- * So it is proved that the following unsafe string accesses always
- * work.
- *)
- let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor
- (Char.code (String.unsafe_get s (p+1)) lsl 8) lor
- (Char.code (String.unsafe_get s (p+2))) in
- (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
- assert(!j + 3 < l_t');
- String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18));
- String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
- String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63));
- String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63));
- j := !j + 4;
- if linelen > 3 then begin
- q := !q + 4;
- if !q + 4 > linelen then begin
- (* The next 4 characters won't fit on the current line. So insert
- * a line ending.
- *)
- if crlf then begin
- t.[ !j ] <- '\013';
- t.[ !j+1 ] <- '\010';
- j := !j + 2;
- end
- else begin
- t.[ !j ] <- '\010';
- incr j
- end;
- q := 0;
- end;
- end;
- done;
- (* padding if needed: *)
- let m = len mod 3 in
- begin
- match m with
- 0 -> ()
- | 1 ->
- let bits = Char.code (s.[pos + len - 1]) in
- t.[ !j ] <- b64.( bits lsr 2);
- t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
- j := !j + 4;
- q := !q + 4;
- | 2 ->
- let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
- (Char.code (s.[pos + len - 1])) in
- t.[ !j ] <- b64.( bits lsr 10);
- t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f);
- t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f);
- j := !j + 4;
- q := !q + 4;
- | _ -> assert false
- end;
-
- (* If required, add another line end: *)
-
- if linelen > 3 & !q > 0 then begin
- if crlf then begin
- t.[ !j ] <- '\013';
- t.[ !j+1 ] <- '\010';
- j := !j + 2;
- end
- else begin
- t.[ !j ] <- '\010';
- incr j
- end;
- end;
-
- t ;;
-
-
-
- let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_with_options rfc_pattern '=' s pos l linelength crlf;;
-
-
- let encode_substring s ~pos ~len ~linelength ~crlf =
- encode_with_options rfc_pattern '=' s pos len linelength crlf;;
-
-
- let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_with_options url_pattern '.' s pos l linelength crlf;;
-
-
- let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
- if len < 0 or pos < 0 or pos > String.length t then
- invalid_arg "Netencoding.Base64.decode_substring";
- if pos + len > String.length t then
- invalid_arg "Netencoding.Base64.decode_substring";
-
- (* Compute the number of effective characters l_t in 't';
- * pad_chars: number of '=' characters at the end of the string.
- *)
- let l_t, pad_chars =
- if p_spaces then begin
- (* Count all non-whitespace characters: *)
- let c = ref 0 in
- let p = ref 0 in
- for i = pos to pos + len - 1 do
- match String.unsafe_get t i with
- (' '|'\t'|'\r'|'\n') -> ()
- | ('='|'.') as ch ->
- if ch = '.' & not p_url then
- invalid_arg "Netencoding.Base64.decode_substring";
- incr c;
- incr p;
- if !p > 2 then
- invalid_arg "Netencoding.Base64.decode_substring";
- for j = i+1 to pos + len - 1 do
- match String.unsafe_get t j with
- (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
- | _ ->
- (* Only another '=' or spaces allowed *)
- invalid_arg "Netencoding.Base64.decode_substring";
- done
- | _ -> incr c
- done;
- if !c mod 4 <> 0 then
- invalid_arg "Netencoding.Base64.decode_substring";
- !c, !p
- end
- else
- len,
- ( if len mod 4 <> 0 then
- invalid_arg "Netencoding.Base64.decode_substring";
- if len > 0 then (
- if String.sub t (len - 2) 2 = "==" or
- (p_url & String.sub t (len - 2) 2 = "..") then 2
- else
- if String.sub t (len - 1) 1 = "=" or
- (p_url & String.sub t (len - 1) 1 = ".") then 1
- else
- 0
- )
- else 0
- )
- in
-
- let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *)
- let s = String.create l_s in
-
- let decode_char c =
- match c with
- 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *)
- | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *)
- | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *)
- | '+' -> 62
- | '-' -> if not p_url then
- invalid_arg "Netencoding.Base64.decode_substring";
- 62
- | '/' -> 63
- | _ -> invalid_arg "Netencoding.Base64.decode_substring";
- in
-
- (* Decode all but the last quartet: *)
-
- let cursor = ref pos in
- let rec next_char() =
- match t.[ !cursor ] with
- (' '|'\t'|'\r'|'\n') ->
- if p_spaces then (incr cursor; next_char())
- else invalid_arg "Netencoding.Base64.decode_substring"
- | c ->
- incr cursor; c
- in
-
- if p_spaces then begin
- for k = 0 to l_t / 4 - 2 do
- let q = 3*k in
- let c0 = next_char() in
- let c1 = next_char() in
- let c2 = next_char() in
- let c3 = next_char() in
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let n3 = decode_char c3 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
- String.unsafe_set s q (Char.chr x0);
- String.unsafe_set s (q+1) (Char.chr x1);
- String.unsafe_set s (q+2) (Char.chr x2);
- done;
- end
- else begin
- (* Much faster: *)
- for k = 0 to l_t / 4 - 2 do
- let p = pos + 4*k in
- let q = 3*k in
- let c0 = String.unsafe_get t p in
- let c1 = String.unsafe_get t (p + 1) in
- let c2 = String.unsafe_get t (p + 2) in
- let c3 = String.unsafe_get t (p + 3) in
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let n3 = decode_char c3 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
- String.unsafe_set s q (Char.chr x0);
- String.unsafe_set s (q+1) (Char.chr x1);
- String.unsafe_set s (q+2) (Char.chr x2);
- done;
- cursor := pos + l_t - 4;
- end;
-
- (* Decode the last quartet: *)
-
- if l_t > 0 then begin
- let q = 3*(l_t / 4 - 1) in
- let c0 = next_char() in
- let c1 = next_char() in
- let c2 = next_char() in
- let c3 = next_char() in
-
- if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- s.[ q ] <- Char.chr x0;
- end
- else
- if (c3 = '=') or (p_url & c3 = '.') then begin
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- s.[ q ] <- Char.chr x0;
- s.[ q+1 ] <- Char.chr x1;
- end
- else begin
- let n0 = decode_char c0 in
- let n1 = decode_char c1 in
- let n2 = decode_char c2 in
- let n3 = decode_char c3 in
- let x0 = (n0 lsl 2) lor (n1 lsr 4) in
- let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
- let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
- s.[ q ] <- Char.chr x0;
- s.[ q+1 ] <- Char.chr x1;
- s.[ q+2 ] <- Char.chr x2;
- end
-
- end;
-
- s ;;
-
-
-
- let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- decode_substring s pos l url_variant accept_spaces;;
-
- let decode_ignore_spaces s =
- decode_substring s 0 (String.length s) true true;;
-
-
-end
-
-
-
-module QuotedPrintable = struct
-
- let encode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.encode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.encode_substring";
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- ('\r'|'\n') ->
- count (n+1) (i+1)
- | ('\000'..'\031'|'\127'..'\255'|
- '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
- count (n+3) (i+1)
- | ' ' ->
- (* Protect spaces only if they occur at the end of a line *)
- if i+1 < len then
- match s.[pos+i+1] with
- ('\r'|'\n') ->
- count (n+3) (i+1)
- | _ ->
- count (n+1) (i+1)
- else
- count (n+3) (i+1)
- | _ ->
- count (n+1) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
-
- let hexdigit =
- [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
- '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
-
- let k = ref 0 in
-
- let add_quoted c =
- t.[ !k ] <- '=';
- t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
- t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
- in
-
- for i = 0 to len - 1 do
- match String.unsafe_get s i with
- ('\r'|'\n') as c ->
- String.unsafe_set t !k c;
- incr k
- | ('\000'..'\031'|'\127'..'\255'|
- '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
- add_quoted c;
- k := !k + 3
- | ' ' ->
- (* Protect spaces only if they occur at the end of a line *)
- if i+1 < len then
- match s.[pos+i+1] with
- ('\r'|'\n') ->
- add_quoted ' ';
- k := !k + 3;
- | _ ->
- String.unsafe_set t !k ' ';
- incr k
- else begin
- add_quoted ' ';
- k := !k + 3;
- end
- | c ->
- String.unsafe_set t !k c;
- incr k
- done;
-
- t ;;
-
-
- let encode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_substring s pos l;;
-
-
-
- let decode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.decode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.QuotedPrintable.decode_substring";
-
- let decode_hex c =
- match c with
- '0'..'9' -> Char.code c - 48
- | 'A'..'F' -> Char.code c - 55
- | 'a'..'f' -> Char.code c - 87
- | _ ->
- invalid_arg "Netencoding.QuotedPrintable.decode_substring";
- in
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- '=' ->
- if i+1 = len then
- (* A '=' at EOF is ignored *)
- count n (i+1)
- else
- if i+1 < len then
- match s.[pos+i+1] with
- '\r' ->
- (* Official soft break *)
- if i+2 < len & s.[pos+i+2] = '\n' then
- count n (i+3)
- else
- count n (i+2)
- | '\n' ->
- (* Inofficial soft break *)
- count n (i+2)
- | _ ->
- if i+2 >= len then
- invalid_arg
- "Netencoding.QuotedPrintable.decode_substring";
- let _ = decode_hex s.[pos+i+1] in
- let _ = decode_hex s.[pos+i+2] in
- count (n+1) (i+3)
- else
- invalid_arg "Netencoding.QuotedPrintable.decode_substring"
- | _ ->
- count (n+1) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
- let k = ref pos in
- let e = pos + len in
- let i = ref 0 in
-
- while !i < l do
- match String.unsafe_get s !k with
- '=' ->
- if !k+1 = e then
- (* A '=' at EOF is ignored *)
- ()
- else
- if !k+1 < e then
- match s.[!k+1] with
- '\r' ->
- (* Official soft break *)
- if !k+2 < e & s.[!k+2] = '\n' then
- k := !k + 3
- else
- k := !k + 2
- | '\n' ->
- (* Inofficial soft break *)
- k := !k + 2
- | _ ->
- if !k+2 >= e then
- invalid_arg
- "Netencoding.QuotedPrintable.decode_substring";
- let x1 = decode_hex s.[!k+1] in
- let x2 = decode_hex s.[!k+2] in
- t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
- k := !k + 3;
- incr i
- else
- invalid_arg "Netencoding.QuotedPrintable.decode_substring"
- | c ->
- String.unsafe_set t !i c;
- incr k;
- incr i
- done;
-
- t ;;
-
-
- let decode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- decode_substring s pos l;;
-
-end
-
-
-module Q = struct
-
- let encode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.Q.encode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.Q.encode_substring";
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- | ('A'..'Z'|'a'..'z'|'0'..'9') ->
- count (n+1) (i+1)
- | _ ->
- count (n+3) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
-
- let hexdigit =
- [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
- '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
-
- let k = ref 0 in
-
- let add_quoted c =
- t.[ !k ] <- '=';
- t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
- t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
- in
-
- for i = 0 to len - 1 do
- match String.unsafe_get s i with
- | ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
- String.unsafe_set t !k c;
- incr k
- | c ->
- add_quoted c;
- k := !k + 3
- done;
-
- t ;;
-
-
- let encode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- encode_substring s pos l;;
-
-
-
- let decode_substring s ~pos ~len =
-
- if len < 0 or pos < 0 or pos > String.length s then
- invalid_arg "Netencoding.Q.decode_substring";
- if pos + len > String.length s then
- invalid_arg "Netencoding.Q.decode_substring";
-
- let decode_hex c =
- match c with
- '0'..'9' -> Char.code c - 48
- | 'A'..'F' -> Char.code c - 55
- | 'a'..'f' -> Char.code c - 87
- | _ ->
- invalid_arg "Netencoding.Q.decode_substring";
- in
-
- let rec count n i =
- if i < len then
- match String.unsafe_get s (pos+i) with
- '=' ->
- if i+2 >= len then
- invalid_arg "Netencoding.Q.decode_substring";
- let _ = decode_hex s.[pos+i+1] in
- let _ = decode_hex s.[pos+i+2] in
- count (n+1) (i+3)
- | _ -> (* including '_' *)
- count (n+1) (i+1)
- else
- n
- in
-
- let l = count 0 0 in
- let t = String.create l in
- let k = ref pos in
- let e = pos + len in
- let i = ref 0 in
-
- while !i < l do
- match String.unsafe_get s !k with
- '=' ->
- if !k+2 >= e then
- invalid_arg "Netencoding.Q.decode_substring";
- let x1 = decode_hex s.[!k+1] in
- let x2 = decode_hex s.[!k+2] in
- t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
- k := !k + 3;
- incr i
- | '_' ->
- String.unsafe_set t !i ' ';
- incr k;
- incr i
- | c ->
- String.unsafe_set t !i c;
- incr k;
- incr i
- done;
-
- t ;;
-
-
- let decode ?(pos=0) ?len s =
- let l = match len with None -> String.length s - pos | Some x -> x in
- decode_substring s pos l ;;
-
-end
-
-
-module Url = struct
- let hex_digits =
- [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
- '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;
-
- let to_hex2 k =
- (* Converts k to a 2-digit hex string *)
- let s = String.create 2 in
- s.[0] <- hex_digits.( (k lsr 4) land 15 );
- s.[1] <- hex_digits.( k land 15 );
- s ;;
-
-
- let of_hex1 c =
- match c with
- ('0'..'9') -> Char.code c - Char.code '0'
- | ('A'..'F') -> Char.code c - Char.code 'A' + 10
- | ('a'..'f') -> Char.code c - Char.code 'a' + 10
- | _ ->
- raise Not_found ;;
-
-
-
- let url_encoding_re =
- Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;
-
- let url_decoding_re =
- Str.regexp "\\+\\|%..\\|%.\\|%";;
-
-
- let encode s =
- Str.global_substitute
- url_encoding_re
- (fun r _ ->
- match Str.matched_string r s with
- " " -> "+"
- | x ->
- let k = Char.code(x.[0]) in
- "%" ^ to_hex2 k
- )
- s ;;
-
-
- let decode s =
- let l = String.length s in
- Str.global_substitute
- url_decoding_re
- (fun r _ ->
- match Str.matched_string r s with
- | "+" -> " "
- | _ ->
- let i = Str.match_beginning r in
- (* Assertion: s.[i] = '%' *)
- if i+2 >= l then failwith "Cgi.decode";
- let c1 = s.[i+1] in
- let c2 = s.[i+2] in
- begin
- try
- let k1 = of_hex1 c1 in
- let k2 = of_hex1 c2 in
- String.make 1 (Char.chr((k1 lsl 4) lor k2))
- with
- Not_found ->
- failwith "Cgi.decode"
- end
- )
- s ;;
-
-end
-
-
-module Html = struct
-
- let eref_re =
- Str.regexp
- "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
- let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
-
- let etable =
- [ "lt", "<";
- "gt", ">";
- "amp", "&";
- "quot", "\"";
- (* Note: " is new in HTML-4.0, but it has been widely used
- * much earlier.
- *)
- "nbsp", "\160";
- "iexcl", "\161";
- "cent", "\162";
- "pound", "\163";
- "curren", "\164";
- "yen", "\165";
- "brvbar", "\166";
- "sect", "\167";
- "uml", "\168";
- "copy", "\169";
- "ordf", "\170";
- "laquo", "\171";
- "not", "\172";
- "shy", "\173";
- "reg", "\174";
- "macr", "\175";
- "deg", "\176";
- "plusmn", "\177";
- "sup2", "\178";
- "sup3", "\179";
- "acute", "\180";
- "micro", "\181";
- "para", "\182";
- "middot", "\183";
- "cedil", "\184";
- "sup1", "\185";
- "ordm", "\186";
- "raquo", "\187";
- "frac14", "\188";
- "frac12", "\189";
- "frac34", "\190";
- "iquest", "\191";
- "Agrave", "\192";
- "Aacute", "\193";
- "Acirc", "\194";
- "Atilde", "\195";
- "Auml", "\196";
- "Aring", "\197";
- "AElig", "\198";
- "Ccedil", "\199";
- "Egrave", "\200";
- "Eacute", "\201";
- "Ecirc", "\202";
- "Euml", "\203";
- "Igrave", "\204";
- "Iacute", "\205";
- "Icirc", "\206";
- "Iuml", "\207";
- "ETH", "\208";
- "Ntilde", "\209";
- "Ograve", "\210";
- "Oacute", "\211";
- "Ocirc", "\212";
- "Otilde", "\213";
- "Ouml", "\214";
- "times", "\215";
- "Oslash", "\216";
- "Ugrave", "\217";
- "Uacute", "\218";
- "Ucirc", "\219";
- "Uuml", "\220";
- "Yacute", "\221";
- "THORN", "\222";
- "szlig", "\223";
- "agrave", "\224";
- "aacute", "\225";
- "acirc", "\226";
- "atilde", "\227";
- "auml", "\228";
- "aring", "\229";
- "aelig", "\230";
- "ccedil", "\231";
- "egrave", "\232";
- "eacute", "\233";
- "ecirc", "\234";
- "euml", "\235";
- "igrave", "\236";
- "iacute", "\237";
- "icirc", "\238";
- "iuml", "\239";
- "eth", "\240";
- "ntilde", "\241";
- "ograve", "\242";
- "oacute", "\243";
- "ocirc", "\244";
- "otilde", "\245";
- "ouml", "\246";
- "divide", "\247";
- "oslash", "\248";
- "ugrave", "\249";
- "uacute", "\250";
- "ucirc", "\251";
- "uuml", "\252";
- "yacute", "\253";
- "thorn", "\254";
- "yuml", "\255";
- ] ;;
-
- let quick_etable =
- let ht = Hashtbl.create 50 in
- List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
- (* Entities to be decoded, but that must not be encoded: *)
- Hashtbl.add ht "apos" "'"; (* used in XML documents *)
- ht ;;
-
- let rev_etable =
- let a = Array.create 256 "" in
- List.iter (fun (name,value) ->
- a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
- for i = 0 to 8 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- for i = 11 to 12 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- for i = 14 to 31 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- for i = 127 to 159 do
- a.(i) <- "&#" ^ string_of_int i ^ ";"
- done;
- a ;;
-
- let decode_to_latin1 s =
- Str.global_substitute
- eref_re
- (fun r _ ->
- let t = Str.matched_string r s in
- try
- let n = int_of_string(Str.matched_group r 2 s) in
- if n < 256 then
- String.make 1 (Char.chr n)
- else
- t
- with
- Not_found ->
- try
- let name = Str.matched_group r 3 s in
- try
- Hashtbl.find quick_etable name
- with
- Not_found ->
- t
- with
- Not_found -> assert false
- )
- s ;;
-
- let encode_from_latin1 s =
- Str.global_substitute
- unsafe_re
- (fun r _ ->
- let t = Str.matched_string r s in
- let i = Char.code (t.[0]) in
- rev_etable.(i)
- )
- s ;;
-end
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.5 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.4 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.3 2000/03/03 17:03:16 gerd
- * Q encoding: CR and LF are quoted.
- *
- * Revision 1.2 2000/03/03 01:08:29 gerd
- * Added Netencoding.Html functions.
- *
- * Revision 1.1 2000/03/02 01:14:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(**********************************************************************)
-(* Several encodings important for the net *)
-(**********************************************************************)
-
-
-(**********************************************************************)
-(* Base 64 encoding *)
-(**********************************************************************)
-
-(* See RFC 2045 for a description of Base 64 encoding. *)
-
-(* THREAD-SAFETY:
- * All Base64 functions are reentrant and thus thread-safe.
- *)
-
-module Base64 : sig
-
- val encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
- string -> string
- (* Compute the "base 64" encoding of the given string argument.
- * Note that the result is a string that only contains the characters
- * a-z, A-Z, 0-9, +, /, =, and optionally spaces, CR and LF characters.
- *
- * If pos and/or len are passed, only the substring starting at
- * pos (default: 0) with length len (default: rest of the string)
- * is encoded.
- *
- * The result is divided up into lines not longer than 'linelength'
- * (without counting the line separator); default: do not divide lines.
- * If 'linelength' is smaller than 4, no line division is performed.
- * If 'linelength' is not divisible by 4, the produced lines are a
- * bit shorter than 'linelength'.
- *
- * If 'crlf' (default: false) the lines are ended by CRLF; otherwise
- * they are only ended by LF.
- * (You need the crlf option to produce correct MIME messages.)
- *
- *)
-
- val url_encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
- string -> string
- (* Same as 'encode' but use slightly different characters that can be
- * part of URLs without additional encodings.
- * The encoded string consists only of the characters a-z, A-Z, 0-9,
- * -, /, .
- * 'url_encode' does NOT implement the Base 64 encoding as described
- * in the standard!
- *)
-
- val encode_substring : string -> pos:int -> len:int -> linelength:int ->
- crlf:bool -> string
- (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
- *
- * encode_substring s pos len linelen crlf:
- * Encodes the substring at position 'pos' in 's' with length 'len'.
- * The result is divided up into lines not longer than 'linelen' (without
- * counting the line separator).
- * If 'linelen' is smaller than 4, no line division is performed.
- * If 'linelen' is not divisible by 4, the produced lines are a
- * bit shorter than 'linelen'.
- * If 'crlf' the lines are ended by CRLF; otherwise they are only
- * ended by LF.
- * (You need the crlf option to produce correct MIME messages.)
- *)
-
- val decode : ?pos:int -> ?len:int -> ?url_variant:bool ->
- ?accept_spaces:bool -> string -> string
- (* Decodes the given string argument.
- *
- * If pos and/or len are passed, only the substring starting at
- * pos (default: 0) with length len (default: rest of the string)
- * is decoded.
- *
- * If url_variant (default: true) is set, the functions also
- * accepts the characters '-' and '.' as produced by 'url_encode'.
- *
- * If accept_spaces (default: false) is set, the function ignores
- * white space contained in the string to decode (otherwise the
- * function fails if it finds white space).
- *)
-
- val decode_ignore_spaces : string -> string
- (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
- *
- * Decodes the string, too, but it is allowed that the string contains
- * whitespace characters.
- * This function is slower than 'decode'.
- *)
-
- val decode_substring : string -> pos:int -> len:int -> url_variant:bool ->
- accept_spaces:bool -> string
- (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
- *
- * decode_substring s pos len url spaces:
- * Decodes the substring of 's' beginning at 'pos' with length 'len'.
- * If 'url', strings created by 'url_encode' are accepted, too.
- * If 'spaces', whitespace characters are allowed in the string.
- *)
-end
-
-(**********************************************************************)
-(* Quoted printable encoding *)
-(**********************************************************************)
-
-(* See RFC 2045.
- * This implementation assumes that the encoded string has a text MIME
- * type. Because of this, the characters CR and LF are never protected
- * by hex tokens; they are copied literally to the output string.
- *)
-
-(* THREAD-SAFETY:
- * All QuotedPrintable functions are reentrant and thus thread-safe.
- *)
-
-module QuotedPrintable :
- sig
- val encode : ?pos:int -> ?len:int -> string -> string
- (* Encodes the string and returns it.
- * Note line breaks:
- * No additional soft line breaks are added. The characters CR
- * and LF are not represented as =0D resp. =0A. (But other control
- * characters ARE encoded.)
- * Note unsafe characters:
- * As recommended by RFC 2045, the characters !\"#$@[]^`{|}~
- * are additionally represented as hex tokens. -- "
- *
- * If pos and/or len are passed, only the substring starting at
- * pos (default: 0) with length len (default: rest of the string)
- * is encoded.
- *)
-
- val encode_substring : string -> pos:int -> len:int -> string
- (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
- * encode_substring s pos len:
- * Encodes the substring of 's' beginning at 'pos' with length 'len'.
- *)
-
- val decode : ?pos:int -> ?len:int -> string -> string
- (* Decodes the string and returns it.
- * Most format errors cause an Invalid_argument exception.
- * Note that soft line breaks can be properly decoded although
- * 'encode' will never produce them.
- *
- * If pos and/or len are passed, only the substring starting at
- * pos (default: 0) with length len (default: rest of the string)
- * is decoded.
- *)
-
- val decode_substring : string -> pos:int -> len:int -> string
- (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
- * decode_substring s pos len:
- * Decodes the substring of 's' beginning at 'pos' with length 'len'.
- *)
-
- end
-
-(**********************************************************************)
-(* Q encoding *)
-(**********************************************************************)
-
-(* See RFC 2047.
- * The functions behave similar to those of QuotedPrintable.
- *)
-
-(* THREAD-SAFETY:
- * All Q functions are reentrant and thus thread-safe.
- *)
-
-module Q :
- sig
- val encode : ?pos:int -> ?len:int -> string -> string
- (* Note:
- * All characters except alphanumeric characters are protected by
- * hex tokens.
- * In particular, spaces are represented as "=20", not as "_".
- *)
-
- val decode : ?pos:int -> ?len:int -> string -> string
-
- val encode_substring : string -> pos:int -> len:int -> string
- (* *** DEPRECATED FUNCTION *** Use 'encode' instead! *** *)
-
- val decode_substring : string -> pos:int -> len:int -> string
- (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** *)
- end
-
-(**********************************************************************)
-(* B encoding *)
-(**********************************************************************)
-
-(* The B encoding of RFC 2047 is the same as Base64. *)
-
-
-(**********************************************************************)
-(* URL-encoding *)
-(**********************************************************************)
-
-(* Encoding/Decoding within URLs:
- *
- * The following two functions perform the '%'-substitution for
- * characters that may otherwise be interpreted as metacharacters.
- *
- * According to: RFC 1738, RFC 1630
- *)
-
-(* THREAD-SAFETY:
- * The Url functions are thread-safe.
- *)
-
-module Url :
- sig
- val decode : string -> string
- val encode : string -> string
- end
-
-
-(**********************************************************************)
-(* HTMLization *)
-(**********************************************************************)
-
-(* Encodes characters that need protection by converting them to
- * entity references. E.g. "<" is converted to "<".
- * As the entities may be named, there is a dependency on the character
- * set. Currently, there are only functions for the Latin 1 alphabet.
- *)
-
-(* THREAD-SAFETY:
- * The Html functions are thread-safe.
- *)
-
-module Html :
- sig
- val encode_from_latin1 : string -> string
- (* Encodes the characters 0-8, 11-12, 14-31, '<', '>', '"', '&',
- * 127-255. If the characters have a name, a named entity is
- * preferred over a numeric entity.
- *)
- val decode_to_latin1 : string -> string
- (* Decodes the string. Unknown named entities are left as they
- * are (i.e. decode_to_latin1 "&nonsense;" = "&nonsense;").
- * The same applies to numeric entities greater than 255.
- *)
- end
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.3 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.2 2000/03/03 01:08:29 gerd
- * Added Netencoding.Html functions.
- *
- * Revision 1.1 2000/03/02 01:14:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Nethtml_scanner;;
-
-type document =
- Element of (string * (string*string) list * document list)
- | Data of string
-;;
-
-
-exception End_of_scan;;
-
-
-let no_end_tag = (* empty HTML elements *)
- ref
- [ "isindex";
- "base";
- "meta";
- "link";
- "hr";
- "input";
- "img";
- "param";
- "basefont";
- "br";
- "area";
- ]
-;;
-
-
-let special_tag = (* other lexical rules *)
- ref
- [ "script";
- "style";
- ]
-;;
-
-
-let rec parse_comment buf =
- let t = scan_comment buf in
- match t with
- Mcomment ->
- parse_comment buf
- | Eof ->
- raise End_of_scan
- | _ ->
- ()
-;;
-
-
-let rec parse_doctype buf =
- let t = scan_doctype buf in
- match t with
- Mdoctype ->
- parse_doctype buf
- | Eof ->
- raise End_of_scan
- | _ ->
- ()
-;;
-
-
-let parse_document buf =
- let current_name = ref "" in
- let current_atts = ref [] in
- let current_subs = ref [] in
- let stack = Stack.create() in
-
- let parse_atts() =
- let rec next_no_space() =
- match scan_element buf with
- Space _ -> next_no_space()
- | t -> t
- in
-
- let rec parse_atts_lookahead next =
- match next with
- Relement -> []
- | Name n ->
- begin match next_no_space() with
- Is ->
- begin match next_no_space() with
- Name v ->
- (String.lowercase n, String.uppercase v) ::
- parse_atts_lookahead (next_no_space())
- | Literal v ->
- (String.lowercase n,v) ::
- parse_atts_lookahead (next_no_space())
- | Eof ->
- raise End_of_scan
- | Relement ->
- (* Illegal *)
- []
- | _ ->
- (* Illegal *)
- parse_atts_lookahead (next_no_space())
- end
- | Eof ->
- raise End_of_scan
- | Relement ->
- (* <tag name> <==> <tag name="name"> *)
- [ String.lowercase n, String.lowercase n ]
- | next' ->
- (* assume <tag name ... > <==> <tag name="name" ...> *)
- ( String.lowercase n, String.lowercase n ) ::
- parse_atts_lookahead next'
- end
- | Eof ->
- raise End_of_scan
- | _ ->
- (* Illegal *)
- parse_atts_lookahead (next_no_space())
- in
- parse_atts_lookahead (next_no_space())
- in
-
- let rec parse_special name =
- (* Parse until </name> *)
- match scan_special buf with
- Lelementend n ->
- if n = name then
- ""
- else
- "</" ^ n ^ parse_special name
- | Eof ->
- raise End_of_scan
- | Cdata s ->
- s ^ parse_special name
- | _ ->
- (* Illegal *)
- parse_special name
- in
-
- let rec skip_element() =
- (* Skip until ">" *)
- match scan_element buf with
- Relement ->
- ()
- | Eof ->
- raise End_of_scan
- | _ ->
- skip_element()
- in
-
- let rec parse_next() =
- let t = scan_document buf in
- match t with
- Lcomment ->
- parse_comment buf;
- parse_next()
- | Ldoctype ->
- parse_doctype buf;
- parse_next()
- | Lelement name ->
- let name = String.lowercase name in
- if List.mem name !no_end_tag then begin
- let atts = parse_atts() in
- current_subs := (Element(name, atts, [])) :: !current_subs;
- parse_next()
- end
- else if List.mem name !special_tag then begin
- let atts = parse_atts() in
- let data = parse_special name in
- (* Read until ">" *)
- skip_element();
- current_subs := (Element(name, atts, [Data data])) :: !current_subs;
- parse_next()
- end
- else begin
- let atts = parse_atts() in
- Stack.push (!current_name, !current_atts, !current_subs) stack;
- current_name := name;
- current_atts := atts;
- current_subs := [];
- parse_next()
- end
- | Cdata data ->
- current_subs := (Data data) :: !current_subs;
- parse_next()
- | Lelementend name ->
- let name = String.lowercase name in
- (* Read until ">" *)
- skip_element();
- (* Search the element to close on the stack: *)
- let found = ref (name = !current_name) in
- Stack.iter
- (fun (old_name, _, _) ->
- if name = old_name then found := true)
- stack;
- (* If not found, the end tag is wrong. Simply ignore it. *)
- if not !found then
- parse_next()
- else begin
- (* Put the current element on to the stack: *)
- Stack.push (!current_name, !current_atts, !current_subs) stack;
- (* If found: Remove the elements from the stack, and append
- * them to the previous element as sub elements
- *)
- let rec remove() =
- let old_name, old_atts, old_subs = Stack.pop stack in
- (* or raise Stack.Empty *)
- if old_name = name then
- old_name, old_atts, old_subs
- else
- let older_name, older_atts, older_subs = remove() in
- older_name,
- older_atts,
- (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
- in
- let old_name, old_atts, old_subs = remove() in
- (* Remove one more element: the element containing the element
- * currently being closed.
- *)
- let new_name, new_atts, new_subs = Stack.pop stack in
- current_name := new_name;
- current_atts := new_atts;
- current_subs := (Element (old_name, old_atts, List.rev old_subs))
- :: new_subs;
- (* Go on *)
- parse_next()
- end
- | Eof ->
- raise End_of_scan
- | _ ->
- parse_next()
- in
- try
- parse_next();
- List.rev !current_subs
- with
- End_of_scan ->
- (* Close all remaining elements: *)
- Stack.push (!current_name, !current_atts, !current_subs) stack;
- let rec remove() =
- let old_name, old_atts, old_subs = Stack.pop stack in
- (* or raise Stack.Empty *)
- try
- let older_name, older_atts, older_subs = remove() in
- older_name,
- older_atts,
- (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
- with
- Stack.Empty ->
- old_name, old_atts, old_subs
- in
- let name, atts, subs = remove() in
- List.rev subs
-;;
-
-
-let parse_string s =
- let buf = Lexing.from_string s in
- parse_document buf
-;;
-
-
-let parse_file fd =
- let buf = Lexing.from_channel fd in
- parse_document buf
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/03/03 01:07:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-(* The type 'document' represents parsed HTML documents.
- * Element (name, args, subnodes): is an element node for an element of
- * type 'name' (i.e. written <name ...>...</name>) with arguments 'args'
- * and subnodes 'subnodes' (the material within the element). The arguments
- * are simply name/value pairs. Entity references (something like %xy;)
- * occuring in the values are NOT resolved.
- * Arguments without values (e.g. <select name="x" multiple>: here,
- * "multiple" is such an argument) are represented as (name,name), i.e. the
- * name is returned as value.
- * As argument names are case-insensitive, the names are all lowercase.
- * Data s: is a character data node. Again, entity references are contained
- * as such and not as what they mean.
- *)
-
-type document =
- Element of (string * (string*string) list * document list)
- | Data of string
-;;
-
-
-val no_end_tag : string list ref;;
- (* List of tags which are always empty. This variable is pre-configured,
- * but you may want to change it.
- * It is important to know which elements are always empty, because HTML
- * allows it to omit the end tag for them. For example,
- * <a><b>x</a> is parsed as
- * Element("a",[],[ Element("b",[],[]); Data "x" ])
- * if we know that "a" is an empty element, but it is wrongly parsed as
- * Element("a",[],[ Element("b",[], [ Data "x"]) ])
- * if "a" is actually empty but we do not know it.
- * An example of such a tag is "br".
- *)
-
-val special_tag : string list ref;;
- (* List of tags with a special rule for recognizing the end.
- * This variable is pre-configured, but you may want to change it.
- * The special rule is that the metacharacters '<', '>' and so on lose
- * their meaning within the element, and that only the corresponding
- * end tag stops this kind of scanning. An example is the element
- * "javascript". Inner elements are not recognized, and the element
- * can only be ended by </javascript>. (Other elements are also ended
- * if an embracing element ends, e.g. "j" in <k><j></k>!)
- *
- * Note that comments are not recognized within special elements;
- * comments are returned as character material.
- *)
-
-val parse_string : string -> document list
- (* Parses the HTML document from a string and returns it *)
-
-val parse_file : in_channel -> document list
- (* Parses the HTML document from a file and returns it *)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/03/03 01:07:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-{
- type token =
- Lcomment
- | Rcomment
- | Mcomment
- | Ldoctype
- | Rdoctype
- | Mdoctype
- | Lelement of string
- | Lelementend of string
- | Relement
- | Cdata of string
- | Space of int
- | Name of string
- | Is
- | Literal of string
- | Other
- | Eof
-}
-
-(* Simplified rules: Only Latin-1 is recognized as character set *)
-
-let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
-let extender = '\183'
-let digit = ['0'-'9']
-let hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
-let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
-let name = ( letter | '_' | ':' ) namechar*
-let nmtoken = namechar+
-let ws = [ ' ' '\t' '\r' '\n' ]
-let string_literal1 = '"' [^ '"' '>' '<' '\n']* '"'
-let string_literal2 = "'" [^ '\'' '>' '<' '\n']* "'"
-
-
-(* This following rules reflect HTML as it is used, not the SGML
- * rules.
- *)
-
-rule scan_document = parse
- | "<!--"
- { Lcomment }
- | "<!"
- { Ldoctype }
- | "<" name
- { let s = Lexing.lexeme lexbuf in
- Lelement (String.sub s 1 (String.length s - 1))
- }
- | "</" name
- { let s = Lexing.lexeme lexbuf in
- Lelementend (String.sub s 2 (String.length s - 2))
- }
- | "<" (* misplaced "<" *)
- { Cdata "<" }
- | eof
- { Eof }
- | [^ '<' ]+
- { Cdata (Lexing.lexeme lexbuf)}
-
-and scan_special = parse
- | "</" name
- { let s = Lexing.lexeme lexbuf in
- Lelementend (String.sub s 2 (String.length s - 2))
- }
- | "<"
- { Cdata "<" }
- | eof
- { Eof }
- | [^ '<' ]+
- { Cdata (Lexing.lexeme lexbuf)}
-
-
-and scan_comment = parse
- | "-->"
- { Rcomment }
- | "-"
- { Mcomment }
- | eof
- { Eof }
- | [^ '-']+
- { Mcomment }
-
-and scan_doctype = parse
- | ">" (* Occurence in strings, and [ ] brackets ignored *)
- { Rdoctype }
- | eof
- { Eof }
- | [^ '>' ] +
- { Mdoctype }
-
-and scan_element = parse
- | ">"
- { Relement }
- | ws+
- { Space (String.length (Lexing.lexeme lexbuf)) }
- | name
- { Name (Lexing.lexeme lexbuf) }
- | "="
- { Is }
- | string_literal1
- { let s = Lexing.lexeme lexbuf in
- Literal (String.sub s 1 (String.length s - 2))
- }
- | string_literal2
- { let s = Lexing.lexeme lexbuf in
- Literal (String.sub s 1 (String.length s - 2))
- }
- | eof
- { Eof }
- | _
- { Other }
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/03/03 01:07:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-type from_uni_list =
- U_nil
- | U_single of (int*int)
- | U_list of (int*int) list
-;;
-
-let to_unicode = Hashtbl.create 50;;
-
-let from_unicode = Hashtbl.create 50;;
-
-let f_lock = ref (fun () -> ());;
-let f_unlock = ref (fun () -> ());;
-
-let lock () = !f_lock();;
-let unlock () = !f_unlock();;
-
-let init_mt new_f_lock new_f_unlock =
- f_lock := new_f_lock;
- f_unlock := new_f_unlock
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/08/28 23:17:54 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-type from_uni_list =
- U_nil
- | U_single of (int*int)
- | U_list of (int*int) list
-;;
- (* A representation of (int*int) list that is optimized for the case that
- * lists with 0 and 1 elements are the most frequent cases.
- *)
-
-
-val to_unicode : (Netconversion.encoding,
- int array Lazy.t) Hashtbl.t;;
-
-val from_unicode : (Netconversion.encoding,
- from_uni_list array Lazy.t) Hashtbl.t;;
- (* These hashtables are used internally by the parser to store
- * the conversion tables from 8 bit encodings to Unicode and vice versa.
- * It is normally not necessary to access these tables; the
- * Netconversion module does it already for you.
- *
- * Specification of the conversion tables:
- *
- * to_unicode: maps an 8 bit code to Unicode, i.e.
- * let m = Hashtbl.find `Enc_isoXXX to_unicode in
- * let unicode = m.(isocode)
- * - This may be (-1) to indicate that the code point is not defined.
- *
- * from_unicode: maps Unicode to an 8 bit code, i.e.
- * let m = Hashtbl.find `Enc_isoXXX from_unicode in
- * let l = m.(unicode land 255)
- * Now search in l the pair (unicode, isocode), and return isocode.
- *
- * Note: It is guaranteed that both arrays have always 256 elements.
- *)
-
-val lock : unit -> unit
- (* In multi-threaded applications: obtains a lock which is required to
- * Lazy.force the values found in to_unicode and from_unicode.
- * In single-threaded applications: a NO-OP
- *)
-
-val unlock : unit -> unit
- (* In multi-threaded applications: releases the lock which is required to
- * Lazy.force the values found in to_unicode and from_unicode.
- * In single-threaded applications: a NO-OP
- *)
-
-
-val init_mt : (unit -> unit) -> (unit -> unit) -> unit
- (* Internally used; see netstring_mt.ml *)
-
-
-(* ---------------------------------------- *)
-
-(* The following comment was written when the conversion module belonged
- * to the PXP package (Polymorhic XML Parser).
- *)
-
-(* HOW TO ADD A NEW 8 BIT CODE:
- *
- * It is relatively simple to add a new 8 bit code to the system. This
- * means that the parser can read and write files with the new encoding;
- * this does not mean that the parser can represent the XML tree internally
- * by the new encoding.
- *
- * - Put a new unimap file into the "mappings" directory. The file format
- * is simple; please look at the already existing files.
- * The name of the file determines the internal name of the code:
- * If the file is called <name>.unimap, the code will be called
- * `Enc_<name>.
- *
- * - Extend the type "encoding" in pxp_types.mli and pxp_types.ml
- *
- * - Extend the two functions encoding_of_string and string_of_encoding
- * in pxp_types.ml
- *
- * - Recompile the parser
- *
- * Every encoding consumes at least 3kB of memory, but this may be much more
- * if the code points are dispersed on the Unicode code space.
- *
- * Perhaps the addition of new codes will become even simpler in future
- * versions of PXP; but it is currently more important to support
- * non-8-bit codes, too.
- * Every contribution of new codes to PXP is welcome!
- *)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/29 00:47:24 gerd
- * New type for the conversion Unicode to 8bit.
- * Conversion tables are now lazy. Thus also mutexes are required.
- *
- * Revision 1.1 2000/08/13 00:02:57 gerd
- * Initial revision.
- *
- *
- * ======================================================================
- * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_mappings.mli):
- *
- * Revision 1.1 2000/07/27 00:40:02 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* WARNING! This is a generated file! *)
-let iso88591_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
-let iso88591_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
- let iso885910_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\001\018\001\001\"\001\001*\001\001(\001\0016\001\000\167\001\001;\001\001\016\001\001`\001\001f\001\001}\001\000\173\001\001j\001\001J\001\000\176\001\001\005\001\001\019\001\001#\001\001+\001\001)\001\0017\001\000\183\001\001<\001\001\017\001\001a\001\001g\001\001~\001 \021\001\001k\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\000\207\001\000\208\001\001E\001\001L\001\000\211\001\000\212\001\000\213\001\000\214\001\001h\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\000\239\001\000\240\001\001F\001\001M\001\000\243\001\000\244\001\000\245\001\000\246\001\001i\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\0018" 0 : int array);;
-let iso885910_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\015\000\000\000\000\000\000\006\185\000\000\006\185\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\192@\145\160\160AA\160\160\001\001\001\001\000\224@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\169@\145\160\160QQ\160\160\001\001\017\001\000\185@\145\160\160RR\160\160\001\001\018\001\000\162@\145\160\160SS\160\160\001\001\019\001\000\178@\144\160TT\145\160\160UU\160\160\001 \021\001\000\189@\145\160\160VV\160\160\001\001\022\001\000\204@\145\160\160WW\160\160\001\001\023\001\000\236@\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\163@\145\160\160cc\160\160\001\001#\001\000\179@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\164@\145\160\160kk\160\160\001\001+\001\000\180@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\166@\145\160\160ww\160\160\001\0017\001\000\182@\145\160\160xx\160\160\001\0018\001\000\255@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\168@\145\160\160||\160\160\001\001<\001\000\184@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\145\160\160\000E\000E\160\160\001\001E\001\000\209@\145\160\160\000F\000F\160\160\001\001F\001\000\241@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\145\160\160\000J\000J\160\160\001\001J\001\000\175@\145\160\160\000K\000K\160\160\001\001K\001\000\191@\145\160\160\000L\000L\160\160\001\001L\001\000\210@\145\160\160\000M\000M\160\160\001\001M\001\000\242@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\170@\145\160\160\000a\000a\160\160\001\001a\001\000\186@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\171@\145\160\160\000g\000g\160\160\001\001g\001\000\187@\145\160\160\000h\000h\160\160\001\001h\001\000\215@\145\160\160\000i\000i\160\160\001\001i\001\000\247@\145\160\160\000j\000j\160\160\001\001j\001\000\174@\145\160\160\000k\000k\160\160\001\001k\001\000\190@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\172@\145\160\160\000~\000~\160\160\001\001~\001\000\188@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@@@@\144\160\001\000\167\001\000\167@@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@@@@@@\144\160\001\000\183\001\000\183@@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214@\144\160\001\000\216\001\000\216@\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230@@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246@\144\160\001\000\248\001\000\248@\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254@" 0 : Netmappings.from_uni_list array);;
- let iso885913_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001 \029\001\000\162\001\000\163\001\000\164\001 \030\001\000\166\001\000\167\001\000\216\001\000\169\001\001V\001\000\171\001\000\172\001\000\173\001\000\174\001\000\198\001\000\176\001\000\177\001\000\178\001\000\179\001 \028\001\000\181\001\000\182\001\000\183\001\000\248\001\000\185\001\001W\001\000\187\001\000\188\001\000\189\001\000\190\001\000\230\001\001\004\001\001.\001\001\000\001\001\006\001\000\196\001\000\197\001\001\024\001\001\018\001\001\012\001\000\201\001\001y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001 \025" 0 : int array);;
-let iso885913_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\031\000\000\000\000\000\000\006\206\000\000\006\206\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\194@\145\160\160AA\160\160\001\001\001\001\000\226@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\192@\145\160\160EE\160\160\001\001\005\001\000\224@\145\160\160FF\160\160\001\001\006\001\000\195@\145\160\160GG\160\160\001\001\007\001\000\227@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\144\160PP\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\199@\145\160\160SS\160\160\001\001\019\001\000\231@\144\160TT\144\160UU\145\160\160VV\160\160\001\001\022\001\000\203@\145\160\160WW\160\160\001\001\023\001\000\235@\145\160\160XX\160\160\001\001\024\001\000\198@\145\160\160YY\160\160\001\001\025\001\000\230\160\160\001 \025\001\000\255@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\180@\145\160\160]]\160\160\001 \029\001\000\161@\145\160\160^^\160\160\001 \030\001\000\165@\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\217@\145\160\160\000B\000B\160\160\001\001B\001\000\249@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\145\160\160\000E\000E\160\160\001\001E\001\000\210@\145\160\160\000F\000F\160\160\001\001F\001\000\242@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\212@\145\160\160\000M\000M\160\160\001\001M\001\000\244@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\170@\145\160\160\000W\000W\160\160\001\001W\001\000\186@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\218@\145\160\160\000[\000[\160\160\001\001[\001\000\250@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~\001\000\254@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\175@@\144\160\001\000\201\001\000\201@@@@@@@@@\144\160\001\000\211\001\000\211@\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\168@@@\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@@@@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\191@@\144\160\001\000\233\001\000\233@@@@@@@@@\144\160\001\000\243\001\000\243@\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\184@@@\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
- let iso885914_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\030\002\001\030\003\001\000\163\001\001\n\001\001\011\001\030\n\001\000\167\001\030\128\001\000\169\001\030\130\001\030\011\001\030\242\001\000\173\001\000\174\001\001x\001\030\030\001\030\031\001\001 \001\001!\001\030@\001\030A\001\000\182\001\030V\001\030\129\001\030W\001\030\131\001\030`\001\030\243\001\030\132\001\030\133\001\030a\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001t\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\030j\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\001v\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001u\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\030k\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\001w\001\000\255" 0 : int array);;
-let iso885914_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\222\000\000\000\000\000\000\006w\000\000\006w\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\030\002\001\000\161@\145\160\160CC\160\160\001\030\003\001\000\162@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\145\160\160JJ\160\160\001\001\n\001\000\164\160\160\001\030\n\001\000\166@\145\160\160KK\160\160\001\001\011\001\000\165\160\160\001\030\011\001\000\171@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\145\160\160^^\160\160\001\030\030\001\000\176@\145\160\160__\160\160\001\030\031\001\000\177@\145\160\160``\160\160\001\001 \001\000\178@\145\160\160aa\160\160\001\001!\001\000\179@\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\030@\001\000\180@\145\160\160\000A\000A\160\160\001\030A\001\000\181@\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\030V\001\000\183@\145\160\160\000W\000W\160\160\001\030W\001\000\185@\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\030`\001\000\187@\145\160\160\000a\000a\160\160\001\030a\001\000\191@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\030j\001\000\215@\145\160\160\000k\000k\160\160\001\030k\001\000\247@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\145\160\160\000t\000t\160\160\001\001t\001\000\208@\145\160\160\000u\000u\160\160\001\001u\001\000\240@\145\160\160\000v\000v\160\160\001\001v\001\000\222@\145\160\160\000w\000w\160\160\001\001w\001\000\254@\145\160\160\000x\000x\160\160\001\001x\001\000\175@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\145\160\160\001\000\128\001\000\128\160\160\001\030\128\001\000\168@\145\160\160\001\000\129\001\000\129\160\160\001\030\129\001\000\184@\145\160\160\001\000\130\001\000\130\160\160\001\030\130\001\000\170@\145\160\160\001\000\131\001\000\131\160\160\001\030\131\001\000\186@\145\160\160\001\000\132\001\000\132\160\160\001\030\132\001\000\189@\145\160\160\001\000\133\001\000\133\160\160\001\030\133\001\000\190@\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@\144\160\001\000\163\001\000\163@@@\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@@@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@@@@@@@\144\160\001\000\182\001\000\182@@@@@@@@@\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214@\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\145\160\160\001\030\242\001\000\172\160\160\001\000\242\001\000\242@\145\160\160\001\030\243\001\000\188\160\160\001\000\243\001\000\243@\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246@\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
- let iso885915_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001 \172\001\000\165\001\001`\001\000\167\001\001a\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\001}\001\000\181\001\000\182\001\000\183\001\001~\001\000\185\001\000\186\001\000\187\001\001R\001\001S\001\001x\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
-let iso885915_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\157\000\000\000\000\000\000\006!\000\000\006!\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\188@\145\160\160\000S\000S\160\160\001\001S\001\000\189@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\166@\145\160\160\000a\000a\160\160\001\001a\001\000\168@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\190@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\180@\145\160\160\000~\000~\160\160\001\001~\001\000\184@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\165@\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\164\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187@@@\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
- let iso88592_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\002\216\001\001A\001\000\164\001\001=\001\001Z\001\000\167\001\000\168\001\001`\001\001^\001\001d\001\001y\001\000\173\001\001}\001\001{\001\000\176\001\001\005\001\002\219\001\001B\001\000\180\001\001>\001\001[\001\002\199\001\000\184\001\001a\001\001_\001\001e\001\001z\001\002\221\001\001~\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
-let iso88592_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007'\000\000\000\000\000\000\006\217\000\000\006\217\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\001\003\001\000\227@\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\145\160\160FF\160\160\001\001\006\001\000\198@\145\160\160GG\160\160\001\001\007\001\000\230@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\145\160\160NN\160\160\001\001\014\001\000\207@\145\160\160OO\160\160\001\001\015\001\000\239@\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\145\160\160ZZ\160\160\001\001\026\001\000\204@\145\160\160[[\160\160\001\001\027\001\000\236@\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\165@\145\160\160~~\160\160\001\001>\001\000\181@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\163@\145\160\160\000B\000B\160\160\001\001B\001\000\179@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\210@\145\160\160\000H\000H\160\160\001\001H\001\000\242@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\213@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\245@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001\001T\001\000\192@\145\160\160\000U\000U\160\160\001\001U\001\000\224@\144\160\000V\000V\144\160\000W\000W\145\160\160\000X\000X\160\160\001\001X\001\000\216@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\248@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\166@\145\160\160\000[\000[\160\160\001\001[\001\000\182@\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\145\160\160\000`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\171@\145\160\160\000e\000e\160\160\001\001e\001\000\187@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\172@\145\160\160\000z\000z\160\160\001\001z\001\000\188@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~\001\000\190@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@@@\144\160\001\000\180\001\000\180@@@\144\160\001\000\184\001\000\184@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\145\160\160\001\002\199\001\000\183\160\160\001\000\199\001\000\199@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\144\160\001\002\219\001\000\178\144\160\001\000\220\001\000\220\145\160\160\001\002\221\001\000\189\160\160\001\000\221\001\000\221@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@@\144\160\001\000\250\001\000\250@\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@@" 0 : Netmappings.from_uni_list array);;
- let iso88593_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001&\001\002\216\001\000\163\001\000\164\000\255\001\001$\001\000\167\001\000\168\001\0010\001\001^\001\001\030\001\0014\001\000\173\000\255\001\001{\001\000\176\001\001'\001\000\178\001\000\179\001\000\180\001\000\181\001\001%\001\000\183\001\000\184\001\0011\001\001_\001\001\031\001\0015\001\000\189\000\255\001\001|\001\000\192\001\000\193\001\000\194\000\255\001\000\196\001\001\n\001\001\008\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\000\255\001\000\209\001\000\210\001\000\211\001\000\212\001\001 \001\000\214\001\000\215\001\001\028\001\000\217\001\000\218\001\000\219\001\000\220\001\001l\001\001\\\001\000\223\001\000\224\001\000\225\001\000\226\000\255\001\000\228\001\001\011\001\001\t\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\000\255\001\000\241\001\000\242\001\000\243\001\000\244\001\001!\001\000\246\001\000\247\001\001\029\001\000\249\001\000\250\001\000\251\001\000\252\001\001m\001\001]\001\002\217" 0 : int array);;
-let iso88593_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\165\000\000\000\000\000\000\006J\000\000\006J\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\145\160\160HH\160\160\001\001\008\001\000\198@\145\160\160II\160\160\001\001\t\001\000\230@\145\160\160JJ\160\160\001\001\n\001\000\197@\145\160\160KK\160\160\001\001\011\001\000\229@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001\001\028\001\000\216@\145\160\160]]\160\160\001\001\029\001\000\248@\145\160\160^^\160\160\001\001\030\001\000\171@\145\160\160__\160\160\001\001\031\001\000\187@\145\160\160``\160\160\001\001 \001\000\213@\145\160\160aa\160\160\001\001!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001\001$\001\000\166@\145\160\160ee\160\160\001\001%\001\000\182@\145\160\160ff\160\160\001\001&\001\000\161@\145\160\160gg\160\160\001\001'\001\000\177@\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\169@\145\160\160qq\160\160\001\0011\001\000\185@\144\160rr\144\160ss\145\160\160tt\160\160\001\0014\001\000\172@\145\160\160uu\160\160\001\0015\001\000\188@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\145\160\160\000\\\000\\\160\160\001\001\\\001\000\222@\145\160\160\000]\000]\160\160\001\001]\001\000\254@\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001\001l\001\000\221@\145\160\160\000m\000m\160\160\001\001m\001\000\253@\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181@\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184@@@@\144\160\001\000\189\001\000\189@@\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\145\160\160\001\000\217\001\000\217\160\160\001\002\217\001\000\255@\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
- let iso88594_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\0018\001\001V\001\000\164\001\001(\001\001;\001\000\167\001\000\168\001\001`\001\001\018\001\001\"\001\001f\001\000\173\001\001}\001\000\175\001\000\176\001\001\005\001\002\219\001\001W\001\000\180\001\001)\001\001<\001\002\199\001\000\184\001\001a\001\001\019\001\001#\001\001g\001\001J\001\001~\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\001*\001\001\016\001\001E\001\001L\001\0016\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\001h\001\001j\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\001+\001\001\017\001\001F\001\001M\001\0017\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\001i\001\001k\001\002\217" 0 : int array);;
-let iso88594_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\021\000\000\000\000\000\000\006\193\000\000\006\193\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\192@\145\160\160AA\160\160\001\001\001\001\000\224@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\145\160\160RR\160\160\001\001\018\001\000\170@\145\160\160SS\160\160\001\001\019\001\000\186@\144\160TT\144\160UU\145\160\160VV\160\160\001\001\022\001\000\204@\145\160\160WW\160\160\001\001\023\001\000\236@\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\171@\145\160\160cc\160\160\001\001#\001\000\187@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\207@\145\160\160kk\160\160\001\001+\001\000\239@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\211@\145\160\160ww\160\160\001\0017\001\000\243@\145\160\160xx\160\160\001\0018\001\000\162@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\166@\145\160\160||\160\160\001\001<\001\000\182@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\145\160\160\000E\000E\160\160\001\001E\001\000\209@\145\160\160\000F\000F\160\160\001\001F\001\000\241@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\145\160\160\000J\000J\160\160\001\001J\001\000\189@\145\160\160\000K\000K\160\160\001\001K\001\000\191@\145\160\160\000L\000L\160\160\001\001L\001\000\210@\145\160\160\000M\000M\160\160\001\001M\001\000\242@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\163@\145\160\160\000W\000W\160\160\001\001W\001\000\179@\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\172@\145\160\160\000g\000g\160\160\001\001g\001\000\188@\145\160\160\000h\000h\160\160\001\001h\001\000\221@\145\160\160\000i\000i\160\160\001\001i\001\000\253@\145\160\160\000j\000j\160\160\001\001j\001\000\222@\145\160\160\000k\000k\160\160\001\001k\001\000\254@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~\001\000\190@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176@@@\144\160\001\000\180\001\000\180@@@\144\160\001\000\184\001\000\184@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\002\199\001\000\183@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@@\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\145\160\160\001\002\219\001\000\178\160\160\001\000\219\001\000\219@\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230@@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@@\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248@\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
- let iso88595_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\004\001\001\004\002\001\004\003\001\004\004\001\004\005\001\004\006\001\004\007\001\004\008\001\004\t\001\004\n\001\004\011\001\004\012\001\000\173\001\004\014\001\004\015\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001!\022\001\004Q\001\004R\001\004S\001\004T\001\004U\001\004V\001\004W\001\004X\001\004Y\001\004Z\001\004[\001\004\\\001\000\167\001\004^\001\004_" 0 : int array);;
-let iso88595_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\154\000\000\000\000\000\000\007r\000\000\007r\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\161@\145\160\160BB\160\160\001\004\002\001\000\162@\145\160\160CC\160\160\001\004\003\001\000\163@\145\160\160DD\160\160\001\004\004\001\000\164@\145\160\160EE\160\160\001\004\005\001\000\165@\145\160\160FF\160\160\001\004\006\001\000\166@\145\160\160GG\160\160\001\004\007\001\000\167@\145\160\160HH\160\160\001\004\008\001\000\168@\145\160\160II\160\160\001\004\t\001\000\169@\145\160\160JJ\160\160\001\004\n\001\000\170@\145\160\160KK\160\160\001\004\011\001\000\171@\145\160\160LL\160\160\001\004\012\001\000\172@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\174@\145\160\160OO\160\160\001\004\015\001\000\175@\145\160\160PP\160\160\001\004\016\001\000\176@\145\160\160QQ\160\160\001\004\017\001\000\177@\145\160\160RR\160\160\001\004\018\001\000\178@\145\160\160SS\160\160\001\004\019\001\000\179@\145\160\160TT\160\160\001\004\020\001\000\180@\145\160\160UU\160\160\001\004\021\001\000\181@\145\160\160VV\160\160\001\004\022\001\000\182\160\160\001!\022\001\000\240@\145\160\160WW\160\160\001\004\023\001\000\183@\145\160\160XX\160\160\001\004\024\001\000\184@\145\160\160YY\160\160\001\004\025\001\000\185@\145\160\160ZZ\160\160\001\004\026\001\000\186@\145\160\160[[\160\160\001\004\027\001\000\187@\145\160\160\\\\\160\160\001\004\028\001\000\188@\145\160\160]]\160\160\001\004\029\001\000\189@\145\160\160^^\160\160\001\004\030\001\000\190@\145\160\160__\160\160\001\004\031\001\000\191@\145\160\160``\160\160\001\004 \001\000\192@\145\160\160aa\160\160\001\004!\001\000\193@\145\160\160bb\160\160\001\004\"\001\000\194@\145\160\160cc\160\160\001\004#\001\000\195@\145\160\160dd\160\160\001\004$\001\000\196@\145\160\160ee\160\160\001\004%\001\000\197@\145\160\160ff\160\160\001\004&\001\000\198@\145\160\160gg\160\160\001\004'\001\000\199@\145\160\160hh\160\160\001\004(\001\000\200@\145\160\160ii\160\160\001\004)\001\000\201@\145\160\160jj\160\160\001\004*\001\000\202@\145\160\160kk\160\160\001\004+\001\000\203@\145\160\160ll\160\160\001\004,\001\000\204@\145\160\160mm\160\160\001\004-\001\000\205@\145\160\160nn\160\160\001\004.\001\000\206@\145\160\160oo\160\160\001\004/\001\000\207@\145\160\160pp\160\160\001\0040\001\000\208@\145\160\160qq\160\160\001\0041\001\000\209@\145\160\160rr\160\160\001\0042\001\000\210@\145\160\160ss\160\160\001\0043\001\000\211@\145\160\160tt\160\160\001\0044\001\000\212@\145\160\160uu\160\160\001\0045\001\000\213@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\215@\145\160\160xx\160\160\001\0048\001\000\216@\145\160\160yy\160\160\001\0049\001\000\217@\145\160\160zz\160\160\001\004:\001\000\218@\145\160\160{{\160\160\001\004;\001\000\219@\145\160\160||\160\160\001\004<\001\000\220@\145\160\160}}\160\160\001\004=\001\000\221@\145\160\160~~\160\160\001\004>\001\000\222@\145\160\160\127\127\160\160\001\004?\001\000\223@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001\004R\001\000\242@\145\160\160\000S\000S\160\160\001\004S\001\000\243@\145\160\160\000T\000T\160\160\001\004T\001\000\244@\145\160\160\000U\000U\160\160\001\004U\001\000\245@\145\160\160\000V\000V\160\160\001\004V\001\000\246@\145\160\160\000W\000W\160\160\001\004W\001\000\247@\145\160\160\000X\000X\160\160\001\004X\001\000\248@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\249@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\250@\145\160\160\000[\000[\160\160\001\004[\001\000\251@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\252@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\254@\145\160\160\000_\000_\160\160\001\004_\001\000\255@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@@@@\144\160\001\000\167\001\000\253@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let iso88596_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\000\255\000\255\000\255\001\000\164\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\006\012\001\000\173\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\006\027\000\255\000\255\000\255\001\006\031\000\255\001\006!\001\006\"\001\006#\001\006$\001\006%\001\006&\001\006'\001\006(\001\006)\001\006*\001\006+\001\006,\001\006-\001\006.\001\006/\001\0060\001\0061\001\0062\001\0063\001\0064\001\0065\001\0066\001\0067\001\0068\001\0069\001\006:\000\255\000\255\000\255\000\255\000\255\001\006@\001\006A\001\006B\001\006C\001\006D\001\006E\001\006F\001\006G\001\006H\001\006I\001\006J\001\006K\001\006L\001\006M\001\006N\001\006O\001\006P\001\006Q\001\006R\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255" 0 : int array);;
-let iso88596_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\218\000\000\000\000\000\000\005\224\000\000\005\224\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\006\012\001\000\172@\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\145\160\160[[\160\160\001\006\027\001\000\187@\144\160\\\\\144\160]]\144\160^^\145\160\160__\160\160\001\006\031\001\000\191@\144\160``\145\160\160aa\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\215@\145\160\160xx\160\160\001\0068\001\000\216@\145\160\160yy\160\160\001\0069\001\000\217@\145\160\160zz\160\160\001\006:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\145\160\160\000A\000A\160\160\001\006A\001\000\225@\145\160\160\000B\000B\160\160\001\006B\001\000\226@\145\160\160\000C\000C\160\160\001\006C\001\000\227@\145\160\160\000D\000D\160\160\001\006D\001\000\228@\145\160\160\000E\000E\160\160\001\006E\001\000\229@\145\160\160\000F\000F\160\160\001\006F\001\000\230@\145\160\160\000G\000G\160\160\001\006G\001\000\231@\145\160\160\000H\000H\160\160\001\006H\001\000\232@\145\160\160\000I\000I\160\160\001\006I\001\000\233@\145\160\160\000J\000J\160\160\001\006J\001\000\234@\145\160\160\000K\000K\160\160\001\006K\001\000\235@\145\160\160\000L\000L\160\160\001\006L\001\000\236@\145\160\160\000M\000M\160\160\001\006M\001\000\237@\145\160\160\000N\000N\160\160\001\006N\001\000\238@\145\160\160\000O\000O\160\160\001\006O\001\000\239@\145\160\160\000P\000P\160\160\001\006P\001\000\240@\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\145\160\160\000R\000R\160\160\001\006R\001\000\242@\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@@@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let iso88597_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002?\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001 \024\001 \025\001\000\163\000\255\000\255\001\000\166\001\000\167\001\000\168\001\000\169\000\255\001\000\171\001\000\172\001\000\173\000\255\001 \021\001\000\176\001\000\177\001\000\178\001\000\179\001\003\132\001\003\133\001\003\134\001\000\183\001\003\136\001\003\137\001\003\138\001\000\187\001\003\140\001\000\189\001\003\142\001\003\143\001\003\144\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\000\255\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\001\003\172\001\003\173\001\003\174\001\003\175\001\003\176\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\194\001\003\195\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001\003\201\001\003\202\001\003\203\001\003\204\001\003\205\001\003\206\000\255" 0 : int array);;
-let iso88597_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\223\000\000\000\000\000\000\006\147\000\000\006\147\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\161@\145\160\160YY\160\160\001 \025\001\000\162@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\145\160\160\001\000\132\001\000\132\160\160\001\003\132\001\000\180@\145\160\160\001\000\133\001\000\133\160\160\001\003\133\001\000\181@\145\160\160\001\000\134\001\000\134\160\160\001\003\134\001\000\182@\144\160\001\000\135\001\000\135\145\160\160\001\000\136\001\000\136\160\160\001\003\136\001\000\184@\145\160\160\001\000\137\001\000\137\160\160\001\003\137\001\000\185@\145\160\160\001\000\138\001\000\138\160\160\001\003\138\001\000\186@\144\160\001\000\139\001\000\139\145\160\160\001\000\140\001\000\140\160\160\001\003\140\001\000\188@\144\160\001\000\141\001\000\141\145\160\160\001\000\142\001\000\142\160\160\001\003\142\001\000\190@\145\160\160\001\000\143\001\000\143\160\160\001\003\143\001\000\191@\145\160\160\001\000\144\001\000\144\160\160\001\003\144\001\000\192@\145\160\160\001\000\145\001\000\145\160\160\001\003\145\001\000\193@\145\160\160\001\000\146\001\000\146\160\160\001\003\146\001\000\194@\145\160\160\001\000\147\001\000\147\160\160\001\003\147\001\000\195@\145\160\160\001\000\148\001\000\148\160\160\001\003\148\001\000\196@\145\160\160\001\000\149\001\000\149\160\160\001\003\149\001\000\197@\145\160\160\001\000\150\001\000\150\160\160\001\003\150\001\000\198@\145\160\160\001\000\151\001\000\151\160\160\001\003\151\001\000\199@\145\160\160\001\000\152\001\000\152\160\160\001\003\152\001\000\200@\145\160\160\001\000\153\001\000\153\160\160\001\003\153\001\000\201@\145\160\160\001\000\154\001\000\154\160\160\001\003\154\001\000\202@\145\160\160\001\000\155\001\000\155\160\160\001\003\155\001\000\203@\145\160\160\001\000\156\001\000\156\160\160\001\003\156\001\000\204@\145\160\160\001\000\157\001\000\157\160\160\001\003\157\001\000\205@\145\160\160\001\000\158\001\000\158\160\160\001\003\158\001\000\206@\145\160\160\001\000\159\001\000\159\160\160\001\003\159\001\000\207@\145\160\160\001\000\160\001\000\160\160\160\001\003\160\001\000\208@\144\160\001\003\161\001\000\209@\145\160\160\001\000\163\001\000\163\160\160\001\003\163\001\000\211@\144\160\001\003\164\001\000\212\144\160\001\003\165\001\000\213\145\160\160\001\000\166\001\000\166\160\160\001\003\166\001\000\214@\145\160\160\001\000\167\001\000\167\160\160\001\003\167\001\000\215@\145\160\160\001\000\168\001\000\168\160\160\001\003\168\001\000\216@\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\217@\144\160\001\003\170\001\000\218\145\160\160\001\000\171\001\000\171\160\160\001\003\171\001\000\219@\145\160\160\001\000\172\001\000\172\160\160\001\003\172\001\000\220@\145\160\160\001\000\173\001\000\173\160\160\001\003\173\001\000\221@\144\160\001\003\174\001\000\222\144\160\001\003\175\001\000\223\145\160\160\001\000\176\001\000\176\160\160\001\003\176\001\000\224@\145\160\160\001\000\177\001\000\177\160\160\001\003\177\001\000\225@\145\160\160\001\000\178\001\000\178\160\160\001\003\178\001\000\226@\145\160\160\001\000\179\001\000\179\160\160\001\003\179\001\000\227@\144\160\001\003\180\001\000\228\144\160\001\003\181\001\000\229\144\160\001\003\182\001\000\230\145\160\160\001\000\183\001\000\183\160\160\001\003\183\001\000\231@\144\160\001\003\184\001\000\232\144\160\001\003\185\001\000\233\144\160\001\003\186\001\000\234\145\160\160\001\000\187\001\000\187\160\160\001\003\187\001\000\235@\144\160\001\003\188\001\000\236\145\160\160\001\000\189\001\000\189\160\160\001\003\189\001\000\237@\144\160\001\003\190\001\000\238\144\160\001\003\191\001\000\239\144\160\001\003\192\001\000\240\144\160\001\003\193\001\000\241\144\160\001\003\194\001\000\242\144\160\001\003\195\001\000\243\144\160\001\003\196\001\000\244\144\160\001\003\197\001\000\245\144\160\001\003\198\001\000\246\144\160\001\003\199\001\000\247\144\160\001\003\200\001\000\248\144\160\001\003\201\001\000\249\144\160\001\003\202\001\000\250\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\252\144\160\001\003\205\001\000\253\144\160\001\003\206\001\000\254@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let iso88598_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002!\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\000\255\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\215\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\247\001\000\187\001\000\188\001\000\189\001\000\190\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \023\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\000\255\001 \014\001 \015\000\255" 0 : int array);;
-let iso88598_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\149\000\000\000\000\000\000\005]\000\000\005]\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\223@\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@@@@@@@@@@@@@\144\160\001\005\208\001\000\224\144\160\001\005\209\001\000\225\144\160\001\005\210\001\000\226\144\160\001\005\211\001\000\227\144\160\001\005\212\001\000\228\144\160\001\005\213\001\000\229\144\160\001\005\214\001\000\230\145\160\160\001\000\215\001\000\170\160\160\001\005\215\001\000\231@\144\160\001\005\216\001\000\232\144\160\001\005\217\001\000\233\144\160\001\005\218\001\000\234\144\160\001\005\219\001\000\235\144\160\001\005\220\001\000\236\144\160\001\005\221\001\000\237\144\160\001\005\222\001\000\238\144\160\001\005\223\001\000\239\144\160\001\005\224\001\000\240\144\160\001\005\225\001\000\241\144\160\001\005\226\001\000\242\144\160\001\005\227\001\000\243\144\160\001\005\228\001\000\244\144\160\001\005\229\001\000\245\144\160\001\005\230\001\000\246\144\160\001\005\231\001\000\247\144\160\001\005\232\001\000\248\144\160\001\005\233\001\000\249\144\160\001\005\234\001\000\250@@@@@@@@@@@@\144\160\001\000\247\001\000\186@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let iso88599_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001\030\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\0010\001\001^\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001\031\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\0011\001\001_\001\000\255" 0 : int array);;
-let iso88599_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\151\000\000\000\000\000\000\006\025\000\000\006\025\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\145\160\160^^\160\160\001\001\030\001\000\208@\145\160\160__\160\160\001\001\031\001\000\240@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
- Hashtbl.add Netmappings.to_unicode `Enc_iso88599 iso88599_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88599 iso88599_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88598 iso88598_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88598 iso88598_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88597 iso88597_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88597 iso88597_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88596 iso88596_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88596 iso88596_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88595 iso88595_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88595 iso88595_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88594 iso88594_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88594 iso88594_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88593 iso88593_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88593 iso88593_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88592 iso88592_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88592 iso88592_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso885915 iso885915_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso885915 iso885915_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso885914 iso885914_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso885914 iso885914_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso885913 iso885913_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso885913 iso885913_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso885910 iso885910_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso885910 iso885910_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_iso88591 iso88591_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_iso88591 iso88591_from_unicode;
-();;
+++ /dev/null
-(* WARNING! This is a generated file! *)
-let cp037_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\001\000\162n|hk\000|f\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223adji{\001\000\172mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
-let cp037_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\000_\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\001\000\236\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\001\000\252\144\160\001\000\221\001\000\173\144\160\001\000\222\001\000\174\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\000H\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W\144\160\001\000\240\001\000\140\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\204\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
- let cp1006_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\228\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\006\240\001\006\241\001\006\242\001\006\243\001\006\244\001\006\245\001\006\246\001\006\247\001\006\248\001\006\249\001\006\012\001\006\027\001\000\173\001\006\031\002\000\000\254\129\002\000\000\254\141\002\000\000\254\142\000\255\002\000\000\254\143\002\000\000\254\145\002\000\000\251V\002\000\000\251X\002\000\000\254\147\002\000\000\254\149\002\000\000\254\151\002\000\000\251f\002\000\000\251h\002\000\000\254\153\002\000\000\254\155\002\000\000\254\157\002\000\000\254\159\002\000\000\251z\002\000\000\251|\002\000\000\254\161\002\000\000\254\163\002\000\000\254\165\002\000\000\254\167\002\000\000\254\169\002\000\000\251\132\002\000\000\254\171\002\000\000\254\173\002\000\000\251\140\002\000\000\254\175\002\000\000\251\138\002\000\000\254\177\002\000\000\254\179\002\000\000\254\181\002\000\000\254\183\002\000\000\254\185\002\000\000\254\187\002\000\000\254\189\002\000\000\254\191\002\000\000\254\193\002\000\000\254\197\002\000\000\254\201\002\000\000\254\202\002\000\000\254\203\002\000\000\254\204\002\000\000\254\205\002\000\000\254\206\002\000\000\254\207\002\000\000\254\208\002\000\000\254\209\002\000\000\254\211\002\000\000\254\213\002\000\000\254\215\002\000\000\254\217\002\000\000\254\219\002\000\000\251\146\002\000\000\251\148\002\000\000\254\221\002\000\000\254\223\002\000\000\254\224\002\000\000\254\225\002\000\000\254\227\002\000\000\251\158\002\000\000\254\229\002\000\000\254\231\002\000\000\254\133\002\000\000\254\237\002\000\000\251\166\002\000\000\251\168\002\000\000\251\169\002\000\000\251\170\002\000\000\254\128\002\000\000\254\137\002\000\000\254\138\002\000\000\254\139\002\000\000\254\241\002\000\000\254\242\002\000\000\254\243\002\000\000\251\176\002\000\000\251\174\002\000\000\254|\002\000\000\254}" 0 : int array);;
-let cp1006_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\143\000\000\000\000\000\000\006\146\000\000\006\146\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\006\012\001\000\171@\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\145\160\160[[\160\160\001\006\027\001\000\172@\144\160\\\\\144\160]]\144\160^^\145\160\160__\160\160\001\006\031\001\000\174@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\002\000\000\251V\001\000\181@\144\160\000W\000W\145\160\160\000X\000X\160\160\002\000\000\251X\001\000\182@\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\002\000\000\251f\001\000\186@\144\160\000g\000g\145\160\160\000h\000h\160\160\002\000\000\251h\001\000\187@\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\145\160\160\000z\000z\160\160\002\000\000\251z\001\000\192@\144\160\000{\000{\145\160\160\000|\000|\160\160\002\000\000\251|\001\000\193\160\160\002\000\000\254|\001\000\254@\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\255@\144\160\000~\000~\144\160\000\127\000\127\145\160\160\001\000\128\001\000\128\160\160\002\000\000\254\128\001\000\245@\145\160\160\001\000\129\001\000\129\160\160\002\000\000\254\129\001\000\175@\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\145\160\160\001\000\132\001\000\132\160\160\002\000\000\251\132\001\000\199@\145\160\160\001\000\133\001\000\133\160\160\002\000\000\254\133\001\000\239@\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\145\160\160\001\000\137\001\000\137\160\160\002\000\000\254\137\001\000\246@\145\160\160\001\000\138\001\000\138\160\160\002\000\000\251\138\001\000\204\160\160\002\000\000\254\138\001\000\247@\145\160\160\001\000\139\001\000\139\160\160\002\000\000\254\139\001\000\248@\145\160\160\001\000\140\001\000\140\160\160\002\000\000\251\140\001\000\202@\145\160\160\001\000\141\001\000\141\160\160\002\000\000\254\141\001\000\176@\145\160\160\001\000\142\001\000\142\160\160\002\000\000\254\142\001\000\177@\145\160\160\001\000\143\001\000\143\160\160\002\000\000\254\143\001\000\179@\144\160\001\000\144\001\000\144\145\160\160\001\000\145\001\000\145\160\160\002\000\000\254\145\001\000\180@\145\160\160\001\000\146\001\000\146\160\160\002\000\000\251\146\001\000\229@\145\160\160\001\000\147\001\000\147\160\160\002\000\000\254\147\001\000\183@\145\160\160\001\000\148\001\000\148\160\160\002\000\000\251\148\001\000\230@\145\160\160\001\000\149\001\000\149\160\160\002\000\000\254\149\001\000\184@\144\160\001\000\150\001\000\150\145\160\160\001\000\151\001\000\151\160\160\002\000\000\254\151\001\000\185@\144\160\001\000\152\001\000\152\145\160\160\001\000\153\001\000\153\160\160\002\000\000\254\153\001\000\188@\144\160\001\000\154\001\000\154\145\160\160\001\000\155\001\000\155\160\160\002\000\000\254\155\001\000\189@\144\160\001\000\156\001\000\156\145\160\160\001\000\157\001\000\157\160\160\002\000\000\254\157\001\000\190@\145\160\160\001\000\158\001\000\158\160\160\002\000\000\251\158\001\000\236@\145\160\160\001\000\159\001\000\159\160\160\002\000\000\254\159\001\000\191@\144\160\001\000\160\001\000\160\144\160\002\000\000\254\161\001\000\194@\144\160\002\000\000\254\163\001\000\195@\144\160\002\000\000\254\165\001\000\196\144\160\002\000\000\251\166\001\000\241\144\160\002\000\000\254\167\001\000\197\144\160\002\000\000\251\168\001\000\242\145\160\160\002\000\000\254\169\001\000\198\160\160\002\000\000\251\169\001\000\243@\144\160\002\000\000\251\170\001\000\244\144\160\002\000\000\254\171\001\000\200@\145\160\160\001\000\173\001\000\173\160\160\002\000\000\254\173\001\000\201@\144\160\002\000\000\251\174\001\000\253\144\160\002\000\000\254\175\001\000\203\144\160\002\000\000\251\176\001\000\252\144\160\002\000\000\254\177\001\000\205@\144\160\002\000\000\254\179\001\000\206@\144\160\002\000\000\254\181\001\000\207@\144\160\002\000\000\254\183\001\000\208@\144\160\002\000\000\254\185\001\000\209@\144\160\002\000\000\254\187\001\000\210@\144\160\002\000\000\254\189\001\000\211@\144\160\002\000\000\254\191\001\000\212@\144\160\002\000\000\254\193\001\000\213@@@\144\160\002\000\000\254\197\001\000\214@@@\144\160\002\000\000\254\201\001\000\215\144\160\002\000\000\254\202\001\000\216\144\160\002\000\000\254\203\001\000\217\144\160\002\000\000\254\204\001\000\218\144\160\002\000\000\254\205\001\000\219\144\160\002\000\000\254\206\001\000\220\144\160\002\000\000\254\207\001\000\221\144\160\002\000\000\254\208\001\000\222\144\160\002\000\000\254\209\001\000\223@\144\160\002\000\000\254\211\001\000\224@\144\160\002\000\000\254\213\001\000\225@\144\160\002\000\000\254\215\001\000\226@\144\160\002\000\000\254\217\001\000\227@\144\160\002\000\000\254\219\001\000\228@\144\160\002\000\000\254\221\001\000\231@\144\160\002\000\000\254\223\001\000\232\144\160\002\000\000\254\224\001\000\233\144\160\002\000\000\254\225\001\000\234@\144\160\002\000\000\254\227\001\000\235@\144\160\002\000\000\254\229\001\000\237@\144\160\002\000\000\254\231\001\000\238@@@@@\144\160\002\000\000\254\237\001\000\240@@\144\160\001\006\240\001\000\161\145\160\160\001\006\241\001\000\162\160\160\002\000\000\254\241\001\000\249@\145\160\160\001\006\242\001\000\163\160\160\002\000\000\254\242\001\000\250@\145\160\160\001\006\243\001\000\164\160\160\002\000\000\254\243\001\000\251@\144\160\001\006\244\001\000\165\144\160\001\006\245\001\000\166\144\160\001\006\246\001\000\167\144\160\001\006\247\001\000\168\144\160\001\006\248\001\000\169\144\160\001\006\249\001\000\170@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp1026_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\000{\001\000\241\001\000\199n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\001\001\030\001\0010ji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\000[\001\000\209\001\001_le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\0011z\001\000\214\001\001^g}\001\000\220\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000}\000`\001\000\166\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\001\000\246\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\000]d\000@\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\001\000\231\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\000~\001\000\242\001\000\243\001\000\245\001\001\031\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\000\\\001\000\249\001\000\250\001\000\255\001\000\252\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212c\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219b\001\000\217\001\000\218\001\000\159" 0 : int array);;
-let cp1026_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\151\000\000\000\000\000\000\006\025\000\000\006\025\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\145\160\160^^\160\160\001\001\030\000Z@\145\160\160__\160\160\001\001\031\001\000\208@\144\160`\000@\144\160a\000O\144\160b\001\000\252\144\160c\001\000\236\144\160d\001\000\173\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\145\160\160\001\0010\000[\160\160p\001\000\240@\145\160\160\001\0011\000y\160\160q\001\000\241@\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\001\000\174\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000h\144\160\000\\\001\000\220\144\160\000]\001\000\172\145\160\160\000^\000_\160\160\001\001^\000|@\145\160\160\001\001_\000j\160\160\000_\000m@\144\160\000`\001\000\141\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\000H\144\160\000|\001\000\187\144\160\000}\001\000\140\144\160\000~\001\000\204\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\001\000\176\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\001\000\142\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\001\000\186\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000J\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w@\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\000{\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\000\127@@\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\001\000\192\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W@\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\161\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\224@@\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
- let cp424_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\031\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\000\162n|hk\000|f\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225adji{\001\000\172mo\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\000\166le\000_~\127\000\255\001\005\234\000\255\000\255\001\000\160\000\255\000\255\000\255\001 \023\000`zc\000@g}b\000\255\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000\255\000\255\000\255\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\000\255\000\255\000\255\001\000\184\000\255\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\000\255\000\255\000\255\000\255\000\255\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\000\255\000\255\000\255\000\255\000\255\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\000\255\000\255\000\255\000\255\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\000\255\000\255\000\255\000\255\000\255pqrstuvwxy\001\000\179\000\255\000\255\000\255\000\255\001\000\159" 0 : int array);;
-let cp424_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\135\000\000\000\000\000\000\005K\000\000\005K\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\145\160\160Wf\160\160\001 \023\000x@\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000t@\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180@\144\160\001\000\171\001\000\138\144\160\001\000\172\000_\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218@\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185@@@@@@@@@@@@@@@@@\144\160\001\005\208\000A\144\160\001\005\209\000B\144\160\001\005\210\000C\144\160\001\005\211\000D\144\160\001\005\212\000E\144\160\001\005\213\000F\144\160\001\005\214\000G\145\160\160\001\005\215\000H\160\160\001\000\215\001\000\191@\144\160\001\005\216\000I\144\160\001\005\217\000Q\144\160\001\005\218\000R\144\160\001\005\219\000S\144\160\001\005\220\000T\144\160\001\005\221\000U\144\160\001\005\222\000V\144\160\001\005\223\000W\144\160\001\005\224\000X\144\160\001\005\225\000Y\144\160\001\005\226\000b\144\160\001\005\227\000c\144\160\001\005\228\000d\144\160\001\005\229\000e\144\160\001\005\230\000f\144\160\001\005\231\000g\144\160\001\005\232\000h\144\160\001\005\233\000i\144\160\001\005\234\000q@@@@@@@@@@@@\144\160\001\000\247\001\000\225@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp437_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\162\001\000\163\001\000\165\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp437_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@\144\160\001\000\165\001\000\157\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@@@@@\144\160\001\000\209\001\000\165@@@@\144\160\001\000\214\001\000\153@@@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
- let cp500_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\000[n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\000]dji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
-let cp500_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\001\000\187\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\001\000\176\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\001\000\186\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\001\000\236\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\001\000\252\144\160\001\000\221\001\000\173\144\160\001\000\222\001\000\174\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\000H\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W\144\160\001\000\240\001\000\140\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\204\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
- let cp737_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\201\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\134\001\003\136\001\003\137\001\003\138\001\003\140\001\003\142\001\003\143\001\000\177\001\"e\001\"d\001\003\170\001\003\171\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp737_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007'\000\000\000\000\000\000\006\216\000\000\006\216\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@\144\160\001\003\134\001\000\234@\145\160\160\001%\136\001\000\219\160\160\001\003\136\001\000\235@\144\160\001\003\137\001\000\236\144\160\001\003\138\001\000\237@\145\160\160\001%\140\001\000\221\160\160\001\003\140\001\000\238@@\144\160\001\003\142\001\000\239\144\160\001\003\143\001\000\240\144\160\001%\144\001\000\222\145\160\160\001\003\145\001\000\128\160\160\001%\145\001\000\176@\145\160\160\001\003\146\001\000\129\160\160\001%\146\001\000\177@\145\160\160\001\003\147\001\000\130\160\160\001%\147\001\000\178@\144\160\001\003\148\001\000\131\144\160\001\003\149\001\000\132\144\160\001\003\150\001\000\133\144\160\001\003\151\001\000\134\144\160\001\003\152\001\000\135\144\160\001\003\153\001\000\136\144\160\001\003\154\001\000\137\144\160\001\003\155\001\000\138\144\160\001\003\156\001\000\139\144\160\001\003\157\001\000\140\144\160\001\003\158\001\000\141\144\160\001\003\159\001\000\142\145\160\160\001\003\160\001\000\143\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\003\161\001\000\144@\144\160\001\003\163\001\000\145\144\160\001\003\164\001\000\146\144\160\001\003\165\001\000\147\144\160\001\003\166\001\000\148\144\160\001\003\167\001\000\149\144\160\001\003\168\001\000\150\144\160\001\003\169\001\000\151\144\160\001\003\170\001\000\244\144\160\001\003\171\001\000\245\144\160\001\003\172\001\000\225\144\160\001\003\173\001\000\226\144\160\001\003\174\001\000\227\144\160\001\003\175\001\000\229\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\152\160\160\001\000\177\001\000\241@\145\160\160\001\003\178\001\000\153\160\160\001\000\178\001\000\253@\144\160\001\003\179\001\000\154\144\160\001\003\180\001\000\155\144\160\001\003\181\001\000\156\144\160\001\003\182\001\000\157\145\160\160\001\003\183\001\000\158\160\160\001\000\183\001\000\250@\144\160\001\003\184\001\000\159\144\160\001\003\185\001\000\160\144\160\001\003\186\001\000\161\144\160\001\003\187\001\000\162\144\160\001\003\188\001\000\163\144\160\001\003\189\001\000\164\144\160\001\003\190\001\000\165\144\160\001\003\191\001\000\166\144\160\001\003\192\001\000\167\144\160\001\003\193\001\000\168\144\160\001\003\194\001\000\170\144\160\001\003\195\001\000\169\144\160\001\003\196\001\000\171\144\160\001\003\197\001\000\172\144\160\001\003\198\001\000\173\144\160\001\003\199\001\000\174\144\160\001\003\200\001\000\175\144\160\001\003\201\001\000\224\144\160\001\003\202\001\000\228\144\160\001\003\203\001\000\232\144\160\001\003\204\001\000\230\144\160\001\003\205\001\000\231\144\160\001\003\206\001\000\233@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\246@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp775_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\001\006\001\000\252\001\000\233\001\001\001\001\000\228\001\001#\001\000\229\001\001\007\001\001B\001\001\019\001\001V\001\001W\001\001+\001\001y\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\001M\001\000\246\001\001\"\001\000\162\001\001Z\001\001[\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\000\215\001\000\164\001\001\000\001\001*\001\000\243\001\001{\001\001|\001\001z\001 \029\001\000\166\001\000\169\001\000\174\001\000\172\001\000\189\001\000\188\001\001A\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\001\004\001\001\012\001\001\024\001\001\022\001%c\001%Q\001%W\001%]\001\001.\001\001`\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001r\001\001j\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\001}\001\001\005\001\001\013\001\001\025\001\001\023\001\001/\001\001a\001\001s\001\001k\001\001~\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\000\211\001\000\223\001\001L\001\001C\001\000\245\001\000\213\001\000\181\001\001D\001\0016\001\0017\001\001;\001\001<\001\001F\001\001\018\001\001E\001 \025\001\000\173\001\000\177\001 \028\001\000\190\001\000\182\001\000\167\001\000\247\001 \030\001\000\176\001\"\025\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp775_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007U\000\000\000\000\000\000\007\019\000\000\007\019\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\160\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\001\001\001\000\131@\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\145\160\160DD\160\160\001\001\004\001\000\181@\145\160\160EE\160\160\001\001\005\001\000\208@\145\160\160FF\160\160\001\001\006\001\000\128@\145\160\160GG\160\160\001\001\007\001\000\135@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\182\160\160\001%\012\001\000\218@\145\160\160MM\160\160\001\001\013\001\000\209@\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\237@\145\160\160SS\160\160\001\001\019\001\000\137@\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\145\160\160VV\160\160\001\001\022\001\000\184@\145\160\160WW\160\160\001\001\023\001\000\211@\145\160\160XX\160\160\001\001\024\001\000\183\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\001\025\001\000\210\160\160\001 \025\001\000\239\160\160\001\"\025\001\000\249@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001 \028\001\000\242@\145\160\160]]\160\160\001 \029\001\000\166@\145\160\160^^\160\160\001 \030\001\000\247@\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\149@\145\160\160cc\160\160\001\001#\001\000\133@\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\161@\145\160\160kk\160\160\001\001+\001\000\140@\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\145\160\160nn\160\160\001\001.\001\000\189@\145\160\160oo\160\160\001\001/\001\000\212@\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\145\160\160vv\160\160\001\0016\001\000\232@\145\160\160ww\160\160\001\0017\001\000\233@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\234@\145\160\160||\160\160\001%<\001\000\197\160\160\001\001<\001\000\235@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\173@\145\160\160\000B\000B\160\160\001\001B\001\000\136@\145\160\160\000C\000C\160\160\001\001C\001\000\227@\145\160\160\000D\000D\160\160\001\001D\001\000\231@\145\160\160\000E\000E\160\160\001\001E\001\000\238@\145\160\160\000F\000F\160\160\001\001F\001\000\236@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\226@\145\160\160\000M\000M\160\160\001\001M\001\000\147@\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\138@\145\160\160\000W\000W\160\160\001\001W\001\000\139\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\151\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\001[\001\000\152@\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\190\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001\001a\001\000\213@\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001\001j\001\000\199@\145\160\160\000k\000k\160\160\001\001k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\198@\145\160\160\000s\000s\160\160\001\001s\001\000\214@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\165@\145\160\160\000{\000{\160\160\001\001{\001\000\163@\145\160\160\000|\000|\160\160\001\001|\001\000\164@\145\160\160\000}\000}\160\160\001\001}\001\000\207@\145\160\160\000~\000~\160\160\001\001~\001\000\216@\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\150\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\159@\144\160\001\000\166\001\000\167\144\160\001\000\167\001\000\245@\144\160\001\000\169\001\000\168@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169@\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252@\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250@\144\160\001\000\185\001\000\251@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243@@@@@\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146@@\144\160\001\000\201\001\000\144@@@@@@@@@\144\160\001\000\211\001\000\224@\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\000\216\001\000\157@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225@@@@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145@@\144\160\001\000\233\001\000\130@@@@@@@@@\144\160\001\000\243\001\000\162@\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155@@@\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
- let cp850_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\000\215\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001\000\174\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\000\192\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\000\240\001\000\208\001\000\202\001\000\203\001\000\200\001\0011\001\000\205\001\000\206\001\000\207\001%\024\001%\012\001%\136\001%\132\001\000\166\001\000\204\001%\128\001\000\211\001\000\223\001\000\212\001\000\210\001\000\245\001\000\213\001\000\181\001\000\254\001\000\222\001\000\218\001\000\219\001\000\217\001\000\253\001\000\221\001\000\175\001\000\180\001\000\173\001\000\177\001 \023\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp850_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\211\000\000\000\000\000\000\006i\000\000\006i\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\145\160\160qq\160\160\001\0011\001\000\213@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243\144\160\001\000\191\001\000\168\144\160\001\000\192\001\000\183\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182\144\160\001\000\195\001\000\199\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\212\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\210\144\160\001\000\203\001\000\211\144\160\001\000\204\001\000\222\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215\144\160\001\000\207\001\000\216\144\160\001\000\208\001\000\209\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\227\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\000\216\001\000\157\144\160\001\000\217\001\000\235\144\160\001\000\218\001\000\233\144\160\001\000\219\001\000\234\144\160\001\000\220\001\000\154\144\160\001\000\221\001\000\237\144\160\001\000\222\001\000\232\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\198\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139\144\160\001\000\240\001\000\208\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\236\144\160\001\000\254\001\000\231\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
- let cp852_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\001o\001\001\007\001\000\231\001\001B\001\000\235\001\001P\001\001Q\001\000\238\001\001y\001\000\196\001\001\006\001\000\201\001\0019\001\001:\001\000\244\001\000\246\001\001=\001\001>\001\001Z\001\001[\001\000\214\001\000\220\001\001d\001\001e\001\001A\001\000\215\001\001\013\001\000\225\001\000\237\001\000\243\001\000\250\001\001\004\001\001\005\001\001}\001\001~\001\001\024\001\001\025\001\000\172\001\001z\001\001\012\001\001_\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\001\026\001\001^\001%c\001%Q\001%W\001%]\001\001{\001\001|\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001\002\001\001\003\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\001\017\001\001\016\001\001\014\001\000\203\001\001\015\001\001G\001\000\205\001\000\206\001\001\027\001%\024\001%\012\001%\136\001%\132\001\001b\001\001n\001%\128\001\000\211\001\000\223\001\000\212\001\001C\001\001D\001\001H\001\001`\001\001a\001\001T\001\000\218\001\001U\001\001p\001\000\253\001\000\221\001\001c\001\000\180\001\000\173\001\002\221\001\002\219\001\002\199\001\002\216\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\002\217\001\001q\001\001X\001\001Y\001%\160\001\000\160" 0 : int array);;
-let cp852_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007X\000\000\000\000\000\000\007\023\000\000\007\023\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179\160\160\001\001\002\001\000\198@\145\160\160CC\160\160\001\001\003\001\000\199@\145\160\160DD\160\160\001\001\004\001\000\164@\145\160\160EE\160\160\001\001\005\001\000\165@\145\160\160FF\160\160\001\001\006\001\000\143@\145\160\160GG\160\160\001\001\007\001\000\134@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\172\160\160\001%\012\001\000\218@\145\160\160MM\160\160\001\001\013\001\000\159@\145\160\160NN\160\160\001\001\014\001\000\210@\145\160\160OO\160\160\001\001\015\001\000\212@\145\160\160PP\160\160\001%\016\001\000\191\160\160\001\001\016\001\000\209@\145\160\160QQ\160\160\001\001\017\001\000\208@\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001\001\024\001\000\168\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\001\025\001\000\169@\145\160\160ZZ\160\160\001\001\026\001\000\183@\145\160\160[[\160\160\001\001\027\001\000\216@\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\145@\145\160\160zz\160\160\001\001:\001\000\146@\144\160{{\145\160\160||\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\001=\001\000\149@\145\160\160~~\160\160\001\001>\001\000\150@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\157@\145\160\160\000B\000B\160\160\001\001B\001\000\136@\145\160\160\000C\000C\160\160\001\001C\001\000\227@\145\160\160\000D\000D\160\160\001\001D\001\000\228@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\213@\145\160\160\000H\000H\160\160\001\001H\001\000\229@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\138\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\139\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\001T\001\000\232@\145\160\160\000U\000U\160\160\001\001U\001\000\234@\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\001X\001\000\252@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\253@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\151\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\001[\001\000\152@\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\184@\145\160\160\000_\000_\160\160\001\001_\001\000\173@\145\160\160\000`\000`\160\160\001%`\001\000\204\160\160\001\001`\001\000\230@\145\160\160\000a\000a\160\160\001\001a\001\000\231@\145\160\160\000b\000b\160\160\001\001b\001\000\221@\145\160\160\000c\000c\160\160\001%c\001\000\185\160\160\001\001c\001\000\238@\145\160\160\000d\000d\160\160\001\001d\001\000\155@\145\160\160\000e\000e\160\160\001\001e\001\000\156@\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\222@\145\160\160\000o\000o\160\160\001\001o\001\000\133@\145\160\160\000p\000p\160\160\001\001p\001\000\235@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\171@\145\160\160\000{\000{\160\160\001\001{\001\000\189@\145\160\160\000|\000|\160\160\001\001|\001\000\190@\145\160\160\000}\000}\160\160\001\001}\001\000\166@\145\160\160\000~\000~\160\160\001\001~\001\000\167@\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249@@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240@@\144\160\001\000\176\001\000\248@@@\144\160\001\000\180\001\000\239@@@\144\160\001\000\184\001\000\247@@\144\160\001\000\187\001\000\175@@@@@\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182@\144\160\001\000\196\001\000\142@@\145\160\160\001\000\199\001\000\128\160\160\001\002\199\001\000\243@@\144\160\001\000\201\001\000\144@\144\160\001\000\203\001\000\211@\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215@@@@\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226@\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\002\216\001\000\244\144\160\001\002\217\001\000\250\144\160\001\000\218\001\000\233\144\160\001\002\219\001\000\242\144\160\001\000\220\001\000\154\145\160\160\001\000\221\001\000\237\160\160\001\002\221\001\000\241@@\144\160\001\000\223\001\000\225@\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132@@\144\160\001\000\231\001\000\135@\144\160\001\000\233\001\000\130@\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140@@@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246@@\144\160\001\000\250\001\000\163@\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\236@@" 0 : Netmappings.from_uni_list array);;
- let cp855_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004R\001\004\002\001\004S\001\004\003\001\004Q\001\004\001\001\004T\001\004\004\001\004U\001\004\005\001\004V\001\004\006\001\004W\001\004\007\001\004X\001\004\008\001\004Y\001\004\t\001\004Z\001\004\n\001\004[\001\004\011\001\004\\\001\004\012\001\004^\001\004\014\001\004_\001\004\015\001\004N\001\004.\001\004J\001\004*\001\0040\001\004\016\001\0041\001\004\017\001\004F\001\004&\001\0044\001\004\020\001\0045\001\004\021\001\004D\001\004$\001\0043\001\004\019\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\004E\001\004%\001\0048\001\004\024\001%c\001%Q\001%W\001%]\001\0049\001\004\025\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\004:\001\004\026\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\004;\001\004\027\001\004<\001\004\028\001\004=\001\004\029\001\004>\001\004\030\001\004?\001%\024\001%\012\001%\136\001%\132\001\004\031\001\004O\001%\128\001\004/\001\004@\001\004 \001\004A\001\004!\001\004B\001\004\"\001\004C\001\004#\001\0046\001\004\022\001\0042\001\004\018\001\004L\001\004,\001!\022\001\000\173\001\004K\001\004+\001\0047\001\004\023\001\004H\001\004(\001\004M\001\004-\001\004I\001\004)\001\004G\001\004'\001\000\167\001%\160\001\000\160" 0 : int array);;
-let cp855_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\195\000\000\000\000\000\000\007\164\000\000\007\164\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\004\001\001\000\133@\145\160\160BB\160\160\001\004\002\001\000\129\160\160\001%\002\001\000\179@\145\160\160CC\160\160\001\004\003\001\000\131@\145\160\160DD\160\160\001\004\004\001\000\135@\145\160\160EE\160\160\001\004\005\001\000\137@\145\160\160FF\160\160\001\004\006\001\000\139@\145\160\160GG\160\160\001\004\007\001\000\141@\145\160\160HH\160\160\001\004\008\001\000\143@\145\160\160II\160\160\001\004\t\001\000\145@\145\160\160JJ\160\160\001\004\n\001\000\147@\145\160\160KK\160\160\001\004\011\001\000\149@\145\160\160LL\160\160\001\004\012\001\000\151\160\160\001%\012\001\000\218@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\153@\145\160\160OO\160\160\001\004\015\001\000\155@\145\160\160PP\160\160\001\004\016\001\000\161\160\160\001%\016\001\000\191@\145\160\160QQ\160\160\001\004\017\001\000\163@\145\160\160RR\160\160\001\004\018\001\000\236@\145\160\160SS\160\160\001\004\019\001\000\173@\145\160\160TT\160\160\001\004\020\001\000\167\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001\004\021\001\000\169@\145\160\160VV\160\160\001\004\022\001\000\234\160\160\001!\022\001\000\239@\145\160\160WW\160\160\001\004\023\001\000\244@\145\160\160XX\160\160\001\004\024\001\000\184\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\004\025\001\000\190@\145\160\160ZZ\160\160\001\004\026\001\000\199@\145\160\160[[\160\160\001\004\027\001\000\209@\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001\004\028\001\000\211@\145\160\160]]\160\160\001\004\029\001\000\213@\145\160\160^^\160\160\001\004\030\001\000\215@\145\160\160__\160\160\001\004\031\001\000\221@\145\160\160``\160\160\001\004 \001\000\226@\145\160\160aa\160\160\001\004!\001\000\228@\145\160\160bb\160\160\001\004\"\001\000\230@\145\160\160cc\160\160\001\004#\001\000\232@\145\160\160dd\160\160\001\004$\001\000\171\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\182@\145\160\160ff\160\160\001\004&\001\000\165@\145\160\160gg\160\160\001\004'\001\000\252@\145\160\160hh\160\160\001\004(\001\000\246@\145\160\160ii\160\160\001\004)\001\000\250@\145\160\160jj\160\160\001\004*\001\000\159@\145\160\160kk\160\160\001\004+\001\000\242@\145\160\160ll\160\160\001%,\001\000\194\160\160\001\004,\001\000\238@\145\160\160mm\160\160\001\004-\001\000\248@\145\160\160nn\160\160\001\004.\001\000\157@\145\160\160oo\160\160\001\004/\001\000\224@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\162@\145\160\160rr\160\160\001\0042\001\000\235@\145\160\160ss\160\160\001\0043\001\000\172@\145\160\160tt\160\160\001\0044\001\000\166\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\168@\145\160\160vv\160\160\001\0046\001\000\233@\145\160\160ww\160\160\001\0047\001\000\243@\145\160\160xx\160\160\001\0048\001\000\183@\145\160\160yy\160\160\001\0049\001\000\189@\145\160\160zz\160\160\001\004:\001\000\198@\145\160\160{{\160\160\001\004;\001\000\208@\145\160\160||\160\160\001%<\001\000\197\160\160\001\004<\001\000\210@\145\160\160}}\160\160\001\004=\001\000\212@\145\160\160~~\160\160\001\004>\001\000\214@\145\160\160\127\127\160\160\001\004?\001\000\216@\145\160\160\000@\000@\160\160\001\004@\001\000\225@\145\160\160\000A\000A\160\160\001\004A\001\000\227@\145\160\160\000B\000B\160\160\001\004B\001\000\229@\145\160\160\000C\000C\160\160\001\004C\001\000\231@\145\160\160\000D\000D\160\160\001\004D\001\000\170@\145\160\160\000E\000E\160\160\001\004E\001\000\181@\145\160\160\000F\000F\160\160\001\004F\001\000\164@\145\160\160\000G\000G\160\160\001\004G\001\000\251@\145\160\160\000H\000H\160\160\001\004H\001\000\245@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\158@\145\160\160\000K\000K\160\160\001\004K\001\000\241@\145\160\160\000L\000L\160\160\001\004L\001\000\237@\145\160\160\000M\000M\160\160\001\004M\001\000\247@\145\160\160\000N\000N\160\160\001\004N\001\000\156@\145\160\160\000O\000O\160\160\001\004O\001\000\222@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\004Q\001\000\132\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001\004R\001\000\128@\145\160\160\000S\000S\160\160\001\004S\001\000\130@\145\160\160\000T\000T\160\160\001\004T\001\000\134\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001\004U\001\000\136@\145\160\160\000V\000V\160\160\001\004V\001\000\138@\145\160\160\000W\000W\160\160\001\004W\001\000\140\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\004X\001\000\142@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\144@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\146\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\004[\001\000\148@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\150@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\004^\001\000\152@\145\160\160\000_\000_\160\160\001\004_\001\000\154@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\253@@@\144\160\001\000\171\001\000\174@\144\160\001\000\173\001\000\240@@@@@@@@@@@@@\144\160\001\000\187\001\000\175@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp856_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\028\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\001\000\163\000\255\001\000\215\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\174\001\000\172\001\000\189\001\000\188\000\255\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\000\255\000\255\000\255\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\000\255\000\255\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001%\024\001%\012\001%\136\001%\132\001\000\166\000\255\001%\128\000\255\000\255\000\255\000\255\000\255\000\255\001\000\181\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\175\001\000\180\001\000\173\001\000\177\001 \023\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp856_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\177\000\000\000\000\000\000\005\152\000\000\005\152\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243@@@@@@@@@@@@@@@@@\144\160\001\005\208\001\000\128\144\160\001\005\209\001\000\129\144\160\001\005\210\001\000\130\144\160\001\005\211\001\000\131\144\160\001\005\212\001\000\132\144\160\001\005\213\001\000\133\144\160\001\005\214\001\000\134\145\160\160\001\005\215\001\000\135\160\160\001\000\215\001\000\158@\144\160\001\005\216\001\000\136\144\160\001\005\217\001\000\137\144\160\001\005\218\001\000\138\144\160\001\005\219\001\000\139\144\160\001\005\220\001\000\140\144\160\001\005\221\001\000\141\144\160\001\005\222\001\000\142\144\160\001\005\223\001\000\143\144\160\001\005\224\001\000\144\144\160\001\005\225\001\000\145\144\160\001\005\226\001\000\146\144\160\001\005\227\001\000\147\144\160\001\005\228\001\000\148\144\160\001\005\229\001\000\149\144\160\001\005\230\001\000\150\144\160\001\005\231\001\000\151\144\160\001\005\232\001\000\152\144\160\001\005\233\001\000\153\144\160\001\005\234\001\000\154@@@@@@@@@@@@\144\160\001\000\247\001\000\246@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp857_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002B\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\0011\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\0010\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\001^\001\001_\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\001\030\001\001\031\001\000\191\001\000\174\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\000\192\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\000\186\001\000\170\001\000\202\001\000\203\001\000\200\000\255\001\000\205\001\000\206\001\000\207\001%\024\001%\012\001%\136\001%\132\001\000\166\001\000\204\001%\128\001\000\211\001\000\223\001\000\212\001\000\210\001\000\245\001\000\213\001\000\181\000\255\001\000\215\001\000\218\001\000\219\001\000\217\001\000\236\001\000\255\001\000\175\001\000\180\001\000\173\001\000\177\000\255\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp857_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\199\000\000\000\000\000\000\006f\000\000\006f\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\001\030\001\000\166@\145\160\160__\160\160\001\001\031\001\000\167@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\152@\145\160\160qq\160\160\001\0011\001\000\141@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\158@\145\160\160\000_\000_\160\160\001\001_\001\000\159@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184\144\160\001\000\170\001\000\209\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251\144\160\001\000\186\001\000\208\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243\144\160\001\000\191\001\000\168\144\160\001\000\192\001\000\183\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182\144\160\001\000\195\001\000\199\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\212\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\210\144\160\001\000\203\001\000\211\144\160\001\000\204\001\000\222\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215\144\160\001\000\207\001\000\216@\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\227\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\232\144\160\001\000\216\001\000\157\144\160\001\000\217\001\000\235\144\160\001\000\218\001\000\233\144\160\001\000\219\001\000\234\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\198\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\237" 0 : Netmappings.from_uni_list array);;
- let cp860_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\227\001\000\224\001\000\193\001\000\231\001\000\234\001\000\202\001\000\232\001\000\205\001\000\212\001\000\236\001\000\195\001\000\194\001\000\201\001\000\192\001\000\200\001\000\244\001\000\245\001\000\242\001\000\218\001\000\249\001\000\204\001\000\213\001\000\220\001\000\162\001\000\163\001\000\217\001 \167\001\000\211\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001\000\210\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp860_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007-\000\000\000\000\000\000\006\224\000\000\006\224\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\145\160\160\001\000\192\001\000\145\160\160\001\003\192\001\000\227@\144\160\001\000\193\001\000\134\144\160\001\000\194\001\000\143\145\160\160\001\000\195\001\000\142\160\160\001\003\195\001\000\229@\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\146\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\137@\144\160\001\000\204\001\000\152\144\160\001\000\205\001\000\139@@@\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\169\144\160\001\000\211\001\000\159\144\160\001\000\212\001\000\140\144\160\001\000\213\001\000\153@@@\144\160\001\000\217\001\000\157\144\160\001\000\218\001\000\150@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\132@@@\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136@\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161@@@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\148@\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163@\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
- let cp861_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\208\001\000\240\001\000\222\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\254\001\000\251\001\000\221\001\000\253\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\193\001\000\205\001\000\211\001\000\218\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp861_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227\144\160\001\000\193\001\000\164@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@\144\160\001\000\205\001\000\165@@\144\160\001\000\208\001\000\139@@\144\160\001\000\211\001\000\166@@\144\160\001\000\214\001\000\153@\144\160\001\000\216\001\000\157@\144\160\001\000\218\001\000\167@\144\160\001\000\220\001\000\154\144\160\001\000\221\001\000\151\144\160\001\000\222\001\000\141\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161@@\144\160\001\000\240\001\000\140@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155@\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\152\144\160\001\000\254\001\000\149@" 0 : Netmappings.from_uni_list array);;
- let cp862_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\001\000\162\001\000\163\001\000\165\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp862_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0074\000\000\000\000\000\000\006\233\000\000\006\233\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@\144\160\001\000\165\001\000\157\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237@@@@@@@@@\144\160\001\005\208\001\000\128\145\160\160\001\005\209\001\000\129\160\160\001\000\209\001\000\165@\144\160\001\005\210\001\000\130\144\160\001\005\211\001\000\131\144\160\001\005\212\001\000\132\144\160\001\005\213\001\000\133\144\160\001\005\214\001\000\134\144\160\001\005\215\001\000\135\144\160\001\005\216\001\000\136\144\160\001\005\217\001\000\137\144\160\001\005\218\001\000\138\144\160\001\005\219\001\000\139\144\160\001\005\220\001\000\140\144\160\001\005\221\001\000\141\144\160\001\005\222\001\000\142\145\160\160\001\005\223\001\000\143\160\160\001\000\223\001\000\225@\144\160\001\005\224\001\000\144\145\160\160\001\005\225\001\000\145\160\160\001\000\225\001\000\160@\144\160\001\005\226\001\000\146\144\160\001\005\227\001\000\147\144\160\001\005\228\001\000\148\144\160\001\005\229\001\000\149\144\160\001\005\230\001\000\150\144\160\001\005\231\001\000\151\144\160\001\005\232\001\000\152\144\160\001\005\233\001\000\153\144\160\001\005\234\001\000\154@@\144\160\001\000\237\001\000\161@@@\144\160\001\000\241\001\000\164@\144\160\001\000\243\001\000\162@@@\144\160\001\000\247\001\000\246@@\144\160\001\000\250\001\000\163@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp863_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\194\001\000\224\001\000\182\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001 \023\001\000\192\001\000\167\001\000\201\001\000\200\001\000\202\001\000\244\001\000\203\001\000\207\001\000\251\001\000\249\001\000\164\001\000\212\001\000\220\001\000\162\001\000\163\001\000\217\001\000\219\001\001\146\001\000\166\001\000\180\001\000\243\001\000\250\001\000\168\001\000\184\001\000\179\001\000\175\001\000\206\001#\016\001\000\172\001\000\189\001\000\188\001\000\190\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp863_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0077\000\000\000\000\000\000\006\237\000\000\006\237\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\141@\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@\144\160\001\000\164\001\000\152@\145\160\160\001\000\166\001\000\160\160\160\001\003\166\001\000\232@\144\160\001\000\167\001\000\143\144\160\001\000\168\001\000\164\144\160\001\003\169\001\000\234@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@\144\160\001\000\175\001\000\167\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\166\145\160\160\001\000\180\001\000\161\160\160\001\003\180\001\000\235@\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@\144\160\001\000\182\001\000\134\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\165@@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\173@\145\160\160\001\000\192\001\000\142\160\160\001\003\192\001\000\227@@\144\160\001\000\194\001\000\132\144\160\001\003\195\001\000\229\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\145\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\146\144\160\001\000\203\001\000\148@@\144\160\001\000\206\001\000\168\144\160\001\000\207\001\000\149@@@@\144\160\001\000\212\001\000\153@@@@\144\160\001\000\217\001\000\157@\144\160\001\000\219\001\000\158\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133@\144\160\001\000\226\001\000\131@@@@\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137@@\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@@\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
- let cp864_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\209\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcd\001\006jfghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\176\001\000\183\001\"\025\001\"\026\001%\146\001%\000\001%\002\001%<\001%$\001%,\001%\028\001%4\001%\016\001%\012\001%\020\001%\024\001\003\178\001\"\030\001\003\198\001\000\177\001\000\189\001\000\188\001\"H\001\000\171\001\000\187\002\000\000\254\247\002\000\000\254\248\000\255\000\255\002\000\000\254\251\002\000\000\254\252\000\255\001\000\160\001\000\173\002\000\000\254\130\001\000\163\001\000\164\002\000\000\254\132\000\255\000\255\002\000\000\254\142\002\000\000\254\143\002\000\000\254\149\002\000\000\254\153\001\006\012\002\000\000\254\157\002\000\000\254\161\002\000\000\254\165\001\006`\001\006a\001\006b\001\006c\001\006d\001\006e\001\006f\001\006g\001\006h\001\006i\002\000\000\254\209\001\006\027\002\000\000\254\177\002\000\000\254\181\002\000\000\254\185\001\006\031\001\000\162\002\000\000\254\128\002\000\000\254\129\002\000\000\254\131\002\000\000\254\133\002\000\000\254\202\002\000\000\254\139\002\000\000\254\141\002\000\000\254\145\002\000\000\254\147\002\000\000\254\151\002\000\000\254\155\002\000\000\254\159\002\000\000\254\163\002\000\000\254\167\002\000\000\254\169\002\000\000\254\171\002\000\000\254\173\002\000\000\254\175\002\000\000\254\179\002\000\000\254\183\002\000\000\254\187\002\000\000\254\191\002\000\000\254\193\002\000\000\254\197\002\000\000\254\203\002\000\000\254\207\001\000\166\001\000\172\001\000\247\001\000\215\002\000\000\254\201\001\006@\002\000\000\254\211\002\000\000\254\215\002\000\000\254\219\002\000\000\254\223\002\000\000\254\227\002\000\000\254\231\002\000\000\254\235\002\000\000\254\237\002\000\000\254\239\002\000\000\254\243\002\000\000\254\189\002\000\000\254\204\002\000\000\254\206\002\000\000\254\205\002\000\000\254\225\002\000\000\254}\001\006Q\002\000\000\254\229\002\000\000\254\233\002\000\000\254\236\002\000\000\254\240\002\000\000\254\242\002\000\000\254\208\002\000\000\254\213\002\000\000\254\245\002\000\000\254\246\002\000\000\254\221\002\000\000\254\217\002\000\000\254\241\001%\160\000\255" 0 : int array);;
-let cp864_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007i\000\000\000\000\000\000\006\136\000\000\006\136\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\133@\144\160AA\145\160\160BB\160\160\001%\002\001\000\134@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\141\160\160\001\006\012\001\000\172@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\140@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\142@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\143@\145\160\160YY\160\160\001\"\025\001\000\130@\145\160\160ZZ\160\160\001\"\026\001\000\131@\145\160\160[[\160\160\001\006\027\001\000\187@\145\160\160\\\\\160\160\001%\028\001\000\138@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\145@\145\160\160__\160\160\001\006\031\001\000\191@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\136@@\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\137@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\139@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\135@\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\150@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\006`\001\000\176@\145\160\160\000a\000a\160\160\001\006a\001\000\177@\145\160\160\000b\000b\160\160\001\006b\001\000\178@\145\160\160\000c\000c\160\160\001\006c\001\000\179@\145\160\160\000d\000d\160\160\001\006d\001\000\180@\145\160\160\000e\000e\160\160\001\006e\001\000\181@\145\160\160\000f\000f\160\160\001\006f\001\000\182@\145\160\160\000g\000g\160\160\001\006g\001\000\183@\145\160\160\000h\000h\160\160\001\006h\001\000\184@\145\160\160\000i\000i\160\160\001\006i\001\000\185@\145\160\160\001\006je\160\160\000j\000j@\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\240@\144\160\000~\000~\144\160\000\127\000\127\144\160\002\000\000\254\128\001\000\193\144\160\002\000\000\254\129\001\000\194\144\160\002\000\000\254\130\001\000\162\144\160\002\000\000\254\131\001\000\195\144\160\002\000\000\254\132\001\000\165\144\160\002\000\000\254\133\001\000\196@@@@@\144\160\002\000\000\254\139\001\000\198@\144\160\002\000\000\254\141\001\000\199\144\160\002\000\000\254\142\001\000\168\144\160\002\000\000\254\143\001\000\169@\144\160\002\000\000\254\145\001\000\200\144\160\001%\146\001\000\132\144\160\002\000\000\254\147\001\000\201@\144\160\002\000\000\254\149\001\000\170@\144\160\002\000\000\254\151\001\000\202@\144\160\002\000\000\254\153\001\000\171@\144\160\002\000\000\254\155\001\000\203@\144\160\002\000\000\254\157\001\000\173@\144\160\002\000\000\254\159\001\000\204\145\160\160\001\000\160\001\000\160\160\160\001%\160\001\000\254@\144\160\002\000\000\254\161\001\000\174\144\160\001\000\162\001\000\192\145\160\160\001\000\163\001\000\163\160\160\002\000\000\254\163\001\000\205@\144\160\001\000\164\001\000\164\144\160\002\000\000\254\165\001\000\175\144\160\001\000\166\001\000\219\144\160\002\000\000\254\167\001\000\206@\144\160\002\000\000\254\169\001\000\207@\145\160\160\001\000\171\001\000\151\160\160\002\000\000\254\171\001\000\208@\144\160\001\000\172\001\000\220\145\160\160\001\000\173\001\000\161\160\160\002\000\000\254\173\001\000\209@@\144\160\002\000\000\254\175\001\000\210\144\160\001\000\176\001\000\128\145\160\160\001\000\177\001\000\147\160\160\002\000\000\254\177\001\000\188@\144\160\001\003\178\001\000\144\144\160\002\000\000\254\179\001\000\211@\144\160\002\000\000\254\181\001\000\189@\145\160\160\001\000\183\001\000\129\160\160\002\000\000\254\183\001\000\212@@\144\160\002\000\000\254\185\001\000\190@\145\160\160\001\000\187\001\000\152\160\160\002\000\000\254\187\001\000\213@\144\160\001\000\188\001\000\149\145\160\160\001\000\189\001\000\148\160\160\002\000\000\254\189\001\000\235@@\144\160\002\000\000\254\191\001\000\214@\144\160\002\000\000\254\193\001\000\215@@@\144\160\002\000\000\254\197\001\000\216\144\160\001\003\198\001\000\146@@\144\160\002\000\000\254\201\001\000\223\144\160\002\000\000\254\202\001\000\197\144\160\002\000\000\254\203\001\000\217\144\160\002\000\000\254\204\001\000\236\144\160\002\000\000\254\205\001\000\238\144\160\002\000\000\254\206\001\000\237\144\160\002\000\000\254\207\001\000\218\144\160\002\000\000\254\208\001\000\247\144\160\002\000\000\254\209\001\000\186@\144\160\002\000\000\254\211\001\000\225@\144\160\002\000\000\254\213\001\000\248@\145\160\160\001\000\215\001\000\222\160\160\002\000\000\254\215\001\000\226@@\144\160\002\000\000\254\217\001\000\252@\144\160\002\000\000\254\219\001\000\227@\144\160\002\000\000\254\221\001\000\251@\144\160\002\000\000\254\223\001\000\228@\144\160\002\000\000\254\225\001\000\239@\144\160\002\000\000\254\227\001\000\229@\144\160\002\000\000\254\229\001\000\242@\144\160\002\000\000\254\231\001\000\230@\144\160\002\000\000\254\233\001\000\243@\144\160\002\000\000\254\235\001\000\231\144\160\002\000\000\254\236\001\000\244\144\160\002\000\000\254\237\001\000\232@\144\160\002\000\000\254\239\001\000\233\144\160\002\000\000\254\240\001\000\245\144\160\002\000\000\254\241\001\000\253\144\160\002\000\000\254\242\001\000\246\144\160\002\000\000\254\243\001\000\234@\144\160\002\000\000\254\245\001\000\249\144\160\002\000\000\254\246\001\000\250\145\160\160\002\000\000\254\247\001\000\153\160\160\001\000\247\001\000\221@\144\160\002\000\000\254\248\001\000\154@@\144\160\002\000\000\254\251\001\000\157\144\160\002\000\000\254\252\001\000\158@@@" 0 : Netmappings.from_uni_list array);;
- let cp865_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\164\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
-let cp865_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@\144\160\001\000\164\001\000\175@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167@\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@@@@@\144\160\001\000\209\001\000\165@@@@\144\160\001\000\214\001\000\153@\144\160\001\000\216\001\000\157@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
- let cp866_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001\004\001\001\004Q\001\004\004\001\004T\001\004\007\001\004W\001\004\014\001\004^\001\000\176\001\"\025\001\000\183\001\"\026\001!\022\001\000\164\001%\160\001\000\160" 0 : int array);;
-let cp866_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\195\000\000\000\000\000\000\007\164\000\000\007\164\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\004\001\001\000\240@\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\145\160\160DD\160\160\001\004\004\001\000\242@\144\160EE\144\160FF\145\160\160GG\160\160\001\004\007\001\000\244@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\246@\144\160OO\145\160\160PP\160\160\001\004\016\001\000\128\160\160\001%\016\001\000\191@\145\160\160QQ\160\160\001\004\017\001\000\129@\145\160\160RR\160\160\001\004\018\001\000\130@\145\160\160SS\160\160\001\004\019\001\000\131@\145\160\160TT\160\160\001\004\020\001\000\132\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001\004\021\001\000\133@\145\160\160VV\160\160\001\004\022\001\000\134\160\160\001!\022\001\000\252@\145\160\160WW\160\160\001\004\023\001\000\135@\145\160\160XX\160\160\001\004\024\001\000\136\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\004\025\001\000\137\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\004\026\001\000\138\160\160\001\"\026\001\000\251@\145\160\160[[\160\160\001\004\027\001\000\139@\145\160\160\\\\\160\160\001\004\028\001\000\140\160\160\001%\028\001\000\195@\145\160\160]]\160\160\001\004\029\001\000\141@\145\160\160^^\160\160\001\004\030\001\000\142@\145\160\160__\160\160\001\004\031\001\000\143@\145\160\160``\160\160\001\004 \001\000\144@\145\160\160aa\160\160\001\004!\001\000\145@\145\160\160bb\160\160\001\004\"\001\000\146@\145\160\160cc\160\160\001\004#\001\000\147@\145\160\160dd\160\160\001\004$\001\000\148\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\149@\145\160\160ff\160\160\001\004&\001\000\150@\145\160\160gg\160\160\001\004'\001\000\151@\145\160\160hh\160\160\001\004(\001\000\152@\145\160\160ii\160\160\001\004)\001\000\153@\145\160\160jj\160\160\001\004*\001\000\154@\145\160\160kk\160\160\001\004+\001\000\155@\145\160\160ll\160\160\001\004,\001\000\156\160\160\001%,\001\000\194@\145\160\160mm\160\160\001\004-\001\000\157@\145\160\160nn\160\160\001\004.\001\000\158@\145\160\160oo\160\160\001\004/\001\000\159@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\161@\145\160\160rr\160\160\001\0042\001\000\162@\145\160\160ss\160\160\001\0043\001\000\163@\145\160\160tt\160\160\001\0044\001\000\164\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\165@\145\160\160vv\160\160\001\0046\001\000\166@\145\160\160ww\160\160\001\0047\001\000\167@\145\160\160xx\160\160\001\0048\001\000\168@\145\160\160yy\160\160\001\0049\001\000\169@\145\160\160zz\160\160\001\004:\001\000\170@\145\160\160{{\160\160\001\004;\001\000\171@\145\160\160||\160\160\001\004<\001\000\172\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\004=\001\000\173@\145\160\160~~\160\160\001\004>\001\000\174@\145\160\160\127\127\160\160\001\004?\001\000\175@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\004T\001\000\243@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187\160\160\001\004W\001\000\245@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198\160\160\001\004^\001\000\247@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209@\145\160\160\000e\000e\160\160\001%e\001\000\210@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\253@@@@@@@@@@@\144\160\001\000\176\001\000\248@@@@@@\144\160\001\000\183\001\000\250@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp869_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\000\255\000\255\000\255\000\255\000\255\000\255\001\003\134\000\255\001\000\183\001\000\172\001\000\166\001 \024\001 \025\001\003\136\001 \021\001\003\137\001\003\138\001\003\170\001\003\140\000\255\000\255\001\003\142\001\003\171\001\000\169\001\003\143\001\000\178\001\000\179\001\003\172\001\000\163\001\003\173\001\003\174\001\003\175\001\003\202\001\003\144\001\003\204\001\003\205\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\000\189\001\003\152\001\003\153\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\003\154\001\003\155\001\003\156\001\003\157\001%c\001%Q\001%W\001%]\001\003\158\001\003\159\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\003\160\001\003\161\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\177\001\003\178\001\003\179\001%\024\001%\012\001%\136\001%\132\001\003\180\001\003\181\001%\128\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\003\194\001\003\196\001\003\132\001\000\173\001\000\177\001\003\197\001\003\198\001\003\199\001\000\167\001\003\200\001\003\133\001\000\176\001\000\168\001\003\201\001\003\203\001\003\176\001\003\206\001%\160\001\000\160" 0 : int array);;
-let cp869_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\207\000\000\000\000\000\000\006\138\000\000\006\138\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001 \021\001\000\142@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\139\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001 \025\001\000\140@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\145\160\160\001%\132\001\000\220\160\160\001\003\132\001\000\239@\144\160\001\003\133\001\000\247\144\160\001\003\134\001\000\134@\145\160\160\001\003\136\001\000\141\160\160\001%\136\001\000\219@\144\160\001\003\137\001\000\143\144\160\001\003\138\001\000\144@\144\160\001\003\140\001\000\146@\144\160\001\003\142\001\000\149\144\160\001\003\143\001\000\152\144\160\001\003\144\001\000\161\145\160\160\001\003\145\001\000\164\160\160\001%\145\001\000\176@\145\160\160\001\003\146\001\000\165\160\160\001%\146\001\000\177@\145\160\160\001\003\147\001\000\166\160\160\001%\147\001\000\178@\144\160\001\003\148\001\000\167\144\160\001\003\149\001\000\168\144\160\001\003\150\001\000\169\144\160\001\003\151\001\000\170\144\160\001\003\152\001\000\172\144\160\001\003\153\001\000\173\144\160\001\003\154\001\000\181\144\160\001\003\155\001\000\182\144\160\001\003\156\001\000\183\144\160\001\003\157\001\000\184\144\160\001\003\158\001\000\189\144\160\001\003\159\001\000\190\145\160\160\001\003\160\001\000\198\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\003\161\001\000\199@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\207@\144\160\001\003\164\001\000\208\144\160\001\003\165\001\000\209\145\160\160\001\000\166\001\000\138\160\160\001\003\166\001\000\210@\145\160\160\001\003\167\001\000\211\160\160\001\000\167\001\000\245@\145\160\160\001\003\168\001\000\212\160\160\001\000\168\001\000\249@\145\160\160\001\000\169\001\000\151\160\160\001\003\169\001\000\213@\144\160\001\003\170\001\000\145\145\160\160\001\003\171\001\000\150\160\160\001\000\171\001\000\174@\145\160\160\001\000\172\001\000\137\160\160\001\003\172\001\000\155@\145\160\160\001\003\173\001\000\157\160\160\001\000\173\001\000\240@\144\160\001\003\174\001\000\158\144\160\001\003\175\001\000\159\145\160\160\001\000\176\001\000\248\160\160\001\003\176\001\000\252@\145\160\160\001\003\177\001\000\214\160\160\001\000\177\001\000\241@\145\160\160\001\000\178\001\000\153\160\160\001\003\178\001\000\215@\145\160\160\001\000\179\001\000\154\160\160\001\003\179\001\000\216@\144\160\001\003\180\001\000\221\144\160\001\003\181\001\000\222\144\160\001\003\182\001\000\224\145\160\160\001\000\183\001\000\136\160\160\001\003\183\001\000\225@\144\160\001\003\184\001\000\226\144\160\001\003\185\001\000\227\144\160\001\003\186\001\000\228\145\160\160\001\000\187\001\000\175\160\160\001\003\187\001\000\229@\144\160\001\003\188\001\000\230\145\160\160\001\000\189\001\000\171\160\160\001\003\189\001\000\231@\144\160\001\003\190\001\000\232\144\160\001\003\191\001\000\233\144\160\001\003\192\001\000\234\144\160\001\003\193\001\000\235\144\160\001\003\194\001\000\237\144\160\001\003\195\001\000\236\144\160\001\003\196\001\000\238\144\160\001\003\197\001\000\242\144\160\001\003\198\001\000\243\144\160\001\003\199\001\000\244\144\160\001\003\200\001\000\246\144\160\001\003\201\001\000\250\144\160\001\003\202\001\000\160\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\162\144\160\001\003\205\001\000\163\144\160\001\003\206\001\000\253@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp874_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002&\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\000\255\000\255\000\255\001 &\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\160\001\014\001\001\014\002\001\014\003\001\014\004\001\014\005\001\014\006\001\014\007\001\014\008\001\014\t\001\014\n\001\014\011\001\014\012\001\014\013\001\014\014\001\014\015\001\014\016\001\014\017\001\014\018\001\014\019\001\014\020\001\014\021\001\014\022\001\014\023\001\014\024\001\014\025\001\014\026\001\014\027\001\014\028\001\014\029\001\014\030\001\014\031\001\014 \001\014!\001\014\"\001\014#\001\014$\001\014%\001\014&\001\014'\001\014(\001\014)\001\014*\001\014+\001\014,\001\014-\001\014.\001\014/\001\0140\001\0141\001\0142\001\0143\001\0144\001\0145\001\0146\001\0147\001\0148\001\0149\001\014:\000\255\000\255\000\255\000\255\001\014?\001\014@\001\014A\001\014B\001\014C\001\014D\001\014E\001\014F\001\014G\001\014H\001\014I\001\014J\001\014K\001\014L\001\014M\001\014N\001\014O\001\014P\001\014Q\001\014R\001\014S\001\014T\001\014U\001\014V\001\014W\001\014X\001\014Y\001\014Z\001\014[\000\255\000\255\000\255\000\255" 0 : int array);;
-let cp874_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\185\000\000\000\000\000\000\006\202\000\000\006\202\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\014\001\001\000\161@\145\160\160BB\160\160\001\014\002\001\000\162@\145\160\160CC\160\160\001\014\003\001\000\163@\145\160\160DD\160\160\001\014\004\001\000\164@\145\160\160EE\160\160\001\014\005\001\000\165@\145\160\160FF\160\160\001\014\006\001\000\166@\145\160\160GG\160\160\001\014\007\001\000\167@\145\160\160HH\160\160\001\014\008\001\000\168@\145\160\160II\160\160\001\014\t\001\000\169@\145\160\160JJ\160\160\001\014\n\001\000\170@\145\160\160KK\160\160\001\014\011\001\000\171@\145\160\160LL\160\160\001\014\012\001\000\172@\145\160\160MM\160\160\001\014\013\001\000\173@\145\160\160NN\160\160\001\014\014\001\000\174@\145\160\160OO\160\160\001\014\015\001\000\175@\145\160\160PP\160\160\001\014\016\001\000\176@\145\160\160QQ\160\160\001\014\017\001\000\177@\145\160\160RR\160\160\001\014\018\001\000\178@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\014\019\001\000\179@\145\160\160TT\160\160\001 \020\001\000\151\160\160\001\014\020\001\000\180@\145\160\160UU\160\160\001\014\021\001\000\181@\145\160\160VV\160\160\001\014\022\001\000\182@\145\160\160WW\160\160\001\014\023\001\000\183@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\014\024\001\000\184@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\014\025\001\000\185@\145\160\160ZZ\160\160\001\014\026\001\000\186@\145\160\160[[\160\160\001\014\027\001\000\187@\145\160\160\\\\\160\160\001 \028\001\000\147\160\160\001\014\028\001\000\188@\145\160\160]]\160\160\001 \029\001\000\148\160\160\001\014\029\001\000\189@\145\160\160^^\160\160\001\014\030\001\000\190@\145\160\160__\160\160\001\014\031\001\000\191@\145\160\160``\160\160\001\014 \001\000\192@\145\160\160aa\160\160\001\014!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001\014\"\001\000\194@\145\160\160cc\160\160\001\014#\001\000\195@\145\160\160dd\160\160\001\014$\001\000\196@\145\160\160ee\160\160\001\014%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\014&\001\000\198@\145\160\160gg\160\160\001\014'\001\000\199@\145\160\160hh\160\160\001\014(\001\000\200@\145\160\160ii\160\160\001\014)\001\000\201@\145\160\160jj\160\160\001\014*\001\000\202@\145\160\160kk\160\160\001\014+\001\000\203@\145\160\160ll\160\160\001\014,\001\000\204@\145\160\160mm\160\160\001\014-\001\000\205@\145\160\160nn\160\160\001\014.\001\000\206@\145\160\160oo\160\160\001\014/\001\000\207@\145\160\160pp\160\160\001\0140\001\000\208@\145\160\160qq\160\160\001\0141\001\000\209@\145\160\160rr\160\160\001\0142\001\000\210@\145\160\160ss\160\160\001\0143\001\000\211@\145\160\160tt\160\160\001\0144\001\000\212@\145\160\160uu\160\160\001\0145\001\000\213@\145\160\160vv\160\160\001\0146\001\000\214@\145\160\160ww\160\160\001\0147\001\000\215@\145\160\160xx\160\160\001\0148\001\000\216@\145\160\160yy\160\160\001\0149\001\000\217@\145\160\160zz\160\160\001\014:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~\145\160\160\127\127\160\160\001\014?\001\000\223@\145\160\160\000@\000@\160\160\001\014@\001\000\224@\145\160\160\000A\000A\160\160\001\014A\001\000\225@\145\160\160\000B\000B\160\160\001\014B\001\000\226@\145\160\160\000C\000C\160\160\001\014C\001\000\227@\145\160\160\000D\000D\160\160\001\014D\001\000\228@\145\160\160\000E\000E\160\160\001\014E\001\000\229@\145\160\160\000F\000F\160\160\001\014F\001\000\230@\145\160\160\000G\000G\160\160\001\014G\001\000\231@\145\160\160\000H\000H\160\160\001\014H\001\000\232@\145\160\160\000I\000I\160\160\001\014I\001\000\233@\145\160\160\000J\000J\160\160\001\014J\001\000\234@\145\160\160\000K\000K\160\160\001\014K\001\000\235@\145\160\160\000L\000L\160\160\001\014L\001\000\236@\145\160\160\000M\000M\160\160\001\014M\001\000\237@\145\160\160\000N\000N\160\160\001\014N\001\000\238@\145\160\160\000O\000O\160\160\001\014O\001\000\239@\145\160\160\000P\000P\160\160\001\014P\001\000\240@\145\160\160\000Q\000Q\160\160\001\014Q\001\000\241@\145\160\160\000R\000R\160\160\001\014R\001\000\242@\145\160\160\000S\000S\160\160\001\014S\001\000\243@\145\160\160\000T\000T\160\160\001\014T\001\000\244@\145\160\160\000U\000U\160\160\001\014U\001\000\245@\145\160\160\000V\000V\160\160\001\014V\001\000\246@\145\160\160\000W\000W\160\160\001\014W\001\000\247@\145\160\160\000X\000X\160\160\001\014X\001\000\248@\145\160\160\000Y\000Y\160\160\001\014Y\001\000\249@\145\160\160\000Z\000Z\160\160\001\014Z\001\000\250@\145\160\160\000[\000[\160\160\001\014[\001\000\251@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@@@@@@@@@\144\160\001 \172\001\000\128@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let cp875_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158\000\255`\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\000[n|hkaf\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\000]dji{\000^mo\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\000|le\000_~\127\001\000\168\001\003\134\001\003\136\001\003\137\001\000\160\001\003\138\001\003\140\001\003\142\001\003\143\000`zc\000@g}b\001\003\133\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\000\180\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\000\163\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\003\201\001\003\144\001\003\176\001 \024\001 \021\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\177\001\000\189\000\255\001\003\135\001 \025\001\000\166\000\\\000\255\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\167\000\255\000\255\001\000\171\001\000\172pqrstuvwxy\001\000\179\001\000\169\000\255\000\255\001\000\187\001\000\159" 0 : int array);;
-let cp875_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\220\000\000\000\000\000\000\006\142\000\000\006\142\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\145\160\160U}\160\160\001 \021\001\000\207@\144\160Vr\144\160Wf\145\160\160XX\160\160\001 \024\001\000\206@\145\160\160YY\160\160\001 \025\001\000\222@@\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000j\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\145\160\160\001\000\133U\160\160\001\003\133\001\000\128@\145\160\160\001\000\134F\160\160\001\003\134\000q@\145\160\160\001\000\135W\160\160\001\003\135\001\000\221@\145\160\160\001\000\136h\160\160\001\003\136\000r@\145\160\160\001\000\137i\160\160\001\003\137\000s@\145\160\160\001\000\138j\160\160\001\003\138\000u@\144\160\001\000\139k\145\160\160\001\000\140l\160\160\001\003\140\000v@\144\160\001\000\141I\145\160\160\001\000\142J\160\160\001\003\142\000w@\145\160\160\001\000\143[\160\160\001\003\143\000x@\145\160\160\001\000\144p\160\160\001\003\144\001\000\204@\145\160\160\001\000\145q\160\160\001\003\145\000A@\145\160\160\001\000\146Z\160\160\001\003\146\000B@\145\160\160\001\000\147s\160\160\001\003\147\000C@\145\160\160\001\000\148t\160\160\001\003\148\000D@\145\160\160\001\000\149u\160\160\001\003\149\000E@\145\160\160\001\000\150v\160\160\001\003\150\000F@\145\160\160\001\000\151H\160\160\001\003\151\000G@\145\160\160\001\000\152x\160\160\001\003\152\000H@\145\160\160\001\000\153y\160\160\001\003\153\000I@\145\160\160\001\000\154z\160\160\001\003\154\000Q@\145\160\160\001\000\155{\160\160\001\003\155\000R@\145\160\160\001\000\156D\160\160\001\003\156\000S@\145\160\160\001\000\157T\160\160\001\003\157\000T@\145\160\160\001\000\158~\160\160\001\003\158\000U@\145\160\160\001\003\159\000V\160\160\001\000\159\001\000\255@\145\160\160\001\003\160\000W\160\160\001\000\160\000t@\144\160\001\003\161\000X@\145\160\160\001\003\163\000Y\160\160\001\000\163\001\000\176@\144\160\001\003\164\000b\144\160\001\003\165\000c\145\160\160\001\003\166\000d\160\160\001\000\166\001\000\223@\145\160\160\001\003\167\000e\160\160\001\000\167\001\000\235@\145\160\160\001\003\168\000f\160\160\001\000\168\000p@\145\160\160\001\003\169\000g\160\160\001\000\169\001\000\251@\144\160\001\003\170\000h\145\160\160\001\003\171\000i\160\160\001\000\171\001\000\238@\145\160\160\001\003\172\001\000\177\160\160\001\000\172\001\000\239@\145\160\160\001\003\173\001\000\178\160\160\001\000\173\001\000\202@\144\160\001\003\174\001\000\179\144\160\001\003\175\001\000\181\145\160\160\001\000\176\001\000\144\160\160\001\003\176\001\000\205@\145\160\160\001\003\177\001\000\138\160\160\001\000\177\001\000\218@\145\160\160\001\003\178\001\000\139\160\160\001\000\178\001\000\234@\145\160\160\001\003\179\001\000\140\160\160\001\000\179\001\000\250@\145\160\160\001\003\180\001\000\141\160\160\001\000\180\001\000\160@\144\160\001\003\181\001\000\142\144\160\001\003\182\001\000\143\144\160\001\003\183\001\000\154\144\160\001\003\184\001\000\155\144\160\001\003\185\001\000\156\144\160\001\003\186\001\000\157\145\160\160\001\003\187\001\000\158\160\160\001\000\187\001\000\254@\144\160\001\003\188\001\000\159\145\160\160\001\003\189\001\000\170\160\160\001\000\189\001\000\219@\144\160\001\003\190\001\000\171\144\160\001\003\191\001\000\172\144\160\001\003\192\001\000\173\144\160\001\003\193\001\000\174\144\160\001\003\194\001\000\186\144\160\001\003\195\001\000\175\144\160\001\003\196\001\000\187\144\160\001\003\197\001\000\188\144\160\001\003\198\001\000\189\144\160\001\003\199\001\000\190\144\160\001\003\200\001\000\191\144\160\001\003\201\001\000\203\144\160\001\003\202\001\000\180\144\160\001\003\203\001\000\184\144\160\001\003\204\001\000\182\144\160\001\003\205\001\000\183\144\160\001\003\206\001\000\185@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let adobe_standard_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\031\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdef\001 \025hijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\001 \024\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\161\001\000\162\001\000\163\001 D\001\000\165\001\001\146\001\000\167\001\000\164g\001 \028\001\000\171\001 9\001 :\002\000\000\251\001\002\000\000\251\002\000\255\001 \019\001 \001 !\001\000\183\000\255\001\000\182\001 \"\001 \026\001 \030\001 \029\001\000\187\001 &\001 0\000\255\001\000\191\000\255\000`\001\000\180\001\002\198\001\002\220\001\000\175\001\002\216\001\002\217\001\000\168\000\255\001\002\218\001\000\184\000\255\001\002\221\001\002\219\001\002\199\001 \020\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\198\000\255\001\000\170\000\255\000\255\000\255\000\255\001\001A\001\000\216\001\001R\001\000\186\000\255\000\255\000\255\000\255\000\255\001\000\230\000\255\000\255\000\255\001\0011\000\255\000\255\001\001B\001\000\248\001\001S\001\000\223\000\255\000\255\000\255\000\255" 0 : int array);;
-let adobe_standard_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\004K\000\000\000\000\000\000\004&\000\000\004&\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\174\144\160\002\000\000\251\002\001\000\175@@@@@@@@@@@@@@@@\144\160\001 \019\001\000\177\144\160\001 \020\001\000\208@@@\144\160\001 \024\000`\144\160\001 \025g\144\160\001 \026\001\000\184@\144\160\001 \028\001\000\170\144\160\001 \029\001\000\186\144\160\001 \030\001\000\185@\145\160\160``\160\160\001 \001\000\178@\145\160\160aa\160\160\001 !\001\000\179@\145\160\160bb\160\160\001 \"\001\000\183@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160g\001\000\169\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\189@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\172@\145\160\160zz\160\160\001 :\001\000\173@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\232@\145\160\160\000B\000B\160\160\001\001B\001\000\248@\144\160\000C\000C\145\160\160\000D\000D\160\160\001 D\001\000\164@\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\234@\145\160\160\000S\000S\160\160\001\001S\001\000\250@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\001\000\193\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~@@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\166@@@@@@@@@@@@@@\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\168\144\160\001\000\165\001\000\165@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\200@\144\160\001\000\170\001\000\227\144\160\001\000\171\001\000\171@@@\144\160\001\000\175\001\000\197@@@@\144\160\001\000\180\001\000\194@\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\180\144\160\001\000\184\001\000\203@\144\160\001\000\186\001\000\235\144\160\001\000\187\001\000\187@@@\144\160\001\000\191\001\000\191@@@@@@\145\160\160\001\002\198\001\000\195\160\160\001\000\198\001\000\225@\144\160\001\002\199\001\000\207@@@@@@@@@@@@@@@@\145\160\160\001\002\216\001\000\198\160\160\001\000\216\001\000\233@\144\160\001\002\217\001\000\199\144\160\001\002\218\001\000\202\144\160\001\002\219\001\000\206\144\160\001\002\220\001\000\196\144\160\001\002\221\001\000\205@\144\160\001\000\223\001\000\251@@@@@@\144\160\001\000\230\001\000\241@@@@@@@@@@@@@@@@@\144\160\001\000\248\001\000\249@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let adobe_symbol_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\192\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`a\001\"\000c\001\"\003ef\001\"\011hi\001\"\023kl\001\"\018nopqrstuvwxyz{|}~\127\001\"E\001\003\145\001\003\146\001\003\167\001\003\148\001\003\149\001\003\166\001\003\147\001\003\151\001\003\153\001\003\209\001\003\154\001\003\155\001\003\156\001\003\157\001\003\159\001\003\160\001\003\152\001\003\161\001\003\163\001\003\164\001\003\165\001\003\194\001\003\169\001\003\158\001\003\168\001\003\150\000[\001\"4\000]\001\"\165\000_\002\000\000\248\229\001\003\177\001\003\178\001\003\199\001\003\180\001\003\181\001\003\198\001\003\179\001\003\183\001\003\185\001\003\213\001\003\186\001\003\187\001\003\188\001\003\189\001\003\191\001\003\192\001\003\184\001\003\193\001\003\195\001\003\196\001\003\197\001\003\214\001\003\201\001\003\190\001\003\200\001\003\182\000{\000|\000}\001\"<\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \172\001\003\210\001 2\001\"d\001 D\001\"\030\001\001\146\001&c\001&f\001&e\001&`\001!\148\001!\144\001!\145\001!\146\001!\147\001\000\176\001\000\177\001 3\001\"e\001\000\215\001\"\029\001\"\002\001 \"\001\000\247\001\"`\001\"a\001\"H\001 &\002\000\000\248\230\002\000\000\248\231\001!\181\001!5\001!\017\001!\028\001!\024\001\"\151\001\"\149\001\"\005\001\")\001\"*\001\"\131\001\"\135\001\"\132\001\"\130\001\"\134\001\"\008\001\"\t\001\" \001\"\007\002\000\000\246\218\002\000\000\246\217\002\000\000\246\219\001\"\015\001\"\026\001\"\197\001\000\172\001\"'\001\"(\001!\212\001!\208\001!\209\001!\210\001!\211\001%\202\001#)\002\000\000\248\232\002\000\000\248\233\002\000\000\248\234\001\"\017\002\000\000\248\235\002\000\000\248\236\002\000\000\248\237\002\000\000\248\238\002\000\000\248\239\002\000\000\248\240\002\000\000\248\241\002\000\000\248\242\002\000\000\248\243\002\000\000\248\244\000\255\001#*\001\"+\001# \002\000\000\248\245\001#!\002\000\000\248\246\002\000\000\248\247\002\000\000\248\248\002\000\000\248\249\002\000\000\248\250\002\000\000\248\251\002\000\000\248\252\002\000\000\248\253\002\000\000\248\254\000\255" 0 : int array);;
-let adobe_symbol_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\t\000\000\000\000\000\000\005)\000\000\005)\008\000\004\000\000\144\160\001\"\000b@\144\160\001\"\002\001\000\182\144\160\001\"\003d@\144\160\001\"\005\001\000\198@\144\160\001\"\007\001\000\209\144\160\001\"\008\001\000\206\144\160\001\"\t\001\000\207@\144\160\001\"\011g@@@\144\160\001\"\015\001\000\213@\145\160\160\001!\017\001\000\193\160\160\001\"\017\001\000\229@\144\160\001\"\018m@@@@\144\160\001\"\023j\144\160\001!\024\001\000\195@\144\160\001\"\026\001\000\214@\144\160\001!\028\001\000\194\144\160\001\"\029\001\000\181\144\160\001\"\030\001\000\165@\145\160\160``\160\160\001\" \001\000\208\160\160\001# \001\000\243@\145\160\160aa\160\160\001#!\001\000\245@\144\160\001 \"\001\000\183\144\160cc@\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160\001\"'\001\000\217\145\160\160hh\160\160\001\"(\001\000\218@\145\160\160ii\160\160\001\")\001\000\199\160\160\001#)\001\000\225@\145\160\160\001\"*\001\000\200\160\160\001#*\001\000\241@\145\160\160kk\160\160\001\"+\001\000\242@\144\160ll@\144\160nn\144\160oo\144\160pp\144\160qq\145\160\160rr\160\160\001 2\001\000\162@\145\160\160ss\160\160\001 3\001\000\178@\145\160\160tt\160\160\001\"4\000\\@\145\160\160uu\160\160\001!5\001\000\192@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001\"<\000~@\144\160}}\144\160~~\144\160\127\127@@@@\144\160\001 D\001\000\164\144\160\001\"E\000@@@\144\160\001\"H\001\000\187@@@@@@@@@@@@@@@@@@\144\160\000[\000[@\144\160\000]\000]@\144\160\000_\000_\145\160\160\001&`\001\000\170\160\160\001\"`\001\000\185@\144\160\001\"a\001\000\186@\144\160\001&c\001\000\167\144\160\001\"d\001\000\163\145\160\160\001&e\001\000\169\160\160\001\"e\001\000\179@\144\160\001&f\001\000\168@@@@@@@@@@@@@@@@@@@@\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}@@@@\144\160\001\"\130\001\000\204\144\160\001\"\131\001\000\201\144\160\001\"\132\001\000\203@\144\160\001\"\134\001\000\205\144\160\001\"\135\001\000\202@@@@@@@@\144\160\001!\144\001\000\172\145\160\160\001\003\145\000A\160\160\001!\145\001\000\173@\145\160\160\001\003\146\000B\160\160\001\001\146\001\000\166\160\160\001!\146\001\000\174@\145\160\160\001\003\147\000G\160\160\001!\147\001\000\175@\145\160\160\001\003\148\000D\160\160\001!\148\001\000\171@\145\160\160\001\003\149\000E\160\160\001\"\149\001\000\197@\144\160\001\003\150\000Z\145\160\160\001\003\151\000H\160\160\001\"\151\001\000\196@\144\160\001\003\152\000Q\144\160\001\003\153\000I\144\160\001\003\154\000K\144\160\001\003\155\000L\144\160\001\003\156\000M\144\160\001\003\157\000N\144\160\001\003\158\000X\144\160\001\003\159\000O\144\160\001\003\160\000P\144\160\001\003\161\000R@\144\160\001\003\163\000S\144\160\001\003\164\000T\145\160\160\001\003\165\000U\160\160\001\"\165\000^@\144\160\001\003\166\000F\144\160\001\003\167\000C\144\160\001\003\168\000Y\144\160\001\003\169\000W@@\145\160\160\001 \172\001\000\160\160\160\001\000\172\001\000\216@@@@\144\160\001\000\176\001\000\176\145\160\160\001\003\177\000a\160\160\001\000\177\001\000\177@\144\160\001\003\178\000b\144\160\001\003\179\000g\144\160\001\003\180\000d\145\160\160\001\003\181\000e\160\160\001!\181\001\000\191@\144\160\001\003\182\000z\144\160\001\003\183\000h\144\160\001\003\184\000q\144\160\001\003\185\000i\144\160\001\003\186\000k\144\160\001\003\187\000l\144\160\001\003\188\000m\144\160\001\003\189\000n\144\160\001\003\190\000x\144\160\001\003\191\000o\144\160\001\003\192\000p\144\160\001\003\193\000r\144\160\001\003\194\000V\144\160\001\003\195\000s\144\160\001\003\196\000t\145\160\160\001\003\197\000u\160\160\001\"\197\001\000\215@\144\160\001\003\198\000f\144\160\001\003\199\000c\144\160\001\003\200\000y\144\160\001\003\201\000w\144\160\001%\202\001\000\224@@@@@\144\160\001!\208\001\000\220\145\160\160\001\003\209\000J\160\160\001!\209\001\000\221@\145\160\160\001\003\210\001\000\161\160\160\001!\210\001\000\222@\144\160\001!\211\001\000\223\144\160\001!\212\001\000\219\144\160\001\003\213\000j\144\160\001\003\214\000v\144\160\001\000\215\001\000\180@\144\160\002\000\000\246\217\001\000\211\144\160\002\000\000\246\218\001\000\210\144\160\002\000\000\246\219\001\000\212@@@@@@@@@\144\160\002\000\000\248\229\000`\144\160\002\000\000\248\230\001\000\189\144\160\002\000\000\248\231\001\000\190\144\160\002\000\000\248\232\001\000\226\144\160\002\000\000\248\233\001\000\227\144\160\002\000\000\248\234\001\000\228\144\160\002\000\000\248\235\001\000\230\144\160\002\000\000\248\236\001\000\231\144\160\002\000\000\248\237\001\000\232\144\160\002\000\000\248\238\001\000\233\144\160\002\000\000\248\239\001\000\234\144\160\002\000\000\248\240\001\000\235\144\160\002\000\000\248\241\001\000\236\144\160\002\000\000\248\242\001\000\237\144\160\002\000\000\248\243\001\000\238\144\160\002\000\000\248\244\001\000\239\144\160\002\000\000\248\245\001\000\244\144\160\002\000\000\248\246\001\000\246\145\160\160\001\000\247\001\000\184\160\160\002\000\000\248\247\001\000\247@\144\160\002\000\000\248\248\001\000\248\144\160\002\000\000\248\249\001\000\249\144\160\002\000\000\248\250\001\000\250\144\160\002\000\000\248\251\001\000\251\144\160\002\000\000\248\252\001\000\252\144\160\002\000\000\248\253\001\000\253\144\160\002\000\000\248\254\001\000\254@" 0 : Netmappings.from_uni_list array);;
- let adobe_zapf_dingbats_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\233\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`\001'\001\001'\002\001'\003\001'\004\001&\014\001'\006\001'\007\001'\008\001'\t\001&\027\001&\030\001'\012\001'\013\001'\014\001'\015\001'\016\001'\017\001'\018\001'\019\001'\020\001'\021\001'\022\001'\023\001'\024\001'\025\001'\026\001'\027\001'\028\001'\029\001'\030\001'\031\001' \001'!\001'\"\001'#\001'$\001'%\001'&\001''\001&\005\001')\001'*\001'+\001',\001'-\001'.\001'/\001'0\001'1\001'2\001'3\001'4\001'5\001'6\001'7\001'8\001'9\001':\001';\001'<\001'=\001'>\001'?\001'@\001'A\001'B\001'C\001'D\001'E\001'F\001'G\001'H\001'I\001'J\001'K\001%\207\001'M\001%\160\001'O\001'P\001'Q\001'R\001%\178\001%\188\001%\198\001'V\001%\215\001'X\001'Y\001'Z\001'[\001'\\\001']\001'^\000\255\002\000\000\248\215\002\000\000\248\216\002\000\000\248\217\002\000\000\248\218\002\000\000\248\219\002\000\000\248\220\002\000\000\248\221\002\000\000\248\222\002\000\000\248\223\002\000\000\248\224\002\000\000\248\225\002\000\000\248\226\002\000\000\248\227\002\000\000\248\228\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001'a\001'b\001'c\001'd\001'e\001'f\001'g\001&c\001&f\001&e\001&`\001$`\001$a\001$b\001$c\001$d\001$e\001$f\001$g\001$h\001$i\001'v\001'w\001'x\001'y\001'z\001'{\001'|\001'}\001'~\001'\127\001'\128\001'\129\001'\130\001'\131\001'\132\001'\133\001'\134\001'\135\001'\136\001'\137\001'\138\001'\139\001'\140\001'\141\001'\142\001'\143\001'\144\001'\145\001'\146\001'\147\001'\148\001!\146\001!\148\001!\149\001'\152\001'\153\001'\154\001'\155\001'\156\001'\157\001'\158\001'\159\001'\160\001'\161\001'\162\001'\163\001'\164\001'\165\001'\166\001'\167\001'\168\001'\169\001'\170\001'\171\001'\172\001'\173\001'\174\001'\175\000\255\001'\177\001'\178\001'\179\001'\180\001'\181\001'\182\001'\183\001'\184\001'\185\001'\186\001'\187\001'\188\001'\189\001'\190\000\255" 0 : int array);;
-let adobe_zapf_dingbats_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006_\000\000\000\000\000\000\005>\000\000\005>\008\000\004\000\000@\144\160\001'\001a\144\160\001'\002b\144\160\001'\003c\144\160\001'\004d\144\160\001&\005\000H\144\160\001'\006f\144\160\001'\007g\144\160\001'\008h\144\160\001'\ti@@\144\160\001'\012l\144\160\001'\013m\145\160\160\001&\014e\160\160\001'\014n@\144\160\001'\015o\144\160\001'\016p\144\160\001'\017q\144\160\001'\018r\144\160\001'\019s\144\160\001'\020t\144\160\001'\021u\144\160\001'\022v\144\160\001'\023w\144\160\001'\024x\144\160\001'\025y\144\160\001'\026z\145\160\160\001&\027j\160\160\001'\027{@\144\160\001'\028|\144\160\001'\029}\145\160\160\001&\030k\160\160\001'\030~@\144\160\001'\031\127\145\160\160``\160\160\001' \000@@\144\160\001'!\000A\144\160\001'\"\000B\144\160\001'#\000C\144\160\001'$\000D\144\160\001'%\000E\144\160\001'&\000F\144\160\001''\000G@\144\160\001')\000I\144\160\001'*\000J\144\160\001'+\000K\144\160\001',\000L\144\160\001'-\000M\144\160\001'.\000N\144\160\001'/\000O\144\160\001'0\000P\144\160\001'1\000Q\144\160\001'2\000R\144\160\001'3\000S\144\160\001'4\000T\144\160\001'5\000U\144\160\001'6\000V\144\160\001'7\000W\144\160\001'8\000X\144\160\001'9\000Y\144\160\001':\000Z\144\160\001';\000[\144\160\001'<\000\\\144\160\001'=\000]\144\160\001'>\000^\144\160\001'?\000_\144\160\001'@\000`\144\160\001'A\000a\144\160\001'B\000b\144\160\001'C\000c\144\160\001'D\000d\144\160\001'E\000e\144\160\001'F\000f\144\160\001'G\000g\144\160\001'H\000h\144\160\001'I\000i\144\160\001'J\000j\144\160\001'K\000k@\144\160\001'M\000m@\144\160\001'O\000o\144\160\001'P\000p\144\160\001'Q\000q\144\160\001'R\000r@@@\144\160\001'V\000v@\144\160\001'X\000x\144\160\001'Y\000y\144\160\001'Z\000z\144\160\001'[\000{\144\160\001'\\\000|\144\160\001']\000}\144\160\001'^\000~@\145\160\160\001&`\001\000\171\160\160\001$`\001\000\172@\145\160\160\001'a\001\000\161\160\160\001$a\001\000\173@\145\160\160\001'b\001\000\162\160\160\001$b\001\000\174@\145\160\160\001'c\001\000\163\160\160\001&c\001\000\168\160\160\001$c\001\000\175@\145\160\160\001'd\001\000\164\160\160\001$d\001\000\176@\145\160\160\001'e\001\000\165\160\160\001&e\001\000\170\160\160\001$e\001\000\177@\145\160\160\001'f\001\000\166\160\160\001&f\001\000\169\160\160\001$f\001\000\178@\145\160\160\001'g\001\000\167\160\160\001$g\001\000\179@\144\160\001$h\001\000\180\144\160\001$i\001\000\181@@@@@@@@@@@@\144\160\001'v\001\000\182\144\160\001'w\001\000\183\144\160\001'x\001\000\184\144\160\001'y\001\000\185\144\160\001'z\001\000\186\144\160\001'{\001\000\187\144\160\001'|\001\000\188\144\160\001'}\001\000\189\144\160\001'~\001\000\190\144\160\001'\127\001\000\191\144\160\001'\128\001\000\192\144\160\001'\129\001\000\193\144\160\001'\130\001\000\194\144\160\001'\131\001\000\195\144\160\001'\132\001\000\196\144\160\001'\133\001\000\197\144\160\001'\134\001\000\198\144\160\001'\135\001\000\199\144\160\001'\136\001\000\200\144\160\001'\137\001\000\201\144\160\001'\138\001\000\202\144\160\001'\139\001\000\203\144\160\001'\140\001\000\204\144\160\001'\141\001\000\205\144\160\001'\142\001\000\206\144\160\001'\143\001\000\207\144\160\001'\144\001\000\208\144\160\001'\145\001\000\209\145\160\160\001'\146\001\000\210\160\160\001!\146\001\000\213@\144\160\001'\147\001\000\211\145\160\160\001'\148\001\000\212\160\160\001!\148\001\000\214@\144\160\001!\149\001\000\215@@\144\160\001'\152\001\000\216\144\160\001'\153\001\000\217\144\160\001'\154\001\000\218\144\160\001'\155\001\000\219\144\160\001'\156\001\000\220\144\160\001'\157\001\000\221\144\160\001'\158\001\000\222\144\160\001'\159\001\000\223\145\160\160\001%\160\000n\160\160\001'\160\001\000\224@\144\160\001'\161\001\000\225\144\160\001'\162\001\000\226\144\160\001'\163\001\000\227\144\160\001'\164\001\000\228\144\160\001'\165\001\000\229\144\160\001'\166\001\000\230\144\160\001'\167\001\000\231\144\160\001'\168\001\000\232\144\160\001'\169\001\000\233\144\160\001'\170\001\000\234\144\160\001'\171\001\000\235\144\160\001'\172\001\000\236\144\160\001'\173\001\000\237\144\160\001'\174\001\000\238\144\160\001'\175\001\000\239@\144\160\001'\177\001\000\241\145\160\160\001%\178\000s\160\160\001'\178\001\000\242@\144\160\001'\179\001\000\243\144\160\001'\180\001\000\244\144\160\001'\181\001\000\245\144\160\001'\182\001\000\246\144\160\001'\183\001\000\247\144\160\001'\184\001\000\248\144\160\001'\185\001\000\249\144\160\001'\186\001\000\250\144\160\001'\187\001\000\251\145\160\160\001%\188\000t\160\160\001'\188\001\000\252@\144\160\001'\189\001\000\253\144\160\001'\190\001\000\254@@@@@@@\144\160\001%\198\000u@@@@@@@@\144\160\001%\207\000l@@@@@@@\145\160\160\001%\215\000w\160\160\002\000\000\248\215\001\000\128@\144\160\002\000\000\248\216\001\000\129\144\160\002\000\000\248\217\001\000\130\144\160\002\000\000\248\218\001\000\131\144\160\002\000\000\248\219\001\000\132\144\160\002\000\000\248\220\001\000\133\144\160\002\000\000\248\221\001\000\134\144\160\002\000\000\248\222\001\000\135\144\160\002\000\000\248\223\001\000\136\144\160\002\000\000\248\224\001\000\137\144\160\002\000\000\248\225\001\000\138\144\160\002\000\000\248\226\001\000\139\144\160\002\000\000\248\227\001\000\140\144\160\002\000\000\248\228\001\000\141@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let jis0201_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\164\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\001\000\165\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\001 >\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\002\000\000\255a\002\000\000\255b\002\000\000\255c\002\000\000\255d\002\000\000\255e\002\000\000\255f\002\000\000\255g\002\000\000\255h\002\000\000\255i\002\000\000\255j\002\000\000\255k\002\000\000\255l\002\000\000\255m\002\000\000\255n\002\000\000\255o\002\000\000\255p\002\000\000\255q\002\000\000\255r\002\000\000\255s\002\000\000\255t\002\000\000\255u\002\000\000\255v\002\000\000\255w\002\000\000\255x\002\000\000\255y\002\000\000\255z\002\000\000\255{\002\000\000\255|\002\000\000\255}\002\000\000\255~\002\000\000\255\127\002\000\000\255\128\002\000\000\255\129\002\000\000\255\130\002\000\000\255\131\002\000\000\255\132\002\000\000\255\133\002\000\000\255\134\002\000\000\255\135\002\000\000\255\136\002\000\000\255\137\002\000\000\255\138\002\000\000\255\139\002\000\000\255\140\002\000\000\255\141\002\000\000\255\142\002\000\000\255\143\002\000\000\255\144\002\000\000\255\145\002\000\000\255\146\002\000\000\255\147\002\000\000\255\148\002\000\000\255\149\002\000\000\255\150\002\000\000\255\151\002\000\000\255\152\002\000\000\255\153\002\000\000\255\154\002\000\000\255\155\002\000\000\255\156\002\000\000\255\157\002\000\000\255\158\002\000\000\255\159\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255" 0 : int array);;
-let jis0201_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0053\000\000\000\000\000\000\004\143\000\000\004\143\008\000\004\000\000@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\145\160\160~~\160\160\001 >\000~@\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[@\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\145\160\160\000a\000a\160\160\002\000\000\255a\001\000\161@\145\160\160\000b\000b\160\160\002\000\000\255b\001\000\162@\145\160\160\000c\000c\160\160\002\000\000\255c\001\000\163@\145\160\160\000d\000d\160\160\002\000\000\255d\001\000\164@\145\160\160\000e\000e\160\160\002\000\000\255e\001\000\165@\145\160\160\000f\000f\160\160\002\000\000\255f\001\000\166@\145\160\160\000g\000g\160\160\002\000\000\255g\001\000\167@\145\160\160\000h\000h\160\160\002\000\000\255h\001\000\168@\145\160\160\000i\000i\160\160\002\000\000\255i\001\000\169@\145\160\160\000j\000j\160\160\002\000\000\255j\001\000\170@\145\160\160\000k\000k\160\160\002\000\000\255k\001\000\171@\145\160\160\000l\000l\160\160\002\000\000\255l\001\000\172@\145\160\160\000m\000m\160\160\002\000\000\255m\001\000\173@\145\160\160\000n\000n\160\160\002\000\000\255n\001\000\174@\145\160\160\000o\000o\160\160\002\000\000\255o\001\000\175@\145\160\160\000p\000p\160\160\002\000\000\255p\001\000\176@\145\160\160\000q\000q\160\160\002\000\000\255q\001\000\177@\145\160\160\000r\000r\160\160\002\000\000\255r\001\000\178@\145\160\160\000s\000s\160\160\002\000\000\255s\001\000\179@\145\160\160\000t\000t\160\160\002\000\000\255t\001\000\180@\145\160\160\000u\000u\160\160\002\000\000\255u\001\000\181@\145\160\160\000v\000v\160\160\002\000\000\255v\001\000\182@\145\160\160\000w\000w\160\160\002\000\000\255w\001\000\183@\145\160\160\000x\000x\160\160\002\000\000\255x\001\000\184@\145\160\160\000y\000y\160\160\002\000\000\255y\001\000\185@\145\160\160\000z\000z\160\160\002\000\000\255z\001\000\186@\145\160\160\000{\000{\160\160\002\000\000\255{\001\000\187@\145\160\160\000|\000|\160\160\002\000\000\255|\001\000\188@\145\160\160\000}\000}\160\160\002\000\000\255}\001\000\189@\144\160\002\000\000\255~\001\000\190\144\160\002\000\000\255\127\001\000\191\144\160\002\000\000\255\128\001\000\192\144\160\002\000\000\255\129\001\000\193\144\160\002\000\000\255\130\001\000\194\144\160\002\000\000\255\131\001\000\195\144\160\002\000\000\255\132\001\000\196\144\160\002\000\000\255\133\001\000\197\144\160\002\000\000\255\134\001\000\198\144\160\002\000\000\255\135\001\000\199\144\160\002\000\000\255\136\001\000\200\144\160\002\000\000\255\137\001\000\201\144\160\002\000\000\255\138\001\000\202\144\160\002\000\000\255\139\001\000\203\144\160\002\000\000\255\140\001\000\204\144\160\002\000\000\255\141\001\000\205\144\160\002\000\000\255\142\001\000\206\144\160\002\000\000\255\143\001\000\207\144\160\002\000\000\255\144\001\000\208\144\160\002\000\000\255\145\001\000\209\144\160\002\000\000\255\146\001\000\210\144\160\002\000\000\255\147\001\000\211\144\160\002\000\000\255\148\001\000\212\144\160\002\000\000\255\149\001\000\213\144\160\002\000\000\255\150\001\000\214\144\160\002\000\000\255\151\001\000\215\144\160\002\000\000\255\152\001\000\216\144\160\002\000\000\255\153\001\000\217\144\160\002\000\000\255\154\001\000\218\144\160\002\000\000\255\155\001\000\219\144\160\002\000\000\255\156\001\000\220\144\160\002\000\000\255\157\001\000\221\144\160\002\000\000\255\158\001\000\222\144\160\002\000\000\255\159\001\000\223@@@@@\144\160\001\000\165\000\\@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let koi8r_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001%\000\001%\002\001%\012\001%\016\001%\020\001%\024\001%\028\001%$\001%,\001%4\001%<\001%\128\001%\132\001%\136\001%\140\001%\144\001%\145\001%\146\001%\147\001# \001%\160\001\"\025\001\"\026\001\"H\001\"d\001\"e\001\000\160\001#!\001\000\176\001\000\178\001\000\183\001\000\247\001%P\001%Q\001%R\001\004Q\001%S\001%T\001%U\001%V\001%W\001%X\001%Y\001%Z\001%[\001%\\\001%]\001%^\001%_\001%`\001%a\001\004\001\001%b\001%c\001%d\001%e\001%f\001%g\001%h\001%i\001%j\001%k\001%l\001\000\169\001\004N\001\0040\001\0041\001\004F\001\0044\001\0045\001\004D\001\0043\001\004E\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004O\001\004@\001\004A\001\004B\001\004C\001\0046\001\0042\001\004L\001\004K\001\0047\001\004H\001\004M\001\004I\001\004G\001\004J\001\004.\001\004\016\001\004\017\001\004&\001\004\020\001\004\021\001\004$\001\004\019\001\004%\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004/\001\004 \001\004!\001\004\"\001\004#\001\004\022\001\004\018\001\004,\001\004+\001\004\023\001\004(\001\004-\001\004)\001\004'\001\004*" 0 : int array);;
-let koi8r_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\187\000\000\000\000\000\000\007\153\000\000\007\153\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\128@\145\160\160AA\160\160\001\004\001\001\000\179@\145\160\160BB\160\160\001%\002\001\000\129@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\130@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\131\160\160\001\004\016\001\000\225@\145\160\160QQ\160\160\001\004\017\001\000\226@\145\160\160RR\160\160\001\004\018\001\000\247@\145\160\160SS\160\160\001\004\019\001\000\231@\145\160\160TT\160\160\001%\020\001\000\132\160\160\001\004\020\001\000\228@\145\160\160UU\160\160\001\004\021\001\000\229@\145\160\160VV\160\160\001\004\022\001\000\246@\145\160\160WW\160\160\001\004\023\001\000\250@\145\160\160XX\160\160\001%\024\001\000\133\160\160\001\004\024\001\000\233@\145\160\160YY\160\160\001\"\025\001\000\149\160\160\001\004\025\001\000\234@\145\160\160ZZ\160\160\001\"\026\001\000\150\160\160\001\004\026\001\000\235@\145\160\160[[\160\160\001\004\027\001\000\236@\145\160\160\\\\\160\160\001%\028\001\000\134\160\160\001\004\028\001\000\237@\145\160\160]]\160\160\001\004\029\001\000\238@\145\160\160^^\160\160\001\004\030\001\000\239@\145\160\160__\160\160\001\004\031\001\000\240@\145\160\160``\160\160\001# \001\000\147\160\160\001\004 \001\000\242@\145\160\160aa\160\160\001#!\001\000\155\160\160\001\004!\001\000\243@\145\160\160bb\160\160\001\004\"\001\000\244@\145\160\160cc\160\160\001\004#\001\000\245@\145\160\160dd\160\160\001%$\001\000\135\160\160\001\004$\001\000\230@\145\160\160ee\160\160\001\004%\001\000\232@\145\160\160ff\160\160\001\004&\001\000\227@\145\160\160gg\160\160\001\004'\001\000\254@\145\160\160hh\160\160\001\004(\001\000\251@\145\160\160ii\160\160\001\004)\001\000\253@\145\160\160jj\160\160\001\004*\001\000\255@\145\160\160kk\160\160\001\004+\001\000\249@\145\160\160ll\160\160\001%,\001\000\136\160\160\001\004,\001\000\248@\145\160\160mm\160\160\001\004-\001\000\252@\145\160\160nn\160\160\001\004.\001\000\224@\145\160\160oo\160\160\001\004/\001\000\241@\145\160\160pp\160\160\001\0040\001\000\193@\145\160\160qq\160\160\001\0041\001\000\194@\145\160\160rr\160\160\001\0042\001\000\215@\145\160\160ss\160\160\001\0043\001\000\199@\145\160\160tt\160\160\001%4\001\000\137\160\160\001\0044\001\000\196@\145\160\160uu\160\160\001\0045\001\000\197@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\218@\145\160\160xx\160\160\001\0048\001\000\201@\145\160\160yy\160\160\001\0049\001\000\202@\145\160\160zz\160\160\001\004:\001\000\203@\145\160\160{{\160\160\001\004;\001\000\204@\145\160\160||\160\160\001%<\001\000\138\160\160\001\004<\001\000\205@\145\160\160}}\160\160\001\004=\001\000\206@\145\160\160~~\160\160\001\004>\001\000\207@\145\160\160\127\127\160\160\001\004?\001\000\208@\145\160\160\000@\000@\160\160\001\004@\001\000\210@\145\160\160\000A\000A\160\160\001\004A\001\000\211@\145\160\160\000B\000B\160\160\001\004B\001\000\212@\145\160\160\000C\000C\160\160\001\004C\001\000\213@\145\160\160\000D\000D\160\160\001\004D\001\000\198@\145\160\160\000E\000E\160\160\001\004E\001\000\200@\145\160\160\000F\000F\160\160\001\004F\001\000\195@\145\160\160\000G\000G\160\160\001\004G\001\000\222@\145\160\160\000H\000H\160\160\001\"H\001\000\151\160\160\001\004H\001\000\219@\145\160\160\000I\000I\160\160\001\004I\001\000\221@\145\160\160\000J\000J\160\160\001\004J\001\000\223@\145\160\160\000K\000K\160\160\001\004K\001\000\217@\145\160\160\000L\000L\160\160\001\004L\001\000\216@\145\160\160\000M\000M\160\160\001\004M\001\000\220@\145\160\160\000N\000N\160\160\001\004N\001\000\192@\145\160\160\000O\000O\160\160\001\004O\001\000\209@\145\160\160\000P\000P\160\160\001%P\001\000\160@\145\160\160\000Q\000Q\160\160\001%Q\001\000\161\160\160\001\004Q\001\000\163@\145\160\160\000R\000R\160\160\001%R\001\000\162@\145\160\160\000S\000S\160\160\001%S\001\000\164@\145\160\160\000T\000T\160\160\001%T\001\000\165@\145\160\160\000U\000U\160\160\001%U\001\000\166@\145\160\160\000V\000V\160\160\001%V\001\000\167@\145\160\160\000W\000W\160\160\001%W\001\000\168@\145\160\160\000X\000X\160\160\001%X\001\000\169@\145\160\160\000Y\000Y\160\160\001%Y\001\000\170@\145\160\160\000Z\000Z\160\160\001%Z\001\000\171@\145\160\160\000[\000[\160\160\001%[\001\000\172@\145\160\160\000\\\000\\\160\160\001%\\\001\000\173@\145\160\160\000]\000]\160\160\001%]\001\000\174@\145\160\160\000^\000^\160\160\001%^\001\000\175@\145\160\160\000_\000_\160\160\001%_\001\000\176@\145\160\160\000`\000`\160\160\001%`\001\000\177@\145\160\160\000a\000a\160\160\001%a\001\000\178@\145\160\160\000b\000b\160\160\001%b\001\000\180@\145\160\160\000c\000c\160\160\001%c\001\000\181@\145\160\160\000d\000d\160\160\001\"d\001\000\152\160\160\001%d\001\000\182@\145\160\160\000e\000e\160\160\001\"e\001\000\153\160\160\001%e\001\000\183@\145\160\160\000f\000f\160\160\001%f\001\000\184@\145\160\160\000g\000g\160\160\001%g\001\000\185@\145\160\160\000h\000h\160\160\001%h\001\000\186@\145\160\160\000i\000i\160\160\001%i\001\000\187@\145\160\160\000j\000j\160\160\001%j\001\000\188@\145\160\160\000k\000k\160\160\001%k\001\000\189@\145\160\160\000l\000l\160\160\001%l\001\000\190@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\139@@@\144\160\001%\132\001\000\140@@@\144\160\001%\136\001\000\141@@@\144\160\001%\140\001\000\142@@@\144\160\001%\144\001\000\143\144\160\001%\145\001\000\144\144\160\001%\146\001\000\145\144\160\001%\147\001\000\146@@@@@@@@@@@@\145\160\160\001%\160\001\000\148\160\160\001\000\160\001\000\154@@@@@@@@@\144\160\001\000\169\001\000\191@@@@@@\144\160\001\000\176\001\000\156@\144\160\001\000\178\001\000\157@@@@\144\160\001\000\183\001\000\158@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\159@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let macroman_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002k\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\255\001\000\196\001\000\197\001\000\199\001\000\201\001\000\209\001\000\214\001\000\220\001\000\225\001\000\224\001\000\226\001\000\228\001\000\227\001\000\229\001\000\231\001\000\233\001\000\232\001\000\234\001\000\235\001\000\237\001\000\236\001\000\238\001\000\239\001\000\241\001\000\243\001\000\242\001\000\244\001\000\246\001\000\245\001\000\250\001\000\249\001\000\251\001\000\252\001 \001\000\176\001\000\162\001\000\163\001\000\167\001 \"\001\000\182\001\000\223\001\000\174\001\000\169\001!\"\001\000\180\001\000\168\001\"`\001\000\198\001\000\216\001\"\030\001\000\177\001\"d\001\"e\001\000\165\001\000\181\001\"\002\001\"\017\001\"\015\001\003\192\001\"+\001\000\170\001\000\186\001\003\169\001\000\230\001\000\248\001\000\191\001\000\161\001\000\172\001\"\026\001\001\146\001\"H\001\"\006\001\000\171\001\000\187\001 &\001\000\160\001\000\192\001\000\195\001\000\213\001\001R\001\001S\001 \019\001 \020\001 \028\001 \029\001 \024\001 \025\001\000\247\001%\202\001\000\255\001\001x\001 D\001 \172\001 9\001 :\002\000\000\251\001\002\000\000\251\002\001 !\001\000\183\001 \026\001 \030\001 0\001\000\194\001\000\202\001\000\193\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\000\211\001\000\212\002\000\000\248\255\001\000\210\001\000\218\001\000\219\001\000\217\001\0011\001\002\198\001\002\220\001\000\175\001\002\216\001\002\217\001\002\218\001\000\184\001\002\221\001\002\219\001\002\199" 0 : int array);;
-let macroman_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\135\000\000\000\000\000\000\005\221\000\000\005\221\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\222\145\160\160\001\"\002\001\000\182\160\160\002\000\000\251\002\001\000\223@@@@\144\160\001\"\006\001\000\198@@@@@@@@\144\160\001\"\015\001\000\184@\144\160\001\"\017\001\000\183@\144\160\001 \019\001\000\208\144\160\001 \020\001\000\209@@@\144\160\001 \024\001\000\212\144\160\001 \025\001\000\213\145\160\160\001\"\026\001\000\195\160\160\001 \026\001\000\226@@\144\160\001 \028\001\000\210\144\160\001 \029\001\000\211\145\160\160\001\"\030\001\000\176\160\160\001 \030\001\000\227@@\145\160\160``\160\160\001 \001\000\160@\145\160\160aa\160\160\001 !\001\000\224@\145\160\160bb\160\160\001 \"\001\000\165\160\160\001!\"\001\000\170@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\201@\144\160gg\144\160hh\144\160ii\144\160jj\145\160\160kk\160\160\001\"+\001\000\186@\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\228@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\220@\145\160\160zz\160\160\001 :\001\000\221@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\145\160\160\000D\000D\160\160\001 D\001\000\218@\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\197@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\206@\145\160\160\000S\000S\160\160\001\001S\001\000\207@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\"`\001\000\173@\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\145\160\160\000d\000d\160\160\001\"d\001\000\178@\145\160\160\000e\000e\160\160\001\"e\001\000\179@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\217@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~@@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\196@@@@@@@@@@@@@\144\160\001\000\160\001\000\202\144\160\001\000\161\001\000\193\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\180@\144\160\001\000\167\001\000\164\144\160\001\000\168\001\000\172\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\189@\144\160\001\000\170\001\000\187\144\160\001\000\171\001\000\199\145\160\160\001\000\172\001\000\194\160\160\001 \172\001\000\219@@\144\160\001\000\174\001\000\168\144\160\001\000\175\001\000\248\144\160\001\000\176\001\000\161\144\160\001\000\177\001\000\177@@\144\160\001\000\180\001\000\171\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\166\144\160\001\000\183\001\000\225\144\160\001\000\184\001\000\252@\144\160\001\000\186\001\000\188\144\160\001\000\187\001\000\200@@@\144\160\001\000\191\001\000\192\145\160\160\001\003\192\001\000\185\160\160\001\000\192\001\000\203@\144\160\001\000\193\001\000\231\144\160\001\000\194\001\000\229\144\160\001\000\195\001\000\204\144\160\001\000\196\001\000\128\144\160\001\000\197\001\000\129\145\160\160\001\000\198\001\000\174\160\160\001\002\198\001\000\246@\145\160\160\001\000\199\001\000\130\160\160\001\002\199\001\000\255@\144\160\001\000\200\001\000\233\144\160\001\000\201\001\000\131\145\160\160\001%\202\001\000\215\160\160\001\000\202\001\000\230@\144\160\001\000\203\001\000\232\144\160\001\000\204\001\000\237\144\160\001\000\205\001\000\234\144\160\001\000\206\001\000\235\144\160\001\000\207\001\000\236@\144\160\001\000\209\001\000\132\144\160\001\000\210\001\000\241\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\239\144\160\001\000\213\001\000\205\144\160\001\000\214\001\000\133@\145\160\160\001\000\216\001\000\175\160\160\001\002\216\001\000\249@\145\160\160\001\000\217\001\000\244\160\160\001\002\217\001\000\250@\145\160\160\001\000\218\001\000\242\160\160\001\002\218\001\000\251@\145\160\160\001\000\219\001\000\243\160\160\001\002\219\001\000\254@\145\160\160\001\000\220\001\000\134\160\160\001\002\220\001\000\247@\144\160\001\002\221\001\000\253@\144\160\001\000\223\001\000\167\144\160\001\000\224\001\000\136\144\160\001\000\225\001\000\135\144\160\001\000\226\001\000\137\144\160\001\000\227\001\000\139\144\160\001\000\228\001\000\138\144\160\001\000\229\001\000\140\144\160\001\000\230\001\000\190\144\160\001\000\231\001\000\141\144\160\001\000\232\001\000\143\144\160\001\000\233\001\000\142\144\160\001\000\234\001\000\144\144\160\001\000\235\001\000\145\144\160\001\000\236\001\000\147\144\160\001\000\237\001\000\146\144\160\001\000\238\001\000\148\144\160\001\000\239\001\000\149@\144\160\001\000\241\001\000\150\144\160\001\000\242\001\000\152\144\160\001\000\243\001\000\151\144\160\001\000\244\001\000\153\144\160\001\000\245\001\000\155\144\160\001\000\246\001\000\154\144\160\001\000\247\001\000\214\144\160\001\000\248\001\000\191\144\160\001\000\249\001\000\157\144\160\001\000\250\001\000\156\144\160\001\000\251\001\000\158\144\160\001\000\252\001\000\159@@\145\160\160\001\000\255\001\000\216\160\160\002\000\000\248\255\001\000\240@" 0 : Netmappings.from_uni_list array);;
- let windows1250_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001 \001 !\000\255\001 0\001\001`\001 9\001\001Z\001\001d\001\001}\001\001y\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\001\001a\001 :\001\001[\001\001e\001\001~\001\001z\001\000\160\001\002\199\001\002\216\001\001A\001\000\164\001\001\004\001\000\166\001\000\167\001\000\168\001\000\169\001\001^\001\000\171\001\000\172\001\000\173\001\000\174\001\001{\001\000\176\001\000\177\001\002\219\001\001B\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\001\005\001\001_\001\000\187\001\001=\001\002\221\001\001>\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
-let windows1250_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007+\000\000\000\000\000\000\006\242\000\000\006\242\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\001\003\001\000\227@\145\160\160DD\160\160\001\001\004\001\000\165@\145\160\160EE\160\160\001\001\005\001\000\185@\145\160\160FF\160\160\001\001\006\001\000\198@\145\160\160GG\160\160\001\001\007\001\000\230@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\145\160\160NN\160\160\001\001\014\001\000\207@\145\160\160OO\160\160\001\001\015\001\000\239@\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\001\025\001\000\234@\145\160\160ZZ\160\160\001 \026\001\000\130\160\160\001\001\026\001\000\204@\145\160\160[[\160\160\001\001\027\001\000\236@\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\188@\145\160\160~~\160\160\001\001>\001\000\190@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\163@\145\160\160\000B\000B\160\160\001\001B\001\000\179@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\210@\145\160\160\000H\000H\160\160\001\001H\001\000\242@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\213@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\245@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001\001T\001\000\192@\145\160\160\000U\000U\160\160\001\001U\001\000\224@\144\160\000V\000V\144\160\000W\000W\145\160\160\000X\000X\160\160\001\001X\001\000\216@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\248@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\140@\145\160\160\000[\000[\160\160\001\001[\001\000\156@\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\141@\145\160\160\000e\000e\160\160\001\001e\001\000\157@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\143@\145\160\160\000z\000z\160\160\001\001z\001\000\159@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~\001\000\158@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184@@\144\160\001\000\187\001\000\187@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\145\160\160\001\002\199\001\000\161\160\160\001\000\199\001\000\199@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\144\160\001\002\219\001\000\178\144\160\001\000\220\001\000\220\145\160\160\001\002\221\001\000\189\160\160\001\000\221\001\000\221@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@@\144\160\001\000\250\001\000\250@\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@@" 0 : Netmappings.from_uni_list array);;
- let windows1251_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002D\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004\002\001\004\003\001 \026\001\004S\001 \030\001 &\001 \001 !\001 \172\001 0\001\004\t\001 9\001\004\n\001\004\012\001\004\011\001\004\015\001\004R\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\001\004Y\001 :\001\004Z\001\004\\\001\004[\001\004_\001\000\160\001\004\014\001\004^\001\004\008\001\000\164\001\004\144\001\000\166\001\000\167\001\004\001\001\000\169\001\004\004\001\000\171\001\000\172\001\000\173\001\000\174\001\004\007\001\000\176\001\000\177\001\004\006\001\004V\001\004\145\001\000\181\001\000\182\001\000\183\001\004Q\001!\022\001\004T\001\000\187\001\004X\001\004\005\001\004U\001\004W\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O" 0 : int array);;
-let windows1251_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\166\000\000\000\000\000\000\007\129\000\000\007\129\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\168@\145\160\160BB\160\160\001\004\002\001\000\128@\145\160\160CC\160\160\001\004\003\001\000\129@\145\160\160DD\160\160\001\004\004\001\000\170@\145\160\160EE\160\160\001\004\005\001\000\189@\145\160\160FF\160\160\001\004\006\001\000\178@\145\160\160GG\160\160\001\004\007\001\000\175@\145\160\160HH\160\160\001\004\008\001\000\163@\145\160\160II\160\160\001\004\t\001\000\138@\145\160\160JJ\160\160\001\004\n\001\000\140@\145\160\160KK\160\160\001\004\011\001\000\142@\145\160\160LL\160\160\001\004\012\001\000\141@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\161@\145\160\160OO\160\160\001\004\015\001\000\143@\145\160\160PP\160\160\001\004\016\001\000\192@\145\160\160QQ\160\160\001\004\017\001\000\193@\145\160\160RR\160\160\001\004\018\001\000\194@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\004\019\001\000\195@\145\160\160TT\160\160\001 \020\001\000\151\160\160\001\004\020\001\000\196@\145\160\160UU\160\160\001\004\021\001\000\197@\145\160\160VV\160\160\001!\022\001\000\185\160\160\001\004\022\001\000\198@\145\160\160WW\160\160\001\004\023\001\000\199@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\004\024\001\000\200@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\004\025\001\000\201@\145\160\160ZZ\160\160\001 \026\001\000\130\160\160\001\004\026\001\000\202@\145\160\160[[\160\160\001\004\027\001\000\203@\145\160\160\\\\\160\160\001 \028\001\000\147\160\160\001\004\028\001\000\204@\145\160\160]]\160\160\001 \029\001\000\148\160\160\001\004\029\001\000\205@\145\160\160^^\160\160\001 \030\001\000\132\160\160\001\004\030\001\000\206@\145\160\160__\160\160\001\004\031\001\000\207@\145\160\160``\160\160\001 \001\000\134\160\160\001\004 \001\000\208@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\004!\001\000\209@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\004\"\001\000\210@\145\160\160cc\160\160\001\004#\001\000\211@\145\160\160dd\160\160\001\004$\001\000\212@\145\160\160ee\160\160\001\004%\001\000\213@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\004&\001\000\214@\145\160\160gg\160\160\001\004'\001\000\215@\145\160\160hh\160\160\001\004(\001\000\216@\145\160\160ii\160\160\001\004)\001\000\217@\145\160\160jj\160\160\001\004*\001\000\218@\145\160\160kk\160\160\001\004+\001\000\219@\145\160\160ll\160\160\001\004,\001\000\220@\145\160\160mm\160\160\001\004-\001\000\221@\145\160\160nn\160\160\001\004.\001\000\222@\145\160\160oo\160\160\001\004/\001\000\223@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0040\001\000\224@\145\160\160qq\160\160\001\0041\001\000\225@\145\160\160rr\160\160\001\0042\001\000\226@\145\160\160ss\160\160\001\0043\001\000\227@\145\160\160tt\160\160\001\0044\001\000\228@\145\160\160uu\160\160\001\0045\001\000\229@\145\160\160vv\160\160\001\0046\001\000\230@\145\160\160ww\160\160\001\0047\001\000\231@\145\160\160xx\160\160\001\0048\001\000\232@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0049\001\000\233@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\004:\001\000\234@\145\160\160{{\160\160\001\004;\001\000\235@\145\160\160||\160\160\001\004<\001\000\236@\145\160\160}}\160\160\001\004=\001\000\237@\145\160\160~~\160\160\001\004>\001\000\238@\145\160\160\127\127\160\160\001\004?\001\000\239@\145\160\160\000@\000@\160\160\001\004@\001\000\240@\145\160\160\000A\000A\160\160\001\004A\001\000\241@\145\160\160\000B\000B\160\160\001\004B\001\000\242@\145\160\160\000C\000C\160\160\001\004C\001\000\243@\145\160\160\000D\000D\160\160\001\004D\001\000\244@\145\160\160\000E\000E\160\160\001\004E\001\000\245@\145\160\160\000F\000F\160\160\001\004F\001\000\246@\145\160\160\000G\000G\160\160\001\004G\001\000\247@\145\160\160\000H\000H\160\160\001\004H\001\000\248@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\250@\145\160\160\000K\000K\160\160\001\004K\001\000\251@\145\160\160\000L\000L\160\160\001\004L\001\000\252@\145\160\160\000M\000M\160\160\001\004M\001\000\253@\145\160\160\000N\000N\160\160\001\004N\001\000\254@\145\160\160\000O\000O\160\160\001\004O\001\000\255@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\184@\145\160\160\000R\000R\160\160\001\004R\001\000\144@\145\160\160\000S\000S\160\160\001\004S\001\000\131@\145\160\160\000T\000T\160\160\001\004T\001\000\186@\145\160\160\000U\000U\160\160\001\004U\001\000\190@\145\160\160\000V\000V\160\160\001\004V\001\000\179@\145\160\160\000W\000W\160\160\001\004W\001\000\191@\145\160\160\000X\000X\160\160\001\004X\001\000\188@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\154@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\156@\145\160\160\000[\000[\160\160\001\004[\001\000\158@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\157@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\162@\145\160\160\000_\000_\160\160\001\004_\001\000\159@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@\144\160\001\004\144\001\000\165\144\160\001\004\145\001\000\180@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\136\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@@@\144\160\001\000\187\001\000\187@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let windows1252_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\001\001}\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\001\001~\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
-let windows1252_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~\001\000\158@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
- let windows1253_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\000\255\001 0\000\255\001 9\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\000\255\001 :\000\255\000\255\000\255\000\255\001\000\160\001\003\133\001\003\134\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\000\255\001\000\171\001\000\172\001\000\173\001\000\174\001 \021\001\000\176\001\000\177\001\000\178\001\000\179\001\003\132\001\000\181\001\000\182\001\000\183\001\003\136\001\003\137\001\003\138\001\000\187\001\003\140\001\000\189\001\003\142\001\003\143\001\003\144\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\000\255\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\001\003\172\001\003\173\001\003\174\001\003\175\001\003\176\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\194\001\003\195\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001\003\201\001\003\202\001\003\203\001\003\204\001\003\205\001\003\206\000\255" 0 : int array);;
-let windows1253_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\130\000\000\000\000\000\000\006F\000\000\006F\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@\144\160\001\003\132\001\000\180\144\160\001\003\133\001\000\161\144\160\001\003\134\001\000\162@\144\160\001\003\136\001\000\184\144\160\001\003\137\001\000\185\144\160\001\003\138\001\000\186@\144\160\001\003\140\001\000\188@\144\160\001\003\142\001\000\190\144\160\001\003\143\001\000\191\144\160\001\003\144\001\000\192\144\160\001\003\145\001\000\193\145\160\160\001\001\146\001\000\131\160\160\001\003\146\001\000\194@\144\160\001\003\147\001\000\195\144\160\001\003\148\001\000\196\144\160\001\003\149\001\000\197\144\160\001\003\150\001\000\198\144\160\001\003\151\001\000\199\144\160\001\003\152\001\000\200\144\160\001\003\153\001\000\201\144\160\001\003\154\001\000\202\144\160\001\003\155\001\000\203\144\160\001\003\156\001\000\204\144\160\001\003\157\001\000\205\144\160\001\003\158\001\000\206\144\160\001\003\159\001\000\207\145\160\160\001\000\160\001\000\160\160\160\001\003\160\001\000\208@\144\160\001\003\161\001\000\209@\145\160\160\001\000\163\001\000\163\160\160\001\003\163\001\000\211@\145\160\160\001\000\164\001\000\164\160\160\001\003\164\001\000\212@\145\160\160\001\000\165\001\000\165\160\160\001\003\165\001\000\213@\145\160\160\001\000\166\001\000\166\160\160\001\003\166\001\000\214@\145\160\160\001\000\167\001\000\167\160\160\001\003\167\001\000\215@\145\160\160\001\000\168\001\000\168\160\160\001\003\168\001\000\216@\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\217@\144\160\001\003\170\001\000\218\145\160\160\001\000\171\001\000\171\160\160\001\003\171\001\000\219@\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172\160\160\001\003\172\001\000\220@\145\160\160\001\000\173\001\000\173\160\160\001\003\173\001\000\221@\145\160\160\001\000\174\001\000\174\160\160\001\003\174\001\000\222@\144\160\001\003\175\001\000\223\145\160\160\001\000\176\001\000\176\160\160\001\003\176\001\000\224@\145\160\160\001\000\177\001\000\177\160\160\001\003\177\001\000\225@\145\160\160\001\000\178\001\000\178\160\160\001\003\178\001\000\226@\145\160\160\001\000\179\001\000\179\160\160\001\003\179\001\000\227@\144\160\001\003\180\001\000\228\145\160\160\001\000\181\001\000\181\160\160\001\003\181\001\000\229@\145\160\160\001\000\182\001\000\182\160\160\001\003\182\001\000\230@\145\160\160\001\000\183\001\000\183\160\160\001\003\183\001\000\231@\144\160\001\003\184\001\000\232\144\160\001\003\185\001\000\233\144\160\001\003\186\001\000\234\145\160\160\001\000\187\001\000\187\160\160\001\003\187\001\000\235@\144\160\001\003\188\001\000\236\145\160\160\001\000\189\001\000\189\160\160\001\003\189\001\000\237@\144\160\001\003\190\001\000\238\144\160\001\003\191\001\000\239\144\160\001\003\192\001\000\240\144\160\001\003\193\001\000\241\144\160\001\003\194\001\000\242\144\160\001\003\195\001\000\243\144\160\001\003\196\001\000\244\144\160\001\003\197\001\000\245\144\160\001\003\198\001\000\246\144\160\001\003\199\001\000\247\144\160\001\003\200\001\000\248\144\160\001\003\201\001\000\249\144\160\001\003\202\001\000\250\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\252\144\160\001\003\205\001\000\253\144\160\001\003\206\001\000\254@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let windows1254_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\000\255\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001\030\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\0010\001\001^\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001\031\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\0011\001\001_\001\000\255" 0 : int array);;
-let windows1254_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\168\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132\160\160\001\001\030\001\000\208@\145\160\160__\160\160\001\001\031\001\000\240@\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
- let windows1255_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002.\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\000\255\001 9\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\000\255\001 :\000\255\000\255\000\255\000\255\001\000\160\001\000\161\001\000\162\001\000\163\001 \170\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\215\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\247\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\005\176\001\005\177\001\005\178\001\005\179\001\005\180\001\005\181\001\005\182\001\005\183\001\005\184\001\005\185\000\255\001\005\187\001\005\188\001\005\189\001\005\190\001\005\191\001\005\192\001\005\193\001\005\194\001\005\195\001\005\240\001\005\241\001\005\242\001\005\243\001\005\244\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\000\255\001 \014\001 \015\000\255" 0 : int array);;
-let windows1255_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006N\000\000\000\000\000\000\006\027\000\000\006\027\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001 \170\001\000\164\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\145\160\160\001\000\176\001\000\176\160\160\001\005\176\001\000\192@\145\160\160\001\000\177\001\000\177\160\160\001\005\177\001\000\193@\145\160\160\001\000\178\001\000\178\160\160\001\005\178\001\000\194@\145\160\160\001\000\179\001\000\179\160\160\001\005\179\001\000\195@\145\160\160\001\000\180\001\000\180\160\160\001\005\180\001\000\196@\145\160\160\001\000\181\001\000\181\160\160\001\005\181\001\000\197@\145\160\160\001\000\182\001\000\182\160\160\001\005\182\001\000\198@\145\160\160\001\000\183\001\000\183\160\160\001\005\183\001\000\199@\145\160\160\001\000\184\001\000\184\160\160\001\005\184\001\000\200@\145\160\160\001\000\185\001\000\185\160\160\001\005\185\001\000\201@@\145\160\160\001\000\187\001\000\187\160\160\001\005\187\001\000\203@\145\160\160\001\000\188\001\000\188\160\160\001\005\188\001\000\204@\145\160\160\001\000\189\001\000\189\160\160\001\005\189\001\000\205@\145\160\160\001\000\190\001\000\190\160\160\001\005\190\001\000\206@\145\160\160\001\000\191\001\000\191\160\160\001\005\191\001\000\207@\144\160\001\005\192\001\000\208\144\160\001\005\193\001\000\209\144\160\001\005\194\001\000\210\144\160\001\005\195\001\000\211@@\144\160\001\002\198\001\000\136@@@@@@@@@\144\160\001\005\208\001\000\224\144\160\001\005\209\001\000\225\144\160\001\005\210\001\000\226\144\160\001\005\211\001\000\227\144\160\001\005\212\001\000\228\144\160\001\005\213\001\000\229\144\160\001\005\214\001\000\230\145\160\160\001\000\215\001\000\170\160\160\001\005\215\001\000\231@\144\160\001\005\216\001\000\232\144\160\001\005\217\001\000\233\144\160\001\005\218\001\000\234\144\160\001\005\219\001\000\235\145\160\160\001\002\220\001\000\152\160\160\001\005\220\001\000\236@\144\160\001\005\221\001\000\237\144\160\001\005\222\001\000\238\144\160\001\005\223\001\000\239\144\160\001\005\224\001\000\240\144\160\001\005\225\001\000\241\144\160\001\005\226\001\000\242\144\160\001\005\227\001\000\243\144\160\001\005\228\001\000\244\144\160\001\005\229\001\000\245\144\160\001\005\230\001\000\246\144\160\001\005\231\001\000\247\144\160\001\005\232\001\000\248\144\160\001\005\233\001\000\249\144\160\001\005\234\001\000\250@@@@@\144\160\001\005\240\001\000\212\144\160\001\005\241\001\000\213\144\160\001\005\242\001\000\214\144\160\001\005\243\001\000\215\144\160\001\005\244\001\000\216@@\144\160\001\000\247\001\000\186@@@@@@@@" 0 : Netmappings.from_uni_list array);;
- let windows1256_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\001\006~\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\006y\001 9\001\001R\001\006\134\001\006\152\001\006\136\001\006\175\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\006\169\001!\"\001\006\145\001 :\001\001S\001 \012\001 \013\001\006\186\001\000\160\001\006\012\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\006\190\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\006\027\001\000\187\001\000\188\001\000\189\001\000\190\001\006\031\001\006\193\001\006!\001\006\"\001\006#\001\006$\001\006%\001\006&\001\006'\001\006(\001\006)\001\006*\001\006+\001\006,\001\006-\001\006.\001\006/\001\0060\001\0061\001\0062\001\0063\001\0064\001\0065\001\0066\001\000\215\001\0067\001\0068\001\0069\001\006:\001\006@\001\006A\001\006B\001\006C\001\000\224\001\006D\001\000\226\001\006E\001\006F\001\006G\001\006H\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\006I\001\006J\001\000\238\001\000\239\001\006K\001\006L\001\006M\001\006N\001\000\244\001\006O\001\006P\001\000\247\001\006Q\001\000\249\001\006R\001\000\251\001\000\252\001 \014\001 \015\001\006\210" 0 : int array);;
-let windows1256_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007W\000\000\000\000\000\000\007\022\000\000\007\022\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001 \012\001\000\157\160\160\001\006\012\001\000\161@\145\160\160MM\160\160\001 \013\001\000\158@\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\145\160\160[[\160\160\001\006\027\001\000\186@\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\145\160\160__\160\160\001\006\031\001\000\191@\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\216@\145\160\160xx\160\160\001\0068\001\000\217@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0069\001\000\218@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\006:\001\000\219@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\220@\145\160\160\000A\000A\160\160\001\006A\001\000\221@\145\160\160\000B\000B\160\160\001\006B\001\000\222@\145\160\160\000C\000C\160\160\001\006C\001\000\223@\145\160\160\000D\000D\160\160\001\006D\001\000\225@\145\160\160\000E\000E\160\160\001\006E\001\000\227@\145\160\160\000F\000F\160\160\001\006F\001\000\228@\145\160\160\000G\000G\160\160\001\006G\001\000\229@\145\160\160\000H\000H\160\160\001\006H\001\000\230@\145\160\160\000I\000I\160\160\001\006I\001\000\236@\145\160\160\000J\000J\160\160\001\006J\001\000\237@\145\160\160\000K\000K\160\160\001\006K\001\000\240@\145\160\160\000L\000L\160\160\001\006L\001\000\241@\145\160\160\000M\000M\160\160\001\006M\001\000\242@\145\160\160\000N\000N\160\160\001\006N\001\000\243@\145\160\160\000O\000O\160\160\001\006O\001\000\245@\145\160\160\000P\000P\160\160\001\006P\001\000\246@\145\160\160\000Q\000Q\160\160\001\006Q\001\000\248@\145\160\160\000R\000R\160\160\001\001R\001\000\140\160\160\001\006R\001\000\250@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\006y\001\000\138@\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\145\160\160\000~\000~\160\160\001\006~\001\000\129@\144\160\000\127\000\127@@@@@@\144\160\001\006\134\001\000\141@\144\160\001\006\136\001\000\143@@@@@@@@\144\160\001\006\145\001\000\154\144\160\001\001\146\001\000\131@@@@@\144\160\001\006\152\001\000\142@@@@@@@\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\145\160\160\001\006\169\001\000\152\160\160\001\000\169\001\000\169@@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\145\160\160\001\006\175\001\000\144\160\160\001\000\175\001\000\175@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\006\186\001\000\159\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\145\160\160\001\006\190\001\000\170\160\160\001\000\190\001\000\190@@@\144\160\001\006\193\001\000\192@@@@\144\160\001\002\198\001\000\136@@@@@@@@@@@\144\160\001\006\210\001\000\255@@@@\144\160\001\000\215\001\000\215@@@@@@@@\144\160\001\000\224\001\000\224@\144\160\001\000\226\001\000\226@@@@\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235@@\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@@@@\144\160\001\000\244\001\000\244@@\144\160\001\000\247\001\000\247@\144\160\001\000\249\001\000\249@\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
- let windows1257_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0029\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001 \001 !\000\255\001 0\000\255\001 9\000\255\001\000\168\001\002\199\001\000\184\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\000\255\001 :\000\255\001\000\175\001\002\219\000\255\001\000\160\000\255\001\000\162\001\000\163\001\000\164\000\255\001\000\166\001\000\167\001\000\216\001\000\169\001\001V\001\000\171\001\000\172\001\000\173\001\000\174\001\000\198\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\248\001\000\185\001\001W\001\000\187\001\000\188\001\000\189\001\000\190\001\000\230\001\001\004\001\001.\001\001\000\001\001\006\001\000\196\001\000\197\001\001\024\001\001\018\001\001\012\001\000\201\001\001y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001\002\217" 0 : int array);;
-let windows1257_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\234\000\000\000\000\000\000\006\186\000\000\006\186\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\194@\145\160\160AA\160\160\001\001\001\001\000\226@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\192@\145\160\160EE\160\160\001\001\005\001\000\224@\145\160\160FF\160\160\001\001\006\001\000\195@\145\160\160GG\160\160\001\001\007\001\000\227@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\144\160PP\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\199@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\001\019\001\000\231@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\145\160\160VV\160\160\001\001\022\001\000\203@\145\160\160WW\160\160\001\001\023\001\000\235@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\001\024\001\000\198@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\001\025\001\000\230@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\217@\145\160\160\000B\000B\160\160\001\001B\001\000\249@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\145\160\160\000E\000E\160\160\001\001E\001\000\210@\145\160\160\000F\000F\160\160\001\001F\001\000\242@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\212@\145\160\160\000M\000M\160\160\001\001M\001\000\244@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\170@\145\160\160\000W\000W\160\160\001\001W\001\000\186@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\218@\145\160\160\000[\000[\160\160\001\001[\001\000\250@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~\001\000\254@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\141\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\157\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\143\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\175\144\160\001\002\199\001\000\142@\144\160\001\000\201\001\000\201@@@@@@@@@\144\160\001\000\211\001\000\211@\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\168\144\160\001\002\217\001\000\255@\144\160\001\002\219\001\000\158\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@@@@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\191@@\144\160\001\000\233\001\000\233@@@@@@@@@\144\160\001\000\243\001\000\243@\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\184@@@\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
- let windows1258_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\000\255\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\000\255\001 :\001\001S\000\255\000\255\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\001\002\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\003\000\001\000\205\001\000\206\001\000\207\001\001\016\001\000\209\001\003\t\001\000\211\001\000\212\001\001\160\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\001\175\001\003\003\001\000\223\001\000\224\001\000\225\001\000\226\001\001\003\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\003\001\001\000\237\001\000\238\001\000\239\001\001\017\001\000\241\001\003#\001\000\243\001\000\244\001\001\161\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\001\176\001 \171\001\000\255" 0 : int array);;
-let windows1258_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006^\000\000\006^\008\000\004\000\000\145\160\160@@\160\160\001\003\000\001\000\204@\145\160\160AA\160\160\001\003\001\001\000\236@\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\003\003\001\000\222\160\160\001\001\003\001\000\227@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\145\160\160II\160\160\001\003\t\001\000\210@\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\145\160\160cc\160\160\001\003#\001\000\242@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\145\160\160\001\000\160\001\000\160\160\160\001\001\160\001\000\213@\145\160\160\001\000\161\001\000\161\160\160\001\001\161\001\000\245@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\145\160\160\001\000\171\001\000\171\160\160\001 \171\001\000\254@\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\145\160\160\001\000\175\001\000\175\160\160\001\001\175\001\000\221@\145\160\160\001\000\176\001\000\176\160\160\001\001\176\001\000\253@\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
- Hashtbl.add Netmappings.to_unicode `Enc_windows1258 windows1258_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1258 windows1258_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1257 windows1257_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1257 windows1257_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1256 windows1256_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1256 windows1256_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1255 windows1255_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1255 windows1255_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1254 windows1254_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1254 windows1254_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1253 windows1253_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1253 windows1253_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1252 windows1252_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1252 windows1252_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1251 windows1251_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1251 windows1251_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_windows1250 windows1250_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_windows1250 windows1250_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_macroman macroman_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_macroman macroman_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_koi8r koi8r_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_koi8r koi8r_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_jis0201 jis0201_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_jis0201 jis0201_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp875 cp875_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp875 cp875_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp874 cp874_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp874 cp874_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp869 cp869_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp869 cp869_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp866 cp866_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp866 cp866_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp865 cp865_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp865 cp865_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp864 cp864_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp864 cp864_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp863 cp863_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp863 cp863_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp862 cp862_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp862 cp862_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp861 cp861_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp861 cp861_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp860 cp860_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp860 cp860_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp857 cp857_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp857 cp857_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp856 cp856_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp856 cp856_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp855 cp855_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp855 cp855_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp852 cp852_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp852 cp852_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp850 cp850_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp850 cp850_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp775 cp775_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp775 cp775_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp737 cp737_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp737 cp737_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp500 cp500_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp500 cp500_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp437 cp437_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp437 cp437_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp424 cp424_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp424 cp424_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp1026 cp1026_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp1026 cp1026_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp1006 cp1006_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp1006 cp1006_from_unicode;
-Hashtbl.add Netmappings.to_unicode `Enc_cp037 cp037_to_unicode;
-Hashtbl.add Netmappings.from_unicode `Enc_cp037 cp037_from_unicode;
-();;
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-type t =
- { s_channel : in_channel;
- s_maxlength : int option;
- s_blocksize : int;
- mutable s_current_length : int;
- mutable s_at_eos : bool;
- mutable s_win_pos : int;
- mutable s_win_len : int;
- s_netbuf : Netbuffer.t;
- s_iobuf : string;
- }
-;;
-
-
-let dump s text =
- print_string ("*** NETSTREAM DUMP " ^ text ^ "\n");
- Printf.printf "current_length=%d at_eos=%b win_pos=%d win_len=%d\n"
- s.s_current_length s.s_at_eos s.s_win_pos s.s_win_len;
- Printf.printf "netbuffer_length=%d netbuffer_size=%d\n"
- (Netbuffer.length s.s_netbuf)
- (String.length(Netbuffer.unsafe_buffer s.s_netbuf));
- Printf.printf "netbuffer=\"%s\"\n"
- (String.escaped(Netbuffer.contents s.s_netbuf));
- print_string "*** ---------------\n";
- flush stdout
-;;
-
-
-let want_another_block s =
- if not s.s_at_eos then begin
- (* How much are we allowed to read? *)
- let m =
- match s.s_maxlength with
- None -> s.s_blocksize
- | Some k -> min (k - s.s_current_length) s.s_blocksize
- in
- (* Read this. *)
- let rec read_block k =
- if k < m then
- let n =
- input s.s_channel s.s_iobuf k (m - k) in
- ( if n > 0 then
- read_block (k+n)
- else (* EOF *)
- k
- )
- else
- k
- in
- let n = read_block 0 in
- (* If n < blocksize, EOS is reached. *)
- Netbuffer.add_sub_string s.s_netbuf s.s_iobuf 0 n;
- s.s_win_len <- s.s_win_len + n;
- s.s_current_length <- s.s_current_length + n;
- s.s_at_eos <- n < s.s_blocksize;
-
- (* dump s "After appending block"; *)
- end
-;;
-
-
-let want s n =
- while not s.s_at_eos && s.s_win_len < n do
- want_another_block s
- done
-;;
-
-
-let want_minimum s =
- want s (s.s_blocksize + s.s_blocksize)
-;;
-
-
-let move s n =
- Netbuffer.delete s.s_netbuf 0 n;
- s.s_win_pos <- s.s_win_pos + n;
- s.s_win_len <- s.s_win_len - n;
- want_minimum s;
- (* dump s "After move"; *)
-;;
-
-
-let create_from_channel ch maxlength blocksize =
- let s =
- { s_channel = ch;
- s_maxlength = maxlength;
- s_blocksize = blocksize;
- s_current_length = 0;
- s_at_eos = false;
- s_win_pos = 0;
- s_win_len = 0;
- s_netbuf = Netbuffer.create (2*blocksize);
- s_iobuf = String.create blocksize;
- }
- in
- want_minimum s;
- s
-;;
-
-
-let create_from_string str =
- let l = String.length str in
- { s_channel = stdin;
- s_maxlength = None;
- s_blocksize = l;
- s_current_length = l;
- s_at_eos = true;
- s_win_pos = 0;
- s_win_len = l;
- s_netbuf =
- ( let nb = Netbuffer.create l in
- Netbuffer.add_string nb str;
- nb
- );
- s_iobuf = "";
- }
-;;
-
-
-let block_size s = s.s_blocksize;;
-
-let current_length s = s.s_current_length;;
-
-let at_eos s = s.s_at_eos;;
-
-let window_position s = s.s_win_pos;;
-
-let window_length s = s.s_win_len;;
-
-let window s = s.s_netbuf;;
-
-let print_stream s =
- Format.printf
- "<NETSTREAM window:%d/%d total_length:%d eof=%b>"
- s.s_win_pos
- s.s_win_len
- s.s_current_length
- s.s_at_eos
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/06/24 20:20:33 gerd
- * Added the toploop printer.
- *
- * Revision 1.1 2000/04/15 13:07:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-(* A netstream is an input channel that is read block by block. The
- * fragment of the channel currently loaded into memory is called the
- * current window of the netstream.
- *
- * PICTURE:
- *
- * 0 window_position current_length EOS
- * +------------------+-------------------+--------------------------+
- * ====================
- * The current window
- *
- * window_length = current_length - window_position
- *
- * There is an automatism that the window has a certain length. If possible,
- * the window is at least twice the block size long, where a "block" is
- * the amount of data that is read from the input channel in one step.
- *
- * (The idea is that you choose as block size the number of bytes you want
- * to analyze at once, and which must be loaded into memory. You can start
- * your analysis at window_position and proceed until window_position +
- * blocksize without having to check whether your window is large enough.
- * Only when the first blocksize bytes of the window are already processed,
- * the window must be enlarged by loading the next block.)
- *
- * If you want that the window becomes larger, you can call 'want' (to
- * enlarge the window to a certain size) or 'want_another_block' (to load
- * just another block from the input channel). Note that this affects only
- * the current window and not future windows.
- *
- * If you do not need the first n bytes of the window anymore, you can
- * call 'move' to move the beginning of the window by n bytes. If the
- * window becomes too small after this operation, it is enlarged until
- * it has twice the block size or until it reaches EOS.
- *)
-
-type t
-
-val create_from_channel : in_channel -> int option -> int -> t
- (* create_from_channel ch maxlength blocksize:
- * The new netstream reads from the channel 'ch'. If maxlength = None,
- * the channel is read until EOF. If maxlength = Some n, at most n bytes
- * are read; i.e. the netstream reads until n bytes have been read or
- * until EOF has been reached, whatever comes first. The blocksize
- * specifies the number of bytes to read at once.
- *)
-
-val create_from_string : string -> t
- (* Creates a new netstream from a string. The initial window of this
- * netstream is a copy of the passed string.
- *)
-
-val block_size : t -> int
- (* Returns the (immutable) block size. *)
-
-val current_length : t -> int
- (* Returns the number of bytes read so far. *)
-
-val at_eos : t -> bool
- (* True iff EOS (end of stream) is reached, i.e. the last byte of the
- * window is the last byte of the stream.
- *)
-
-val window_position : t -> int
- (* Returns the absolute position of the current window. *)
-
-val window_length : t -> int
- (* Returns the length of the current window. *)
-
-val window : t -> Netbuffer.t
- (* Returns the current window. *)
-
-val move : t -> int -> unit
- (* move s n:
- * Moves the window: The first n bytes of the current window are
- * discarded. If the window would become smaller than twice the
- * blocksize and if the end of the stream is not yet reached, another
- * block is read from the input channel and appended to the window.
- *
- * PRECONDITION:
- * - n <= window_length
- *)
-
-val want : t -> int -> unit
- (* want s n:
- * If the window is smaller than n bytes, it is tried to enlarge
- * the window such that it is at least n bytes long. The enlargement
- * is not possible if the stream is not long enough; in this case
- * the window becomes as large as possible.
- *)
-
-val want_another_block : t -> unit
- (* Enlarges the window by another block (if possible i.e. if the stream
- * is long enough).
- *)
-
-val print_stream : t -> unit
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/06/24 20:20:33 gerd
- * Added the toploop printer.
- *
- * Revision 1.1 2000/04/15 13:07:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* Initialize multi-threading mode: *)
-
-let str_mutex = Mutex.create();;
-let cgi_mutex = Mutex.create();;
-let mappings_mutex = Mutex.create();;
-
-Netstring_str.init_mt
- (fun () -> Mutex.lock str_mutex)
- (fun () -> Mutex.unlock str_mutex);
-Cgi.init_mt
- (fun () -> Mutex.lock cgi_mutex)
- (fun () -> Mutex.unlock cgi_mutex);
-Netmappings.init_mt
- (fun () -> Mutex.lock mappings_mutex)
- (fun () -> Mutex.unlock mappings_mutex)
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/29 00:45:42 gerd
- * Initializing Netmappings, too
- *
- * Revision 1.1 2000/06/25 21:15:27 gerd
- * Initial revision
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* This module initializes the multi-threading mode of
- * Netstring. You must link it with every application that
- * uses multi-threading.
- * PITFALL: Link this module _directly_ with the executable,
- * _don't_ put this module into a cma archive! This would not work!
- *)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/06/25 21:15:27 gerd
- * Initial revision
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-let lock = ref (fun () -> ());;
-let unlock = ref (fun () -> ());;
-
-let init_mt new_lock new_unlock =
- lock := new_lock;
- unlock := new_unlock
-;;
-
-let protect f =
- !lock();
- try
- let r = f() in
- !unlock();
- r
- with
- x ->
- !unlock();
- raise x
-;;
-
-type regexp = Str.regexp;;
-type split_result = Str.split_result = Text of string | Delim of string;;
-
-type result =
- { pos : int;
- match_beg : int;
- match_end : int;
- group_beg : int array;
- group_end : int array;
- }
-;;
-
-let regexp s =
- protect
- (fun () -> Str.regexp s)
-;;
-
-let regexp_case_fold s =
- protect
- (fun () -> Str.regexp_case_fold s)
-;;
-
-let quote s =
- protect
- (fun () -> Str.quote s)
-;;
-
-let regexp_string s =
- protect
- (fun () -> Str.regexp_string s)
-;;
-
-let regexp_string_case_fold s =
- protect
- (fun () -> Str.regexp_string_case_fold s)
-;;
-
-let return_result pos n_groups =
- let r =
- { pos = pos;
- match_beg = (try Str.match_beginning() with Not_found -> -1);
- match_end = (try Str.match_end() with Not_found -> -1);
- group_beg = Array.create n_groups (-1);
- group_end = Array.create n_groups (-1);
- }
- in
- for g = 0 to n_groups - 1 do
- r.group_beg.(g) <- (try Str.group_beginning (g+1) with Not_found -> -1);
- r.group_end.(g) <- (try Str.group_end (g+1) with Not_found -> -1);
- done;
- r
-;;
-
-let string_match ?(groups = 9) ~pat s ~pos =
- protect
- (fun () ->
- if Str.string_match pat s pos then
- Some (return_result pos groups)
- else
- None
- )
-;;
-
-let string_partial_match ?(groups = 9) ~pat s ~pos =
- protect
- (fun () ->
- if Str.string_partial_match pat s pos then
- Some (return_result pos groups)
- else
- None
- )
-;;
-
-let search_forward ?(groups = 9) ~pat s ~pos =
- protect
- (fun () ->
- let i = Str.search_forward pat s pos in
- i, return_result pos groups
- )
-;;
-
-let search_backward ?(groups = 9) ~pat s ~pos =
- protect
- (fun () ->
- let i = Str.search_backward pat s pos in
- i, return_result pos groups
- )
-;;
-
-let matched_string result s =
- if result.match_beg < 0 or result.match_end < 0 then raise Not_found;
- String.sub s result.match_beg (result.match_end - result.match_beg)
-;;
-
-let match_beginning result =
- if result.match_beg < 0 then raise Not_found;
- result.match_beg
-;;
-
-let match_end result =
- if result.match_end < 0 then raise Not_found;
- result.match_end
-;;
-
-let matched_group result n s =
- if n < 0 || n >= Array.length result.group_beg then raise Not_found;
- let gbeg = result.group_beg.(n-1) in
- let gend = result.group_end.(n-1) in
- if gbeg < 0 or gend < 0 then raise Not_found;
- String.sub s gbeg (gend - gbeg)
-;;
-
-let group_beginning result n =
- if n < 0 || n >= Array.length result.group_beg then raise Not_found;
- let gbeg = result.group_beg.(n-1) in
- if gbeg < 0 then raise Not_found else
- gbeg
-;;
-
-let group_end result n =
- if n < 0 || n >= Array.length result.group_end then raise Not_found;
- let gend = result.group_end.(n-1) in
- if gend < 0 then raise Not_found else
- gend
-;;
-
-let global_replace ~pat ~templ s =
- protect
- (fun () ->
- Str.global_replace pat templ s)
-;;
-
-let replace_first ~pat ~templ s =
- protect
- (fun () ->
- Str.replace_first pat templ s)
-;;
-
-let global_substitute ?(groups = 9) ~pat ~subst s =
- protect
- (fun () ->
- let xsubst s =
- let r = return_result 0 groups in
- subst r s
- in
- Str.global_substitute pat xsubst s)
-;;
-
-let substitute_first ?(groups = 9) ~pat ~subst s =
- protect
- (fun () ->
- let xsubst s =
- let r = return_result 0 groups in
- subst r s
- in
- Str.substitute_first pat xsubst s)
-;;
-
-(* replace_matched: n/a *)
-
-let split ~sep s =
- protect
- (fun () ->
- Str.split sep s)
-;;
-
-let bounded_split ~sep s ~max =
- protect
- (fun () ->
- Str.bounded_split sep s max)
-;;
-
-let split_delim ~sep s =
- protect
- (fun () ->
- Str.split_delim sep s)
-;;
-
-let bounded_split_delim ~sep s ~max =
- protect
- (fun () ->
- Str.bounded_split_delim sep s max)
-;;
-
-let full_split ~sep s =
- protect
- (fun () ->
- Str.full_split sep s)
-;;
-
-let bounded_full_split ~sep s ~max =
- protect
- (fun () ->
- Str.bounded_full_split sep s max)
-;;
-
-let string_before = Str.string_before;;
-let string_after = Str.string_after;;
-let first_chars = Str.first_chars;;
-let last_chars = Str.last_chars;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.1 2000/06/25 20:48:19 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* This module is a version of Str with a thread-safe interface *)
-
-type regexp = Str.regexp;;
-type split_result = Str.split_result = Text of string | Delim of string;;
-
-type result;;
- (* The type of matching results *)
-
-val regexp: string -> regexp
-val regexp_case_fold: string -> regexp
-val quote: string -> string
-val regexp_string: string -> regexp
-val regexp_string_case_fold: string -> regexp
-
-val string_match:
- ?groups:int -> pat:regexp -> string -> pos:int -> result option
-val search_forward:
- ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
-val search_backward:
- ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
-val string_partial_match:
- ?groups:int -> pat:regexp -> string -> pos:int -> result option
-
-(* The ~groups option specifies how many groups will be stored into
- * 'result'. Default: 9
- *)
-
-val matched_string : result -> string -> string
-val match_beginning : result -> int
-val match_end : result -> int
-val matched_group : result -> int -> string -> string
-val group_beginning : result -> int -> int
-val group_end : result -> int -> int
-
-val global_replace: pat:regexp -> templ:string -> string -> string
-val replace_first: pat:regexp -> templ:string -> string -> string
-val global_substitute:
- ?groups:int ->
- pat:regexp -> subst:(result -> string -> string) -> string -> string
-val substitute_first:
- ?groups:int ->
- pat:regexp -> subst:(result -> string -> string) -> string -> string
-
-(* replace_matched: not available *)
-
-val split: sep:regexp -> string -> string list
-val bounded_split: sep:regexp -> string -> max:int -> string list
-val split_delim: sep:regexp -> string -> string list
-val bounded_split_delim: sep:regexp -> string -> max:int -> string list
-val full_split: sep:regexp -> string -> split_result list
-val bounded_full_split: sep:regexp -> string -> max:int -> split_result list
-
-val string_before: string -> int -> string
-val string_after: string -> int -> string
-val first_chars: string -> len:int -> string
-val last_chars: string -> len:int -> string
-
-(* Private: *)
-
-val init_mt : (unit -> unit) -> (unit -> unit) -> unit
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/06/25 21:15:48 gerd
- * Checked thread-safety.
- *
- * Revision 1.1 2000/06/25 20:48:19 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-let exec s =
- let l = Lexing.from_string s in
- let ph = !Toploop.parse_toplevel_phrase l in
- assert(Toploop.execute_phrase false Format.err_formatter ph)
-;;
-
-(* Install the printers: *)
-
-exec "#install_printer Neturl.print_url;;";;
-exec "#install_printer Netbuffer.print_buffer;;";;
-exec "#install_printer Netstream.print_stream;;";;
-exec "#install_printer Cgi.print_argument;;";;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/06/25 22:34:43 gerd
- * Added labels to arguments.
- *
- * Revision 1.1 2000/06/24 20:20:58 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* You may load this module into the toploop in order to install
- * the printers for the various opaque data types of Netstring.
- *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/06/25 22:53:45 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-exception Malformed_URL
-
-type url_syntax_option =
- Url_part_not_recognized
- | Url_part_allowed
- | Url_part_required
-
-
-type url_syntax =
- { url_enable_scheme : url_syntax_option;
- url_enable_user : url_syntax_option;
- url_enable_password : url_syntax_option;
- url_enable_host : url_syntax_option;
- url_enable_port : url_syntax_option;
- url_enable_path : url_syntax_option;
- url_enable_param : url_syntax_option;
- url_enable_query : url_syntax_option;
- url_enable_fragment : url_syntax_option;
- url_enable_other : url_syntax_option;
- url_accepts_8bits : bool;
- url_is_valid : url -> bool;
- }
-
-and url =
- {
- url_syntax : url_syntax;
- mutable url_validity : bool;
- url_scheme : string option;
- url_user : string option;
- url_password : string option;
- url_host : string option;
- url_port : int option;
- url_path : string list;
- url_param : string list;
- url_query : string option;
- url_fragment : string option;
- url_other : string option;
- }
-;;
-
-
-type char_category =
- Accepted
- | Rejected
- | Separator
-
-
-
-let scan_url_part s k_from k_to cats accept_8bits =
- (* Scans the longest word of accepted characters from position 'k_from'
- * in 's' until at most position 'k_to'. The character following the
- * word (if any) must be a separator character.
- * On success, the function returns the position of the last character
- * of the word + 1.
- * If there is any rejected character before the separator or the end
- * of the string (i.e. position 'k_to') is reached, the exception
- * Malformed_URL is raised.
- * Furthermore, if the character '%' is accepted it is checked whether
- * two hexadecimal digits follow (which must be accepted, too). If this
- * is not true, the exception Malformed_URL is raised, too.
- * 'cats': contains for every character code (0 to 255) the category
- * of the character.
- *)
- let check_hex c =
- if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
- match c with
- ('0'..'9'|'A'..'F'|'a'..'f') -> ()
- | _ -> raise Malformed_URL
- in
-
- let rec scan k =
- if k >= k_to then
- k
- else begin
- let c = s.[k] in
- let cat = cats.(Char.code c) in
- match cat with
- Accepted ->
- if c = '%' then begin
- if k+2 >= k_to then raise Malformed_URL;
- let c1 = s.[k+1] in
- let c2 = s.[k+2] in
- check_hex c1;
- check_hex c2;
- scan (k+3)
- end
- else
- scan (k+1)
- | Separator -> k
- | Rejected ->
- if accept_8bits && c >= '\128'
- then scan (k+1)
- else raise Malformed_URL
- end
- in
-
- assert (Array.length cats = 256);
- assert (k_from >= 0);
- assert (k_from <= k_to);
- assert (k_to <= String.length s);
-
- scan k_from
-;;
-
-
-(* Create a categorization: *)
-
-let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
- 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]
-
-let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
- 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]
-
-let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]
-
-let safe = [ '$'; '-'; '_'; '.'; '+' ]
-
-let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]
-
-let make_cats accepted separators =
- (* create a categorization:
- * - All characters listed in 'separators' are separators.
- * - All characters listed in 'accepted' and which do not occur in
- * 'separators' are accepted characters.
- * - All other characters are rejected.
- *)
- let cats = Array.make 256 Rejected in
- List.iter
- (fun c ->
- cats.(Char.code c) <- Accepted
- )
- accepted;
-
- List.iter
- (fun c ->
- cats.(Char.code c) <- Separator
- )
- separators;
- cats
-;;
-
-
-let scheme_cats =
- make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;
-
- (* scheme_cats: character categorization to _extract_ the URL scheme *)
-
-
-let login_cats =
- make_cats
- (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])
- [':'; '@'; '/'; '#' ]
-;;
-
- (* login_cats: character categorization to _extract_ user name, password,
- * host name, and port.
- *)
-
-let host_cats =
- make_cats
- (lalpha @ ualpha @ digit @ ['.'; '-'])
- []
-;;
-
- (* host_cats: character categorization to _check_ whether the host name
- * is formed only by legal characters.
- * Especially '%' is not allowed here!
- *)
-
-let port_cats =
- make_cats
- digit
- []
-;;
-
- (* port_cats: character categorization to _check_ whether the port number
- * is formed only by legal characters.
- * Especially '%' is not allowed here!
- *)
-
-let path_cats separators =
- make_cats
- (lalpha @ ualpha @ digit @ safe @ extra @
- ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
- separators
-;;
-
-
-let separators_from_syntax syn =
- let include_if syn_option clist =
- if syn_option <> Url_part_not_recognized then
- clist
- else
- []
- in
- (include_if syn.url_enable_param [';']) @
- (include_if syn.url_enable_query ['?']) @
- (include_if syn.url_enable_fragment ['#'])
-;;
-
-
-let path_cats_from_syntax syn extraseps =
- let separators = separators_from_syntax syn in
- path_cats (separators @ extraseps)
-;;
-
-(* path_cats_from_syntax:
- * Computes a character categorization to extract the path from an URL.
- * This depends on the syntax because the list of possible separators
- * contains the characters that may begin the next URL clause.
- *
- * Notes:
- * - The '#' is rejected unless fragments are enabled.
- * - The '~' is accepted although this violates RFC 1738.
- *)
-
-
-let other_cats_from_syntax syn =
- let include_if syn_option clist =
- if syn_option <> Url_part_not_recognized then
- clist
- else
- []
- in
- let separators =
- (include_if syn.url_enable_param [';']) @
- (include_if syn.url_enable_query ['?']) @
- (include_if syn.url_enable_fragment ['#'])
- in
-
- make_cats
- (lalpha @ ualpha @ digit @ safe @ extra @
- (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
- []
-;;
-
- (* other_cats: character categorization to extract or check the
- * "other" part of the URL.
- *)
-
-
-
-let extract_url_scheme s =
- let l = String.length s in
- let k = scan_url_part s 0 l scheme_cats false in
- (* or raise Malformed_URL *)
- if k = l then raise Malformed_URL;
- assert (s.[k] = ':');
- String.lowercase(String.sub s 0 k)
-;;
-
-
-let ( => ) a b = not a or b;; (* implication *)
-
-let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *)
-
-let url_syntax_is_valid syn =
- let recognized x = x <> Url_part_not_recognized in
- let not_recognized x = x = Url_part_not_recognized in
- (recognized syn.url_enable_password => recognized syn.url_enable_user) &
- (recognized syn.url_enable_port => recognized syn.url_enable_host) &
- (recognized syn.url_enable_user => recognized syn.url_enable_host) &
- not ( (recognized syn.url_enable_user ||
- recognized syn.url_enable_password ||
- recognized syn.url_enable_host ||
- recognized syn.url_enable_port ||
- recognized syn.url_enable_path) &&
- (recognized syn.url_enable_other))
-;;
-
-
-let partial_url_syntax syn =
- let weaken =
- function
- Url_part_not_recognized -> Url_part_not_recognized
- | Url_part_allowed -> Url_part_allowed
- | Url_part_required -> Url_part_allowed
- in
- { url_enable_scheme = weaken syn.url_enable_scheme;
- url_enable_user = weaken syn.url_enable_user;
- url_enable_password = weaken syn.url_enable_password;
- url_enable_host = weaken syn.url_enable_host;
- url_enable_port = weaken syn.url_enable_port;
- url_enable_path = weaken syn.url_enable_path;
- url_enable_param = weaken syn.url_enable_param;
- url_enable_query = weaken syn.url_enable_query;
- url_enable_fragment = weaken syn.url_enable_fragment;
- url_enable_other = weaken syn.url_enable_other;
- url_accepts_8bits = syn.url_accepts_8bits;
- url_is_valid = syn.url_is_valid;
- }
-;;
-
-
-
-let file_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_not_recognized;
- url_enable_password = Url_part_not_recognized;
- url_enable_host = Url_part_allowed;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_required;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let ftp_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_allowed;
- url_enable_password = Url_part_allowed;
- url_enable_host = Url_part_required;
- url_enable_port = Url_part_allowed;
- url_enable_path = Url_part_allowed;
- url_enable_param = Url_part_allowed;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let http_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_allowed;
- url_enable_password = Url_part_allowed;
- url_enable_host = Url_part_required;
- url_enable_port = Url_part_allowed;
- url_enable_path = Url_part_allowed;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_allowed;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let mailto_url_syntax =
- { url_enable_scheme = Url_part_required;
- url_enable_user = Url_part_not_recognized;
- url_enable_password = Url_part_not_recognized;
- url_enable_host = Url_part_not_recognized;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_not_recognized;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_required;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let null_url_syntax =
- { url_enable_scheme = Url_part_not_recognized;
- url_enable_user = Url_part_not_recognized;
- url_enable_password = Url_part_not_recognized;
- url_enable_host = Url_part_not_recognized;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_not_recognized;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_not_recognized;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let ip_url_syntax =
- { url_enable_scheme = Url_part_allowed;
- url_enable_user = Url_part_allowed;
- url_enable_password = Url_part_allowed;
- url_enable_host = Url_part_allowed;
- url_enable_port = Url_part_allowed;
- url_enable_path = Url_part_allowed;
- url_enable_param = Url_part_allowed;
- url_enable_query = Url_part_allowed;
- url_enable_fragment = Url_part_allowed;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = (fun _ -> true);
- }
-;;
-
-
-let common_url_syntax =
- let h = Hashtbl.create 10 in
- Hashtbl.add h "file" file_url_syntax;
- Hashtbl.add h "ftp" ftp_url_syntax;
- Hashtbl.add h "http" http_url_syntax;
- Hashtbl.add h "mailto" mailto_url_syntax;
- h
-;;
-
-
-let url_conforms_to_syntax url =
- let recognized x = x <> Url_part_not_recognized in
- let required x = x = Url_part_required in
- let present x = x <> None in
- let syn = url.url_syntax in
- (present url.url_scheme => recognized syn.url_enable_scheme) &
- (present url.url_user => recognized syn.url_enable_user) &
- (present url.url_password => recognized syn.url_enable_password) &
- (present url.url_host => recognized syn.url_enable_host) &
- (present url.url_port => recognized syn.url_enable_port) &
- ((url.url_path <> []) => recognized syn.url_enable_path) &
- ((url.url_param <> []) => recognized syn.url_enable_param) &
- (present url.url_query => recognized syn.url_enable_query) &
- (present url.url_fragment => recognized syn.url_enable_fragment) &
- (present url.url_other => recognized syn.url_enable_other) &
- (required syn.url_enable_scheme => present url.url_scheme) &
- (required syn.url_enable_user => present url.url_user) &
- (required syn.url_enable_password => present url.url_password) &
- (required syn.url_enable_host => present url.url_host) &
- (required syn.url_enable_port => present url.url_port) &
- (required syn.url_enable_path => (url.url_path <> [])) &
- (required syn.url_enable_param => (url.url_param <> [])) &
- (required syn.url_enable_query => present url.url_query) &
- (required syn.url_enable_fragment => present url.url_fragment) &
- (required syn.url_enable_other => present url.url_other) &
- (url.url_validity or syn.url_is_valid url)
-;;
-
-
-let url_syntax_of_url url = url.url_syntax
-;;
-
-
-let modify_url
- ?syntax
- ?(encoded = false)
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?path
- ?param
- ?query
- ?fragment
- ?other
- url
- =
-
- let encode = Netencoding.Url.encode in
- let enc x =
- if encoded then
- x
- else
- match x with
- None -> None
- | Some x' -> Some (encode x')
- in
- let enc_list l =
- if encoded then
- l
- else
- List.map encode l
- in
-
- let new_syntax =
- match syntax with
- None -> url.url_syntax
- | Some syn -> syn
- in
-
- let check_string s_opt cats =
- match s_opt with
- None -> ()
- | Some s ->
- let l = String.length s in
- let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
- (* or raise Malformed_URL *)
- if k <> l then raise Malformed_URL
- in
-
- let check_string_list p cats sep =
- List.iter
- (fun p_component ->
- let l = String.length p_component in
- let k =
- scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
- (* or raise Malformed_URL *)
- if k <> l then raise Malformed_URL;
- if String.contains p_component sep then raise Malformed_URL;
- )
- p
- in
-
- (* Create the modified record: *)
- let url' =
- {
- url_syntax = new_syntax;
- url_validity = false;
- url_scheme = if scheme = None then url.url_scheme else scheme;
- url_user = if user = None then url.url_user else enc user;
- url_password = if password = None then url.url_password else enc password;
- url_host = if host = None then url.url_host else host;
- url_port = if port = None then url.url_port else port;
- url_path = (match path with
- None -> url.url_path
- | Some p -> enc_list p);
- url_param = (match param with
- None -> url.url_param
- | Some p -> enc_list p);
- url_query = if query = None then url.url_query else enc query;
- url_fragment = if fragment = None then url.url_fragment else enc fragment;
- url_other = if other = None then url.url_other else enc other;
- }
- in
- (* Check whether the URL conforms to the syntax:
- *)
- if not (url_conforms_to_syntax url') then raise Malformed_URL;
- if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
- if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
- if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
- (* Check every part: *)
- check_string url'.url_scheme scheme_cats;
- check_string url'.url_user login_cats;
- check_string url'.url_password login_cats;
- check_string url'.url_host host_cats;
- (match url'.url_port with
- None -> ()
- | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
- );
- let path_cats = path_cats_from_syntax new_syntax [] in
- let other_cats = other_cats_from_syntax new_syntax in
- check_string url'.url_query path_cats;
- check_string url'.url_fragment path_cats;
- check_string url'.url_other other_cats;
- (* Check the lists: *)
- check_string_list url'.url_param path_cats ';';
- check_string_list url'.url_path path_cats '/';
- (* Further path checks: *)
- begin match url'.url_path with
- [] ->
- (* The path is empty: There must not be a 'param' or 'query' *)
- if url'.url_host <> None then begin
- if url'.url_param <> [] then raise Malformed_URL;
- if url'.url_query <> None then raise Malformed_URL;
- end
- | ["";""] ->
- (* This is illegal. *)
- raise Malformed_URL;
- | "" :: p' ->
- (* The path is absolute: always ok *)
- ()
- | _ ->
- (* The path is relative: there must not be a host *)
- if url'.url_host <> None then raise Malformed_URL;
- end;
- begin match url'.url_path with
- _ :: rest -> (* "//" ambiguity *)
- begin match List.rev rest with
- _ :: rest' ->
- if List.exists (fun p -> p = "") rest' then
- raise Malformed_URL;
- | [] ->
- ()
- end
- | [] ->
- ()
- end;
- (* Cache that the URL is valid: *)
- url'.url_validity <- true;
-
- url'
-;;
-
-
-let null_url =
- {
- url_syntax = null_url_syntax;
- url_validity = true;
- url_scheme = None;
- url_user = None;
- url_password = None;
- url_host = None;
- url_port = None;
- url_path = [];
- url_param = [];
- url_query = None;
- url_fragment = None;
- url_other = None;
- }
-;;
-
-
-let make_url
- ?(encoded = false)
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?path
- ?param
- ?query
- ?fragment
- ?other
- url_syntax
- =
-
- if not (url_syntax_is_valid url_syntax) then
- invalid_arg "Neturl.make_url";
-
- modify_url
- ~encoded:encoded
- ~syntax:url_syntax
- ?scheme:scheme
- ?user:user
- ?password:password
- ?host:host
- ?port:port
- ?path:path
- ?param:param
- ?query:query
- ?fragment:fragment
- ?other:other
- null_url
-;;
-
-
-let remove_from_url
- ?(scheme = false)
- ?(user = false)
- ?(password = false)
- ?(host = false)
- ?(port = false)
- ?(path = false)
- ?(param = false)
- ?(query = false)
- ?(fragment = false)
- ?(other = false)
- url
- =
-
- make_url
- ~encoded: true
- ?scheme: (if scheme then None else url.url_scheme)
- ?user: (if user then None else url.url_user)
- ?password: (if password then None else url.url_password)
- ?host: (if host then None else url.url_host)
- ?port: (if port then None else url.url_port)
- ?path: (if path then None else Some url.url_path)
- ?param: (if param then None else Some url.url_param)
- ?query: (if query then None else url.url_query)
- ?fragment: (if fragment then None else url.url_fragment)
- ?other: (if other then None else url.url_other)
- url.url_syntax
-;;
-
-
-let default_url
- ?(encoded = false)
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?(path = [])
- ?(param = [])
- ?query
- ?fragment
- ?other
- url
- =
-
- let encode = Netencoding.Url.encode in
-
- let enc x =
- if encoded then
- x
- else
- match x with
- None -> None
- | Some x' -> Some (encode x')
- in
-
- let enc_list l =
- if encoded then
- l
- else
- List.map encode l
- in
-
- let pass_if_missing current arg =
- match current with
- None -> arg
- | _ -> current
- in
-
- make_url
- ~encoded: true
- ?scheme: (pass_if_missing url.url_scheme scheme)
- ?user: (pass_if_missing url.url_user (enc user))
- ?password: (pass_if_missing url.url_password (enc password))
- ?host: (pass_if_missing url.url_host host)
- ?port: (pass_if_missing url.url_port port)
- ~path: (if url.url_path = [] then enc_list path else url.url_path)
- ~param: (if url.url_param = [] then enc_list param else url.url_param)
- ?query: (pass_if_missing url.url_query (enc query))
- ?fragment: (pass_if_missing url.url_fragment (enc fragment))
- ?other: (pass_if_missing url.url_other (enc other))
- url.url_syntax
-;;
-
-
-let undefault_url
- ?scheme
- ?user
- ?password
- ?host
- ?port
- ?path
- ?param
- ?query
- ?fragment
- ?other
- url
- =
-
- let remove_if_matching current arg =
- match current with
- None -> None
- | Some x ->
- (match arg with
- None -> current
- | Some x' ->
- if x=x' then
- None
- else
- current)
- in
-
- make_url
- ~encoded: true
- ?scheme: (remove_if_matching url.url_scheme scheme)
- ?user: (remove_if_matching url.url_user user)
- ?password: (remove_if_matching url.url_password password)
- ?host: (remove_if_matching url.url_host host)
- ?port: (remove_if_matching url.url_port port)
- ~path: (match path with
- None -> url.url_path
- | Some x ->
- if x = url.url_path then
- []
- else
- url.url_path)
- ~param: (match param with
- None -> url.url_param
- | Some x ->
- if x = url.url_param then
- []
- else
- url.url_param)
- ?query: (remove_if_matching url.url_query query)
- ?fragment: (remove_if_matching url.url_fragment fragment)
- ?other: (remove_if_matching url.url_other other)
- url.url_syntax
-;;
-
-
-let url_provides
- ?(scheme = false)
- ?(user = false)
- ?(password = false)
- ?(host = false)
- ?(port = false)
- ?(path = false)
- ?(param = false)
- ?(query = false)
- ?(fragment = false)
- ?(other = false)
- url
- =
-
- (scheme => (url.url_scheme <> None)) &
- (user => (url.url_user <> None)) &
- (password => (url.url_password <> None)) &
- (host => (url.url_host <> None)) &
- (port => (url.url_port <> None)) &
- (path => (url.url_path <> [])) &
- (param => (url.url_param <> [])) &
- (query => (url.url_query <> None)) &
- (fragment => (url.url_fragment <> None)) &
- (other => (url.url_other <> None))
-;;
-
-
-let return_if value =
- match value with
- None -> raise Not_found
- | Some x -> x
-;;
-
-
-let decode_if want_encoded value =
- let value' = return_if value in
- if want_encoded then
- value'
- else
- Netencoding.Url.decode value' (* WARNING: not thread-safe! *)
-;;
-
-
-let decode_path_if want_encoded value =
- if want_encoded then
- value
- else
- List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *)
-;;
-
-
-let url_scheme url = return_if url.url_scheme;;
-let url_user ?(encoded=false) url = decode_if encoded url.url_user;;
-let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
-let url_host url = return_if url.url_host;;
-let url_port url = return_if url.url_port;;
-let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;;
-let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;;
-let url_query ?(encoded=false) url = decode_if encoded url.url_query;;
-let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
-let url_other ?(encoded=false) url = decode_if encoded url.url_other;;
-
-
-let string_of_url url =
- if not (url.url_validity) then
- failwith "Neturl.string_of_url: URL not flagged as valid";
- (match url.url_scheme with
- None -> ""
- | Some s -> s ^ ":") ^
- (match url.url_host with
- None -> ""
- | Some host ->
- "//" ^
- (match url.url_user with
- None -> ""
- | Some user ->
- user ^
- (match url.url_password with
- None -> ""
- | Some password ->
- ":" ^ password
- ) ^
- "@") ^
- host ^
- (match url.url_port with
- None -> ""
- | Some port ->
- ":" ^ string_of_int port)) ^
- (match url.url_path with
- | [""] ->
- "/"
- | x :: p when url.url_scheme = None &&
- url.url_host = None &&
- String.contains x ':'
- ->
- (* Really a special case: The colon contained in 'x' may cause
- * that a prefix of 'x' is interpreted as URL scheme. In this
- * case, "./" is prepended (as recommended in RFC 1808, 5.3).
- *)
- "./"
- | _ ->
- ""
- ) ^
- String.concat "/" url.url_path ^
- (match url.url_other with
- None -> ""
- | Some other ->
- other) ^
- String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^
- (match url.url_query with
- None -> ""
- | Some query ->
- "?" ^ query) ^
- (match url.url_fragment with
- None -> ""
- | Some fragment ->
- "#" ^ fragment)
-;;
-
-
-let url_of_string url_syntax s =
- let l = String.length s in
- let recognized x = x <> Url_part_not_recognized in
-
- let rec collect_words terminators eof_char cats k =
- (* Collect words as recognized by 'cats', starting at position 'k' in
- * 's'. Collection stops if one the characters listed in 'terminators'
- * is found. If the end of the string is reached, it is treated as
- * 'eof_char'.
- *)
- let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in
- (* or raise Malformed_URL *)
- let word, sep =
- String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
- if List.mem sep terminators then
- [word, sep], k'
- else
- let word_sep_list', k'' =
- collect_words terminators eof_char cats (k'+1) in
- ((word, sep) :: word_sep_list'), k''
- in
-
- (* Try to extract the scheme name: *)
- let scheme, k1 =
- if recognized url_syntax.url_enable_scheme then
- try
- let k = scan_url_part s 0 l scheme_cats false in
- (* or raise Malformed_URL *)
- if k = l then raise Malformed_URL;
- assert (s.[k] = ':');
- Some (String.sub s 0 k), (k+1)
- with
- Malformed_URL -> None, 0
- else
- None, 0
- in
-
- (* If there is a "//", a host will follow: *)
- let host, port, user, password, k2 =
- if recognized url_syntax.url_enable_host &&
- k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin
-
- let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2)
- in
- (* or raise Malformed_URL *)
-
- let int x =
- try int_of_string x with _ -> raise Malformed_URL in
-
- match word_sep_list with
- [ host, ('/'|'#') ] ->
- Some host, None, None, None, k'
- | [ host, ':'; port, ('/'|'#') ] ->
- Some host, Some (int port), None, None, k'
- | [ user, '@'; host, ('/'|'#') ] ->
- Some host, None, Some user, None, k'
- | [ user, '@'; host, ':'; port, ('/'|'#') ] ->
- Some host, Some (int port), Some user, None, k'
- | [ user, ':'; password, '@'; host, ('/'|'#') ] ->
- Some host, None, Some user, Some password, k'
- | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
- Some host, Some (int port), Some user, Some password, k'
- | _ ->
- raise Malformed_URL
- end
- else
- None, None, None, None, k1
- in
-
- let path, k3 =
- if recognized url_syntax.url_enable_path &&
- k2 < l (* && s.[k2]='/' *)
- then begin
- let cats = path_cats_from_syntax url_syntax [ '/' ] in
- let seps = separators_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
- (* or raise Malformed_URL *)
- match word_sep_list with
- [ "", '/'; "", _ ] ->
- [ "" ], k'
- | [ "", _ ] ->
- [], k'
- | _ ->
- List.map fst word_sep_list, k'
- end
- else begin
- (* If there is a single '/': skip it *)
- if not (recognized url_syntax.url_enable_other) &&
- k2 < l && s.[k2]='/'
- then
- [], (k2+1)
- else
- [], k2
- end
- in
-
- let other, k4 =
- if recognized url_syntax.url_enable_other &&
- k3 < l
- then begin
-
- let cats = other_cats_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
- (* or raise Malformed_URL *)
-
- match word_sep_list with
- [ other, _ ] -> Some other, k'
- | _ -> assert false
- end
- else
- None, k3
- in
-
- let param, k5 =
- if recognized url_syntax.url_enable_param &&
- k4 < l && s.[k4]=';'
- then begin
- let cats = path_cats_from_syntax url_syntax [] in
- let seps = separators_from_syntax url_syntax in
- let seps' = List.filter (fun c -> c <> ';') seps in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
- (* or raise Malformed_URL *)
-
- List.map fst word_sep_list, k'
- end
- else
- [], k4
- in
-
- let query, k6 =
- if recognized url_syntax.url_enable_query &&
- k5 < l && s.[k5]='?'
- then begin
- let cats = path_cats_from_syntax url_syntax [] in
- let seps = separators_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
- (* or raise Malformed_URL *)
-
- match word_sep_list with
- [ query, _ ] -> Some query, k'
- | _ -> assert false
- end
- else
- None, k5
- in
-
- let fragment, k7 =
- if recognized url_syntax.url_enable_fragment &&
- k6 < l && s.[k6]='#'
- then begin
- let cats = path_cats_from_syntax url_syntax [] in
- let seps = separators_from_syntax url_syntax in
-
- (* Note: '>' is not allowed within URLs; because of this we can use
- * it as end-of-string character.
- *)
-
- let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
- (* or raise Malformed_URL *)
-
- match word_sep_list with
- [ fragment, _ ] -> Some fragment, k'
- | _ -> assert false
- end
- else
- None, k6
- in
-
- if k7 <> l then raise Malformed_URL;
-
- make_url
- ~encoded:true
- ?scheme:scheme
- ?user:user
- ?password:password
- ?host:host
- ?port:port
- ~path:path
- ~param:param
- ?query:query
- ?fragment:fragment
- ?other:other
- url_syntax
-;;
-
-
-let split_path s =
- let l = String.length s in
- let rec collect_words k =
- let k' =
- try
- String.index_from s k '/'
- with
- Not_found -> l
- in
- let word = String.sub s k (k'-k) in
- if k' >= l then
- [word]
- else
- word :: collect_words (k'+1)
- in
- match collect_words 0 with
- [ "" ] -> []
- | [ "";"" ] -> [ "" ]
- | other -> other
-;;
-
-
-let join_path l =
- match l with
- [ "" ] -> "/"
- | _ -> String.concat "/" l;;
-
-
-let norm_path l =
-
- let rec remove_slash_slash l first =
- match l with
- | [ "" ] ->
- [ "" ]
- | [ ""; "" ] when first ->
- [ "" ]
- | "" :: l' when not first ->
- remove_slash_slash l' false
- | x :: l' ->
- x :: remove_slash_slash l' false
- | [] ->
- []
- in
-
- let rec remove_dot l first =
- match l with
- | ([ "." ] | ["."; ""]) ->
- if first then [] else [ "" ]
- | "." :: x :: l' ->
- remove_dot (x :: l') false
- | x :: l' ->
- x :: remove_dot l' false
- | [] ->
- []
- in
-
- let rec remove_dot_dot_once l first =
- match l with
- x :: ".." :: [] when x <> "" && x <> ".." && not first ->
- [ "" ]
- | x :: ".." :: l' when x <> "" && x <> ".." ->
- l'
- | x :: l' ->
- x :: remove_dot_dot_once l' false
- | [] ->
- raise Not_found
- in
-
- let rec remove_dot_dot l =
- try
- let l' = remove_dot_dot_once l true in
- remove_dot_dot l'
- with
- Not_found -> l
- in
-
- let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
- match l' with
- [".."] -> [".."; ""]
- | ["";""] -> [ "" ]
- | _ -> l'
-;;
-
-
-let apply_relative_url baseurl relurl =
- if not (baseurl.url_validity) or not (relurl.url_validity) then
- failwith "Neturl.apply_relative_url: URL not flagged as valid";
-
- if relurl.url_scheme <> None then
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax *)
- relurl
- else
- if relurl.url_host <> None then
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax and scheme *)
- ?scheme:baseurl.url_scheme
- relurl
- else
- match relurl.url_path with
- "" :: other ->
- (* An absolute path *)
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
- ~encoded:true
- ?scheme:baseurl.url_scheme (* login info *)
- ?host:baseurl.url_host
- ?port:baseurl.url_port
- ?user:baseurl.url_user
- ?password:baseurl.url_password
- relurl
- | [] ->
- (* Empty: Inherit also path, params, query, and fragment *)
- let new_params, new_query, new_fragment =
- match relurl.url_param, relurl.url_query, relurl.url_fragment
- with
- [], None, None ->
- (* Inherit all three *)
- baseurl.url_param, baseurl.url_query, baseurl.url_fragment
- | [], None, f ->
- (* Inherit params and query *)
- baseurl.url_param, baseurl.url_query, f
- | [], q, f ->
- (* Inherit params *)
- baseurl.url_param, q, f
- | p, q, f ->
- (* Inherit none of them *)
- p, q, f
- in
- modify_url
- ~syntax:baseurl.url_syntax
- ~encoded:true
- ?scheme:baseurl.url_scheme
- ?host:baseurl.url_host
- ?port:baseurl.url_port
- ?user:baseurl.url_user
- ?password:baseurl.url_password
- ~path:baseurl.url_path
- ~param:new_params
- ?query:new_query
- ?fragment:new_fragment
- relurl
- | relpath ->
- (* A relative path *)
- let rec change_path basepath =
- match basepath with
- | [] ->
- relpath
- | [ x ] ->
- relpath
- | x :: basepath' ->
- x :: change_path basepath'
- in
- let new_path = norm_path (change_path baseurl.url_path) in
- modify_url
- ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
- ~encoded:true
- ?scheme:baseurl.url_scheme (* login info *)
- ?host:baseurl.url_host
- ?port:baseurl.url_port
- ?user:baseurl.url_user
- ?password:baseurl.url_password
- ~path:new_path (* and change path *)
- relurl
-
-;;
-
-
-let print_url url =
- Format.print_string ("<URL:" ^ string_of_url url ^ ">")
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:28 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/07/04 21:50:51 gerd
- * Fixed typo.
- *
- * Revision 1.3 2000/06/26 22:57:49 gerd
- * Change: The record 'url_syntax' has an additional component
- * 'url_accepts_8bits'. Setting this option to 'true' causes that
- * the bytes >= 0x80 are no longer rejected.
- *
- * Revision 1.2 2000/06/25 19:39:48 gerd
- * Lots of Bugfixes.
- *
- * Revision 1.1 2000/06/24 20:19:59 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* This module applies already O'Caml-3 features. *)
-
-(* Uniform Resource Locators (URLs):
- *
- * This module provides functions to parse URLs, to print URLs, to
- * store URLs, to modify URLs, and to apply relative URLs.
- *
- * URLs are strings formed according to pattern (1) or (2):
- *
- * (1) scheme://user:password@host:port/path;params?query#fragment
- * (2) scheme:other;params?query#fragment
- *
- * The word at the beginning of the URL identifies the URL scheme
- * (such as "http" or "file"). Depending on the scheme, not all of the
- * parts are allowed, or parts may be omitted. This module defines the
- * type 'url_syntax' whose values describe which parts are allowed/required/
- * not allowed for a concrete URL scheme (see below).
- *
- * Not all characters are allowed in a URL. Some characters are allowed,
- * but have the special task to separate the various parts of the URL
- * (reserved characters).
- * However, it is possible to include even invalid or reserved characters
- * as normal content by applying the '%'-encoding on these characters:
- * A '%' indicates that an encoded character follows, and the character
- * is denoted by a two-digit hexadecimal number (e.g. %2f for '/').
- * In the following descriptions, the term "encoded string" means a string
- * containing such %-encoded characters, and the "decoded string" means a
- * string not containing such characters.
- * See the module Netencoding.Url for functions encoding or decoding
- * strings.
- *
- * The type 'url' describes values storing the components of a URL,
- * and the 'url_syntax' for the URL. In general, the components are
- * stored as encoded strings; however, not for all components the
- * '%'-encoding is applicable.
- * For convenience, the functions creating, modifying, and accessing
- * URLs can handle both encoded and decoded strings. In order to
- * avoid errors, the functions pass strings even in their decoded form.
- *
- * Note that there is currently no function to compare URLs. The
- * canoncical comparison ( = ) is not applicable because the same URL
- * may be written differently.
- *
- * Note that nothing is said about the character set/encoding of URLs.
- * Some protocols and standards prefer UTF-8 as fundamental encoding
- * and apply the '%'-encoding on top of it; i.e. the byte sequence
- * representing a character in UTF-8 is '%'-encoded. There is no special
- * support for this technique.
- *
- * For more information about URLs, see RFCs 1738 and 1808.
- *)
-
-exception Malformed_URL
-(* Is raised by a number of functions when encountering a badly formed
- * URL.
- *)
-
-val extract_url_scheme : string -> string
- (* Returns the URL scheme from the string representation of an URL.
- * E.g. extract_url_scheme "http://host/path" = "http".
- * The scheme name is always converted to lowercase characters.
- * Raises Malformed_URL if the scheme name is not found.
- *)
-
-type url_syntax_option =
- Url_part_not_recognized
- | Url_part_allowed
- | Url_part_required
-
-
-type url_syntax =
- { url_enable_scheme : url_syntax_option;
- url_enable_user : url_syntax_option;
- url_enable_password : url_syntax_option;
- url_enable_host : url_syntax_option;
- url_enable_port : url_syntax_option;
- url_enable_path : url_syntax_option;
- url_enable_param : url_syntax_option;
- url_enable_query : url_syntax_option;
- url_enable_fragment : url_syntax_option;
- url_enable_other : url_syntax_option;
- url_accepts_8bits : bool;
- url_is_valid : url -> bool;
- }
-
-and url
-;;
-
-(* Values of type 'url_syntax' describe which components of an URL are
- * recognized, which are allowed (and optional), and which are required.
- * Not all combinations are valid; the predicate expressed by the
- * function 'url_syntax_is_valid' must hold.
- * The function 'url_is_valid' is applied when a fresh URL is created
- * and must return 'true'. This function allows it to add an arbitrary
- * validity criterion to 'url_syntax'. (Note that the URL passed to
- * this function is not fully working; you can safely assume that the
- * accessor functions url_scheme etc. can be applied to it.)
- *
- * Switch 'url_accepts_8bit': If 'true', the bytes with code 128 to
- * 255 are treated like alphanumeric characters; if 'false' these bytes
- * are illegal (but it is still possible to include such byte in their
- * encoded form: %80 to %FF).
- *
- * Values of type 'url' describe concrete URLs. Every URL must have
- * a fundamental 'url_syntax', and it is only possible to create URLs
- * conforming to the syntax. See 'make_url' for further information.
- *)
-
-
-val url_syntax_is_valid : url_syntax -> bool
- (* Checks whether the passed url_syntax is valid. This means:
- *
- * - If passwords are recognized, users (and hosts) must be recognized, too
- * - If ports are recognized, hosts must be recognized, too
- * - If users are recognized, hosts must be recognized, too
- * - Either the syntax recognizes one of the phrases
- * { user, password, host, port, path }, or the syntax recognized
- * the phrase 'other'.
- *)
-
-
-val partial_url_syntax : url_syntax -> url_syntax
- (* Transforms the syntax into another syntax where all required parts are
- * changed into optional parts.
- *)
-
-
-(* Note that all following url_syntaxes do not allow 8bit bytes. *)
-
-val null_url_syntax : url_syntax
-
-val ip_url_syntax : url_syntax
- (* Maximum syntax for IP based protocols *)
-
-val common_url_syntax : (string, url_syntax) Hashtbl.t
- (* Syntax descriptions for common URL schemes:
- *
- * null_url_syntax: nothing is recognized
- *
- * common_url_syntax: Hashtable mapping from URL scheme names to
- * definitions of syntaxes:
- *
- * "file": scheme, host?, path
- * "ftp": scheme, user?, password?, host, port?, path?, param?
- * "http": scheme, user?, password?, host, port?, path?, query?
- * "mailto": scheme, other
- *
- * Notes:
- * (1) These syntax descriptions can be weakened for partial/relative URLs
- * by changing the required parts to optional parts: See the function
- * 'partial_url_syntax'.
- * (2) None of the descriptions allows fragments. These can be enabled by
- * setting 'url_enable_fragment' to Url_part_allowed. E.g.
- * { file_url_syntax with url_enable_fragment = Url_part_allowed }
- *)
-
-val null_url : url
- (* A URL without any component and 'null_url_syntax'
- *)
-
-val make_url :
- ?encoded:bool ->
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url_syntax ->
- url
- (* Creates a URL from components:
- *
- * - The components "scheme" and "host" are simple strings to which the
- * '%'-encoding is not applicable.
- * - The component "port" is a simple number. Of course, the '%'-encoding
- * is not applicable, too.
- * - The components "user", "password", "query", "fragment", and "other"
- * are strings which may contains '%'-encoded characters. By default,
- * you can pass any string for these components, and problematic characters
- * are automatically encoded. If you set ~encoded:true, the passed
- * strings must already be encoded, but the function checks whether
- * the encoding is correct.
- * Note that for "query" even the characters '?' and '=' are encoded
- * by default, so you need to set ~encoded:true to pass a reasonable
- * query string.
- * - The components "path" and "param" are lists of strings which may
- * contain '%'-encoded characters. Again, the default is to pass
- * decoded strings to the function, and the function encodes them
- * automatically, and by setting ~encoded:true the caller is responsible
- * for encoding the strings.
- * path = [] and params = [] mean that no path and no parameters are
- * specified, respectively.
- * See below for the respresentation of these components.
- *
- * Except of "path", the strings representing the components do not
- * contain the characters separating the components from each other.
- * The "path" component includes the '/' at the beginning of the path
- * (if present).
- *
- * The created URL must conform to the 'url_syntax', i.e.
- * - The URL must only contain components which are recognized by the
- * syntax
- * - The URL must contain components which are required by the syntax
- * - The URL must fulfill the predicate expressed by the 'url_is_valid'
- * function of the syntax.
- *
- * The path of a URL is represented as a list of '/'-separated path
- * components. i.e.
- * [ s1; s2; ...; sN ] represents the path
- * s1 ^ "/" ^ s2 ^ "/" ^ ... ^ "/" ^ sN
- * As special cases:
- * [] is the non-existing path
- * [ "" ] is "/"
- * [ "";"" ] is illegal
- *
- * Except of s1 and sN, the path components must not be empty strings.
- *
- * To avoid ambiguities, it is illegal to create URLs with both relative
- * paths (s1 <> "") and host components.
- *
- * Parameters of URLs are components beginning with ';'. The list
- * of parameters is represented as list of strings where the strings
- * contain the value following ';'.
- *)
-
-val modify_url :
- ?syntax:url_syntax ->
- ?encoded:bool ->
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url ->
- url
- (* Modifies the passed components and returns the modified URL.
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val remove_from_url :
- ?scheme:bool ->
- ?user:bool ->
- ?password:bool ->
- ?host:bool ->
- ?port:bool ->
- ?path:bool ->
- ?param:bool ->
- ?query:bool ->
- ?fragment:bool ->
- ?other:bool ->
- url ->
- url
- (* Removes the 'true' components from the URL, and returns the modified
- * URL.
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val default_url :
- ?encoded:bool ->
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url ->
- url
- (* Adds missing components and returns the modified URL.
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val undefault_url :
- ?scheme:string ->
- ?user:string ->
- ?password:string ->
- ?host:string ->
- ?port:int ->
- ?path:string list ->
- ?param:string list ->
- ?query:string ->
- ?fragment:string ->
- ?other:string ->
- url ->
- url
- (* Removes components from the URL if they have the passed value, and
- * returns the modified URL.
- * Note: The values must always be passed in _encoded_ form!
- * The modfied URL shares unmodified components with the original
- * URL.
- *)
-
-val url_syntax_of_url : url -> url_syntax
- (* Returns the 'url_syntax' record of a URL. *)
-
-val url_of_string : url_syntax -> string -> url
- (* Parses the passed string according to the passed url_syntax. *)
-
-val string_of_url : url -> string
- (* Returns the URL as string *)
-
-val url_provides :
- ?scheme:bool ->
- ?user:bool ->
- ?password:bool ->
- ?host:bool ->
- ?port:bool ->
- ?path:bool ->
- ?param:bool ->
- ?query:bool ->
- ?fragment:bool ->
- ?other:bool ->
- url ->
- bool
- (* Returns 'true' iff the URL has all of the components passed with
- * 'true' value.
- *)
-
-val url_scheme : url -> string
-val url_user : ?encoded:bool -> url -> string
-val url_password : ?encoded:bool -> url -> string
-val url_host : url -> string
-val url_port : url -> int
-val url_path : ?encoded:bool -> url -> string list
-val url_param : ?encoded:bool -> url -> string list
-val url_query : ?encoded:bool -> url -> string
-val url_fragment : ?encoded:bool -> url -> string
-val url_other : ?encoded:bool -> url -> string
- (* Return components of the URL. The functions return decoded strings
- * unless ~encoded:true is set.
- * If the component does not exist, the exception Not_found
- * is raised.
- *)
-
-val split_path : string -> string list
- (* Splits a '/'-separated path into components (e.g. to set up the
- * ~path argument of make_url).
- * E.g. split_path "a/b/c" = [ "a"; "b"; "c" ],
- * split_path "/a/b" = [ ""; "a"; "b" ],
- * split_path "a/b/" = [ "a"; "b"; "" ]
- *)
-
-val join_path : string list -> string
- (* Concatenates the path components (reverse function of split_path).
- *)
-
-val norm_path : string list -> string list
- (* Removes "." and ".." from the path if possible. Deletes double slashes.
- *
- * EXAMPLES:
- *
- * norm_path ["."] = []
- * means: "." = ""
- * norm_path ["."; ""] = []
- * means: "./" = ""
- * norm_path ["a"; "."] = ["a"; ""]
- * means: "a/." = "a/"
- * norm_path ["a"; "b"; "."] = ["a"; "b"; ""]
- * means: "a/b/." = "a/b/"
- * norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]
- * means: "a/./b/." = "a/b/"
- * norm_path [".."] = [".."; ""]
- * means: ".." = "../"
- * norm_path [".."; ""] = [".."; ""]
- * means: "../" = "../"
- * norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]
- * means: "a/b/../c" = "a/c"
- * norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]
- * means: "a/b/../c/" = "a/c/"
- * norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]
- * means: "//a//b" = "/a/b"
- * norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]
- * means: "a/b//../c/" = "a/c/"
- * norm_path ["a"; ".."] = []
- * means: "a/.." = ""
- *)
-
-
-val apply_relative_url : url -> url -> url
- (* apply_relative_url base rel:
- * Interprets 'rel' relative to 'base' and returns the new URL. This
- * function implements RFC 1808.
- *)
-
-val print_url : url -> unit
- (* Printer for the toploop. *)
-
-(* ---------------------------------------------------------------------- *)
-
-(* EXAMPLES:
- *
- * let http = Hashtbl.find common_url_syntax "http";;
- * let u = url_of_string http "http://g:pw@host/a/%62/";;
- * string_of_url u;;
- * --> "http://g:pw@host/a/%62/"
- * url_scheme u;;
- * --> "http"
- * url_user u;;
- * --> "g"
- * url_password u;;
- * --> "pw"
- * url_host u;;
- * --> "host"
- * url_path u;;
- * --> [ ""; "a"; "b"; "" ] (* sic! *)
- * url_path ~encoded:true u;;
- * --> [ ""; "a"; "%62"; "" ]
- * let v = make_url
- * ~path:[ ".."; "c" ]
- * ~fragment:"near-the-#-character"
- * { (partial_url_syntax http) with url_enable_fragment = Url_part_allowed };;
- * string_of_url v;;
- * --> "../c#near-the-%23-character"
- * let u' = modify_url ~syntax:(url_syntax_of_url v) u;;
- * (* u does not permit fragments *)
- * let w = apply_relative_url u' v;;
- * string_of_url w;;
- * --> "http://g:pw@host/c#near-the-%23-character"
- *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:27 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/06/26 22:57:49 gerd
- * Change: The record 'url_syntax' has an additional component
- * 'url_accepts_8bits'. Setting this option to 'true' causes that
- * the bytes >= 0x80 are no longer rejected.
- *
- * Revision 1.2 2000/06/25 22:55:47 gerd
- * Doc update.
- *
- * Revision 1.1 2000/06/24 20:19:59 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-*.cmo
-*.cmx
-*.cmi
-
-*.o
-*.a
-
+++ /dev/null
-# Note: you need an appopriate toploop "ocamlfattop" to run the
-# tests.
-
-# 2nd note: "test_encoding.cgi" is a CGI script; you must invoke
-# it through browser and WWW server.
-
-test: test_recode
- ocamlfattop test_netencoding.ml
- ocamlfattop test_mimestring.ml
- ocamlfattop test_cgi.ml
- ocamlfattop test_neturl.ml
- ./test_recode
-
-test_recode: test_recode.ml
- ocamlc -custom -o test_recode unix.cma threads.cma str.cma \
- ../netstring.cma ../netmappings_iso.cmo \
- -I .. -thread test_recode.ml
-
-distclean: clean
- rm -f *~ test_recode
-
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
-
-CLEAN:
-
+++ /dev/null
-#require "str";;
-#directory "..";;
-#load "netstring.cma";;
-
-
-open Cgi;;
-
-(**********************************************************************)
-(* dest_form_encoded_parameters *)
-(**********************************************************************)
-
-let t001 f =
- let r =
- f
- "blah blah
---snip
-Content-Disposition: form-data; name=blupp
-
-This is a text
---snip--
-blah blah"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text"]
-;;
-
-
-let t002 f =
- let r =
- f
- "blah blah
---snip
-Content-Disposition: form-data; name=blupp
-
-This is a text
---snip--
-blah blah"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text"]
-;;
-
-
-let t003 f =
- let r =
- f
- "--snip
-Content-Disposition: form-data; name=blupp
-
-This is a text
---snip--"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text"]
-;;
-
-
-let t004 f =
- let r =
- f
- "--snip
-Content-Disposition: form-data; name=blupp
-
-This is a text
-
---snip--"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text\013\n"]
-;;
-
-
-let t005 f =
- let r =
- f
- "--snip
-Content-Disposition: form-data; name=blupp
-
-This is a text
-
---snip--"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text\n"]
-;;
-
-
-let t006 f =
- let r =
- f
- "blah blah
---snip
-Content-Disposition: form-data;name= \"blupp\"
-
-This is a text
---snip--
-blah blah"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text"]
-;;
-
-
-let t007 f =
- let r =
- f
- "blah blah
---snip
-Content-Disposition: form-data;name= \"name=blupp\"
-
-This is a text
---snip--
-blah blah"
- "snip"
- in
- r = ["name=blupp", "text/plain", "This is a text"]
-;;
-
-
-let t008 f =
- let r =
- f
- "blah blah
---snip
-Content-Disposition: form-data; strange=\"name=blop\"; name= \"blupp\"
-
-This is a text
---snip--
-blah blah"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text"]
-;;
-
-
-let t009 f =
- let r =
- f
- "blah blah
---snip
-Content-Disposition: form-data; strange=\" name=blop \"; name=blupp
-
-This is a text
---snip--
-blah blah"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text"]
-;;
-
-
-let t010 f =
- (* There is a space after "octet-stream"! *)
- let r =
- f
- "--snip
-Content-Disposition: form-data; name=blupp
-Content-type: application/octet-stream
-
-This is a text
---snip--"
- "snip"
- in
- r = ["blupp", "application/octet-stream", "This is a text"]
-;;
-
-
-let t011 f =
- let r =
- f
- "blah blah
---snip
-Content-Disposition: form-data; name=blupp
-
-This is a text
---snip
-Content-Disposition: form-data; name=blipp
-
-Another line
---snip-- blah
-blah blah"
- "snip"
- in
- r = ["blupp", "text/plain", "This is a text";
- "blipp", "text/plain", "Another line" ]
-;;
-
-
-let t012 f =
- (* A real example *)
- let r =
- f
-"-----------------------------10843891265508332411092264958
-Content-Disposition: form-data; name=\"line\"
-
-aaa
------------------------------10843891265508332411092264958
-Content-Disposition: form-data; name=\"submit\"
-
-Submit
------------------------------10843891265508332411092264958--
-"
- "---------------------------10843891265508332411092264958"
- in
- r = [ "line", "text/plain", "aaa";
- "submit", "text/plain", "Submit";
- ]
-;;
-
-
-(**********************************************************************)
-(* encode/decode *)
-(**********************************************************************)
-
-let t100() =
- let s = String.create 256 in
- for i = 0 to 255 do s.[i] <- Char.chr i done;
- let r = encode s in
- r = ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
- "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
- "+!%22%23$%25%26'()*%2B,-.%2F" ^
- "0123456789%3A%3B%3C%3D%3E%3F" ^
- "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
- "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
- "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
- "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
- "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
- "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
- "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
- "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
- "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
- "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF")
-;;
-
-
-let t101() =
- let r = String.create 256 in
- for i = 0 to 255 do r.[i] <- Char.chr i done;
- let s = decode
- ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
- "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
- "+!%22%23$%25%26'()*%2B,-.%2F" ^
- "0123456789%3A%3B%3C%3D%3E%3F" ^
- "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
- "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
- "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
- "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
- "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
- "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
- "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
- "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
- "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
- "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF") in
- r = s
-;;
-
-
-let t102() =
- let r = String.create 256 in
- for i = 0 to 255 do r.[i] <- Char.chr i done;
- let s = decode
- ((String.lowercase
- ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
- "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
- "+!%22%23$%25%26'()*%2B,-.%2F" ^
- "0123456789%3A%3B%3C%3D%3E%3F")) ^
- "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
- (String.lowercase
- ("%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
- "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
- "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
- "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
- "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
- "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
- "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
- "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
- "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF"))) in
- r = s
-;;
-
-(**********************************************************************)
-(* dest_url_encoded_parameters *)
-(**********************************************************************)
-
-let t200() =
- let r = dest_url_encoded_parameters "a=b&c=d" in
- r = ["a", "b"; "c", "d" ]
-;;
-
-
-let t201() =
- let r = dest_url_encoded_parameters "a=&c=d" in
- r = ["a", ""; "c", "d" ]
-;;
-
-
-let t202() =
- let r = dest_url_encoded_parameters "a=&c=" in
- r = ["a", ""; "c", "" ]
-;;
-
-
-let t203() =
- let r = dest_url_encoded_parameters "" in
- r = []
-;;
-
-
-let t204() =
- let r = dest_url_encoded_parameters "%41=%42" in
- r = ["A", "B"]
-;;
-
-
-(**********************************************************************)
-
-let test f n =
- if f() then
- print_endline ("Test " ^ n ^ " ok")
- else
- print_endline ("Test " ^ n ^ " FAILED!!!!");
- flush stdout
-;;
-
-
-let test_dest_form_encoded_parameters f n =
- let dest s b =
- let args = dest_form_encoded_parameters s b default_config in
- List.map
- (fun a -> arg_name a, arg_mimetype a, arg_value a)
- args
- in
- if f dest then
- print_endline ("Test dest_form_encoded_parameters " ^ n ^ " ok")
- else
- print_endline ("Test dest_form_encoded_parameters " ^ n ^ " FAILED!!!!");
- flush stdout
-;;
-
-
-let fill_stream s =
- (* Returns a channel that reads from string s.
- * This requires forking.
- *)
- let rd, wr = Unix.pipe() in
- let pid = Unix.fork() in
- if pid = 0 then begin
- Unix.close rd;
- let out = Unix.out_channel_of_descr wr in
- output_string out s;
- close_out out;
- exit(0);
- end;
- Unix.close wr;
- Unix.in_channel_of_descr rd
-;;
-
-
-let test_dest_form_encoded_parameters_from_netstream f n =
- let dest s b =
- let fd = fill_stream s in
- let bs = String.length b * 2 in
- let stream = Netstream.create_from_channel fd None bs in
- let args = dest_form_encoded_parameters_from_netstream
- stream b default_config in
-
-(*
- List.iter
- (fun a ->
- Printf.printf "name=%s mimetype=%s value=%s\n"
- (arg_name a) (arg_mimetype a) (arg_value a))
- args;
-*)
- List.map
- (fun a -> arg_name a, arg_mimetype a, arg_value a)
- args
- in
- if f dest then
- Printf.printf
- "Test dest_form_encoded_parameters_from_netstream %s ok\n"
- n
- else
- print_endline ("Test dest_form_encoded_parameters_from_netstream " ^ n ^ " FAILED!!!!");
- flush stdout
-;;
-
-
-
-test_dest_form_encoded_parameters t001 "001";;
-test_dest_form_encoded_parameters t002 "002";;
-test_dest_form_encoded_parameters t003 "003";;
-test_dest_form_encoded_parameters t004 "004";;
-test_dest_form_encoded_parameters t005 "005";;
-test_dest_form_encoded_parameters t006 "006";;
-test_dest_form_encoded_parameters t007 "007";;
-test_dest_form_encoded_parameters t008 "008";;
-test_dest_form_encoded_parameters t009 "009";;
-test_dest_form_encoded_parameters t010 "010";;
-test_dest_form_encoded_parameters t011 "011";;
-test_dest_form_encoded_parameters t012 "012";;
-
-test_dest_form_encoded_parameters_from_netstream t001 "001";;
-test_dest_form_encoded_parameters_from_netstream t002 "002";;
-test_dest_form_encoded_parameters_from_netstream t003 "003";;
-test_dest_form_encoded_parameters_from_netstream t004 "004";;
-test_dest_form_encoded_parameters_from_netstream t005 "005";;
-test_dest_form_encoded_parameters_from_netstream t006 "006";;
-test_dest_form_encoded_parameters_from_netstream t007 "007";;
-test_dest_form_encoded_parameters_from_netstream t008 "008";;
-test_dest_form_encoded_parameters_from_netstream t009 "009";;
-test_dest_form_encoded_parameters_from_netstream t010 "010";;
-test_dest_form_encoded_parameters_from_netstream t011 "011";;
-test_dest_form_encoded_parameters_from_netstream t012 "012";;
-
-test t100 "100";;
-test t101 "101";;
-test t102 "102";;
-
-test t200 "200";;
-test t201 "201";;
-test t202 "202";;
-test t203 "203";;
-test t204 "204";;
+++ /dev/null
-#! /bin/sh
-# (*
-exec /opt/ocaml-2.04/bin/ocamlfattop "$0" "$@"
-*) directory ".";;
-
-#directory "..";;
-#load "netstring.cma";;
-
-Cgi.header "";
-Cgi.parse_arguments
- { Cgi.default_config with
- Cgi.how_to_process_arguments = (fun _ -> Cgi.File)
- };
-let params = Cgi.arguments() in
-print_string "<html><body>\n";
-print_string "<h1>Parameters:</h1>\n";
-print_string "<ul>\n";
-List.iter
- (fun (n,a) ->
- print_string "<li>";
- print_string n;
- print_string ":";
- print_string (Cgi.arg_mimetype a);
- print_string "=";
- (match Cgi.arg_filename a with
- None -> ()
- | Some fn -> print_string ("[filename=" ^ fn ^ "]")
- );
- print_string (Cgi.arg_value a);
- print_string "</li>\n";
-
- )
- params;
-
-Cgi.cleanup();
-
-print_string "</ul>\n";
-
-print_string "<h1>GET URL-encoded form</h1>\n";
-print_string "<form action=\"test_encoding.cgi\" method=GET>\n";
-print_string "<input type=text name=line>\n";
-print_string "<input type=submit name=submit value=\"Submit\">\n";
-print_string "</form>\n";
-
-print_string "<h1>POST URL-encoded form</h1>\n";
-print_string "<form action=\"test_encoding.cgi\" method=POST>\n";
-print_string "<input type=text name=line>\n";
-print_string "<input type=submit name=submit value=\"Submit\">\n";
-print_string "</form>\n";
-
-print_string "<h1>POST FORM-encoded form</h1>\n";
-print_string "<form action=\"test_encoding.cgi\" method=POST enctype=\"multipart/form-data\">\n";
-print_string "<input type=text name=line>\n";
-print_string "<input type=text name=\"sträange\">\n";
-print_string "<input type=submit name=submit value=\"Submit\">\n";
-print_string "</form>\n";
-
-print_string "<h1>File upload</h1>\n";
-print_string "<form action=\"test_encoding.cgi\" method=POST enctype=\"multipart/form-data\">\n";
-print_string "<input type=text name=line>\n";
-print_string "<input type=file name=file>\n";
-print_string "<input type=submit name=submit value=\"Submit\">\n";
-print_string "</form>\n";
-
-
-
-print_string "</body></html>\n";
-
-flush stdout
-;;
-
-
+++ /dev/null
-#require "str";;
-#directory "..";;
-#load "netstring.cma";;
-
-open Mimestring;;
-
-(**********************************************************************)
-(* scan_structured_value *)
-(**********************************************************************)
-
-let t001() =
- let r = scan_structured_value "user@domain.com" [ '@'; '.' ] [] in
- r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
-;;
-
-
-let t002() =
- let r = scan_structured_value "user @ domain . com" [ '@'; '.' ] [] in
- r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
-;;
-
-
-let t003() =
- let r = scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ]
- [] in
- r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
-;;
-
-
-let t004() =
- let r = scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] [] in
- r = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
- Special ' '; Special '.'; Special ' '; Atom "com" ]
-;;
-
-
-let t005() =
- let r = scan_structured_value "user(Do you know him?)@domain.com"
- ['@'; '.'; '('] [] in
- r = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
- Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
-;;
-
-
-let t006() =
- let r = scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] [] in
- r = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
- Atom "com" ]
-;;
-
-
-let t007() =
- let r = scan_structured_value "\"\\\"()@. \"@domain.com" [ '@'; '.' ] [] in
- r = [ QString "\"()@. "; Special '@'; Atom "domain"; Special '.';
- Atom "com" ]
-;;
-
-
-let t008() =
- let r = scan_structured_value "a(b(c(d)e)f)g" [] [] in
- r = [ Atom "a"; Atom "g" ]
-;;
-
-
-let t009() =
- let r = scan_structured_value "a(b(c(d)e)f" [] [] in
- r = [ Atom "a" ]
-;;
-
-
-let t010() =
- let r = scan_structured_value "a(b\\(c\\(d\\)e)f" [] [] in
- r = [ Atom "a"; Atom "f" ]
-;;
-
-
-let t011() =
- let r = scan_structured_value "a(b(c(d)e)f\\" [] [] in
- r = [ Atom "a" ]
-;;
-
-
-let t012() =
- let r = scan_structured_value "\"abc" [] [] in
- r = [ QString "abc" ]
-;;
-
-
-let t013() =
- let r = scan_structured_value "\"abc\\" [] [] in
- r = [ QString "abc\\" ]
-;;
-
-
-(* New tests for netstring-0.9: *)
-
-let t020() =
- let r = scan_structured_value "user(Do you know him?)@domain.com"
- [ '@'; '.' ] [ Return_comments ] in
- r = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.';
- Atom "com" ]
-;;
-
-let t021() =
- let r = scan_structured_value "user (Do you know him?) @ domain . com"
- [ '@'; '.'; ' ' ] [] in
- r = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@';
- Special ' '; Atom "domain";
- Special ' '; Special '.'; Special ' '; Atom "com" ]
-;;
-
-let t022() =
- let r = scan_structured_value "user (Do you know him?) @ domain . com"
- [ '@'; '.'; ' ' ] [ Return_comments ] in
- r = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@';
- Special ' '; Atom "domain";
- Special ' '; Special '.'; Special ' '; Atom "com" ]
-;;
-
-let t023() =
- let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
- [] [] in
- r = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
-;;
-
-let t024() =
- let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
- [ ] [ Recognize_encoded_words ] in
- r = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
-;;
-
-let t025() =
- let r = scan_structured_value
- "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
- []
- [ Recognize_encoded_words ] in
- r = [ EncodedWord
- ("ISO-8859-1", "B", "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=");
- EncodedWord
- ("ISO-8859-2", "B", "dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==")
- ]
-;;
-
-(**********************************************************************)
-(* s_extended_token *)
-(**********************************************************************)
-
-let scan specials options str =
- let scn = create_mime_scanner specials options str in
- scan_token_list scn;;
-
-let t100() =
- let r = scan [] [] "Two atoms" in
- match r with
- [ a1, Atom "Two"; a2, Atom "atoms" ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 3) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 4) &&
- (get_line a2 = 1) &&
- (get_column a2 = 4) &&
- (get_length a2 = 5) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-
-let t101() =
- let r = scan [] [] " Two atoms " in
- match r with
- [ a1, Atom "Two"; a2, Atom "atoms" ] ->
-
- (get_pos a1 = 2) &&
- (get_line a1 = 1) &&
- (get_column a1 = 2) &&
- (get_length a1 = 3) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 7) &&
- (get_line a2 = 1) &&
- (get_column a2 = 7) &&
- (get_length a2 = 5) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-
-let t102() =
- let r = scan [] [] " Two\n atoms " in
- match r with
- [ a1, Atom "Two"; a2, Atom "atoms" ] ->
-
- (get_pos a1 = 2) &&
- (get_line a1 = 1) &&
- (get_column a1 = 2) &&
- (get_length a1 = 3) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 7) &&
- (get_line a2 = 2) &&
- (get_column a2 = 1) &&
- (get_length a2 = 5) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t110() =
- let r = scan [] [] "\"Two\" \"qstrings\"" in
- match r with
- [ a1, QString "Two"; a2, QString "qstrings" ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 5) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 6) &&
- (get_line a2 = 1) &&
- (get_column a2 = 6) &&
- (get_length a2 = 10) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t111() =
- let r = scan [] [] " \"Two\" \"qstrings\" " in
- match r with
- [ a1, QString "Two"; a2, QString "qstrings" ] ->
-
- (get_pos a1 = 2) &&
- (get_line a1 = 1) &&
- (get_column a1 = 2) &&
- (get_length a1 = 5) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 9) &&
- (get_line a2 = 1) &&
- (get_column a2 = 9) &&
- (get_length a2 = 10) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t112() =
- let r = scan [] [] " \"Two\nlines\" \"and\nqstrings\" " in
- match r with
- [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
-
- (get_pos a1 = 2) &&
- (get_line a1 = 1) &&
- (get_column a1 = 2) &&
- (get_length a1 = 11) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 15) &&
- (get_line a2 = 2) &&
- (get_column a2 = 8) &&
- (get_length a2 = 14) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t113() =
- let r = scan [] [] " \"Two\\\nlines\" \"and\\\nqstrings\" " in
- match r with
- [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
-
- (get_pos a1 = 2) &&
- (get_line a1 = 1) &&
- (get_column a1 = 2) &&
- (get_length a1 = 12) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 16) &&
- (get_line a2 = 2) &&
- (get_column a2 = 8) &&
- (get_length a2 = 15) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t120() =
- (* Domain literals are implemented like quoted strings, so only the
- * most complicated test case.
- *)
- let r = scan [] [] " [Two\\\nlines] [and\\\nliterals] " in
- match r with
- [ a1, DomainLiteral "Two\nlines"; a2, DomainLiteral "and\nliterals" ] ->
-
- (get_pos a1 = 2) &&
- (get_line a1 = 1) &&
- (get_column a1 = 2) &&
- (get_length a1 = 12) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 16) &&
- (get_line a2 = 2) &&
- (get_column a2 = 8) &&
- (get_length a2 = 15) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t130() =
- let r = scan [] [ Return_comments ] "(Two) (comments)" in
- match r with
- [ a1, Comment; a2, Comment ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 5) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 6) &&
- (get_line a2 = 1) &&
- (get_column a2 = 6) &&
- (get_length a2 = 10) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t131() =
- let r = scan [] [ Return_comments ] "(Two\nlines) (and\ncomments)" in
- match r with
- [ a1, Comment; a2, Comment ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 11) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 12) &&
- (get_line a2 = 2) &&
- (get_column a2 = 7) &&
- (get_length a2 = 14) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t132() =
- let r = scan [] [ Return_comments ] "(Two\\\nlines) (and\\\ncomments)" in
- match r with
- [ a1, Comment; a2, Comment ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 12) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 13) &&
- (get_line a2 = 2) &&
- (get_column a2 = 7) &&
- (get_length a2 = 15) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t133() =
- let r = scan [] [ Return_comments ] "(a\n(b\nc)d\ne(f)) atom" in
- match r with
- [ a1, Comment; a2, Atom "atom" ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 15) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 16) &&
- (get_line a2 = 4) &&
- (get_column a2 = 6) &&
- (get_length a2 = 4) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t140() =
- let r = scan [] [] "\031\031" in
- match r with
- [ a1, Control '\031'; a2, Control '\031' ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 1) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 1) &&
- (get_line a2 = 1) &&
- (get_column a2 = 1) &&
- (get_length a2 = 1) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t150() =
- let r = scan [ '\t'; '\n' ] [] " \t\n \n \t" in
- match r with
- [ a1, Special '\t'; _, Special '\n'; _, Special '\n'; a2, Special '\t'] ->
-
- (get_pos a1 = 1) &&
- (get_line a1 = 1) &&
- (get_column a1 = 1) &&
- (get_length a1 = 1) &&
- (separates_adjacent_encoded_words a1 = false) &&
-
- (get_pos a2 = 8) &&
- (get_line a2 = 3) &&
- (get_column a2 = 2) &&
- (get_length a2 = 1) &&
- (separates_adjacent_encoded_words a2 = false)
-
- | _ ->
- false
-;;
-
-let t160() =
- let r = scan [] [ Recognize_encoded_words ]
- "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
- match r with
- [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
- a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 22) &&
- (separates_adjacent_encoded_words a1 = false) &&
- (get_decoded_word a1 = "Görd") &&
- (get_charset a1 = "ISO8859-1") &&
-
- (get_pos a2 = 23) &&
- (get_line a2 = 1) &&
- (get_column a2 = 23) &&
- (get_length a2 = 22) &&
- (separates_adjacent_encoded_words a2 = false) &&
- (get_decoded_word a2 = "Görd") &&
- (get_charset a2 = "ISO8859-1")
-
- | _ ->
- false
-;;
-
-let t161() =
- let r = scan [ ' ' ] [ Recognize_encoded_words ]
- "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
- match r with
- [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
- sp, Special ' ';
- a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 22) &&
- (separates_adjacent_encoded_words a1 = false) &&
- (get_decoded_word a1 = "Görd") &&
- (get_charset a1 = "ISO8859-1") &&
-
- (get_pos a2 = 23) &&
- (get_line a2 = 1) &&
- (get_column a2 = 23) &&
- (get_length a2 = 22) &&
- (separates_adjacent_encoded_words a2 = false) &&
- (get_decoded_word a2 = "Görd") &&
- (get_charset a2 = "ISO8859-1") &&
-
- (separates_adjacent_encoded_words sp = true)
-
- | _ ->
- false
-;;
-
-let t162() =
- let r = scan [ ' ' ] [ Recognize_encoded_words ]
- "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
- match r with
- [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
- sp1, Special ' ';
- sp2, Special ' ';
- a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
-
- (get_pos a1 = 0) &&
- (get_line a1 = 1) &&
- (get_column a1 = 0) &&
- (get_length a1 = 22) &&
- (separates_adjacent_encoded_words a1 = false) &&
- (get_decoded_word a1 = "Görd") &&
- (get_charset a1 = "ISO8859-1") &&
-
- (get_pos a2 = 24) &&
- (get_line a2 = 1) &&
- (get_column a2 = 24) &&
- (get_length a2 = 22) &&
- (separates_adjacent_encoded_words a2 = false) &&
- (get_decoded_word a2 = "Görd") &&
- (get_charset a2 = "ISO8859-1") &&
-
- (separates_adjacent_encoded_words sp1 = true) &&
- (separates_adjacent_encoded_words sp2 = true)
-
- | _ ->
- false
-;;
-
-
-
-(**********************************************************************)
-
-let test f n =
- if f() then
- print_endline ("Test " ^ n ^ " ok")
- else
- print_endline ("Test " ^ n ^ " FAILED!!!!");
- flush stdout
-;;
-
-test t001 "001";;
-test t002 "002";;
-test t003 "003";;
-test t004 "004";;
-test t005 "005";;
-test t006 "006";;
-test t007 "007";;
-test t008 "008";;
-test t009 "009";;
-test t010 "010";;
-test t011 "011";;
-test t012 "012";;
-test t013 "013";;
-
-test t020 "020";;
-test t021 "021";;
-test t022 "022";;
-test t023 "023";;
-test t024 "024";;
-test t025 "025";;
-
-test t100 "100";;
-test t101 "101";;
-test t102 "102";;
-test t110 "110";;
-test t111 "111";;
-test t112 "112";;
-test t113 "113";;
-test t120 "120";;
-test t130 "130";;
-test t131 "131";;
-test t132 "132";;
-test t133 "133";;
-test t140 "140";;
-test t150 "150";;
-test t160 "160";;
-test t161 "161";;
-test t162 "162";;
+++ /dev/null
-#require "str";;
-#directory "..";;
-#load "netstring.cma";;
-
-
-open Netencoding;;
-
-(**********************************************************************)
-(* Base64 *)
-(**********************************************************************)
-
-(* Test strings:
- * "", "a", "ab", "abc", "abcd", "abcde",
- * "abcdefghijklmnopqrstuvwxyz".
- *)
-
-let t001() =
- (* ENCODE. No line breaks. *)
- Base64.encode "" = "" &
- Base64.encode "a" = "YQ==" &
- Base64.encode "ab" = "YWI=" &
- Base64.encode "abc" = "YWJj" &
- Base64.encode "abcd" = "YWJjZA==" &
- Base64.encode "abcde" = "YWJjZGU=" &
- Base64.encode "abcdefghijklmnopqrstuvwxyz" =
- "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="
-;;
-
-
-let t002() =
- (* ENCODE. Lines with length of 4, separated by LF *)
- let abc = "abcdefghijklmnopqrstuvwxyz" in
- Base64.encode_substring abc 0 0 4 false = "" &
- Base64.encode_substring abc 0 1 4 false = "YQ==\n" &
- Base64.encode_substring abc 0 2 4 false = "YWI=\n" &
- Base64.encode_substring abc 0 3 4 false = "YWJj\n" &
- Base64.encode_substring abc 0 4 4 false = "YWJj\nZA==\n" &
- Base64.encode_substring abc 0 5 4 false = "YWJj\nZGU=\n" &
- Base64.encode_substring abc 0 26 4 false =
- "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
-;;
-
-
-let t003() =
- (* ENCODE. Lines with length of 5, separated by LF *)
- let abc = "abcdefghijklmnopqrstuvwxyz" in
- Base64.encode_substring abc 0 0 5 false = "" &
- Base64.encode_substring abc 0 1 5 false = "YQ==\n" &
- Base64.encode_substring abc 0 2 5 false = "YWI=\n" &
- Base64.encode_substring abc 0 3 5 false = "YWJj\n" &
- Base64.encode_substring abc 0 4 5 false = "YWJj\nZA==\n" &
- Base64.encode_substring abc 0 5 5 false = "YWJj\nZGU=\n" &
- Base64.encode_substring abc 0 26 5 false =
- "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
-;;
-
-
-let t004() =
- (* ENCODE. Lines with length of 7, separated by LF *)
- let abc = "abcdefghijklmnopqrstuvwxyz" in
- Base64.encode_substring abc 0 0 7 false = "" &
- Base64.encode_substring abc 0 1 7 false = "YQ==\n" &
- Base64.encode_substring abc 0 2 7 false = "YWI=\n" &
- Base64.encode_substring abc 0 3 7 false = "YWJj\n" &
- Base64.encode_substring abc 0 4 7 false = "YWJj\nZA==\n" &
- Base64.encode_substring abc 0 5 7 false = "YWJj\nZGU=\n" &
- Base64.encode_substring abc 0 26 7 false =
- "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
-;;
-
-
-let t005() =
- (* ENCODE. Lines with length of 8, separated by LF *)
- let abc = "abcdefghijklmnopqrstuvwxyz" in
- Base64.encode_substring abc 0 0 8 false = "" &
- Base64.encode_substring abc 0 1 8 false = "YQ==\n" &
- Base64.encode_substring abc 0 2 8 false = "YWI=\n" &
- Base64.encode_substring abc 0 3 8 false = "YWJj\n" &
- Base64.encode_substring abc 0 4 8 false = "YWJjZA==\n" &
- Base64.encode_substring abc 0 5 8 false = "YWJjZGU=\n" &
- Base64.encode_substring abc 0 26 8 false =
- "YWJjZGVm\nZ2hpamts\nbW5vcHFy\nc3R1dnd4\neXo=\n"
-;;
-
-
-let t006() =
- (* ENCODE. Lines with length of 8, separated by CRLF *)
- let abc = "abcdefghijklmnopqrstuvwxyz" in
- Base64.encode_substring abc 0 0 8 true = "" &
- Base64.encode_substring abc 0 1 8 true = "YQ==\r\n" &
- Base64.encode_substring abc 0 2 8 true = "YWI=\r\n" &
- Base64.encode_substring abc 0 3 8 true = "YWJj\r\n" &
- Base64.encode_substring abc 0 4 8 true = "YWJjZA==\r\n" &
- Base64.encode_substring abc 0 5 8 true = "YWJjZGU=\r\n" &
- Base64.encode_substring abc 0 26 8 true =
- "YWJjZGVm\r\nZ2hpamts\r\nbW5vcHFy\r\nc3R1dnd4\r\neXo=\r\n"
-;;
-
-
-let t020() =
- (* DECODE. First test without spaces *)
- Base64.decode_substring "" 0 0 false false = "" &
- Base64.decode_substring "YQ==" 0 4 false false = "a" &
- Base64.decode_substring "YWI=" 0 4 false false = "ab" &
- Base64.decode_substring "YWJj" 0 4 false false = "abc" &
- Base64.decode_substring "YWJjZA==" 0 8 false false = "abcd" &
- Base64.decode_substring "YWJjZGU=" 0 8 false false = "abcde" &
- Base64.decode_substring
- "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" 0 36 false false =
- "abcdefghijklmnopqrstuvwxyz"
-;;
-
-
-let t021() =
- (* DECODE. With spaces *)
- Base64.decode_substring " \r\n\t" 0 4 false true = "" &
- Base64.decode_substring " Y W J j\n Z G U = " 0 18 false true = "abcde"
-;;
-
-
-let t022() =
- (* DECODE. With URL characters and spaces *)
- Base64.decode_substring " Y W J j\n Z G U = " 0 18 true true = "abcde" &
- Base64.decode_substring " Y W J j\n Z G U . " 0 18 true true = "abcde"
-;;
-
-(**********************************************************************)
-(* Quoted Printable *)
-(**********************************************************************)
-
-let t100() =
- (* ENCODE. *)
- QuotedPrintable.encode "a %= 12345 &$[]\"" = "a %=3D 12345 &=24=5B=5D=22" &
- QuotedPrintable.encode "\000\001\002" = "=00=01=02" &
- QuotedPrintable.encode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
- QuotedPrintable.encode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
- QuotedPrintable.encode "abc \r\n def\nghi " = "abc=20\r\n def\nghi=20"
-;;
-
-
-let t120() =
- (* DECODE. *)
- QuotedPrintable.decode "a %=3D 12345 &=24=5B=5D=22" = "a %= 12345 &$[]\"" &
- QuotedPrintable.decode "=00=01=02" = "\000\001\002" &
- QuotedPrintable.decode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
- QuotedPrintable.decode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
- QuotedPrintable.decode "abc=20\r\n def\nghi=20" = "abc \r\n def\nghi " &
- QuotedPrintable.decode "abc=\r\n def\nghi=20" = "abc def\nghi "
-;;
-
-(**********************************************************************)
-(* Q *)
-(**********************************************************************)
-
-let t200() =
- (* ENCODE. *)
- Q.encode "a %= 12345 &$[]\"" = "a=20=25=3D=2012345=20=26=24=5B=5D=22" &
- Q.encode "\000\001\002\r\n" = "=00=01=02=0D=0A"
-;;
-
-
-let t220() =
- (* DECODE. *)
- Q.decode "a=20=25=3D=2012345=20=26=24=5B=5D=22" = "a %= 12345 &$[]\"" &
- Q.decode "=00=01=02=0D=0A" = "\000\001\002\r\n" &
- Q.decode "a=20=25=3d=2012345=20=26=24=5b=5d=22" = "a %= 12345 &$[]\""
-;;
-
-(**********************************************************************)
-(* Url *)
-(**********************************************************************)
-
-(* Already tested for Cgi *)
-
-(**********************************************************************)
-(* Html *)
-(**********************************************************************)
-
-let t300() =
- Html.encode_from_latin1 "<>&\"abcdefäöÜ\160\025'" =
- "<>&"abcdefäöÜ '"
-;;
-
-
-let t320() =
- Html.decode_to_latin1
- "<>&"abcdefäöÜ " =
- "<>&\"abcdefäöÜ\160\025" &
- Html.decode_to_latin1 "'" = "'" &
- Html.decode_to_latin1 "&nonsense;" = "&nonsense;" &
- Html.decode_to_latin1 "Ā" = "Ā"
-;;
-
-
-(**********************************************************************)
-
-let test f n =
- if f() then
- print_endline ("Test " ^ n ^ " ok")
- else
- print_endline ("Test " ^ n ^ " FAILED!!!!");
- flush stdout
-;;
-
-test t001 "001";
-test t002 "002";
-test t003 "003";
-test t004 "004";
-test t005 "005";
-test t006 "006";
-
-test t020 "020";
-test t021 "021";
-test t022 "022";
-
-test t100 "100";
-test t120 "120";
-
-test t200 "200";
-test t220 "220";
-
-test t300 "300";
-test t320 "320";
+++ /dev/null
-#directory "..";;
-#load "netstring.cma";;
-
-open Neturl;;
-
-
-let expect_malformed_url f =
- try ignore(f()); false with Malformed_URL -> true;;
-
-let works f =
- not (expect_malformed_url f)
-;;
-
-(**********************************************************************)
-(* extract_url_scheme *)
-(**********************************************************************)
-
-let t001 () =
- extract_url_scheme "a:bc" = "a" &&
- extract_url_scheme "A:bc" = "a" &&
- extract_url_scheme "a:b:c" = "a" &&
- extract_url_scheme "a+b-c:d:e" = "a+b-c"
-;;
-
-
-let t002 () =
- let test s =
- try ignore(extract_url_scheme s); false with Malformed_URL -> true
- in
- test "a" &&
- test "a/b:c" &&
- test "%61:b" &&
- test "a%3ab"
-;;
-
-(**********************************************************************)
-(* url_syntax *)
-(**********************************************************************)
-
-let hashtbl_for_all f h =
- let b = ref true in
- Hashtbl.iter
- (fun k v -> b := !b && f k v)
- h;
- !b
-;;
-
-let t010 () =
- url_syntax_is_valid null_url_syntax &&
- url_syntax_is_valid ip_url_syntax &&
- hashtbl_for_all
- (fun _ syn ->
- url_syntax_is_valid syn
- )
- common_url_syntax
-;;
-
-let t011 () =
- url_syntax_is_valid (partial_url_syntax null_url_syntax) &&
- url_syntax_is_valid (partial_url_syntax ip_url_syntax) &&
- hashtbl_for_all
- (fun _ syn ->
- url_syntax_is_valid (partial_url_syntax syn)
- )
- common_url_syntax
-;;
-
-let t012 () =
- let f = fun _ -> true in
- let syn =
- { url_enable_scheme = Url_part_not_recognized;
- url_enable_user = Url_part_required;
- url_enable_password = Url_part_allowed;
- url_enable_host = Url_part_required;
- url_enable_port = Url_part_not_recognized;
- url_enable_path = Url_part_required;
- url_enable_param = Url_part_not_recognized;
- url_enable_query = Url_part_not_recognized;
- url_enable_fragment = Url_part_required;
- url_enable_other = Url_part_not_recognized;
- url_accepts_8bits = false;
- url_is_valid = f;
- } in
- let syn' = partial_url_syntax syn in
-
- (syn'.url_enable_scheme = Url_part_not_recognized) &&
- (syn'.url_enable_user = Url_part_allowed) &&
- (syn'.url_enable_password = Url_part_allowed) &&
- (syn'.url_enable_host = Url_part_allowed) &&
- (syn'.url_enable_port = Url_part_not_recognized) &&
- (syn'.url_enable_path = Url_part_allowed) &&
- (syn'.url_enable_param = Url_part_not_recognized) &&
- (syn'.url_enable_query = Url_part_not_recognized) &&
- (syn'.url_enable_fragment = Url_part_allowed) &&
- (syn'.url_enable_other = Url_part_not_recognized) &&
- (syn'.url_is_valid == f) &&
-
- url_syntax_is_valid syn &&
- url_syntax_is_valid syn'
-;;
-
-(**********************************************************************)
-(* make_url *)
-(**********************************************************************)
-
-let t020 () =
- (* Basic functionality: *)
- let http_syn = Hashtbl.find common_url_syntax "http" in
-
- let u1 = make_url
- (* default: not encoded *)
- ~scheme:"http"
- ~user:"U"
- ~password:"%()~$@"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"?";""]
- http_syn in
-
- url_provides
- ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
- u1 &&
-
- not
- (url_provides
- ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
- ~query:true u1) &&
-
- (url_syntax_of_url u1 == http_syn) &&
-
- (url_scheme u1 = "http") &&
- (url_user u1 = "U") &&
- (url_password u1 = "%()~$@") &&
- (url_host u1 = "a.b.c") &&
- (url_port u1 = 81) &&
- (url_path u1 = ["";"?";""]) &&
-
- (url_user ~encoded:true u1 = "U") &&
- (url_password ~encoded:true u1 = "%25()%7E$%40") &&
- (url_path ~encoded:true u1 = ["";"%3F";""]) &&
-
- string_of_url u1 = "http://U:%25()%7E$%40@a.b.c:81/%3F/"
-;;
-
-
-let t021 () =
- (* Basic functionality: *)
- let http_syn = Hashtbl.find common_url_syntax "http" in
-
- let u1 = make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"%55"
- ~password:"%25()%7e$%40"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"%3F";""]
- http_syn in
-
- url_provides
- ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
- u1 &&
-
- not
- (url_provides
- ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
- ~query:true u1) &&
-
- (url_syntax_of_url u1 == http_syn) &&
-
- (url_scheme u1 = "http") &&
- (url_user u1 = "U") &&
- (url_password u1 = "%()~$@") &&
- (url_host u1 = "a.b.c") &&
- (url_port u1 = 81) &&
- (url_path u1 = ["";"?";""]) &&
-
- (url_user ~encoded:true u1 = "%55") &&
- (url_password ~encoded:true u1 = "%25()%7e$%40") &&
- (url_path ~encoded:true u1 = ["";"%3F";""]) &&
-
- string_of_url u1 = "http://%55:%25()%7e$%40@a.b.c:81/%3F/"
-;;
-
-
-(* NEGATIVE TESTS *)
-
-let t030 () =
- (* It is not possible to add a component which is not recognized *)
- let http_syn = Hashtbl.find common_url_syntax "http" in
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:"http"
- ~user:"U"
- ~password:"%()~$@"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"?";""]
- ~fragment:"abc"
- http_syn)
-;;
-
-
-let t031 () =
- (* It is not possible to put malformed '%'-encodings into the URL *)
- let http_syn = Hashtbl.find common_url_syntax "http" in
-
- works (* reference *)
- (fun () ->
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:"XX"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"a";""]
- http_syn) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:"%XX"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"a";""]
- http_syn) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:"%X"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"a";""]
- http_syn) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:"%"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"a";""]
- http_syn)
-;;
-
-let t032 () =
- (* It is not possible to put unsafe characters into the URL *)
- let http_syn = Hashtbl.find common_url_syntax "http" in
-
- let make c =
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:(String.make 1 c)
- ~host:"a.b.c"
- ~port:81
- ~path:["";"a";""]
- http_syn
- in
-
- works (fun () -> make 'a') && (* reference *)
-
- (* List of unsafe characters taken from RFC1738: *)
- expect_malformed_url (fun () -> make '<') &&
- expect_malformed_url (fun () -> make '>') &&
- expect_malformed_url (fun () -> make '"') &&
- expect_malformed_url (fun () -> make '#') &&
- (* Note: '#' would be considered as reserved if fragments were enabled *)
- expect_malformed_url (fun () -> make '%') &&
- expect_malformed_url (fun () -> make '{') &&
- expect_malformed_url (fun () -> make '}') &&
- expect_malformed_url (fun () -> make '|') &&
- expect_malformed_url (fun () -> make '\\') &&
- expect_malformed_url (fun () -> make '^') &&
- expect_malformed_url (fun () -> make '[') &&
- expect_malformed_url (fun () -> make ']') &&
- expect_malformed_url (fun () -> make '`') &&
- expect_malformed_url (fun () -> make '~') &&
- (* Note: '~' is considered as safe in paths: *)
- works
- (fun () ->
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:"a"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"~";""]
- http_syn)
-;;
-
-let t033 () =
- (* It is not possible to put reserved characters into the URL *)
- let http_syn = Hashtbl.find common_url_syntax "http" in
-
- let make_password c =
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:(String.make 1 c)
- ~host:"a.b.c"
- ~port:81
- ~path:["";"a";""]
- http_syn
- in
- let make_path c =
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:"a"
- ~host:"a.b.c"
- ~port:81
- ~path:["";String.make 1 c;""]
- http_syn
- in
- let make_query c =
- make_url
- ~encoded:true
- ~scheme:"http"
- ~user:"U"
- ~password:"a"
- ~host:"a.b.c"
- ~port:81
- ~path:["";"a";""]
- ~query:(String.make 1 c)
- http_syn
- in
-
- (* Note: There is a difference between RFC 1738 and RFC 1808 regarding
- * which characters are reserved. RFC 1808 defines a fixed set of characters
- * as reserved while RFC 1738 defines the reserved characters depending
- * on the scheme.
- * This implementation of URLs follows RFC 1738 (because of practical
- * reasons).
- *)
-
- works (fun () -> make_password 'a') && (* reference *)
- works (fun () -> make_path 'a') &&
- works (fun () -> make_query 'a') &&
-
- expect_malformed_url (fun () -> make_password ':') &&
- expect_malformed_url (fun () -> make_password '@') &&
- expect_malformed_url (fun () -> make_password '/') &&
- works (fun () -> make_password ';') &&
- works (fun () -> make_password '?') &&
- works (fun () -> make_password '=') &&
- works (fun () -> make_password '&') &&
-
- (* Note: ';' is allowed in path and query because parameters are not
- * recognized in HTTP syntax.
- *)
-
- expect_malformed_url (fun () -> make_path '/') &&
- expect_malformed_url (fun () -> make_path '?') &&
- works (fun () -> make_path ':') &&
- works (fun () -> make_path '@') &&
- works (fun () -> make_path ';') &&
- works (fun () -> make_path '=') &&
- works (fun () -> make_path '&') &&
-
- expect_malformed_url (fun () -> make_query '?') &&
- works (fun () -> make_query '/') &&
- works (fun () -> make_query ':') &&
- works (fun () -> make_query '@') &&
- works (fun () -> make_query ';') &&
- works (fun () -> make_query '=') &&
- works (fun () -> make_query '&')
-;;
-
-
-let t034 () =
- (* It is not possible to create a URL with a password, but without user;
- * and neither to create a URL with a port, but without host;
- * and neither to create a URL with a user, but without host
- *)
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:"http"
- ~password:"a"
- ~host:"a.b.c"
- ~path:["";"a";""]
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:"http"
- ~user:"U"
- ~path:["";"a";""]
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:"http"
- ~port:81
- ~path:["";"a";""]
- ip_url_syntax)
-;;
-
-
-let t035 () =
- (* It is not possible to create a URL with illegal scheme prefix *)
-
- (* reference: *)
- works
- (fun () ->
- make_url
- ~scheme:"a"
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:":"
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:"a=b"
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:"a%62b"
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~scheme:"a&b"
- ip_url_syntax)
-;;
-
-
-let t036 () =
- (* It is not possible to have a path with double slashes *)
-
- (* reference: *)
- works
- (fun () ->
- make_url
- ~path:["";"a";""]
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~path:["";""]
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~path:["a";"";""]
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~path:["";"";"a"]
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~path:["a";"";"a"]
- ip_url_syntax)
-;;
-
-
-let t037 () =
- (* It is not possible to have port numbers outside 0..65535 *)
-
- (* reference: *)
- works
- (fun () ->
- make_url
- ~host:"a"
- ~port:1
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~host:"a"
- ~port:(-1)
- ip_url_syntax) &&
-
- expect_malformed_url
- (fun () ->
- make_url
- ~host:"a"
- ~port:65536
- ip_url_syntax)
-;;
-
-
-let t038 () =
- (* Several cases which are not allowed. *)
-
- expect_malformed_url
- (fun () ->
- make_url
- ~host:"a"
- ~path:["a"]
- ip_url_syntax
- ) && (* illegal: host + relative path *)
-
- expect_malformed_url
- (fun () ->
- make_url
- ~host:"a"
- ~path:[]
- ~param:["x"]
- ip_url_syntax
- ) && (* illegal: host + no path + params *)
-
- expect_malformed_url
- (fun () ->
- make_url
- ~host:"a"
- ~path:[]
- ~query:"x"
- ip_url_syntax
- ) (* illegal: host + no path + query *)
-;;
-
-(**********************************************************************)
-(* url_of_string *)
-(**********************************************************************)
-
-let t050 () =
- (* absolute URLs with ip_url_syntax *)
- let identical s =
- string_of_url (url_of_string ip_url_syntax s) = s in
-
- let fails s =
- try ignore(url_of_string ip_url_syntax s); false
- with Malformed_URL -> true
- in
-
- identical "http:" &&
-
- identical "http://host" &&
- identical "http://user@host" &&
- identical "http://user:password@host" &&
- identical "http://user@host:99" &&
- identical "http://user:password@host:99" &&
-
- identical "http://host/" &&
- identical "http://user@host/" &&
- identical "http://user:password@host/" &&
- identical "http://user@host:99/" &&
- identical "http://user:password@host:99/" &&
-
- identical "http://host/a/b" &&
- identical "http://user@host/a/b" &&
- identical "http://user:password@host/a/b" &&
- identical "http://user@host:99/a/b" &&
- identical "http://user:password@host:99/a/b" &&
-
- identical "http://host/a/b/" &&
- identical "http://user@host/a/b/" &&
- identical "http://user:password@host/a/b/" &&
- identical "http://user@host:99/a/b/" &&
- identical "http://user:password@host:99/a/b/" &&
-
- identical "http://host/?a=b&c=d" &&
- identical "http://user@host/?a=b&c=d" &&
- identical "http://user:password@host/?a=b&c=d" &&
- identical "http://user@host:99/?a=b&c=d" &&
- identical "http://user:password@host:99/?a=b&c=d" &&
-
- fails "http://host?a=b&c=d" &&
- fails "http://user@host?a=b&c=d" &&
- fails "http://user:password@host?a=b&c=d" &&
- fails "http://user@host:99?a=b&c=d" &&
- fails "http://user:password@host:99?a=b&c=d" &&
-
- identical "http://host/?a=/&c=/" &&
- identical "http://user@host/?a=/&c=/" &&
- identical "http://user:password@host/?a=/&c=/" &&
- identical "http://user@host:99/?a=/&c=/" &&
- identical "http://user:password@host:99/?a=/&c=/" &&
-
- identical "http://host/;a;b" &&
- identical "http://user@host/;a;b" &&
- identical "http://user:password@host/;a;b" &&
- identical "http://user@host:99/;a;b" &&
- identical "http://user:password@host:99/;a;b" &&
-
- fails "http://host;a;b" &&
- fails "http://user@host;a;b" &&
- fails "http://user:password@host;a;b" &&
- fails "http://user@host:99;a;b" &&
- fails "http://user:password@host:99;a;b" &&
-
- identical "http://host/;a;b?a=b&c=d" &&
- identical "http://user@host/;a;b?a=b&c=d" &&
- identical "http://user:password@host/;a;b?a=b&c=d" &&
- identical "http://user@host:99/;a;b?a=b&c=d" &&
- identical "http://user:password@host:99/;a;b?a=b&c=d" &&
-
- identical "http:#f" &&
-
- identical "http://host#f" &&
- identical "http://user@host#f" &&
- identical "http://user:password@host#f" &&
- identical "http://user@host:99#f" &&
- identical "http://user:password@host:99#f" &&
-
- identical "http://host/;a;b?a=b&c=d#f" &&
- identical "http://user@host/;a;b?a=b&c=d#f" &&
- identical "http://user:password@host/;a;b?a=b&c=d#f" &&
- identical "http://user@host:99/;a;b?a=b&c=d#f" &&
- identical "http://user:password@host:99/;a;b?a=b&c=d#f" &&
-
- true
-;;
-
-
-let t051 () =
- (* relative URLs with ip_url_syntax *)
- let identical s =
- string_of_url (url_of_string ip_url_syntax s) = s in
-
- let fails s =
- try ignore(url_of_string ip_url_syntax s); false
- with Malformed_URL -> true
- in
-
- identical "//host" &&
- identical "//user@host" &&
- identical "//user:password@host" &&
- identical "//user@host:99" &&
- identical "//user:password@host:99" &&
-
- identical "//host/" &&
- identical "//user@host/" &&
- identical "//user:password@host/" &&
- identical "//user@host:99/" &&
- identical "//user:password@host:99/" &&
-
- identical "//host#f" &&
- identical "//user@host#f" &&
- identical "//user:password@host#f" &&
- identical "//user@host:99#f" &&
- identical "//user:password@host:99#f" &&
-
- identical "/" &&
- identical "/a" &&
- identical "/a/" &&
- identical "/a/a" &&
-
- identical "/;a;b" &&
- identical "/a;a;b" &&
- identical "/a/;a;b" &&
- identical "/a/a;a;b" &&
-
- identical "/?a=b&c=d" &&
- identical "/a?a=b&c=d" &&
- identical "/a/?a=b&c=d" &&
- identical "/a/a?a=b&c=d" &&
-
- identical "/;a;b?a=b&c=d" &&
- identical "/a;a;b?a=b&c=d" &&
- identical "/a/;a;b?a=b&c=d" &&
- identical "/a/a;a;b?a=b&c=d" &&
-
- identical "/#f" &&
- identical "/a#f" &&
- identical "/a/#f" &&
- identical "/a/a#f" &&
-
- identical "/;a;b#f" &&
- identical "/a;a;b#f" &&
- identical "/a/;a;b#f" &&
- identical "/a/a;a;b#f" &&
-
- identical "/;a;b?a=b&c=d#f" &&
- identical "/a;a;b?a=b&c=d#f" &&
- identical "/a/;a;b?a=b&c=d#f" &&
- identical "/a/a;a;b?a=b&c=d#f" &&
-
- identical "" &&
- identical "a" &&
- identical "a/" &&
- identical "a/a" &&
-
- identical ";a;b" &&
- identical "a;a;b" &&
- identical "a/;a;b" &&
- identical "a/a;a;b" &&
-
- identical "?a=b&c=d" &&
- identical "a?a=b&c=d" &&
- identical "a/?a=b&c=d" &&
- identical "a/a?a=b&c=d" &&
-
- identical ";a;b?a=b&c=d" &&
- identical "a;a;b?a=b&c=d" &&
- identical "a/;a;b?a=b&c=d" &&
- identical "a/a;a;b?a=b&c=d" &&
-
- identical "#f" &&
- identical "a#f" &&
- identical "a/#f" &&
- identical "a/a#f" &&
-
- identical ";a;b#f" &&
- identical "a;a;b#f" &&
- identical "a/;a;b#f" &&
- identical "a/a;a;b#f" &&
-
- identical ";a;b?a=b&c=d#f" &&
- identical "a;a;b?a=b&c=d#f" &&
- identical "a/;a;b?a=b&c=d#f" &&
- identical "a/a;a;b?a=b&c=d#f" &&
-
- identical "." &&
- identical "./" &&
- identical "./a" &&
-
- identical ".;a;b" &&
- identical "./;a;b" &&
- identical "./a;a;b" &&
-
- identical ".?a=b&c=d" &&
- identical "./?a=b&c=d" &&
- identical "./a?a=b&c=d" &&
-
- identical ".;a;b?a=b&c=d" &&
- identical "./;a;b?a=b&c=d" &&
- identical "./a;a;b?a=b&c=d" &&
-
- identical ".#f" &&
- identical "./#f" &&
- identical "./a#f" &&
-
- identical ".;a;b#f" &&
- identical "./;a;b#f" &&
- identical "./a;a;b#f" &&
-
- identical ".;a;b?a=b&c=d#f" &&
- identical "./;a;b?a=b&c=d#f" &&
- identical "./a;a;b?a=b&c=d#f" &&
-
- identical ".." &&
- identical "../" &&
- identical "../a" &&
-
- identical "..;a;b" &&
- identical "../;a;b" &&
- identical "../a;a;b" &&
-
- identical "..?a=b&c=d" &&
- identical "../?a=b&c=d" &&
- identical "../a?a=b&c=d" &&
-
- identical "..;a;b?a=b&c=d" &&
- identical "../;a;b?a=b&c=d" &&
- identical "../a;a;b?a=b&c=d" &&
-
- identical "..#f" &&
- identical "../#f" &&
- identical "../a#f" &&
-
- identical "..;a;b#f" &&
- identical "../;a;b#f" &&
- identical "../a;a;b#f" &&
-
- identical "..;a;b?a=b&c=d#f" &&
- identical "../;a;b?a=b&c=d#f" &&
- identical "../a;a;b?a=b&c=d#f" &&
-
- string_of_url
- (make_url ~path:["a:b"] ip_url_syntax) = "a%3Ab" &&
-
- string_of_url
- (make_url ~encoded:true ~path:["a:b"] ip_url_syntax) = "./a:b" &&
-
- true
-;;
-
-
-let t052 () =
- (* mailto: URLs *)
- let mailto_syn = Hashtbl.find common_url_syntax "mailto" in
-
- let identical s =
- string_of_url (url_of_string mailto_syn s) = s in
-
- let fails s =
- try ignore(url_of_string mailto_syn s); false
- with Malformed_URL -> true
- in
-
- identical "mailto:user@host" &&
- identical "mailto:user@host;?;?" &&
- fails "mailto:user@host#f"
-;;
-
-(**********************************************************************)
-(* split_path/join_path/norm_path: *)
-(**********************************************************************)
-
-let t060 () =
- (split_path "" = []) &&
- (split_path "/" = [ "" ]) &&
- (split_path "/a" = [ ""; "a" ]) &&
- (split_path "a" = [ "a" ]) &&
- (split_path "a/" = [ "a"; "" ]) &&
- (split_path "/a/" = [ ""; "a"; "" ]) &&
- (split_path "/a/b" = [ ""; "a"; "b" ]) &&
- (split_path "/a/b/" = [ ""; "a"; "b"; "" ]) &&
- (split_path "/a/b/c" = [ ""; "a"; "b"; "c" ]) &&
-
- (join_path [] = "") &&
- (join_path [ "" ] = "/") &&
- (join_path [ ""; "a" ] = "/a") &&
- (join_path [ "a" ] = "a") &&
- (join_path [ "a"; "" ] = "a/") &&
- (join_path [ ""; "a"; "" ] = "/a/") &&
- (join_path [ ""; "a"; "b" ] = "/a/b") &&
- (join_path [ ""; "a"; "b"; "" ] = "/a/b/") &&
- (join_path [ ""; "a"; "b"; "c" ] = "/a/b/c") &&
-
- true
-;;
-
-
-let t061 () =
- (norm_path ["."] = []) &&
- (norm_path ["."; ""] = []) &&
- (norm_path ["a"; "."] = ["a"; ""]) &&
- (norm_path ["a"; "b"; "."] = ["a"; "b"; ""]) &&
- (norm_path ["a"; "b"; ".."] = ["a"; ""]) &&
- (norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]) &&
- (norm_path [".."] = [".."; ""]) &&
- (norm_path [".."; ""] = [".."; ""]) &&
- (norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]) &&
- (norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]) &&
- (norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]) &&
- (norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]) &&
- (norm_path ["a"; ".."] = []) &&
- (norm_path ["";""] = [""]) &&
- (norm_path [""] = [""]) &&
- (norm_path [] = []) &&
-
- true
-;;
-
-(**********************************************************************)
-(* apply_relative_url: *)
-(**********************************************************************)
-
-let t070() =
- (* Examples taken from RFC 1808 *)
- let url = url_of_string ip_url_syntax in
- let base = url "http://a/b/c/d;p?q#f" in
- let aru = apply_relative_url base in
-
- (aru (url "g:h") = url "g:h") &&
- (aru (url "g") = url "http://a/b/c/g") &&
- (aru (url "./g") = url "http://a/b/c/g") &&
- (aru (url "g/") = url "http://a/b/c/g/") &&
- (aru (url "/g") = url "http://a/g") &&
- (aru (url "//g") = url "http://g") &&
- (aru (url "?y") = url "http://a/b/c/d;p?y") &&
- (aru (url "g?y") = url "http://a/b/c/g?y") &&
- (aru (url "g?y/./x") = url "http://a/b/c/g?y/./x") &&
- (aru (url "#s") = url "http://a/b/c/d;p?q#s") &&
- (aru (url "g#s") = url "http://a/b/c/g#s") &&
- (aru (url "g#s/./x") = url "http://a/b/c/g#s/./x") &&
- (aru (url "g?y#s") = url "http://a/b/c/g?y#s") &&
- (aru (url ";x") = url "http://a/b/c/d;x") &&
- (aru (url "g;x") = url "http://a/b/c/g;x") &&
- (aru (url "g;x?y#s") = url "http://a/b/c/g;x?y#s") &&
- (aru (url ".") = url "http://a/b/c/") &&
- (aru (url "./") = url "http://a/b/c/") &&
- (aru (url "..") = url "http://a/b/") &&
- (aru (url "../") = url "http://a/b/") &&
- (aru (url "../g") = url "http://a/b/g") &&
- (aru (url "../..") = url "http://a/") &&
- (aru (url "../../") = url "http://a/") &&
- (aru (url "../../g") = url "http://a/g") &&
-
- (aru (url "") = url "http://a/b/c/d;p?q#f") &&
- (aru (url "../../../g") = url "http://a/../g") &&
- (aru (url "../../../../g") = url "http://a/../../g") &&
- (aru (url "/./g") = url "http://a/./g") &&
- (aru (url "/../g") = url "http://a/../g") &&
- (aru (url "g.") = url "http://a/b/c/g.") &&
- (aru (url ".g") = url "http://a/b/c/.g") &&
- (aru (url "g..") = url "http://a/b/c/g..") &&
- (aru (url "..g") = url "http://a/b/c/..g") &&
- (aru (url "./../g") = url "http://a/b/g") &&
- (aru (url "./g/.") = url "http://a/b/c/g/") &&
- (aru (url "g/./h") = url "http://a/b/c/g/h") &&
- (aru (url "g/../h") = url "http://a/b/c/h") &&
- (aru (url "http:g") = url "http:g") &&
- (aru (url "http:") = url "http:") &&
-
- true
-;;
-
-
-(**********************************************************************)
-
-let test f n =
- if f() then
- print_endline ("Test " ^ n ^ " ok")
- else
- print_endline ("Test " ^ n ^ " FAILED!!!!");
- flush stdout
-;;
-
-test t001 "001";
-test t002 "002";
-
-test t010 "010";
-test t011 "011";
-test t012 "012";
-
-test t020 "020";
-test t021 "021";
-
-test t030 "030";
-test t031 "031";
-test t032 "032";
-test t033 "033";
-test t034 "034";
-test t035 "035";
-test t036 "036";
-test t037 "037";
-test t038 "038";
-
-test t050 "050";
-test t051 "051";
-test t052 "052";
-
-test t060 "060";
-test t061 "061";
-
-test t070 "070";
-()
-;;
+++ /dev/null
-
-
-let make_iso enc =
- let s = ref "" in
- for i = 0 to 255 do
- let u = try Netconversion.makechar (enc :> Netconversion.encoding) i
- with Not_found -> "" in
- s := !s ^ u
- done;
- !s
-;;
-
-let make_ucs2 start stop =
- let s = String.create ((stop - start) * 2) in
- for i = 0 to stop-start-1 do
- let k = 2 * i in
- let c = i + start in
- s.[k] <- Char.chr(c lsr 8);
- s.[k+1] <- Char.chr(c land 0xff);
- done;
- s
-;;
-
-let make_ucs4 start stop =
- let s = String.create ((stop - start) * 4) in
- for i = 0 to stop-start-1 do
- let k = 4 * i in
- let c = i + start in
- s.[k] <- Char.chr(c lsr 24);
- s.[k+1] <- Char.chr((c lsr 16) land 0xff);
- s.[k+2] <- Char.chr((c lsr 8) land 0xff);
- s.[k+3] <- Char.chr(c land 0xff);
- done;
- s
-;;
-
-let name_of_encoding enc =
- match enc with
- `Enc_iso88591 -> "ISO_8859-1"
- | `Enc_iso88592 -> "ISO_8859-2"
- | `Enc_iso88593 -> "ISO_8859-3"
- | `Enc_iso88594 -> "ISO_8859-4"
- | `Enc_iso88595 -> "ISO_8859-5"
- | `Enc_iso88596 -> "ISO_8859-6"
- | `Enc_iso88597 -> "ISO_8859-7"
- | `Enc_iso88598 -> "ISO_8859-8"
- | `Enc_iso88599 -> "ISO_8859-9"
- | `Enc_iso885910 -> "ISO_8859-10"
- | `Enc_iso885913 -> "ISO_8859-13"
- | `Enc_iso885914 -> "ISO_8859-14"
- | `Enc_iso885915 -> "ISO_8859-15"
- | `Enc_utf8 -> "UTF-8"
- | `Enc_ucs4 -> "UCS-4"
- | `Enc_ucs2 -> "UCS-2"
- | `Enc_utf16 -> "UTF-16"
-
- (* Note: GNU-iconv assumes big endian byte order *)
-;;
-
-let iconv_recode_string in_enc out_enc in_s =
- let in_enc_name = name_of_encoding in_enc in
- let out_enc_name = name_of_encoding out_enc in
- let out_s = ref "" in
-
- let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^
- out_enc_name) in
- (* Write in_s to in_ch in a new thread: *)
- ignore
- (Thread.create
- (fun () ->
- output_string in_ch in_s;
- close_out in_ch;
- )
- ()
- );
- (* Read the result in the current thread: *)
- let buf = String.create 1024 in
- let n = ref 1 in
- while !n <> 0 do
- let n' = input out_ch buf 0 1024 in
- out_s := !out_s ^ String.sub buf 0 n';
- n := n'
- done;
- ignore(Unix.close_process (out_ch,in_ch));
- !out_s
-;;
-
-let test_iso_and_utf8 enc =
- let name = name_of_encoding enc in
- print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout;
- let s = make_iso enc in
- let s1' = Netconversion.recode_string (enc :> Netconversion.encoding)
- `Enc_utf8 s in
- let s2' = iconv_recode_string enc `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8
- (enc :> Netconversion.encoding) s1' in
- let s2 = iconv_recode_string `Enc_utf8 enc s1' in
- assert(s1 = s2 && s1 = s);
- print_endline "OK"; flush stdout
-;;
-
-let test_utf16_and_utf8_0000_d7ff () =
- print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... ";
- flush stdout;
- let s = make_ucs2 0 0xd800 in
- let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
- let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
- let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
- assert(s1 = s2 && s1 = s);
- print_endline "OK"; flush stdout
-;;
-
-let test_utf16_and_utf8_e000_fffd () =
- print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... ";
- flush stdout;
- let s = make_ucs2 0xe000 0xfffe in
- let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
- let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
- let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
- assert(s1 = s2 && s1 = s);
- print_endline "OK"; flush stdout
-;;
-
-let test_utf16_and_utf8_10000_10FFFF () =
- print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... ";
- flush stdout;
- for i = 1 to 16 do
- let s0 = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in
- let s = iconv_recode_string `Enc_ucs4 `Enc_utf16 s0 in
- let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
- let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
- assert(s1' = s2');
- let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
- let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
- assert(s1 = s2 && s1 = s);
- print_string "+"; flush stdout;
- done;
- print_endline "OK"; flush stdout
-;;
-
-
-print_endline "Warning: You need the command 'iconv' to run this test!";
-flush stdout;
-test_iso_and_utf8 `Enc_iso88591;
-test_iso_and_utf8 `Enc_iso88592;
-test_iso_and_utf8 `Enc_iso88593;
-test_iso_and_utf8 `Enc_iso88594;
-test_iso_and_utf8 `Enc_iso88595;
-test_iso_and_utf8 `Enc_iso88596;
-test_iso_and_utf8 `Enc_iso88597;
-(* test_iso_and_utf8 `Enc_iso88598; *)
-test_iso_and_utf8 `Enc_iso88599;
-test_iso_and_utf8 `Enc_iso885910;
-(* test_iso_and_utf8 `Enc_iso885913; *)
-(* test_iso_and_utf8 `Enc_iso885914; *)
-(* test_iso_and_utf8 `Enc_iso885915; *)
-test_utf16_and_utf8_0000_d7ff();
-test_utf16_and_utf8_e000_fffd();
-(* This test does not work because iconv does not support the surrogate
- * representation of UTF-16:
- * test_utf16_and_utf8_10000_10FFFF();
- *)
-()
-;;
+++ /dev/null
-all:
- $(MAKE) -C unimap_to_ocaml
-
-clean:
-
-CLEAN: clean
- $(MAKE) -C unimap_to_ocaml CLEAN
-
-distclean: clean
- $(MAKE) -C unimap_to_ocaml distclean
+++ /dev/null
-*.cmo
-*.cmx
-*.cmi
-
-*.o
-*.a
-
+++ /dev/null
-all: unimap_to_ocaml
-
-unimap_to_ocaml: unimap_to_ocaml.ml
- ocamlfind ocamlc -g -package str -linkpkg -custom \
- -o unimap_to_ocaml \
- unimap_to_ocaml.ml
-
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
-
-CLEAN: clean
-
-distclean: clean
- rm -f *~ unimap_to_ocaml
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Printf;;
-
-let comment_re = Str.regexp "#.*$";;
-let space_re = Str.regexp "[ \t\r\n]+";;
-
-let read_unimap_format_a fname f =
- (* Reads a Unicode mapping in format A from a "local" code to Unicode.
- * Returns a list of pairs (localcode, unicode).
- *)
-
- let read_unimap_line() =
- let s = input_line f in (* may raise End_of_file *)
- let s' = Str.global_replace comment_re "" s in
- let words = Str.split space_re s' in
- match words with
- [] -> raise Not_found
- | [ localcode; unicode ] ->
- int_of_string localcode, int_of_string unicode
- | _ ->
- failwith ("File " ^ fname ^ ": Do not know what to do with:\n" ^ s')
- in
-
- let rec read_following_lines() =
- try
- let localcode, unicode = read_unimap_line() in
- (* may raise End_of_file, Not_found *)
- (localcode, unicode) :: read_following_lines()
- with
- Not_found -> read_following_lines()
- | End_of_file -> []
- in
-
- read_following_lines()
-;;
-
-
-type from_uni_list =
- U_nil
- | U_single of (int * int)
- | U_list of (int * int) list
-
-type from_unicode =
- from_uni_list array;;
- (* A hashtable with fixed size (256). A pair (unicode, localcode) is
- * stored at the position unicode mod 256 in the array.
- *)
-
-
-let make_bijection unimap =
- (* unimap: a list of pairs (localcode, unicode)
- * returns a pair of arrays (m_to_unicode, m_from_unicode) with:
- * - m_to_unicode.(localcode) = Some unicode,
- * if the pair (localcode, unicode) exists
- * m_to_unicode.(x) = None otherwise
- * - m_from_unicode.(unicode lsr 8) = [ ...; (unicode,localcode); ... ]
- *)
-
- let m_to_unicode = Array.create 256 None in
- let m_from_unicode = Array.create 256 [] in
-
- List.iter
- (fun (localcode, unicode) ->
- assert(localcode < 256);
-
- (* Update m_to_unicode: *)
- if m_to_unicode.(localcode) <> None then
- failwith ("Local code point " ^ string_of_int localcode ^
- " mapped twice");
- m_to_unicode.(localcode) <- Some unicode;
-
- (* Update m_from_unicode: *)
- let unilow = unicode land 255 in
- if List.mem_assoc unicode (m_from_unicode.(unilow)) then
- failwith ("Unicode code point " ^ string_of_int unicode ^
- " mapped twice");
- m_from_unicode.(unilow) <-
- m_from_unicode.(unilow) @ [unicode,localcode];
- )
- unimap;
-
- m_to_unicode, m_from_unicode
-;;
-
-
-let to_unimap_as_string to_unimap =
- let make_repr x =
- match x with
- None -> -1
- | Some u -> u
- in
- Marshal.to_string (Array.map make_repr to_unimap) [ Marshal.No_sharing ]
-;;
-
-
-let from_unimap_as_string from_unimap =
- let make_repr l =
- match l with
- [] -> U_nil
- | [u,l] -> U_single(u,l)
- | _ -> U_list l
- in
- let m = Array.map make_repr from_unimap in
- Marshal.to_string m [ Marshal.No_sharing ]
-;;
-
-
-let print_bijection f name m_to_unicode m_from_unicode =
- (* Prints on file f this O'Caml code:
- * let <name>_to_unicode = ...
- * let <name>_from_unicode = ...
- *)
- fprintf f "let %s_to_unicode = lazy (Marshal.from_string \"%s\" 0 : int array);;\n"
- name
- (String.escaped (to_unimap_as_string m_to_unicode));
-
- fprintf f "let %s_from_unicode = lazy (Marshal.from_string \"%s\" 0 : Netmappings.from_uni_list array);;\n "
- name
- (String.escaped (from_unimap_as_string m_from_unicode));
-;;
-
-
-let main() =
- let files = ref [] in
- let outch = ref (lazy stdout) in
- Arg.parse
- [ "-o", Arg.String (fun s -> outch := lazy (open_out s)),
- " <file> Write result to this file"]
- (fun s -> files := !files @ [s])
- "usage: unimap_to_ocaml file.unimap ...";
-
- (* First read in all unimaps: *)
- let unimaps =
- List.map
- (fun filename ->
- let mapname = Str.replace_first (Str.regexp "\.unimap$") ""
- (Filename.basename filename) in
- let f = open_in filename in
- prerr_endline ("Reading " ^ filename);
- let unimap = read_unimap_format_a filename f in
- close_in f;
- mapname, unimap
- )
- !files
- in
-
- (* Second compute all bijections: *)
- let bijections =
- List.map
- (fun (mapname, unimap) ->
- prerr_endline ("Processing " ^ mapname);
- let to_unicode, from_unicode = make_bijection unimap in
- mapname, to_unicode, from_unicode
- )
- unimaps
- in
-
- let out = Lazy.force !outch in
- (* Third output all results: *)
- output_string out "(* WARNING! This is a generated file! *)\n";
-
- List.iter
- (fun (mapname, to_unicode, from_unicode) ->
- print_bijection out mapname to_unicode from_unicode)
- bijections;
- List.iter
- (fun (mapname, _, _) ->
- fprintf out "Hashtbl.add Netmappings.to_unicode `Enc_%s %s_to_unicode;\n"
- mapname mapname;
- fprintf out "Hashtbl.add Netmappings.from_unicode `Enc_%s %s_from_unicode;\n"
- mapname mapname;
- )
- (List.rev bijections);
- fprintf out "();;\n";
-
- close_out out
-;;
-
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/29 00:48:52 gerd
- * Conversion tables are now stored in marshalled form.
- * New type for the conversion table Unicode to 8bit.
- *
- * Revision 1.2 2000/08/12 23:54:56 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-%define toolname findlib
-
-Summary: OCAML FindLib package manager
-Name: ocaml-%{toolname}
-Version: 0.4.0
-Release: 1
-Copyright: GPL-like
-Group: Development/Languages
-Source: http://www.ocaml-programming.de/packages/%{toolname}-%{version}.tar.gz
-Vendor: Gerd Stolpmann
-Url: http://www.ocaml-programming.de/packages/documentation/%{toolname}/
-Prereq: fileutils, ocaml
-Requires: fileutils, ocaml
-
-%define dir_man usr/man
-%define dir_bin usr/bin
-%define dir_sitelib usr/lib/ocaml/site-lib
-%define files_doc LICENSE README INSTALL
-
-BuildRoot: /tmp/root-%{name}
-
-%description
-Provides a scheme to manage reusable software components (packages),
-as collections of OCaml modules for which metainformation can be stored.
-The library contains functions to look the directory up that
-stores a package, to query metainformation about a package, and
-to retrieve dependency information about multiple packages.
-Metainformation includes a version string, the archives the package consists of, additional linker
-options, and dependencies dependent on other packages.
-
-%prep
-%setup -n %{toolname}
-./configure -mandir "/%{dir_man}" -bindir "/%{dir_bin}" -sitelib "/%{dir_sitelib}"
-
-%build
-make all
-make opt
-
-%install
-# Install binary, libs, manuals
-DIR_BIN="${RPM_BUILD_ROOT}/%{dir_bin}"
-DIR_MAN="${RPM_BUILD_ROOT}/%{dir_man}"
-DIR_SITELIB="${RPM_BUILD_ROOT}/%{dir_sitelib}"
-install -m 755 -d "${DIR_BIN}"
-install -m 755 -d "${DIR_MAN}"
-install -m 755 -d "${DIR_SITELIB}"
-make install "OCAML_SITELIB=${DIR_SITELIB}" "OCAMLFIND_BIN=${DIR_BIN}" "OCAMLFIND_MAN=${DIR_MAN}"
-# Move doc files to root, if needed
-for F in %{files_doc} ; do test -f "./doc/${F}" && mv -f "./doc/${F}" . ; done
-
-# HTML manual
-if test -d doc/html ; then mv doc/html htmlman; else mkdir htmlman ; fi
-
-%post
-# Create a symbolic link to version-specific HTML manual
-cd /usr/doc/HTML/
-if test -L "%{name}" ; then rm -f "%{name}" ; fi
-if test ! -e "%{name}" ; then ln -s "../%{name}-%{version}/htmlman" "%{name}" ; fi
-
-%postun
-cd /usr/doc/HTML/
-if test "$1" = "0" -a -L %{name} ; then rm -f %{name} ; fi
-
-
-%clean
-if test `dirname "${RPM_BUILD_ROOT}"` != "/" ; then rm -rf "${RPM_BUILD_ROOT}" ; fi
-
-%files
-%defattr(-,root,root)
-%doc %{files_doc} htmlman
-/%{dir_bin}
-/%{dir_man}/man1
-/%{dir_man}/man3
-/%{dir_man}/man5
-/%{dir_sitelib}
-
-
-%changelog
-* Fri Sep 1 2000 Olivier Montanuy <Olivier.Montanuy@wanadoo.fr>
-- created and tested package, with html manual
+++ /dev/null
-%define toolname netstring
-
-Summary: OCAML Netstring library
-Name: ocaml-%{toolname}
-Version: 0.9.3
-Release: 2
-Copyright: GPL-like
-Group: Development/Languages
-Source: http://www.ocaml-programming.de/packages/%{toolname}-%{version}.tar.gz
-Vendor: Gerd Stolpmann
-Url: http://www.ocaml-programming.de/packages/documentation/%{toolname}/
-Prereq: fileutils, ocaml, ocaml-findlib
-Requires: fileutils, ocaml
-
-%define dir_sitelib usr/lib/ocaml/site-lib
-%define files_doc LICENSE README INSTALL
-BuildRoot: /tmp/root-%{name}
-
-%description
-A collection of string processing functions for Internet protocols
-- Parse MIME messages
-- Encode/decode Base 64, Quoted Printable, Q, URL-encoding
-- CGI interface that allows users to upload files
-- Simple HTML parser
-- URL parsing, printing and processing
-
-%prep
-%setup -n %{toolname}
-make clean
-
-%build
-make all
-make opt
-
-%install
-# Install binary, libs, manuals
-
-DIR_INSTALL="${RPM_BUILD_ROOT}/%{dir_sitelib}/%{toolname}"
-install -m 755 -d "${DIR_INSTALL}"
-# Install in non-standard directory
-# replace "$(OCAMLFIND) install" or "ocamlfind install"
-cat Makefile | sed "s#[_A-Za-z\(\)\$]* install \$(NAME) #\$(TRICK) #" > Makefile.trick
-# Ugly trick: replace "findlib" by "cp"
-make -f Makefile.trick install "TRICK=cp -f --target-directory=${DIR_INSTALL}"
-# Reference to obsolete CGI and BASE64 packages
-for CMD in cgi base64 ; do
- cat Makefile | sed "s#[_A-Za-z\(\)\$]* install $CMD #\$(TRICK) #" > Makefile.trick
- DIR="${RPM_BUILD_ROOT}/%{dir_sitelib}/${CMD}"
- install -m 755 -d "${DIR}"
- make -f Makefile.trick "install-${CMD}" "TRICK=cp -f --target-directory=${DIR}"
-done
-
-# Move doc files to root, if needed
-for F in %{files_doc} ; do test -f "./doc/${F}" && mv -f "./doc/${F}" . ; done
-
-%clean
-#if test `dirname "${RPM_BUILD_ROOT}"` != "/" ; then rm -rf "${RPM_BUILD_ROOT}" ; fi
-
-%files
-%defattr(-,root,root)
-%doc %{files_doc}
-/%{dir_sitelib}/%{toolname}
-# Reference to obsolete CGI and BASE64 packages
-/%{dir_sitelib}/cgi
-/%{dir_sitelib}/base64
-
-%changelog
-* Wed Sep 6 2000 Olivier Montanuy <Olivier.Montanuy@wanadoo.fr>
-- attempt to install cgi and base64 modules, for obsolete Makefiles
-
-* Fri Sep 1 2000 Olivier Montanuy <Olivier.Montanuy@wanadoo.fr>
-- created and tested package
-
+++ /dev/null
-%define toolname pxp
-
-Summary: OCAML PXP XML library
-Name: ocaml-%{toolname}
-Version: 1.0
-Release: 1
-Copyright: GPL-like
-Group: Development/Languages
-Source: http://www.ocaml-programming.de/packages/%{toolname}-%{version}.tar.gz
-Vendor: Gerd Stolpmann
-Url: http://www.ocaml-programming.de/packages/documentation/%{toolname}/
-Prereq: fileutils, ocaml, ocaml-findlib, ocaml-netstring
-Requires: fileutils, ocaml, ocaml-netstring
-
-%define dir_sitelib usr/lib/ocaml/site-lib
-%define files_doc LICENSE README INSTALL SPEC EXTENSIONS
-
-BuildRoot: /tmp/root-%{name}
-
-%description
-PXP is a validating parser for XML-1.0 written entirely in Objective Caml.
-PXP stands for Polymorphic XML parser, emphasizes its most useful property:
-the API is polymorphic and can be configured such that different objects are
-used to store different types of elements.
-PXP was formerly known as "Markup".
-
-%prep
-%setup -n %{toolname}
-make clean
-
-%build
-make all
-make opt
-
-%install
-# Install binary, libs, manuals
-
-DIR_INSTALL="${RPM_BUILD_ROOT}/%{dir_sitelib}/%{toolname}"
-install -m 755 -d "${DIR_INSTALL}"
-# Install in non-standard directory
-# replace "$(OCAMLFIND) install" or "ocamlfind install"
-cat Makefile | sed "s#[_A-Za-z\(\)\$]* install \$(NAME)#\$(TRICK)#" > Makefile.trick
-# Ugly trick: replace "findlib" by "cp"
-make -f Makefile.trick install "TRICK=cp -f --target-directory=${DIR_INSTALL}"
-
-# Move doc files to root, if needed
-for F in %{files_doc} ; do test -f "./doc/${F}" && mv -f "./doc/${F}" . ; done
-
-# HTML manual
-if test -d doc/manual/html ; then mv doc/manual/html htmlman; else mkdir htmlman ; fi
-
-
-# Create a symbolic link to version-specific HTML manual
-%post
-cd /usr/doc/HTML/
-if test -L "%{name}" ; then rm -f "%{name}" ; fi
-if test ! -e "%{name}" ; then ln -s "../%{name}-%{version}/htmlman" "%{name}" ; fi
-
-%postun
-cd /usr/doc/HTML/
-if test "$1" = "0" -a -L %{name} ; then rm -f %{name} ; fi
-
-%clean
-if test `dirname "${RPM_BUILD_ROOT}"` != "/" ; then rm -rf "${RPM_BUILD_ROOT}" ; fi
-
-%files
-%defattr(-,root,root)
-%doc %{files_doc} htmlman examples
-/%{dir_sitelib}/%{toolname}
-
-%changelog
-* Fri Sep 1 2000 Olivier Montanuy <Olivier.Montanuy@wanadoo.fr>
-- created and tested package
+++ /dev/null
-*.cmo
-*.cmx
-*.cmi
-
+++ /dev/null
-Copyright 1999 by Gerd Stolpmann
-
-The package "markup" is copyright by Gerd Stolpmann.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this document and the "markup" software (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-The Software is provided ``as is'', without warranty of any kind, express
-or implied, including but not limited to the warranties of
-merchantability, fitness for a particular purpose and noninfringement.
-In no event shall Gerd Stolpmann be liable for any claim, damages or
-other liability, whether in an action of contract, tort or otherwise,
-arising from, out of or in connection with the Software or the use or
-other dealings in the software.
+++ /dev/null
-version = "1.0"
-requires = "netstring"
-description = "Validating parser for XML-1.0"
-archive(byte) = "pxp_types.cma
- pxp_lex_iso88591.cma
- pxp_lex_utf8.cma
- pxp_engine.cma
- pxp_utf8.cmo"
-archive(byte, pxp_without_utf8) = "pxp_types.cma
- pxp_lex_iso88591.cma
- pxp_engine.cma"
-archive(native) = "pxp_types.cmxa
- pxp_lex_iso88591.cmxa
- pxp_lex_utf8.cmxa
- pxp_engine.cmxa
- pxp_utf8.cmx"
-archive(native, pxp_without_utf8) = "pxp_types.cmxa
- pxp_lex_iso88591.cmxa
- pxp_engine.cmxa"
-
+++ /dev/null
-# make all: make bytecode archive
-# make opt: make native archive
-# make install: install bytecode archive, and if present, native archive
-# make uninstall: uninstall package
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-# make release: cleanup, create archive, tag CVS module
-# (for developers)
-
-#----------------------------------------------------------------------
-
-include Makefile.conf
-
-.PHONY: all
-all:
- $(MAKE) -C m2parsergen all
- $(MAKE) -C tools/ucs2_to_utf8 all
- $(MAKE) -f Makefile.code all
- $(MAKE) -C compatibility all
-
-.PHONY: opt
-opt:
- $(MAKE) -C m2parsergen all
- $(MAKE) -C tools/ucs2_to_utf8 all
- $(MAKE) -f Makefile.code opt
- $(MAKE) -C compatibility opt
-
-.PHONY: install
-install: all tmp/pxp_entity.mli
- files=`tools/collect_files *.cmi *.cma *.cmxa *.a \
- pxp_utf8.cmo pxp_utf8.cmx pxp_utf8.o` && \
- ocamlfind install $(NAME) $(MLI) tmp/pxp_entity.mli $$files META
-
-.PHONY: uninstall
-uninstall:
- ocamlfind remove $(NAME)
-
-.PHONY: markup-install
-markup-install:
- $(MAKE) -C compatibility install
-
-.PHONY: markup-uninstall
-markup-uninstall:
- $(MAKE) -C compatibility uninstall
-
-tmp/pxp_entity.mli: pxp_entity.ml
- mkdir -p tmp
- rm -f tmp/pxp_entity.*
- cp pxp_entity.ml tmp
- echo '(* Sorry, this is currently undocumented *)' >tmp/mli
- ocamlc -i -c tmp/pxp_entity.ml >>tmp/mli
- mv tmp/mli tmp/pxp_entity.mli
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
- rm -f pxp_yacc.ml
- touch lexers/objects_iso88591 lexers/objects_utf8 lexers/depend
- $(MAKE) -C lexers clean
- $(MAKE) -C compatibility clean
-
-.PHONY: CLEAN
-CLEAN: clean
- $(MAKE) -C doc CLEAN
- $(MAKE) -C examples CLEAN
- $(MAKE) -C rtests CLEAN
- $(MAKE) -C m2parsergen CLEAN
- touch tools/ucs2_to_utf8/depend
- $(MAKE) -C tools/ucs2_to_utf8 clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~ depend depend.pkg
- $(MAKE) -C doc distclean
- $(MAKE) -C examples distclean
- $(MAKE) -C rtests distclean
- $(MAKE) -C m2parsergen distclean
- touch tools/ucs2_to_utf8/depend
- $(MAKE) -C tools/ucs2_to_utf8 clean
- $(MAKE) -C compatibility distclean
-
-RELEASE: META
- awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
-
-.PHONY: dist
-dist: RELEASE
- r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*~" --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/oo_questions*" --exclude="*/testsamples*" --exclude="*/tmp/*" --exclude="*reptil*" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*.fig.bak" --exclude="*/ps/pic*" --exclude="*/examples/panel*" --exclude="*/examples/xmlforms_gtk*" --exclude="*/Mail*" $(NAME)/*
-
-.PHONY: tag-release
-tag-release: RELEASE
- r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r markup
-
-.PHONY: release
-release: distclean
- $(MAKE) tag-release
- $(MAKE) dist
-
-.PHONY: dev
-dev:
- $(MAKE) all
- -$(MAKE) uninstall
- $(MAKE) install
- $(MAKE) -C examples/validate distclean
- $(MAKE) -C examples/validate validate
+++ /dev/null
-# make all: make bytecode archives
-# make opt: make native archives
-#----------------------------------------------------------------------
-
-include Makefile.conf
-
-all:
- $(MAKE) -f Makefile.code pxp_types.cma
- $(MAKE) -f Makefile.code pxp_lex_iso88591.cma
- if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cma; else rm -f pxp_lex_utf8.cma; fi
- $(MAKE) -f Makefile.code pxp_engine.cma
- if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmo; else rm -f pxp_utf8.cmo; fi
-
-opt:
- $(MAKE) -f Makefile.code pxp_types.cmxa
- $(MAKE) -f Makefile.code pxp_lex_iso88591.cmxa
- if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cmxa; else rm -f pxp_lex_utf8.cmxa; fi
- $(MAKE) -f Makefile.code pxp_engine.cmxa
- if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmx; else rm -f pxp_utf8.cmx; fi
-
-#----------------------------------------------------------------------
-
-pxp_types.cma: $(OBJECTS_types)
- $(OCAMLC) -a -o pxp_types.cma $(OBJECTS_types)
-
-pxp_types.cmxa: $(XOBJECTS_types)
- $(OCAMLOPT) -a -o pxp_types.cmxa $(XOBJECTS_types)
-
-pxp_engine.cma: $(OBJECTS_engine)
- $(OCAMLC) -a -o pxp_engine.cma $(OBJECTS_engine)
-
-pxp_engine.cmxa: $(XOBJECTS_engine)
- $(OCAMLOPT) -a -o pxp_engine.cmxa $(XOBJECTS_engine)
-
-
-# The following rules are "phony" to force 'make' to go into the
-# "lexers" subdirectory.
-
-.PHONY: pxp_lex_iso88591.cma
-pxp_lex_iso88591.cma: $(CMI_types)
- $(MAKE) -C lexers all_iso88591
- cp lexers/pxp_lex_iso88591.cma .
-
-.PHONY: pxp_lex_iso88591.cmxa
-pxp_lex_iso88591.cmxa: $(CMI_types)
- $(MAKE) -C lexers opt_iso88591
- cp lexers/pxp_lex_iso88591.cmxa lexers/pxp_lex_iso88591.a .
-
-.PHONY: pxp_lex_utf8.cma
-pxp_lex_utf8.cma: $(CMI_types)
- $(MAKE) -C lexers all_utf8
- cp lexers/pxp_lex_utf8.cma .
-
-.PHONY: pxp_lex_utf8.cmxa
-pxp_lex_utf8.cmxa: $(CMI_types)
- $(MAKE) -C lexers opt_utf8
- cp lexers/pxp_lex_utf8.cmxa lexers/pxp_lex_utf8.a .
-
-#----------------------------------------------------------------------
-# general rules:
-
-OPTIONS =
-OCAMLC = $(OCAMLFIND) ocamlc -package "$(PACKAGES)" \
- -g -I lexers $(OPTIONS) $(ROPTIONS)
-OCAMLOPT = $(OCAMLFIND) ocamlopt -package "$(PACKAGES)" \
- -p -I lexers $(OPTIONS) $(ROPTIONS)
-OCAMLDEP = ocamldep $(OPTIONS)
-OCAMLFIND = ocamlfind
-
-depend: *.ml *.mli pxp_yacc.ml
- $(OCAMLDEP) *.ml *.mli >depend
-
-.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .m2y
-
-.ml.cmx:
- $(OCAMLOPT) -c $<
-
-.ml.cmo:
- $(OCAMLC) -c $<
-
-.mli.cmi:
- $(OCAMLC) -c $<
-
-.mll.ml:
- ocamllex $<
-
-.m2y.ml:
- ./m2parsergen/m2parsergen < $< >`basename $< .m2y`.ml || { rm -f `basename $< .m2y`.ml; false; }
-
-*.mli:
-
-
-# Generated dependencies:
-
-include depend
-
+++ /dev/null
-# User-configurable section:
-
-# yes or no: Do you want that the parser has support for the internal
-# representation as UTF-8 strings? "yes" is recommended, but the parser
-# becomes much bigger
-UTF8_SUPPORT = yes
-
-# --- End of User-configurable section.
-
-# Settings.
-
-NAME = pxp
-PACKAGES = netstring
-
-# Caml objects that are needed by the lexers:
-OBJECTS_types = \
- pxp_types.cmo pxp_lexer_types.cmo
-
-CMI_types = $(OBJECTS_types:.cmo=.cmi)
-
-# Caml objects that depend on the lexers:
-OBJECTS_engine = \
- pxp_lexers.cmo \
- pxp_dfa.cmo \
- pxp_aux.cmo pxp_reader.cmo \
- pxp_entity.cmo pxp_dtd.cmo pxp_document.cmo \
- pxp_yacc.cmo pxp_codewriter.cmo
-
-# Same as native objects:
-XOBJECTS_types = $(OBJECTS_types:.cmo=.cmx)
-XOBJECTS_engine = $(OBJECTS_engine:.cmo=.cmx)
-
-# .mli files to install:
-
-MLI = pxp_document.mli pxp_dtd.mli \
- pxp_types.mli pxp_yacc.mli \
- pxp_codewriter.mli pxp_dfa.mli
+++ /dev/null
-*.cmo
-*.cmx
-*.cmi
-
+++ /dev/null
-version = "PXP-emulator"
-requires = "pxp"
-description = "Validating parser for XML-1.0"
-archive(byte) = "markup.cma"
-archive(native) = "markup.cmxa"
-
+++ /dev/null
-# make all: make bytecode archive
-# make opt: make native archive
-# make install: install bytecode archive, and if present, native archive
-# make uninstall: uninstall package
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-
-#----------------------------------------------------------------------
-
-include Makefile.conf
-
-.PHONY: all
-all:
- $(MAKE) -f Makefile.code all
-
-.PHONY: opt
-opt:
- $(MAKE) -f Makefile.code opt
-
-.PHONY: install
-install: all
- files=`../tools/collect_files *.cmi *.cma *.cmxa *.a` && \
- ocamlfind install $(NAME) $(MLI) $$files META
-
-.PHONY: uninstall
-uninstall:
- ocamlfind remove $(NAME)
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~ depend depend.pkg
-
+++ /dev/null
-# make all: make bytecode archives
-# make opt: make native archives
-#----------------------------------------------------------------------
-
-include Makefile.conf
-
-.PHONY: all
-all: markup.cma
-
-.PHONY: opt
-opt: markup.cmxa
-
-#----------------------------------------------------------------------
-
-markup.cma: $(OBJECTS)
- $(OCAMLC) -a -o markup.cma $(OBJECTS)
-
-markup.cmxa: $(XOBJECTS)
- $(OCAMLOPT) -a -o markup.cmxa $(XOBJECTS)
-
-#----------------------------------------------------------------------
-# general rules:
-
-OPTIONS =
-OCAMLC = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS) $(ROPTIONS)
-OCAMLOPT = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS) $(ROPTIONS)
-OCAMLDEP = ocamldep $(OPTIONS)
-OCAMLFIND = ocamlfind
-
-depend: *.ml *.mli
- $(OCAMLDEP) *.ml *.mli >depend
-
-.SUFFIXES: .cmo .cmi .cmx .ml .mli
-
-.ml.cmx:
- $(OCAMLOPT) -c $<
-
-.ml.cmo:
- $(OCAMLC) -c $<
-
-.mli.cmi:
- $(OCAMLC) -c $<
-
-*.mli:
-
-
-# Generated dependencies:
-
-include depend
-
+++ /dev/null
-NAME = markup
-
-OBJECTS = markup_types.cmo markup_dtd.cmo markup_reader.cmo \
- markup_document.cmo markup_yacc.cmo
-XOBJECTS = $(OBJECTS:.cmo=.cmx)
-
-MLI = markup_document.mli markup_dtd.mli \
- markup_types.mli markup_yacc.mli markup_reader.mli
-
+++ /dev/null
-This directory contains the modules for Markup-0.2.10
-compatibility. The modules consist mainly of wrapper classes for the
-new PXP classes, and translate the old methods to the new ones.
-
-Please note that the compatibility is not perfect. Sometimes there are
-new methods which do not exist in Markup-0.2.10, and sometimes even
-existing methods changed their signature. I have tried to avoid that,
-but there are some ugly cases which are hard to solve without such
-modifications.
-
-Translating old methods into new methods costs time and
-memory. Because of this, it is best to consider the compatibility
-modules as migration path to PXP: You can test whether PXP parses your
-input files, and you can compare the old API with the new API
-directly. (However, it is hard to test new features of PXP with the
-compatibility modules; the old API does not reflect the new features.)
-
-The compatibility modules are currently maintained, but that will stop
-once PXP has been established.
-
-(Gerd)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-type node_type =
- T_element of string
- | T_data
-
-class type [ 'node ] extension = [ 'node ] Pxp_document.extension
-
-class type [ 'ext, 'node ] pxp_extension_type =
-object ('self)
- method clone : 'self
- method node : 'self Pxp_document.node
- method set_node : 'self Pxp_document.node -> unit
-
- method markup_node : 'node
- method set_markup_node : 'node -> unit
-
- method set_index : 'self Pxp_yacc.index -> unit
- method index : 'self Pxp_yacc.index
- end
-;;
-
-
-class type [ 'ext ] node =
- object ('self)
- constraint 'ext = 'ext node #extension
- method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
-
- method extension : 'ext
- method delete : unit
- method parent : 'ext node
- method root : 'ext node
- method orphaned_clone : 'ext node
- method orphaned_flat_clone : 'ext node
- method add_node : 'ext node -> unit
- method add_pinstr : Markup_dtd.proc_instruction -> unit
- method pinstr : string -> Markup_dtd.proc_instruction list
- method pinstr_names : string list
- method sub_nodes : 'ext node list
- method iter_nodes : ('ext node -> unit) -> unit
- method iter_nodes_sibl :
- ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
- method set_nodes : 'ext node list -> unit
- method data : string
- method node_type : node_type
- method attribute : string -> Markup_types.att_value
- method attribute_names : string list
- method attribute_type : string -> Markup_types.att_type
- method attributes : (string * Markup_types.att_value) list
- method required_string_attribute : string -> string
- method required_list_attribute : string -> string list
- method optional_string_attribute : string -> string option
- method optional_list_attribute : string -> string list
- method quick_set_attributes : (string * Markup_types.att_value) list -> unit
- method find : string -> 'ext node
- method reset_finder : unit
- method dtd : Markup_dtd.dtd
- method create_element :
- Markup_dtd.dtd -> node_type -> (string * string) list -> 'ext node
- method create_data : Markup_dtd.dtd -> string -> 'ext node
- method local_validate : unit
- method keep_always_whitespace_mode : unit
- method write_compact_as_latin1 : Markup_types.output_stream -> unit
- method internal_adopt : 'ext node option -> unit
- method internal_delete : 'ext node -> unit
- method internal_init : Markup_dtd.dtd -> string -> (string * string) list -> unit
- end
-;;
-
-
-class [ 'ext ] pxp_extension init_markup_node =
- (object (self : 'self)
- (* constraint 'ext = 'ext node #extension *)
- val mutable pxp_node = (None :
- 'self Pxp_document.node option)
- (* 'ext pxp_extension Pxp_document.node option *)
- val mutable markup_node = (init_markup_node : 'ext node)
-
- val mutable index = (None : 'self Pxp_yacc.index option)
-
- method clone =
- {< >}
-
- method node =
- match pxp_node with
- None ->
- assert false
- | Some n -> n
-
- method set_node n =
- pxp_node <- Some n
-
- method markup_node = markup_node
-
- method set_markup_node n = markup_node <- n
-
- method set_index ix =
- index <- Some ix
-
- method index =
- match index with
- None -> assert false
- | Some x -> x
-
- end
- : ['ext, 'ext node] pxp_extension_type )
-;;
-
-
-class [ 'ext ] emulate_markup_node init_ext init_pxp_node =
- object (self)
- constraint 'ext = 'ext node #extension
- val mutable pxp_node = (init_pxp_node :
- ('ext, 'ext #node)
- pxp_extension_type Pxp_document.node option)
- val mutable extension = (init_ext : 'ext)
-
- method pxp_node =
- match pxp_node with
- None -> assert false
- | Some n -> n
-
- method extension = extension
- method delete = self # pxp_node # delete
- method parent = self # pxp_node # parent # extension # markup_node
- method root = self # pxp_node # root # extension # markup_node
-
- method orphaned_clone =
- let ext' = extension # clone in
- let pxp' = self # pxp_node # orphaned_clone in
- let n = new emulate_markup_node ext' (Some pxp') in
- ext' # set_node (n : 'ext #node :> 'ext node);
- pxp' # extension # set_markup_node n;
- n
-
- method orphaned_flat_clone =
- let ext' = extension # clone in
- let pxp' = self # pxp_node # orphaned_flat_clone in
- let n = new emulate_markup_node ext' (Some pxp') in
- ext' # set_node (n : 'ext #node :> 'ext node);
- pxp' # extension # set_markup_node n;
- n
-
- method dtd = self # pxp_node # dtd
-
- method add_node (n : 'ext node) =
- let n_pxp = n # pxp_node in
- self # pxp_node # add_node n_pxp
-
- method add_pinstr pi =
- self # pxp_node # add_pinstr pi
-
- method sub_nodes =
- let l = self # pxp_node # sub_nodes in
- List.map (fun n_pxp -> n_pxp # extension # markup_node) l
-
- method pinstr name =
- self # pxp_node # pinstr name
-
- method pinstr_names =
- self # pxp_node # pinstr_names
-
- method iter_nodes f =
- self # pxp_node # iter_nodes
- (fun n_pxp -> f (n_pxp # extension # markup_node))
-
- method iter_nodes_sibl f =
- self # pxp_node # iter_nodes_sibl
- (fun left_pxp node_pxp right_pxp ->
- let left =
- match left_pxp with
- None -> None
- | Some n_pxp -> Some (n_pxp # extension # markup_node) in
- let right =
- match right_pxp with
- None -> None
- | Some n_pxp -> Some (n_pxp # extension # markup_node) in
- let node =
- node_pxp # extension # markup_node in
- f left node right
- )
-
- method set_nodes (l : 'ext node list) =
- let l_pxp = List.map (fun n -> n # pxp_node) l in
- self # pxp_node # set_nodes l_pxp
-
- method data = self # pxp_node # data
-
- method node_type =
- match self # pxp_node # node_type with
- Pxp_document.T_data -> T_data
- | Pxp_document.T_element name -> T_element name
- | Pxp_document.T_super_root -> T_element "-vr"
- | Pxp_document.T_pinstr _ -> T_element "-pi"
- | _ -> assert false
-
- method attribute name =
- self # pxp_node # attribute name
-
- method attribute_names =
- self # pxp_node # attribute_names
-
- method attribute_type name =
- self # pxp_node # attribute_type name
-
- method attributes =
- self # pxp_node # attributes
-
- method required_string_attribute name =
- self # pxp_node # required_string_attribute name
-
- method required_list_attribute name =
- self # pxp_node # required_list_attribute name
-
- method optional_string_attribute name =
- self # pxp_node # optional_string_attribute name
-
- method optional_list_attribute name =
- self # pxp_node # optional_list_attribute name
-
- method quick_set_attributes l =
- self # pxp_node # quick_set_attributes l
-
- method find (name : string) =
- let index = self # root # pxp_node # extension # index in
- let n = index # find name in (* may raise Not_found *)
- n # extension # markup_node
-
- method reset_finder = ()
-
- method create_element dtd nt atts =
- let nt_pxp =
- match nt with
- T_data -> Pxp_document.T_data
- | T_element name -> Pxp_document.T_element name in
- let node_pxp =
- self # pxp_node # create_element dtd nt_pxp atts in
- let ext' = extension # clone in
- let n = new emulate_markup_node ext' (Some node_pxp) in
- ext' # set_node (n : 'ext #node :> 'ext node);
- node_pxp # extension # set_markup_node n;
- n
-
- method create_data dtd s =
- let node_pxp =
- self # pxp_node # create_data dtd s in
- let ext' = extension # clone in
- let n = new emulate_markup_node ext' (Some node_pxp) in
- ext' # set_node (n : 'ext #node :> 'ext node);
- node_pxp # extension # set_markup_node n;
- n
-
- method keep_always_whitespace_mode =
- self # pxp_node # keep_always_whitespace_mode
-
- method write_compact_as_latin1 out =
- self # pxp_node # write_compact_as_latin1 out
-
- method local_validate =
- self # pxp_node # local_validate()
-
- method internal_adopt (p:'ext node option) =
- assert false;
- ()
-
- method internal_delete (n:'ext node) =
- assert false;
- ()
-
- method internal_init (d:Markup_dtd.dtd) (s:string) (atts:(string*string)list) =
- assert false;
- ()
- end
-;;
-
-class [ 'ext ] data_impl ext data =
- object (self)
- inherit [ 'ext ] emulate_markup_node ext None
- constraint 'ext = 'ext node #extension
- initializer
- if data <> "" then
- failwith "Emulation of Markup_document: Cannot instantiate data node with non-empty string";
- let self' = (self : 'ext #node :> 'ext node ) in
- pxp_node <- Some (new Pxp_document.data_impl (new pxp_extension self'))
-
- end
-;;
-
-class [ 'ext ] element_impl ext =
- object (self)
- inherit [ 'ext ] emulate_markup_node ext None
- initializer
- let self' = (self : 'ext #node :> 'ext node ) in
- pxp_node <- Some (new Pxp_document.element_impl (new pxp_extension self'))
- end
-;;
-
-
-class [ 'ext ] document w =
- object (self)
- val pxp_doc = new Pxp_document.document
- (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings)
-
- val mutable standalone_flag = false
-
- method init_xml_version v =
- pxp_doc # init_xml_version v
-
- method xml_version =
- pxp_doc # xml_version
-
- method init_xml_standalone b =
- standalone_flag <- b
-
- method xml_standalone = standalone_flag
-
- method init_root (r : 'ext node) =
- pxp_doc # init_root (r # pxp_node);
- self # dtd # set_standalone_declaration standalone_flag
- (* questionable *)
-
- method root =
- let pxp_root = pxp_doc # root in
- pxp_root # extension # markup_node
-
- method dtd =
- pxp_doc # dtd
-
- method add_pinstr pi =
- pxp_doc # add_pinstr pi
-
- method pinstr name =
- pxp_doc # pinstr name
-
- method pinstr_names =
- pxp_doc # pinstr_names
-
- method write_compact_as_latin1 out =
- pxp_doc # write_compact_as_latin1 out
-
- end
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.6 2000/08/18 20:19:00 gerd
- * Changed the emulation: there are now wrapper objects for nodes.
- * This was necessary because node_type changed in PXP such that it became
- * incompatible with Markup's node_type.
- *
- * Revision 1.5 2000/07/14 21:35:35 gerd
- * Updated because of the simplification of Pxp_types.collect_warnings.
- *
- * Revision 1.4 2000/07/08 17:40:50 gerd
- * Updated the simulation.
- *
- * Revision 1.3 2000/06/14 22:19:27 gerd
- * Update because of additional 'encoding' methods.
- *
- * Revision 1.2 2000/05/30 00:08:40 gerd
- * Bugfix.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- *)
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * Markup! The validating XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *
- * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_document.mli.
- * It corresponds to revision 1.13 of markup_document.mli.
- *)
-
-(**********************************************************************)
-(* *)
-(* Markup_document: *)
-(* Object model of the document/element instances *)
-(* *)
-(**********************************************************************)
-
-
-(* ======================================================================
- * OVERVIEW
- *
- * class type node ............. The common class type of the nodes of
- * the element tree. Nodes are either
- * elements (inner nodes) or data nodes
- * (leaves)
- * class type extension ........ The minimal properties of the so-called
- * extensions of the nodes: Nodes can be
- * customized by applying a class parameter
- * that adds methods/values to nodes.
- * class data_impl : node ...... Implements data nodes.
- * class element_impl : node ... Implements element nodes
- * class document .............. A document is an element with some additional
- * properties
- *
- * ======================================================================
- *
- * THE STRUCTURE OF NODE TREES:
- *
- * Every node except the root node has a parent node. The parent node is
- * always an element, because data nodes never contain other nodes.
- * In the other direction, element nodes may have children; both elements
- * and data nodes are possible as children.
- * Every node knows its parent (if any) and all its children (if any);
- * the linkage is maintained in both directions. A node without a parent
- * is called a root.
- * It is not possible that a node is the child of two nodes (two different nodes
- * or a multiple child of the same node).
- * You can break the connection between a node and its parent; the method
- * "delete" performs this operations and deletes the node from the parent's
- * list of children. The node is now a root, for itself and for all
- * subordinate nodes. In this context, the node is also called an orphan,
- * because it has lost its parent (this is a bit misleading because the
- * parent is not always the creator of a node).
- * In order to simplify complex operations, you can also set the list of
- * children of an element. Nodes that have been children before are unchanged;
- * new nodes are added (and the linkage is set up), nodes no more occurring
- * in the list are handled if they have been deleted.
- * If you try to add a node that is not a root (either by an "add" or by a
- * "set" operation) the operation fails.
- *
- * CREATION OF NODES
- *
- * The class interface supports creation of nodes by cloning a so-called
- * exemplar. The idea is that it is sometimes useful to implement different
- * element types by different classes, and to implement this by looking up
- * exemplars.
- * Imagine you have three element types A, B, and C, and three classes
- * a, b, and c implementing the node interface (for example, by providing
- * different extensions, see below). The XML parser can be configured to
- * have a lookup table
- * { A --> a0, B --> b0, C --> c0 }
- * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
- * objects belonging to these classes. If the parser finds an instance of
- * A, it looks up the exemplar a0 of A and clones it (actually, the method
- * "create_element" performs this for elements, and "create_data" for data
- * nodes). Clones belong to the same class as the original nodes, so the
- * instances of the elements have the same classes as the configured
- * exemplars.
- * Note: This technique assumes that the interface of all exemplars is the
- * same!
- *
- * THE EXTENSION
- *
- * The class type node and all its implementations have a class parameter
- * 'ext which must at least fulfil the properties of the class type "extension".
- * The idea is that you can add properties, for example:
- *
- * class my_extension =
- * object
- * (* minimal properties required by class type "extension": *)
- * method clone = ...
- * method node = ...
- * method set_node n = ...
- * (* here my own methods: *)
- * method do_this_and_that ...
- * end
- *
- * class my_element_impl = [ my_extension ] element_impl
- * class my_data_impl = [ my_extension ] data_impl
- *
- * The whole XML parser is parameterized with 'ext, so your extension is
- * visible everywhere (this is the reason why extensibility is solved by
- * parametric polymorphism and not by inclusive polymorphism (subtyping)).
- *
- *
- * SOME COMPLICATED TYPE EXPRESSIONS
- *
- * Sometimes the following type expressions turn out to be necessary:
- *
- * 'a node extension as 'a
- * This is the type of an extension that belongs to a node that
- * has an extension that is the same as we started with.
- *
- * 'a extension node as 'a
- * This is the type of a node that has an extension that belongs to a
- * node of the type we started with.
- *
- *
- * DOCUMENTS
- * ...
- *
- * ======================================================================
- *
- * SIMPLE USAGE: ...
- *)
-
-
-open Markup_dtd
-
-
-type node_type =
- T_element of string
- | T_data
-
-
-
-class type [ 'node ] extension =
- object ('self)
- method clone : 'self
- (* "clone" should return an exact deep copy of the object. *)
- method node : 'node
- (* "node" returns the corresponding node of this extension. This method
- * intended to return exactly what previously has been set by "set_node".
- *)
- method set_node : 'node -> unit
- (* "set_node" is invoked once the extension is associated to a new
- * node object.
- *)
- end
-;;
-
-class type [ 'ext, 'node ] pxp_extension_type =
-object ('self)
- method clone : 'self
- method node : 'self Pxp_document.node
- method set_node : 'self Pxp_document.node -> unit
-
- method markup_node : 'node
- method set_markup_node : 'node -> unit
-
- method set_index : 'self Pxp_yacc.index -> unit
- method index : 'self Pxp_yacc.index
- end
-;;
-
-class type [ 'ext ] node =
- object ('self)
- constraint 'ext = 'ext node #extension
- method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
-
- method extension : 'ext
- (* Return the extension of this node: *)
-
- method delete : unit
- (* Delete this node from the parent's list of sub nodes. This node gets
- * orphaned.
- * 'delete' does nothing if this node does not have a parent.
- *)
-
- method parent : 'ext node
- (* Get the parent, or raise Not_found if this node is an orphan. *)
-
- method root : 'ext node
- (* Get the direct or indirect parent that does not have a parent itself,
- * i.e. the root of the tree.
- *)
-
- method orphaned_clone : 'ext node
- (* return an exact clone of this element and all sub nodes (deep copy)
- * except string values which are shared by this node and the clone.
- * The other exception is that the clone has no parent (i.e. it is now
- * a root).
- *)
-
- method orphaned_flat_clone : 'ext node
- (* return a clone of this element where all subnodes are omitted.
- * The type of the node, and the attributes are the same as in the
- * original node.
- * The clone has no parent.
- *)
-
- method add_node : 'ext node -> unit
- (* Append new sub nodes -- mainly used by the parser itself, but
- * of course open for everybody. If an element is added, it must be
- * an orphan (i.e. does not have a parent node); and after addition
- * *this* node is the new parent.
- *)
-
- method add_pinstr : proc_instruction -> unit
- (* Add a processing instruction to the set of processing instructions of
- * this node. Usually only elements contain processing instructions.
- *)
-
- method pinstr : string -> proc_instruction list
- (* Get all processing instructions with the passed name *)
-
- method pinstr_names : string list
- (* Get a list of all names of processing instructions *)
-
- method sub_nodes : 'ext node list
- (* Get the list of sub nodes *)
-
- method iter_nodes : ('ext node -> unit) -> unit
- (* iterate over the sub nodes *)
-
- method iter_nodes_sibl :
- ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
- (* Here every iteration step can also access to the previous and to the
- * following node if present:
- *)
-
- method find : string -> 'ext node
- (* Get the node that has an ID attribute with this value, or raise
- * Not_found.
- * "find" may also cause a Validation_error if something is wrong
- * with the IDs.
- *)
-
- method reset_finder : unit
- (* makes that newly added nodes will also be found *)
-
- method set_nodes : 'ext node list -> unit
- (* Set the list of sub nodes. Elements that are no longer sub nodes gets
- * orphaned, and all new elements that previously were not sub nodes
- * must have been orphaned.
- *)
-
- method data : string
- (* Get the data string of this node. For data nodes, this string is just
- * the content. For elements, this string is the concatenation of all
- * subordinate data nodes.
- *)
-
- method node_type : node_type
- (* Get the name of the element type. *)
-
- method attribute : string -> Markup_types.att_value
- method attribute_names : string list
- method attribute_type : string -> Markup_types.att_type
- method attributes : (string * Markup_types.att_value) list
- (* Get a specific attribute; get the names of all attributes; get the
- * type of a specific attribute; get names and values of all attributes.
- * Only elements have attributes.
- * Note: If the DTD allows arbitrary for this element, "attribute_type"
- * raises Undeclared.
- *)
-
- method required_string_attribute : string -> string
- method required_list_attribute : string -> string list
- (* Return the attribute or fail if the attribute is not present:
- * The first version passes the value always as string back;
- * the second version always as list.
- *)
-
- method optional_string_attribute : string -> string option
- method optional_list_attribute : string -> string list
- (* Return some attribute value or return None if the attribute is not
- * present:
- * The first version passes the value always as string back;
- * the second version always as list.
- *)
-
- method quick_set_attributes : (string * Markup_types.att_value) list -> unit
- (* Sets the attributes but does not check whether they match the DTD.
- *)
-
- method dtd : dtd
- (* Get the DTD *)
-
- method create_element : dtd -> node_type -> (string * string) list -> 'ext node
- (* create an "empty copy" of this element:
- * - new DTD
- * - new node type
- * - new attribute list
- * - empty list of nodes
- *)
-
- method create_data : dtd -> string -> 'ext node
- (* create an "empty copy" of this data node: *)
-
- method local_validate : unit
- (* Check that this element conforms to the DTD: *)
-
- method keep_always_whitespace_mode : unit
- (* Normally, add_node does not accept data nodes when the DTD does not
- * allow data nodes or only whitespace ("ignorable whitespace").
- * Once you have invoked this method, ignorable whitespace is forced
- * to be included into the document.
- *)
-
- method write_compact_as_latin1 : Markup_types.output_stream -> unit
- (* Write the contents of this node and the subtrees to the passed
- * output stream; the character set ISO-8859-1 is used. The format
- * is compact (the opposite of "pretty printing").
- *)
-
- (* ---------------------------------------- *)
- (* internal methods: *)
- method internal_adopt : 'ext node option -> unit
- method internal_delete : 'ext node -> unit
- method internal_init : dtd -> string -> (string * string) list -> unit
- end
-;;
-
-class [ 'ext ] data_impl : 'ext -> string -> [ 'ext ] node
-
-class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
-
-class [ 'ext ] document :
- Markup_types.collect_warnings ->
- object
- method init_xml_version : string -> unit
- method init_xml_standalone : bool -> unit
- method init_root : 'ext node -> unit
-
- method xml_version : string
- method xml_standalone : bool
- method dtd : dtd
- method root : 'ext node
-
- method add_pinstr : proc_instruction -> unit
- method pinstr : string -> proc_instruction list
- method pinstr_names : string list
-
- method write_compact_as_latin1 : Markup_types.output_stream -> unit
- (* Write the document to the passed
- * output stream; the character set ISO-8859-1 is used. The format
- * is compact (the opposite of "pretty printing").
- * If a DTD is present, the DTD is included into the internal subset.
- *)
-
- end
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/08/18 20:19:16 gerd
- * Updates in the emulation because of PXP changes.
- *
- * Revision 1.3 2000/07/16 16:35:06 gerd
- * Update because PXP interface contains now the method 'write'.
- *
- * Revision 1.2 2000/06/14 22:19:27 gerd
- * Update because of additional 'encoding' methods.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- * ======================================================================
- * OLD LOGS:
- *
- * Revision 1.13 2000/05/27 19:15:08 gerd
- * Removed the method init_xml_standalone.
- *
- * Revision 1.12 2000/05/01 20:42:34 gerd
- * New method write_compact_as_latin1.
- *
- * Revision 1.11 2000/04/30 18:15:57 gerd
- * Beautifications.
- * New method keep_always_whitespace_mode.
- *
- * Revision 1.10 2000/03/11 22:58:15 gerd
- * Updated to support Markup_codewriter.
- *
- * Revision 1.9 2000/01/27 21:51:56 gerd
- * Added method 'attributes'.
- *
- * Revision 1.8 2000/01/27 21:19:07 gerd
- * Added further methods.
- *
- * Revision 1.7 1999/11/09 22:20:14 gerd
- * Removed method init_dtd from class "document". The DTD is
- * implicitly passed to the document by the root element.
- *
- * Revision 1.6 1999/09/01 22:51:40 gerd
- * Added methods to store processing instructions.
- *
- * Revision 1.5 1999/09/01 16:19:57 gerd
- * The "document" class has now a "warner" as class argument.
- *
- * Revision 1.4 1999/08/19 21:59:13 gerd
- * Added method "reset_finder".
- *
- * Revision 1.3 1999/08/19 01:08:29 gerd
- * Added method "find".
- *
- * Revision 1.2 1999/08/15 02:19:41 gerd
- * Some new explanations: That unknown elements are not rejected
- * if the DTD allows them.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-class dtd w =
- Pxp_dtd.dtd
- (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings)
- `Enc_iso88591;;
-
-class dtd_element dtd name =
- Pxp_dtd.dtd_element dtd name;;
-
-class dtd_notation name id =
- Pxp_dtd.dtd_notation name id `Enc_iso88591;;
-
-class proc_instruction target value =
- Pxp_dtd.proc_instruction target value `Enc_iso88591;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/07/14 21:35:35 gerd
- * Updated because of the simplification of Pxp_types.collect_warnings.
- *
- * Revision 1.2 2000/06/14 22:19:27 gerd
- * Update because of additional 'encoding' methods.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * Markup! The validating XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *
- * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_dtd.mli.
- * It corresponds to revision 1.11 of markup_dtd.mli.
- *)
-
-(**********************************************************************)
-(* *)
-(* Markup_dtd: *)
-(* Object model of document type declarations *)
-(* *)
-(**********************************************************************)
-
-(* ======================================================================
- * OVERVIEW
- *
- * class dtd ............... represents the whole DTD, including element
- * declarations, entity declarations, notation
- * declarations, and processing instructions
- * class dtd_element ....... represents an element declaration consisting
- * of a content model and an attribute list
- * declaration
- * class dtd_notation ...... represents a notation declaration
- * class proc_instruction .. represents a processing instruction
- * ======================================================================
- *
- *)
-
-
-class dtd :
- Markup_types.collect_warnings ->
- Pxp_dtd.dtd
- (* Incompatibilities:
- * add_gen_entity, gen_entity
- *)
-
-class dtd_element : dtd -> string -> Pxp_dtd.dtd_element
- (* Incompatibilities:
- * set_content_model, add_attribute
- *)
-
-class dtd_notation : string -> Markup_types.ext_id -> Pxp_dtd.dtd_notation
-
-class proc_instruction : string -> string -> Pxp_dtd.proc_instruction
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- * ======================================================================
- * OLD LOGS:
- *
- * Revision 1.11 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.10 2000/05/27 19:20:38 gerd
- * Changed the interfaces for the standalone check: New
- * methods: standalone_declaration, set_standalone_declaration,
- * externally_declared, attribute_violates_standalone_declaration.
- * The method set_content_model has been renamed to
- * set_cm_and_extdecl; it now initializes also whether the element
- * has been declared in an external entity.
- * Methods add_gen_entity and gen_entity pass an additional
- * boolean argument containing whether the declaration of the
- * general entity happened in an external entity.
- * Method add_attribute expects this argument, too, which
- * states whether the declaration of the attribute happened in an
- * external entity.
- *
- * Revision 1.9 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.8 2000/05/06 23:10:26 gerd
- * allow_arbitrary for elements, too.
- *
- * Revision 1.7 2000/05/01 20:42:52 gerd
- * New method write_compact_as_latin1.
- *
- * Revision 1.6 2000/03/11 22:58:15 gerd
- * Updated to support Markup_codewriter.
- *
- * Revision 1.5 2000/02/22 02:32:02 gerd
- * Updated.
- *
- * Revision 1.4 1999/11/09 22:15:41 gerd
- * Added method "arbitrary_allowed".
- *
- * Revision 1.3 1999/09/01 16:21:56 gerd
- * "dtd" classes have now an argument that passes a "warner".
- *
- * Revision 1.2 1999/08/15 02:20:23 gerd
- * New feature: a DTD can allow arbitrary elements.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-open Markup_types;;
-
-class type resolver =
- object
- method open_in : ext_id -> Lexing.lexbuf
- method close_in : unit
- method change_encoding : string -> unit
- method clone : resolver
- end
-;;
-
-(* General note: close_in is simulated by close_all. Of course, this is
- * wrong, but it should not matter
- *)
-
-
-class resolve_read_channel ch the_warner =
- object (self)
- val pxp_resolver =
- new Pxp_reader.resolve_read_this_channel
- ~auto_close:false
- ch
- val warner = the_warner
-
- initializer
- pxp_resolver # init_warner
- (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
- pxp_resolver # init_rep_encoding `Enc_iso88591;
-
- method open_in xid =
- pxp_resolver # open_in xid
-
- method close_in =
- pxp_resolver # close_all (* sic! *)
-
- method change_encoding enc =
- pxp_resolver # change_encoding enc
-
- method clone =
- ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
-
- end
-;;
-
-
-class resolve_read_string str =
- object (self)
- val pxp_resolver =
- new Pxp_reader.resolve_read_this_string str
- val warner = new Pxp_types.drop_warnings
-
- initializer
- pxp_resolver # init_warner warner;
- pxp_resolver # init_rep_encoding `Enc_iso88591;
-
- method open_in xid =
- pxp_resolver # open_in xid
-
- method close_in =
- pxp_resolver # close_all (* sic! *)
-
- method change_encoding enc =
- pxp_resolver # change_encoding enc
-
- method clone =
- ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
- end
-;;
-
-
-class resolve_as_file the_warner =
- object (self)
- val pxp_resolver =
- new Pxp_reader.resolve_as_file
- ~system_encoding:`Enc_iso88591
- ()
- val warner = the_warner
-
- initializer
- pxp_resolver # init_warner
- (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
- pxp_resolver # init_rep_encoding `Enc_iso88591;
-
- method open_in xid =
- pxp_resolver # open_in xid
-
- method close_in =
- pxp_resolver # close_all (* sic! *)
-
- method change_encoding enc =
- pxp_resolver # change_encoding enc
-
- method clone =
- ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
- end
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/07/14 21:35:35 gerd
- * Updated because of the simplification of Pxp_types.collect_warnings.
- *
- * Revision 1.2 2000/07/08 17:40:50 gerd
- * Updated the simulation.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * Markup! The validating XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *
- * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_reader.mli.
- * It corresponds to revision 1.3 of markup_reader.mli.
- *)
-
-open Markup_types;;
-
-
-(* The class type resolver is the official type of all "resolvers".
- * Resolvers get file names (or better, external identifiers) and
- * return lexbufs, scanning the file for tokens. Resolvers may be
- * cloned, and clones can interpret relative file names relative to
- * their creator.
- *)
-
-class type resolver =
- object
- (* A resolver can open a character source, and returns this source as
- * Lexing.lexbuf.
- * The resolver should recode the source into ISO-8859-1. By default,
- * a resolver should assume UTF-8 or UTF-16 encoding. Before
- * 'change_encoding' is invoked, the resolver should only return
- * lexbufs with one character. After 'change_encoding' has been invoked,
- * there is no character limit anymore.
- * 'change_encoding' can only be invoked once. This method is usually
- * called after the <? ... ?> prolog of the entity has been read.
- * If this method is not called, it is up to the resolver to find out
- * if UTF-8 or UTF-16 is used. It is recommended to invoke this method
- * with an empty string to indicate this situation.
- *)
- method open_in : ext_id -> Lexing.lexbuf
- method close_in : unit
- method change_encoding : string -> unit
-
-
- (* Every resolver can be cloned. The clone does not inherit the connection
- * with the external object, i.e. it is closed.
- *)
- method clone : resolver
-
- end
-;;
-
-
-(* The following class is the current main implementation of resolvers.
- * It fetches strings from an arbitrary source (by calling init_in, and
- * then repeatedly next_string), recodes them to ISO-8859-1, and creates
- * lexbufs for them.
- * It is not complete, as the source is missing.
- *
- * Note that 'resolve_general' may change in future revisions; it is ugly.
- *)
-
-(* -- This API simulation does not provide 'resolve_general' any longer
-
-class virtual resolve_general :
- collect_warnings ->
- object
- val mutable encoding : string
- val mutable encoding_requested : bool
- val warner : collect_warnings
-
- method clone : resolver
-
- method private warn : int -> unit
- method private autodetect : string -> unit
-
- method private virtual next_string : string -> int -> int -> int
- method private virtual init_in : ext_id -> unit
- method virtual close_in : unit
-
- method open_in : ext_id -> Lexing.lexbuf
-
- method change_encoding : string -> unit
- end
-*)
-
-
-(* The next classes are resolvers for concrete input sources. *)
-
-class resolve_read_channel :
- in_channel -> collect_warnings -> resolver;;
-
- (* Reads from the passed channel (it may be even a pipe). Note that this
- * resolver cannot handle file inclusions, as it is pre-bound to a
- * specific channel and is not able to interpret file names.
- * That means, if there is a entity reference (something like &name; or
- * %name;) to parse, and the definition points to another file, the
- * resolver will fail.
- *)
-
-
-class resolve_read_string :
- string -> resolver;;
-
- (* Reads from the passed string. As 'resolver_read_channel', this
- * resolver cannot handle file inclusions.
- *)
-
-
-class resolve_as_file :
- collect_warnings -> resolver;;
-
- (* Reads from the local file system. Every file name is interpreted as
- * file name of the local file system, and the referred file is read.
- * This resolver can handle file inclusions as long as they do not
- * exceed the scope of the local file system (i.e. no URLs).
- *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/07/08 17:40:50 gerd
- * Updated the simulation.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- * ======================================================================
- * OLD LOGS:
- *
- * Revision 1.3 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.2 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.1 2000/03/13 23:41:54 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-
-type ext_id = Pxp_types.ext_id =
- System of string
- | Public of (string * string)
- | Anonymous
-type dtd_id = Pxp_types.dtd_id=
- External of ext_id
- | Derived of ext_id
- | Internal
-type content_model_type = Pxp_types.content_model_type =
- Unspecified
- | Empty
- | Any
- | Mixed of mixed_spec list
- | Regexp of regexp_spec
-and mixed_spec = Pxp_types.mixed_spec =
- MPCDATA
- | MChild of string
-and regexp_spec = Pxp_types.regexp_spec =
- Optional of regexp_spec
- | Repeated of regexp_spec
- | Repeated1 of regexp_spec
- | Alt of regexp_spec list
- | Seq of regexp_spec list
- | Child of string
-type att_type = Pxp_types.att_type =
- A_cdata
- | A_id
- | A_idref
- | A_idrefs
- | A_entity
- | A_entities
- | A_nmtoken
- | A_nmtokens
- | A_notation of string list
- | A_enum of string list
-type att_default = Pxp_types.att_default =
- D_required
- | D_implied
- | D_default of string
- | D_fixed of string
-type att_value = Pxp_types.att_value =
- Value of string
- | Valuelist of string list
- | Implied_value
-
-class collect_warnings =
-object
- val mutable w = Buffer.create 100
- method print_warnings =
- Buffer.contents w
- method reset =
- Buffer.clear w
- method warn s =
- Buffer.add_string w ("WARNING: " ^ s ^ "\n")
-end
-
-exception Illegal_character of int
-exception Validation_error = Pxp_types.Validation_error
-exception WF_error = Pxp_types.WF_error
-exception Character_not_supported = Pxp_types.Character_not_supported
-exception Bad_character_stream = Netconversion.Malformed_code
-exception At = Pxp_types.At
-exception Undeclared = Pxp_types.Undeclared
-
-let string_of_exn = Pxp_types.string_of_exn
-
-type output_stream = Pxp_types.output_stream =
- Out_buffer of Buffer.t
- | Out_channel of out_channel
- | Out_function of (string -> int -> int -> unit)
-
-let write = Pxp_types.write
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.5 2000/08/18 20:19:16 gerd
- * Updates in the emulation because of PXP changes.
- *
- * Revision 1.4 2000/07/16 18:30:15 gerd
- * Updated because PXP does no longer have the exception
- * Illegal_character.
- *
- * Revision 1.3 2000/07/14 21:35:35 gerd
- * Updated because of the simplification of Pxp_types.collect_warnings.
- *
- * Revision 1.2 2000/07/08 17:40:50 gerd
- * Updated the simulation.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * Markup! The validating XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *
- * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_types.mli.
- * It corresponds to revision 1.7 of markup_types.mli.
- *)
-
-
-type ext_id = Pxp_types.ext_id =
- System of string
- | Public of (string * string)
- | Anonymous
-type dtd_id = Pxp_types.dtd_id =
- External of ext_id
- | Derived of ext_id
- | Internal
-type content_model_type = Pxp_types.content_model_type =
- Unspecified
- | Empty
- | Any
- | Mixed of mixed_spec list
- | Regexp of regexp_spec
-and mixed_spec = Pxp_types.mixed_spec =
- MPCDATA
- | MChild of string
-and regexp_spec = Pxp_types.regexp_spec =
- Optional of regexp_spec
- | Repeated of regexp_spec
- | Repeated1 of regexp_spec
- | Alt of regexp_spec list
- | Seq of regexp_spec list
- | Child of string
-type att_type = Pxp_types.att_type =
- A_cdata
- | A_id
- | A_idref
- | A_idrefs
- | A_entity
- | A_entities
- | A_nmtoken
- | A_nmtokens
- | A_notation of string list
- | A_enum of string list
-type att_default = Pxp_types.att_default =
- D_required
- | D_implied
- | D_default of string
- | D_fixed of string
-type att_value = Pxp_types.att_value =
- Value of string
- | Valuelist of string list
- | Implied_value
-
-class collect_warnings :
- object
- method warn : string -> unit
- method print_warnings : string
- method reset : unit
- end
-;;
-
-
-exception Illegal_character of int
-exception Validation_error of string
-exception WF_error of string
-exception Character_not_supported
-exception Bad_character_stream
-exception At of (string * exn)
-exception Undeclared
-
-val string_of_exn : exn -> string
- (* Converts a Markup exception into a readable string *)
-
-
-type output_stream = Pxp_types.output_stream =
- Out_buffer of Buffer.t
- | Out_channel of out_channel
- | Out_function of (string -> int -> int -> unit)
-
-val write : output_stream -> string -> int -> int -> unit
- (* write os s pos len: Writes the string to the buffer/channel/stream *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/07/08 17:40:50 gerd
- * Updated the simulation.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- * ======================================================================
- * OLD LOGS:
- *
- * Revision 1.7 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.6 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.5 2000/05/01 20:43:25 gerd
- * New type output_stream; new function 'write'.
- *
- * Revision 1.4 1999/09/01 16:25:35 gerd
- * Dropped Illegal_token and Content_not_allowed_here. WF_error can
- * be used instead.
- *
- * Revision 1.3 1999/08/15 02:22:40 gerd
- * Added exception Undeclared.
- *
- * Revision 1.2 1999/08/14 22:15:17 gerd
- * New class "collect_warnings".
- *
- * Revision 1.1 1999/08/10 00:35:52 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *)
-
-open Markup_types
-open Markup_dtd
-open Markup_document
-
-type config =
- { warner : collect_warnings;
- errors_with_line_numbers : bool;
- processing_instructions_inline : bool;
- virtual_root : bool;
- debugging_mode : bool;
- }
-
-
-type source =
- Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
- | Channel of in_channel
- | File of string
- | Latin1 of string
- | ExtID of (ext_id * Markup_reader.resolver)
-
-type 'ext domspec =
- { map : (node_type, 'ext node) Hashtbl.t;
- default_element : 'ext node;
- }
-
-
-class default_ext =
- object(self)
- val mutable node = (None : ('a extension node as 'a) option)
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
- end
-;;
-
-
-let default_extension = new default_ext;;
-
-let default_config =
- { warner = new collect_warnings;
- errors_with_line_numbers = true;
- processing_instructions_inline = false;
- virtual_root = false;
- debugging_mode = false;
- }
-
-
-let default_dom =
- let d = Hashtbl.create 2 in
- Hashtbl.add d T_data (new data_impl default_extension "");
- { map = d;
- default_element = new element_impl default_extension
- }
-;;
-
-
-let pxp_config cfg =
- { Pxp_yacc.default_config with
- Pxp_yacc.warner = (cfg.warner :> Pxp_types.collect_warnings);
- Pxp_yacc.errors_with_line_numbers = cfg.errors_with_line_numbers;
- Pxp_yacc.enable_pinstr_nodes = cfg.processing_instructions_inline;
- Pxp_yacc.enable_super_root_node = cfg.virtual_root;
- Pxp_yacc.encoding = `Enc_iso88591;
- Pxp_yacc.recognize_standalone_declaration = false;
- Pxp_yacc.debugging_mode = cfg.debugging_mode;
- }
-;;
-
-
-class pxp_resolver r =
- object (self)
- val markup_resolver = r
-
- method init_rep_encoding enc =
- assert (enc = `Enc_iso88591 )
-
- method init_warner w =
- ()
-
- method rep_encoding = `Enc_iso88591
-
- method open_in xid =
- markup_resolver # open_in xid
-
- method close_in =
- markup_resolver # close_in
-
- method close_all =
- markup_resolver # close_in
-
- method change_encoding enc =
- markup_resolver # change_encoding enc
-
- method clone =
- ( {< markup_resolver = markup_resolver # clone >}
- : #Pxp_reader.resolver :> Pxp_reader.resolver )
- end
-;;
-
-
-let pxp_source src =
- match src with
- Entity (mkent, res) -> Pxp_yacc.Entity(mkent, new pxp_resolver res)
- | ExtID (id, res) -> Pxp_yacc.ExtID(id, new pxp_resolver res)
- | Channel ch -> Pxp_yacc.from_channel
- ~system_encoding:`Enc_iso88591 ch
- | File f -> Pxp_yacc.from_file
- ~system_encoding:`Enc_iso88591 f
- | Latin1 s -> Pxp_yacc.from_string ~fixenc:`Enc_iso88591 s
-;;
-
-
-let pxp_dom dom =
- let dex =
- try Hashtbl.find dom.map T_data
- with Not_found -> assert false
- in
- let eex = dom.default_element in
- let m = Hashtbl.create 100 in
- Hashtbl.iter
- (fun nt ex ->
- match nt with
- T_element name when name <> "-vr" && name <> "-pi" ->
- let pxp_ex = ex # pxp_node in
- Hashtbl.add m name pxp_ex
- | _ -> ()
- )
- dom.map;
- let srex =
- try
- Some ((Hashtbl.find dom.map (T_element "-vr")) # pxp_node)
- with
- Not_found -> None
- in
- let piex =
- try
- Some ((Hashtbl.find dom.map (T_element "-pi")) # pxp_node)
- with
- Not_found -> None
- in
- Pxp_document.make_spec_from_mapping
- ?super_root_exemplar:srex
- ?default_pinstr_exemplar:piex
- ~data_exemplar:(dex # pxp_node)
- ~default_element_exemplar:(eex # pxp_node)
- ~element_mapping:m
- ()
-;;
-
-
-let markup_document w index doc =
- let mdoc = new document w in
- mdoc # init_xml_version (doc # xml_version);
- mdoc # init_xml_standalone (doc # xml_standalone);
- let r = doc # root # extension in
- r # set_index index;
- mdoc # init_root (r # markup_node);
- List.iter
- (fun piname ->
- let l = doc # pinstr piname in
- List.iter
- (fun pi -> mdoc # add_pinstr pi)
- l)
- (doc # pinstr_names);
- mdoc
-;;
-
-
-
-let parse_dtd_entity cfg src =
- Pxp_yacc.parse_dtd_entity
- (pxp_config cfg)
- (pxp_source src)
-;;
-
-
-let parse_document_entity cfg src dom =
- let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
- markup_document
- cfg.warner
- index
- (Pxp_yacc.parse_document_entity
- ~id_index:index
- (pxp_config cfg)
- (pxp_source src)
- (pxp_dom dom))
-;;
-
-
-let parse_content_entity cfg src dtd dom =
- let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
- let n =
- (Pxp_yacc.parse_content_entity
- ~id_index:index
- (pxp_config cfg)
- (pxp_source src)
- dtd
- (pxp_dom dom)) # extension in
- n # set_index index;
- n # markup_node
-;;
-
-
-let parse_wf_entity cfg src dom =
- let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
- (* Restriction: index is not filled! *)
- markup_document
- cfg.warner
- index
- (Pxp_yacc.parse_wfdocument_entity
- (pxp_config cfg)
- (pxp_source src)
- (pxp_dom dom))
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/08/18 20:19:16 gerd
- * Updates in the emulation because of PXP changes.
- *
- * Revision 1.3 2000/07/14 21:35:35 gerd
- * Updated because of the simplification of Pxp_types.collect_warnings.
- *
- * Revision 1.2 2000/07/08 17:40:50 gerd
- * Updated the simulation.
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * Markup! The validating XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *
- * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_yacc.mli.
- * It corresponds to revision 1.4 of markup_yacc.mli.
- *)
-
-
-(*$ markup-yacc.mli *)
-
-open Markup_types
-open Markup_dtd
-open Markup_document
-
-type config =
- { warner : collect_warnings;
- (* An object that collects warnings. *)
-
- errors_with_line_numbers : bool;
- (* Whether error messages contain line numbers or not. The parser
- * is 10 to 20 per cent faster if line numbers are turned off;
- * you get only character positions in this case.
- *)
-
- processing_instructions_inline : bool;
- (* true: turns a special mode for processing instructions on. Normally,
- * you cannot determine the exact location of a PI; you only know
- * in which element the PI occurs. The "inline" mode makes it possible
- * to find the exact location out: Every PI is artificially wrapped
- * by a special element with name "-pi". For example, if the XML text
- * is <a><?x?><?y?></a>, the parser normally produces only an element
- * object for "a", and puts the PIs "x" and "y" into it (without
- * order). In inline mode, the object "a" will contain two objects
- * with name "-pi", and the first object will contain "x", and the
- * second "y".
- * Notes:
- * (1) The name "-pi" is reserved. You cannot use it for your own
- * tags because tag names must not begin with '-'.
- * (2) You need not to add a declaration for "-pi" to the DTD. These
- * elements are handled separately.
- * (3) Of course, the "-pi" objects are created from exemplars of
- * your DOM map.
- *)
-
- virtual_root : bool;
- (* true: the topmost element of the XML tree is not the root element,
- * but the so-called virtual root. The root element is a son of the
- * virtual root. The virtual root is an ordinary element with name
- * "-vr".
- * The following behaviour changes, too:
- * - PIs occurring outside the root element and outside the DTD are
- * added to the virtual root instead of the document object
- * - If processing_instructions_inline is also turned on, these PIs
- * are added inline to the virtual root
- * Notes:
- * (1) The name "-vr" is reserved. You cannot use it for your own
- * tags because tag names must not begin with '-'.
- * (2) You need not to add a declaration for "-vr" to the DTD. These
- * elements are handled separately.
- * (3) Of course, the "-vr" objects are created from exemplars of
- * your DOM map.
- *)
-
- (* The following options are not implemented, or only for internal
- * use.
- *)
-
- debugging_mode : bool;
- }
-
-
-type source =
- Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
- | Channel of in_channel
- | File of string
- | Latin1 of string
- | ExtID of (ext_id * Markup_reader.resolver)
-
-(* Note on sources:
- *
- * The sources do not have all the same capabilities. Here the differences:
- *
- * - File: A File source reads from a file by name. This has the advantage
- * that references to external entites can be resolved. - The problem
- * with SYSTEM references is that they usually contain relative file
- * names; more exactly, a file name relative to the document containing it.
- * It is only possible to convert such names to absolute file names if the
- * name of the document containing such references is known; and File
- * denotes this name.
- *
- * - Channel, Latin1: These sources read from documents given as channels or
- * (Latin 1-encoded) strings. There is no file name, and because of this
- * the documents must not contain references to external files (even
- * if the file names are given as absolute names).
- *
- * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
- * entity to read from is passed to the resolver r as-is.
- * The intention of this option is to allow customized
- * resolvers to interpret external identifiers without any restriction.
- * For example, you can assign the PUBLIC identifiers a meaning (they
- * currently do not have any), or you can extend the "namespace" of
- * identifiers.
- * ExtID is the interface of choice for own extensions to resolvers.
- *
- * - Entity(m,r): You can implementy every behaviour by using a customized
- * entity class. Once the DTD object d is known that will be used during
- * parsing, the entity e = m d is determined and used together with the
- * resolver r.
- * This is only for hackers.
- *)
-
-
-type 'ext domspec =
- { map : (node_type, 'ext node) Hashtbl.t;
- default_element : 'ext node;
- }
- (* Specifies which node to use as exemplar for which node type. See the
- * manual for explanations.
- *)
-
-val default_config : config
- (* - The resolver is able to read from files by name
- * - Warnings are thrown away
- * - Error message will contain line numbers
- * - The internal encoding is ISO-8859-1
- * - standalone declaration is checked
- *)
-
-val default_extension : ('a node extension) as 'a
- (* A "null" extension; an extension that does not extend the funtionality *)
-
-val default_dom : ('a node extension as 'a) domspec
- (* Specifies that you do not want to use extensions. *)
-
-val parse_dtd_entity : config -> source -> dtd
- (* Parse an entity containing a DTD, and return this DTD. *)
-
-val parse_document_entity : config -> source -> 'ext domspec -> 'ext document
- (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
- * and validate the contents of the document against the DTD contained
- * and/or referenced in the document.
- *)
-
-val parse_content_entity : config ->
- source ->
- dtd ->
- 'ext domspec ->
- 'ext node
- (* Parse a file representing a well-formed fragment of a document. The
- * fragment must be a single element (i.e. something like <a>...</a>;
- * not a sequence like <a>...</a><b>...</b>). The element is validated
- * against the passed DTD, but it is not checked whether the element is
- * the root element specified in the DTD.
- * Note that you can create DTDs that specify not to validate at all
- * (invoke method allow_arbitrary on the DTD).
- *)
-
-val parse_wf_entity : config -> source -> 'ext domspec -> 'ext document
- (* Parse a closed document (see parse_document_entity), but do not
- * validate it. Only checks on well-formedness are performed.
- *)
-
-(*$-*)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/29 23:43:51 gerd
- * Initial compatibility revision.
- *
- * ======================================================================
- * OLD LOGS:
- *
- * Revision 1.4 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.3 2000/05/27 19:24:01 gerd
- * New option: recognize_standalone_declaration.
- *
- * Revision 1.2 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.1 2000/05/06 23:21:49 gerd
- * Initial revision.
- *
- * Revision 1.9 2000/04/30 18:23:38 gerd
- * New config options 'processing_instructions_inline' and
- * 'virtual_root'.
- *
- * Revision 1.8 2000/03/13 23:46:46 gerd
- * Change: The 'resolver' component of the 'config' type has
- * disappeared. Instead, there is a new resolver component in the Entity
- * and ExtID values of 'source'. I hope that this makes clearer that the
- * resolver has only an effect if used together with Entity and ExtID
- * sources.
- * Change: The Entity value can now return the entity dependent
- * on the DTD that is going to be used.
- *
- * Revision 1.7 2000/02/22 02:32:02 gerd
- * Updated.
- *
- * Revision 1.6 2000/02/22 01:52:45 gerd
- * Added documentation.
- *
- * Revision 1.5 2000/01/20 20:54:43 gerd
- * New config.errors_with_line_numbers.
- *
- * Revision 1.4 1999/09/01 23:09:10 gerd
- * New function parse_wf_entity that simulates a well-formedness
- * parser.
- *
- * Revision 1.3 1999/09/01 16:26:36 gerd
- * Added an empty line. This is *really* a big change.
- *
- * Revision 1.2 1999/08/14 22:20:27 gerd
- * The "config" slot has now a component "warner"which is
- * an object with a "warn" method. This is used to warn about characters
- * that cannot be represented in the Latin 1 alphabet.
- * Furthermore, there is a new component "debugging_mode".
- *
- * Revision 1.1 1999/08/10 00:35:52 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-******************************************************************************
-ABOUT-FINDLIB - Package manager for O'Caml
-******************************************************************************
-
-
-==============================================================================
-Abstract
-==============================================================================
-
-The findlib library provides a scheme to manage reusable software components
-(packages), and includes tools that support this scheme. Packages are
-collections of OCaml modules for which metainformation can be stored. The
-packages are kept in the filesystem hierarchy, but with strict directory
-structure. The library contains functions to look the directory up that stores
-a package, to query metainformation about a package, and to retrieve dependency
-information about multiple packages. There is also a tool that allows the user
-to enter queries on the command-line. In order to simplify compilation and
-linkage, there are new frontends of the various OCaml compilers that can
-directly deal with packages.
-
-Together with the packages metainformation is stored. This includes a version
-string, the archives the package consists of, and additional linker options.
-Packages can also be dependent on other packages. There is a query which finds
-out all predecessors of a list of packages and sorts them topologically. The
-new compiler frontends do this implicitly.
-
-Metainformation can be conditional, i.e. depend on a set of predicates. This is
-mainly used to be able to react on certain properties of the environment, such
-as if the bytecode or the native compiler is invoked, if the application is
-multi-threaded, and a few more. If the new compiler frontends are used, most
-predicates are found out automatically.
-
-There is special support for scripts. A new directive, "#require", loads
-packages into scripts. Of course, this works only with newly created toploops
-which include the findlib library.
-
-==============================================================================
-Where to get findlib
-==============================================================================
-
-The manual of findlib is available online [1]. You can download findlib here
-[2].
-
-
---------------------------
-
-[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
-
-[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
-
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!ENTITY f "<em>findlib</em>">
-<!ENTITY F "<em>Findlib</em>">
-
-]>
-
-<readme title="ABOUT-FINDLIB - Package manager for O'Caml">
- <sect1>
- <title>Abstract</title>
-<p>
-The &f; library provides a scheme to manage reusable software
-components (packages), and includes tools that support this
-scheme. Packages are collections of OCaml modules for which
-metainformation can be stored. The packages are kept in the filesystem
-hierarchy, but with strict directory structure. The library contains
-functions to look the directory up that stores a package, to query
-metainformation about a package, and to retrieve dependency
-information about multiple packages. There is also a tool that allows
-the user to enter queries on the command-line. In order to simplify
-compilation and linkage, there are new frontends of the various OCaml
-compilers that can directly deal with packages.
-</p>
-
-<p>
-Together with the packages metainformation is stored. This includes a
-version string, the archives the package consists of, and additional
-linker options. Packages can also be dependent on other
-packages. There is a query which finds out all predecessors of a list
-of packages and sorts them topologically. The new compiler frontends
-do this implicitly.
-</p>
-
-<p>
-Metainformation can be conditional, i.e. depend on a set of
-predicates. This is mainly used to be able to react on certain
-properties of the environment, such as if the bytecode or the native
-compiler is invoked, if the application is multi-threaded, and a few
-more. If the new compiler frontends are used, most predicates are
-found out automatically.
-</p>
-
-<p>
-There is special support for scripts. A new directive, "#require",
-loads packages into scripts. Of course, this works only with newly
-created toploops which include the &f; library.
-</p>
-
- </sect1>
-
- <sect1><title>Where to get findlib</title>
- <p>
-The manual of &f; is available <a href="&url.findlib-project;">online</a>.
-You can download &f; <a href="&url.findlib-download;">here</a>.
-</p>
- </sect1>
-</readme>
+++ /dev/null
-******************************************************************************
-Extensions of the XML specification
-******************************************************************************
-
-
-==============================================================================
-This document
-==============================================================================
-
-This parser has some options extending the XML specification. Here, the options
-are explained.
-
-==============================================================================
-Optional declarations instead of mandatory declarations
-==============================================================================
-
-The XML spec demands that elements, notations, and attributes must be declared.
-However, there are sometimes situations where a different rule would be better:
-If there is a declaration, the actual instance of the element type, notation
-reference or attribute must match the pattern of the declaration; but if the
-declaration is missing, a reasonable default declaration should be assumed.
-
-I have an example that seems to be typical: The inclusion of HTML into a meta
-language. Imagine you have defined some type of "generator" or other tool
-working with HTML fragments, and your document contains two types of elements:
-The generating elements (with a name like "gen:xxx"), and the object elements
-which are HTML. As HTML is still evolving, you do not want to declare the HTML
-elements; the HTML fragments should be treated as well-formed XML fragments. In
-contrast to this, the elements of the generator should be declared and
-validated because you can more easily detect errors.
-
-The following two processing instructions can be included into the DTD:
-
--
- <?pxp:dtd optional-element-and-notation-declarations?>
-
- References to unknown element types and notations no longer cause an error.
- The element may contain everything, but it must be still well-formed. It may
- have arbitrary attributes, and every attribute is treated as an #IMPLIED
- CDATA attribute.
-
--
- <?pxp:dtd optional-attribute-declarations elements="x y ..."?>
-
- References to unknown attributes inside one of the enumerated elements no
- longer cause an error. Such an attribute is treated as an #IMPLIED CDATA
- attribute.
- If there are several "optional-attribute-declarations" PIs, they are all
- interpreted (implicitly merged).
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!-- Special HTML config: -->
-<!ENTITY % readme:html:up '<a href="../..">up</a>'>
-
-<!ENTITY % config SYSTEM "config.xml">
-%config;
-
-]>
-
-<readme title="Extensions of the XML specification">
-
- <sect1>
- <title>This document</title>
- <p>This parser has some options extending the XML specification. Here, the
-options are explained.
-</p>
- </sect1>
-
- <sect1>
- <title>Optional declarations instead of mandatory declarations</title>
-
-<p>The XML spec demands that elements, notations, and attributes must be
-declared. However, there are sometimes situations where a different rule would
-be better: <em>If</em> there is a declaration, the actual instance of the
-element type, notation reference or attribute must match the pattern of the
-declaration; but if the declaration is missing, a reasonable default declaration
-should be assumed.</p>
-
-<p>I have an example that seems to be typical: The inclusion of HTML into a
-meta language. Imagine you have defined some type of "generator" or other tool
-working with HTML fragments, and your document contains two types of elements:
-The generating elements (with a name like "gen:xxx"), and the object elements
-which are HTML. As HTML is still evolving, you do not want to declare the HTML
-elements; the HTML fragments should be treated as well-formed XML fragments. In
-contrast to this, the elements of the generator should be declared and
-validated because you can more easily detect errors.</p>
-
-<p>The following two processing instructions can be included into the DTD:</p>
- <ul>
- <li><p><code><![CDATA[<?pxp:dtd optional-element-and-notation-declarations?>]]></code>
- References to unknown element types and notations no longer cause an
- error. The element may contain everything, but it must be still
- well-formed. It may have arbitrary attributes, and every attribute is
- treated as an #IMPLIED CDATA attribute.</p>
- </li>
- <li><p><code><![CDATA[<?pxp:dtd optional-attribute-declarations elements="x y ..."?>]]></code>
- References to unknown attributes inside one of the enumerated elements
- no longer cause an error. Such an attribute is treated as an #IMPLIED
- CDATA attribute.
-</p>
-
-<p>If there are several "optional-attribute-declarations" PIs, they are all
-interpreted (implicitly merged).</p>
- </li>
- </ul>
- </sect1>
-</readme>
+++ /dev/null
-******************************************************************************
-INSTALL - PXP, the XML parser for O'Caml
-******************************************************************************
-
-
-==============================================================================
-The "pxp" package
-==============================================================================
-
-------------------------------------------------------------------------------
-Prerequisites
-------------------------------------------------------------------------------
-
-PXP requires that the netstring package [1] is already installed. PXP works
-only with O'Caml 3.00 (the support for 2.04 has been dropped). The installation
-procedure defined in the Makefile requires findlib [2] to work [3].
-
-------------------------------------------------------------------------------
-Configuration
-------------------------------------------------------------------------------
-
-It is not necessary to configure PXP; but you can switch off the UTF-8 support
-by setting the variable
-
-UTF8_SUPPORT = no
-
-in Makefile.conf. In this case, the UTF-8 modules are not even compiled. - By
-default, the UTF-8 support is enabled.
-
-Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II; if
-this is too long, you can set UTF8_SUPPORT to "no".
-
-------------------------------------------------------------------------------
-Compilation
-------------------------------------------------------------------------------
-
-The Makefile defines the following goals:
-
-- make all
- compiles with the bytecode compiler and creates the files pxp_types.cma,
- pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma, and pxp_utf8.cmo
- (*). The (*) files are not built if the UTF-8 support is switched off.
-
-- make opt
- compiles with the native compiler and creates the files pxp_types.cmxa,
- pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa, and
- pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support is
- switched off.
-
-------------------------------------------------------------------------------
-Installation
-------------------------------------------------------------------------------
-
-The Makefile defines the following goals:
-
-- make install
- installs the bytecode archives, the interface definitions, and if present,
- the native archives in the default location of findlib as package "pxp"
-
-- make uninstall
- removes the package "pxp"
-
-- make markup-install
- installs the Markup compatibility API as package "markup"
-
-- make markup-uninstall
- removes the package "markup"
-
-------------------------------------------------------------------------------
-Usage with the help of "findlib"
-------------------------------------------------------------------------------
-
-You can refer to the parser as the findlib package "pxp":
-
-ocamlfind ocamlc -package pxp ...
-
-By default, the UTF-8 support modules will be linked in. If you do not need
-them, you may define the predicate "pxp_without_utf8", which causes that the
-UTF-8 relevant parts are not linked with your program; the difference in size
-is about 1 MB:
-
-ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
-
-Note that you can also reduce the size of the resulting executable by
-specifying Netstring-related predicates (e.g. netstring_only_iso); see the
-documentation of Netstring.
-
-------------------------------------------------------------------------------
-Linking with the archives directly
-------------------------------------------------------------------------------
-
-If you need UTF-8 support, you must link your program as follows:
-
-ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma
- pxp_engine.cma pxp_utf8.cmo ...
-
-If you do not need UTF-8, the following suffices:
-
-ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
-
-
-
-==============================================================================
-The examples
-==============================================================================
-
-In the "examples" directory you find several applications of PXP. They require
-that PXP has been installed using findlib. See the Makefiles in the directories
-for descriptions of "make" goals.
-
-==============================================================================
-Trouble shooting
-==============================================================================
-
-------------------------------------------------------------------------------
-Solaris
-------------------------------------------------------------------------------
-
-The "make" utility of Solaris does not work properly enough; there is a bug in
-it that prevents the so-called suffix rules from being recognized. There are
-two solutions:
-
-- Install GNU make and use it instead of Solaris make. This is the recommended
- way to solve the problem, as GNU make can process almost every Makefile from
- open source projects, and you will never have problems with building
- software again.
-
-- Add the following lines to Makefile.code:
-
- %.cmx: %.ml
- $(OCAMLOPT) -c $<
-
- %.cmo: %.ml
- $(OCAMLC) -c $<
-
- %.cmi: %.mli
- $(OCAMLC) -c $<
-
- %.ml: %.mll
- ocamllex $<
-
-
-
-
---------------------------
-
-[1] see http://www.ocaml-programming.de/packages/documentation/netstring
-
-[2] see http://www.ocaml-programming.de/packages/documentation/findlib/
-
-[3] Findlib is a package manager, see the file ABOUT-FINDLIB.
-
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!ENTITY m "<em>PXP</em>">
-
-]>
-
-<readme title="INSTALL - PXP, the XML parser for O'Caml">
- <sect1><title>The "pxp" package</title>
- <sect2><title>Prerequisites</title>
- <p>
-&m; requires that the <a href="&url.netstring-project;">netstring package
-</a> is already installed. &m; works
-only with O'Caml 3.00 (the support for 2.04 has been dropped).
-The installation
-procedure defined in the Makefile requires <a
-href="&url.findlib-project;">findlib</a> to work<footnote><em>Findlib</em> is a
-package manager, see the file ABOUT-FINDLIB.</footnote>.
-</p>
- </sect2>
-
- <sect2><title>Configuration</title>
- <p>
-It is not necessary to configure PXP; but you can switch off the UTF-8
-support by setting the variable
-
-<code>
-UTF8_SUPPORT = no
-</code>
-
-in Makefile.conf. In this case, the UTF-8 modules are not even compiled.
-- By default, the UTF-8 support is enabled.
-</p>
-
- <p>
-Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II;
-if this is too long, you can set UTF8_SUPPORT to "no".</p>
- </sect2>
-
- <sect2><title>Compilation</title>
- <p>
-The Makefile defines the following goals:
-</p>
- <ul>
- <li>
- <p>make all</p>
- <p>compiles with the bytecode compiler and creates the files
-pxp_types.cma, pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma,
-and pxp_utf8.cmo (*). The (*) files are not built if the UTF-8 support
-is switched off.</p>
- </li>
- <li>
- <p>make opt</p>
- <p>compiles with the native compiler and creates the files
-pxp_types.cmxa, pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa,
-and pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support
-is switched off.</p>
- </li>
- </ul>
- </sect2>
-
- <sect2><title>Installation</title>
- <p>
-The Makefile defines the following goals:</p>
- <ul>
- <li>
- <p>make install</p>
- <p>installs the bytecode archives, the interface definitions, and if
-present, the native archives in the default location of <em>findlib</em> as
-package "pxp"
-</p>
- </li>
- <li>
- <p>make uninstall</p>
- <p>removes the package "pxp"</p>
- </li>
- <li>
- <p>make markup-install</p>
- <p>installs the Markup compatibility API as package "markup"</p>
- </li>
- <li>
- <p>make markup-uninstall</p>
- <p>removes the package "markup"</p>
- </li>
- </ul>
- </sect2>
-
- <sect2>
- <title>Usage with the help of "findlib"</title>
- <p>You can refer to the parser as the findlib package "pxp":
-
-<code>
-ocamlfind ocamlc -package pxp ...
-</code>
-
-By default, the UTF-8 support modules will be linked in. If you do not need
-them, you may define the predicate "pxp_without_utf8", which causes that the
-UTF-8 relevant parts are not linked with your program; the difference in size
-is about 1 MB:
-
-<code>
-ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
-</code>
-
-Note that you can also reduce the size of the resulting executable by
-specifying Netstring-related predicates (e.g. netstring_only_iso); see the
-documentation of Netstring.
-</p>
- </sect2>
-
- <sect2>
- <title>Linking with the archives directly</title>
- <p>If you need UTF-8 support, you must link your program as follows:
-
-<code>
-ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma
- pxp_engine.cma pxp_utf8.cmo ...
-</code>
-
-If you do not need UTF-8, the following suffices:
-
-<code>
-ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
-</code>
-
-</p>
- </sect2>
-
- </sect1>
-
- <sect1><title>The examples</title>
- <p>
-In the "examples" directory you find several applications of &m;. They require
-that &m; has been installed using <em>findlib</em>. See the Makefiles in the
-directories for descriptions of "make" goals.
-</p>
- </sect1>
-
- <sect1><title>Trouble shooting</title>
- <sect2><title>Solaris</title>
- <p>
-The "make" utility of Solaris does not work properly enough; there is a bug
-in it that prevents the so-called suffix rules from being recognized. There
-are two solutions:</p>
- <ul>
- <li><p>Install GNU make and use it instead of Solaris make. This is
-the recommended way to solve the problem, as GNU make can process almost
-every Makefile from open source projects, and you will never have problems
-with building software again.</p></li>
- <li><p>Add the following lines to Makefile.code:
- <code>
-%.cmx: %.ml
- $(OCAMLOPT) -c $<
-
-%.cmo: %.ml
- $(OCAMLC) -c $<
-
-%.cmi: %.mli
- $(OCAMLC) -c $<
-
-%.ml: %.mll
- ocamllex $<
-</code>
-</p></li>
- </ul>
- </sect2>
- </sect1>
-</readme>
\ No newline at end of file
+++ /dev/null
-.PHONY: all
-all: README INSTALL ABOUT-FINDLIB SPEC PRERELEASE EXTENSIONS
-
-README: README.xml common.xml config.xml
- readme -text README.xml >README
-
-INSTALL: INSTALL.xml common.xml config.xml
- readme -text INSTALL.xml >INSTALL
-
-ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
- readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
-
-SPEC: SPEC.xml common.xml config.xml
- readme -text SPEC.xml >SPEC
-
-EXTENSIONS: EXTENSIONS.xml common.xml config.xml
- readme -text EXTENSIONS.xml >EXTENSIONS
-
-PRERELEASE: PRERELEASE.xml common.xml config.xml
- readme -text PRERELEASE.xml >PRERELEASE
-
-config.xml:
- touch config.xml
-
-common.xml:
- ln -s dist-common.xml common.xml
-
-.PHONY: clean
-clean:
-
-.PHONY: CLEAN
-CLEAN: clean
- $(MAKE) -C manual CLEAN
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- $(MAKE) -C manual distclean
-
-.PHONY: symlinks
-symlinks:
- ln -s ../examples/readme/readme.dtd .
-
+++ /dev/null
-******************************************************************************
-README - PXP, the XML parser for O'Caml
-******************************************************************************
-
-
-==============================================================================
-Pre-release of PXP, the XML parser for O'Caml
-==============================================================================
-
-PXP is the new, completely revised and partly rewritten validating XML parser
-for O'Caml; the old name, "Markup", has been dropped. The current version of
-PXP is still a bit experimental because it is not fully tested; however, it is
-now stable enough to be used in experimental applications.
-
-PXP will retain most parts of Markup's API; the name PXP emphasizes the
-strengths of the API: it is the Polymorphic XML Parser. The document objects
-representing the parsed file have an interesting polymorphism which allows that
-the user of the parser can control which kind of objects are actually created.
-The current API supports the element type as criterion for object/class
-selection; future APIs will extend this concept such that arbitrary criterions
-are possible (e.g. you may want to have different classes for different
-namespaces).
-
-The current development goals of PXP are:
-
-- Full XML-1.0 conformance: The current pre-release is now very close to
- strict XML-1.0 conformance. The only bigger difference to the standard is
- that PXP sometimes accepts DTDs as legal while the standard forbids them
- (non-deterministic content models).
- One of the more important improvements since 0.2.10 is the possibility to
- represent XML documents internally as UTF-8 strings, not only as ISO-8859-1
- strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer
- preprocessor hiding the details of the UTF-8 encoding in the lexer
- definitions.
-
-- Correctness of validation: The well-formedness and valididity constraints
- must be implemented as correct as possible. The last stable release had
- already a regression test covering many aspects of XML. The test suite will
- be extended.
-
-- Parsing performance: It should be possible to process large amounts of data
- in a reasoable period of time. The last stable release had many stages of
- processing that wasted time.
- The current pre-release is already 30 per cent faster than 0.2.10.
-
-- Simplicity of usage: Unlike parsers basing on imperative languages and DOM,
- the usage of PXP should be simple, even for complex tasks. The current
- parser API has already many advantages over DOM; especially it is well
- integrated into the functional and object-oriented language O'Caml. You do
- not have to deal with artificial representations like "node lists" while the
- programming environment already provides good support for list structures.
- The fact that O'Caml allows a functional programming style is interesting
- for programs transforming XML trees.
-
-==============================================================================
-Download the PXP pre-release
-==============================================================================
-
-The current pre-release is available under
-http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz [1]. There is
-currently no documentation for this version of the software; it is recommended
-to use the Markup manual [2] and compare it with the current module interfaces.
-
-Please note that this is work in progress; it may still contain bugs and
-irregularities.
-
-The parser works only with OCaml-3. The parser needs the netstring package [3],
-at least version 0.9.1.
-
-I am very interested in your opinion to PXP; please contact me [4].
-
-==============================================================================
-Author, Credits, Copying
-==============================================================================
-
-PXP has been written by Gerd Stolpmann [5]; it contains contributions by
-Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for
-commercial purposes as long as the license conditions are respected, see the
-file LICENSE coming with the distribution. It allows almost everything.
-
-==============================================================================
-Where to find the stable release
-==============================================================================
-
-Here. [6]
-
-
---------------------------
-
-[1] see http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz
-
-[2] see http://www.ocaml-programming.de/packages/documentation/markup/manual
-
-[3] see http://www.ocaml-programming.de/packages/documentation/netstring
-
-[4] see mailto:gerd@gerd-stolpmann.de
-
-[5] see mailto:gerd@gerd-stolpmann.de
-
-[6] see http://www.ocaml-programming.de/packages/documentation/markup
-
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!-- Special HTML config: -->
-<!ENTITY % readme:html:up '<a href="../..">up</a>'>
-
-<!ENTITY % config SYSTEM "config.xml">
-%config;
-
-]>
-
-<readme title="README - PXP, the XML parser for O'Caml">
- <sect1>
- <title>Pre-release of PXP, the XML parser for O'Caml</title>
-
- <p>PXP is the new, completely revised and partly rewritten
-validating XML parser
-for O'Caml; the old name, "Markup", has been dropped. The current version
-of PXP is still a bit experimental because it is not fully tested; however,
-it is now stable enough to be used in experimental applications.
-</p>
-
- <p>PXP will retain most parts of Markup's API; the name PXP
-emphasizes the strengths of the API: it is the Polymorphic XML Parser.
-The document objects representing the parsed file have an interesting
-polymorphism which allows that the user of the parser can control
-which kind of objects are actually created. The current API supports
-the element type as criterion for object/class selection; future APIs will
-extend this concept such that arbitrary criterions are possible
-(e.g. you may want to have different classes for different namespaces).
-</p>
-
- <p>The current development goals of PXP are:</p>
-
- <ul>
- <li><p><em>Full XML-1.0 conformance:</em> The current pre-release
-is now very close to strict XML-1.0 conformance. The only bigger
-difference to the standard is that PXP sometimes accepts DTDs as legal
-while the standard forbids them (non-deterministic content models).</p>
-
-<p>One of the more important improvements since 0.2.10 is the possibility to
-represent XML documents internally as UTF-8 strings, not only as ISO-8859-1
-strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer
-preprocessor hiding the details of the UTF-8 encoding in the lexer definitions.
-</p>
- </li>
-
- <li><p><em>Correctness of validation:</em> The well-formedness
-and valididity constraints must be implemented as correct as possible.
-The last stable release had already a regression test covering many
-aspects of XML. The test suite will be extended.</p>
- </li>
-
- <li><p><em>Parsing performance:</em> It should be possible to
-process large amounts of data in a reasoable period of time. The last
-stable release had many stages of processing that wasted time.</p>
-
- <p>The current pre-release is already 30 per cent faster than
-0.2.10.</p>
- </li>
-
- <li><p><em>Simplicity of usage:</em> Unlike parsers basing on
-imperative languages and DOM, the usage of PXP should be simple, even
-for complex tasks. The current parser API has already many advantages
-over DOM; especially it is well integrated into the functional and
-object-oriented language O'Caml. You do not have to deal with
-artificial representations like "node lists" while the programming
-environment already provides good support for list structures. The
-fact that O'Caml allows a functional programming style is interesting
-for programs transforming XML trees.</p>
- </li>
- </ul>
- </sect1>
-
- <sect1>
- <title>Download the PXP pre-release</title>
-
- <p>The current pre-release is available under
-<a href="&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz">
-&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz</a>. There is currently no
-documentation for this version of the software; it is recommended to use the <a
-href="&url.markup-manual;">Markup manual</a> and compare it with the current
-module interfaces.</p>
-
- <p>Please note that this is work in progress; it may still contain bugs
-and irregularities.</p>
-
- <p>The parser works only with OCaml-3. The parser needs the <a
-href="&url.netstring-project;">netstring package</a>, at least version 0.9.1.
-</p>
-
- <p>I am very interested in your opinion to PXP; please <a
-href="mailto:&person.gps.mail;">contact me</a>.</p>
- </sect1>
-
- <sect1>
- <title>Author, Credits, Copying</title>
- <p>
-<em>PXP</em> has been written by &person.gps;; it contains contributions by
-Claudio Sacerdoti Coen. You may copy it as you like,
-you may use it even for commercial purposes as long as the license conditions
-are respected, see the file LICENSE coming with the distribution. It allows
-almost everything.
-</p>
- </sect1>
-
- <sect1>
- <title>Where to find the stable release</title>
- <p><a href="&url.markup-project;">Here.</a></p>
- </sect1>
-
-</readme>
-
+++ /dev/null
-******************************************************************************
-README - PXP, the XML parser for O'Caml
-******************************************************************************
-
-
-==============================================================================
-Abstract
-==============================================================================
-
-PXP is a validating parser for XML-1.0 which has been written entirely in
-Objective Caml.
-
-PXP is the new name of the parser formerly known as "Markup". PXP means
-"Polymorphic XML parser" and emphasizes its most useful property: that the API
-is polymorphic and can be configured such that different objects are used to
-store different types of elements.
-
-==============================================================================
-Download
-==============================================================================
-
-You can download PXP as gzip'ed tarball [1]. The parser needs the Netstring [2]
-package (0.9.3). Note that PXP requires O'Caml 3.00.
-
-==============================================================================
-User's Manual
-==============================================================================
-
-The manual is included in the distribution both as Postscript document and
-bunch of HTML files. An online version can be found here [3].
-
-==============================================================================
-Author, Credits, Copying
-==============================================================================
-
-PXP has been written by Gerd Stolpmann [4]; it contains contributions by
-Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for
-commercial purposes as long as the license conditions are respected, see the
-file LICENSE coming with the distribution. It allows almost everything.
-
-Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug reports.
-
-==============================================================================
-Description
-==============================================================================
-
-PXP is a validating XML parser for O'Caml [5]. It strictly complies to the
-XML-1.0 [6] standard.
-
-The parser is simple to call, usually only one statement (function call) is
-sufficient to parse an XML document and to represent it as object tree.
-
-Once the document is parsed, it can be accessed using a class interface. The
-interface allows arbitrary access including transformations. One of the
-features of the document representation is its polymorphic nature; it is simple
-to add custom methods to the document classes. Furthermore, the parser can be
-configured such that different XML elements are represented by objects created
-from different classes. This is a very powerful feature, because it simplifies
-the structure of programs processing XML documents.
-
-Note that the class interface does not comply to the DOM standard. It was not a
-development goal to realize a standard API (industrial developers can this much
-better than I); however, the API is powerful enough to be considered as
-equivalent with DOM. More important, the interface is compatible with the XML
-information model required by many XML-related standards.
-
-------------------------------------------------------------------------------
-Detailed feature list
-------------------------------------------------------------------------------
-
-- The XML instance is validated against the DTD; any violation of a validation
- constraint leads to the rejection of the instance. The validator has been
- carefully implemented, and conforms strictly to the standard. If needed, it
- is also possible to run the parser in a well-formedness mode.
-
-- If possible, the validator applies a deterministic finite automaton to
- validate the content models. This ensures that validation can always be
- performed in linear time. However, in the case that the content models are
- not deterministic, the parser uses a backtracking algorithm which can be
- much slower. - It is also possible to reject non-deterministic content
- models.
-
-- In particular, the validator also checks the complicated rules whether
- parentheses are properly nested with respect to entities, and whether the
- standalone declaration is satisfied. On demand, it is checked whether the
- IDREF attributes only refer to existing nodes.
-
-- Entity references are automatically resolved while the XML text is being
- scanned. It is not possible to recognize in the object tree where a
- referenced entity begins or ends; the object tree only represents the
- logical structure.
-
-- External entities are loaded using a configurable resolver infrastructure.
- It is possible to connect the parser with an arbitrary XML source.
-
-- The parser can read XML text encoded in a variety of character sets.
- Independent of this, it is possible to choose the encoding of the internal
- representation of the tree nodes; the parser automatically converts the
- input text to this encoding. Currently, the parser supports UTF-8 and
- ISO-8859-1 as internal encodings.
-
-- The interface of the parser has been designed such that it is best
- integrated into the language O'Caml. The first goal was simplicity of usage
- which is achieved by many convenience methods and functions, and by allowing
- the user to select which parts of the XML text are actually represented in
- the tree. For example, it is possible to store processing instructions as
- tree nodes, but the parser can also be configured such that these
- instructions are put into hashtables. The information model is compatible
- with the requirements of XML-related standards such as XPath.
-
-- In particular, the node tree can optionally contain or leave out processing
- instructions and comments. It is also possible to generate a "super root"
- object which is the parent of the root element. The attributes of elements
- are normally not stored as nodes, but it is possible to get them wrapped
- into nodes.
-
-- There is also an interface for DTDs; you can parse and access sequences of
- declarations. The declarations are fully represented as recursive O'Caml
- values.
-
-------------------------------------------------------------------------------
-Code examples
-------------------------------------------------------------------------------
-
-This distribution contains several examples:
-
-- validate: simply parses a document and prints all error messages
-
-- readme: Defines a DTD for simple "README"-like documents, and offers
- conversion to HTML and text files [7].
-
-- xmlforms: This is already a sophisticated application that uses XML as style
- sheet language and data storage format. It shows how a Tk user interface can
- be configured by an XML style, and how data records can be stored using XML.
-
-------------------------------------------------------------------------------
-Restrictions and missing features
-------------------------------------------------------------------------------
-
-The following restrictions apply that are not violations of the standard:
-
-- The attributes "xml:space", and "xml:lang" are not supported specially. (The
- application can do this.)
-
-- The built-in support for SYSTEM and PUBLIC identifiers is limited to local
- file access. There is no support for catalogs. The parser offers a hook to
- add missing features.
-
-- It is currently not possible to check for interoperatibility with SGML.
-
-The following features are also missing:
-
-- There is no special support for namespaces. (Perhaps in the next release?)
-
-- There is no support for XPATH or XSLT.
-
-However, I hope that these features will be implemented soon, either by myself
-or by contributors (who are invited to do so).
-
-------------------------------------------------------------------------------
-Recent Changes
-------------------------------------------------------------------------------
-
-- Changed in 1.0:
- Support for document order.
-
-- Changed in 0.99.8:
- Several fixes of bugs reported by Haruo Hosoya and Alain Frisch.
- The class type "node" has been extended: you can go directly to the next and
- previous nodes in the list; you can refer to nodes by position.
- There are now some iterators for nodes: find, find_all, find_element,
- find_all_elements, map_tree, iter_tree.
- Experimental support for viewing attributes as nodes; I hope that helps
- Alain writing his XPath evaluator.
- The user's manual has been revised and is almost up to date.
-
-- Changed in 0.99.7:
- There are now additional node types T_super_root, T_pinstr and T_comment,
- and the parser is able to create the corresponding nodes.
- The functions for character set conversion have been moved to the Netstring
- package; they are not specific for XML.
-
-- Changed in 0.99.6:
- Implemented a check on deterministic content models. Added an alternate
- validator basing on a DFA. - This means that now all mandatory features for
- an XML-1.0 parser are implemented! The parser is now substantially complete.
-
-- Changed in 0.99.5:
- The handling of ID and IDREF attributes has changed. The index of nodes
- containing an ID attribute is now separated from the document. Optionally
- the parser now checks whether the IDREF attributes refer to existing
- elements.
- The element nodes can optionally store the location in the source XML code.
- The method 'write' writes the XML tree in every supported encoding.
- (Successor of 'write_compact_as_latin1'.)
- Several smaller changes and fixes.
-
-- Changed in 0.99.4:
- The module Pxp_reader has been modernized. The resolver classes are simpler
- to use. There is now support for URLs.
- The interface of Pxp_yacc has been improved: The type 'source' is now
- simpler. The type 'domspec' has gone; the new 'spec' is opaque and performs
- better. There are some new parsing modes.
- Many smaller changes.
-
-- Changed in 0.99.3:
- The markup_* modules have been renamed to pxp_*. There is a new
- compatibility API that tries to be compatible with markup-0.2.10.
- The type "encoding" is now a polymorphic variant.
-
-- Changed in 0.99.2:
- Added checks for the constraints about the standalone declaration.
- Added regression tests about attribute normalization, attribute checks,
- standalone checks.
- Fixed some minor errors of the attribute normalization function.
- The bytecode/native archives are now separated in a general part, in a
- ISO-8859-1-relevant part, and a UTF-8-relevant part. The parser can again be
- compiled with ocamlopt.
-
-- Changed in 0.99.1:
- In general, this release is an early pre-release of the next stable version
- 1.00. I do not recommend to use it for serious work; it is still very
- experimental!
- The core of the parser has been rewritten using a self-written parser
- generator.
- The lexer has been restructured, and can now handle UTF-8 encoded files.
- Numerous other changes.
-
-
---------------------------
-
-[1] see http://www.ocaml-programming.de/packages/pxp-1.0.tar.gz
-
-[2] see http://www.ocaml-programming.de/packages/documentation/netstring
-
-[3] see http://www.ocaml-programming.de/packages/documentation/pxp/manual
-
-[4] see mailto:gerd@gerd-stolpmann.de
-
-[5] see http://caml.inria.fr/
-
-[6] see http://www.w3.org/TR/1998/REC-xml-19980210.html
-
-[7] This particular document is an example of this DTD!
-
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!--
-<!ENTITY url.ocaml "http://caml.inria.fr/">
-<!ENTITY url.xml-spec "http://www.w3.org/TR/1998/REC-xml-19980210.html">
-<!ENTITY url.jclark-xmltdata "ftp://ftp.jclark.com/pub/xml/xmltest.zip">
-<!ENTITY url.gps-ocaml-download "http://people.darmstadt.netsurf.de/ocaml">
-<!ENTITY url.markup-download "&url.gps-ocaml-download;/markup-0.1.tar.gz">
-<!ENTITY person.gps '<a
- href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>'>
--->
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!-- Special HTML config: -->
-<!ENTITY % readme:html:up '<a href="../..">up</a>'>
-
-<!ENTITY % config SYSTEM "config.xml">
-%config;
-
-]>
-
-<readme title="README - PXP, the XML parser for O'Caml">
- <sect1>
- <title>Abstract</title>
- <p>
-<em>PXP</em> is a validating parser for XML-1.0 which has been written
-entirely in Objective Caml.
-</p>
-
- <p>PXP is the new name of the parser formerly known as "Markup".
-PXP means "Polymorphic XML parser" and emphasizes its most useful
-property: that the API is polymorphic and can be configured such that
-different objects are used to store different types of elements.</p>
- </sect1>
-
- <sect1>
- <title>Download</title>
- <p>
-You can download <em>PXP</em> as gzip'ed <a
-href="&url.pxp-download;">tarball</a>. The parser needs the <a
-href="&url.netstring-project;">Netstring</a> package (0.9.3). Note that PXP
-requires O'Caml 3.00.
-</p>
- </sect1>
-
- <sect1>
- <title>User's Manual</title>
- <p>
-The manual is included in the distribution both as Postscript document and
-bunch of HTML files. An online version can be found <a
- href="&url.pxp-manual;">here</a>.
-</p>
- </sect1>
-
- <sect1>
- <title>Author, Credits, Copying</title>
- <p>
-<em>PXP</em> has been written by &person.gps;; it contains contributions by
-Claudio Sacerdoti Coen. You may copy it as you like,
-you may use it even for commercial purposes as long as the license conditions
-are respected, see the file LICENSE coming with the distribution. It allows
-almost everything.
-</p>
-
- <p>Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug
-reports.</p>
- </sect1>
-
- <sect1>
- <title>Description</title>
- <p>
-<em>PXP</em> is a validating XML parser for <a
-href="&url.ocaml;">O'Caml</a>. It strictly complies to the
-<a href="&url.xml-spec;">XML-1.0</a> standard.
-</p>
-
- <p>The parser is simple to call, usually only one statement (function
-call) is sufficient to parse an XML document and to represent it as object
-tree.</p>
-
- <p>
-Once the document is parsed, it can be accessed using a class interface.
-The interface allows arbitrary access including transformations. One of
-the features of the document representation is its polymorphic nature;
-it is simple to add custom methods to the document classes. Furthermore,
-the parser can be configured such that different XML elements are represented
-by objects created from different classes. This is a very powerful feature,
-because it simplifies the structure of programs processing XML documents.
-</p>
-
- <p>
-Note that the class interface does not comply to the DOM standard. It was not a
-development goal to realize a standard API (industrial developers can this much
-better than I); however, the API is powerful enough to be considered as
-equivalent with DOM. More important, the interface is compatible with the
-XML information model required by many XML-related standards.
-</p>
-
- <sect2>
- <title>Detailed feature list</title>
-
- <ul>
- <li><p>The XML instance is validated against the DTD; any violation of
-a validation constraint leads to the rejection of the instance. The validator
-has been carefully implemented, and conforms strictly to the standard. If
-needed, it is also possible to run the parser in a well-formedness mode.</p>
- </li>
- <li><p>If possible, the validator applies a deterministic finite
-automaton to validate the content models. This ensures that validation can
-always be performed in linear time. However, in the case that the content
-models are not deterministic, the parser uses a backtracking algorithm which
-can be much slower. - It is also possible to reject non-deterministic content
-models.</p>
- </li>
- <li><p>In particular, the validator also checks the complicated rules
-whether parentheses are properly nested with respect to entities, and whether
-the standalone declaration is satisfied. On demand, it is checked whether the
-IDREF attributes only refer to existing nodes.</p>
- </li>
- <li><p>Entity references are automatically resolved while the XML text
-is being scanned. It is not possible to recognize in the object tree where a
-referenced entity begins or ends; the object tree only represents the logical structure.</p>
- </li>
- <li><p>External entities are loaded using a configurable resolver
-infrastructure. It is possible to connect the parser with an arbitrary XML source.</p>
- </li>
- <li><p>The parser can read XML text encoded in a variety of character
-sets. Independent of this, it is possible to choose the encoding of the
-internal representation of the tree nodes; the parser automatically converts
-the input text to this encoding. Currently, the parser supports UTF-8 and
-ISO-8859-1 as internal encodings.</p>
- </li>
- <li><p>The interface of the parser has been designed such that it is
-best integrated into the language O'Caml. The first goal was simplicity of
-usage which is achieved by many convenience methods and functions, and by
-allowing the user to select which parts of the XML text are actually
-represented in the tree. For example, it is possible to store processing
-instructions as tree nodes, but the parser can also be configured such that
-these instructions are put into hashtables. The information model is compatible
-with the requirements of XML-related standards such as XPath.</p>
- </li>
- <li><p>In particular, the node tree can optionally contain or leave out
-processing instructions and comments. It is also possible to generate a "super
-root" object which is the parent of the root element. The attributes of
-elements are normally not stored as nodes, but it is possible to get them
-wrapped into nodes.</p>
- </li>
- <li><p>There is also an interface for DTDs; you can parse and access
-sequences of declarations. The declarations are fully represented as recursive
-O'Caml values.
-</p>
- </li>
- </ul>
- </sect2>
-
-
- <sect2>
- <title>Code examples</title>
- <p>
-This distribution contains several examples:</p>
- <ul>
- <li><p>
-<em>validate:</em> simply parses a
-document and prints all error messages
-</p></li>
-
- <li><p>
-<em>readme:</em> Defines a DTD for simple "README"-like documents, and offers
-conversion to HTML and text files<footnote>This particular document is an
-example of this DTD!</footnote>.
-</p></li>
-
- <li><p>
-<em>xmlforms:</em> This is already a
-sophisticated application that uses XML as style sheet language and data
-storage format. It shows how a Tk user interface can be configured by an
-XML style, and how data records can be stored using XML.
-</p></li>
- </ul>
- </sect2>
-
- <sect2>
- <title>Restrictions and missing features</title>
- <p>
-The following restrictions apply that are not violations of the standard:
-</p>
- <ul>
- <li><p>
-The attributes "xml:space", and "xml:lang" are not supported specially.
- (The application can do this.)</p></li>
-
- <li><p>
-The built-in support for SYSTEM and PUBLIC identifiers is limited to
- local file access. There is no support for catalogs. The parser offers
- a hook to add missing features.</p></li>
-
- <li><p>
-It is currently not possible to check for interoperatibility with SGML.
-</p></li>
- </ul>
-
-<p>The following features are also missing:</p>
- <ul>
- <li><p>There is no special support for namespaces. (Perhaps in the next release?)</p>
- </li>
- <li><p>There is no support for XPATH or XSLT.</p>
- </li>
- </ul>
-<p>However, I hope that these features will be implemented soon, either by
-myself or by contributors (who are invited to do so).</p>
- </sect2>
-
- <sect2>
- <title>Recent Changes</title>
- <ul>
- <li>
- <p>Changed in 1.0:</p>
- <p>Support for document order.</p>
- </li>
- <li>
- <p>Changed in 0.99.8:</p>
- <p>Several fixes of bugs reported by Haruo Hosoya and Alain
-Frisch.</p>
- <p>The class type "node" has been extended: you can go directly to
-the next and previous nodes in the list; you can refer to nodes by
-position.</p>
- <p>There are now some iterators for nodes: find, find_all,
-find_element, find_all_elements, map_tree, iter_tree.</p>
- <p>Experimental support for viewing attributes as nodes; I hope that
-helps Alain writing his XPath evaluator.</p>
- <p>The user's manual has been revised and is almost up to date.</p>
- </li>
- <li>
- <p>Changed in 0.99.7:</p>
- <p>There are now additional node types T_super_root, T_pinstr and
-T_comment, and the parser is able to create the corresponding nodes.</p>
- <p>The functions for character set conversion have been moved to
-the Netstring package; they are not specific for XML.</p>
- </li>
- <li>
- <p>Changed in 0.99.6:</p>
- <p>Implemented a check on deterministic content models. Added
-an alternate validator basing on a DFA. - This means that now all mandatory
-features for an XML-1.0 parser are implemented! The parser is now substantially
-complete.</p>
- </li>
- <li>
- <p>Changed in 0.99.5:</p>
- <p>The handling of ID and IDREF attributes has changed. The
-index of nodes containing an ID attribute is now separated from the document.
-Optionally the parser now checks whether the IDREF attributes refer to
-existing elements.</p>
- <p>The element nodes can optionally store the location in the
-source XML code.</p>
- <p>The method 'write' writes the XML tree in every supported
-encoding. (Successor of 'write_compact_as_latin1'.)</p>
- <p>Several smaller changes and fixes.</p>
- </li>
- <li>
- <p>Changed in 0.99.4:</p>
- <p>The module Pxp_reader has been modernized. The resolver classes
-are simpler to use. There is now support for URLs.</p>
- <p>The interface of Pxp_yacc has been improved: The type 'source'
-is now simpler. The type 'domspec' has gone; the new 'spec' is opaque and
-performs better. There are some new parsing modes.</p>
- <p>Many smaller changes.</p>
- </li>
- <li>
- <p>Changed in 0.99.3:</p>
- <p>The markup_* modules have been renamed to pxp_*. There is a new
-compatibility API that tries to be compatible with markup-0.2.10.</p>
- <p>The type "encoding" is now a polymorphic variant.</p>
- </li>
- <li>
- <p>Changed in 0.99.2:</p>
- <p>Added checks for the constraints about the standalone
-declaration.</p>
- <p>Added regression tests about attribute normalization,
-attribute checks, standalone checks.</p>
- <p>Fixed some minor errors of the attribute normalization
-function.</p>
- <p>The bytecode/native archives are now separated in
-a general part, in a ISO-8859-1-relevant part, and a UTF-8-relevant
-part. The parser can again be compiled with ocamlopt.</p>
- </li>
- <li>
- <p>Changed in 0.99.1:</p>
- <p>In general, this release is an early pre-release of the
-next stable version 1.00. I do not recommend to use it for serious
-work; it is still very experimental!</p>
- <p>The core of the parser has been rewritten using a self-written
-parser generator.</p>
- <p>The lexer has been restructured, and can now handle UTF-8
-encoded files.</p>
- <p>Numerous other changes.</p>
- </li>
-
-<!--
- <li>
- <p>Changed in 0.2.10:</p>
- <p>Bugfix: in the "allow_undeclared_attributes" feature.</p>
- <p>Bugfix: in the methods write_compact_as_latin1.</p>
- <p>Improvement: The code produced by the codewriter module can be
-faster compiled and with less memory usage.</p>
- </li>
-
- <li>
- <p>Changed in 0.2.9:</p>
- <p>New: The module Markup_codewriter generates for a given XML
-tree O'Caml code that creates the same XML tree. This is useful for
-applications which use large, constant XML trees.</p>
- <p>New: Documents and DTDs have a method write_compact_as_latin1
-that writes an XML tree to a buffer or to a channel. (But it is not a pretty
-printer...)</p>
- <p>Enhancement: If a DTD contains the processing instruction
-<code>
-<?xml:allow_undeclared_attributes x?></code>
-where "x" is the name of an already declared element it is allowed that
-instances of this element type have attributes that have not been declared.
-</p>
- <p>New function Markup_types.string_of_exn that converts an
-exception from Markup into a readable string.</p>
- <p>Change: The module Markup_reader contains all resolvers.
-The resolver API is now stable.</p>
- <p>New parser modes processing_instructions_inline and
-virtual_root that help locating processing instructions exactly (if needed).
-</p>
- <p>Many bugs regarding CRLF handling have been fixed.</p>
- <p>The distributed tarball contains now the regression test suite.
-</p>
- <p>The manual has been extended (but it is still incomplete and
-still behind the code).</p>
- </li>
- <li>
- <p>Changed in 0.2.8:</p>
- <p>A bit more documentation (Markup_yacc).</p>
- <p>Bugfix: In previous versions, the second trial to refer to
-an entity caused a Bad_character_stream exception. The reason was improper
-re-initialization of the resolver object.</p>
- </li>
- <li>
- <p>Changed in 0.2.7:</p>
- <p>Added some methods in Markup_document.</p>
- <p>Bugfix: in method orphaned_clone</p>
- </li>
- <li>
- <p>Changed in 0.2.6:</p>
- <p>Enhancement: The config parameter has a new component
-"errors_with_line_numbers". If "true", error exceptions come with line numbers
-(the default; and the only option in the previous versions); if "false"
-the line numbers are left out (only character positions). The parser is 10 to
-20 percent faster if the lines are not tracked.</p>
- <p>Enhancement: If a DTD contains the processing instruction
-<code>
-<?xml:allow_undeclared_elements_and_notations?></code>
-it is allowed that
-elements and notations are undeclared. However, the elements for which
-declarations exist are still validated. The main effect is that the
-keyword ALL in element declarations means that also undeclared elements
-are permitted at this location.</p>
- <p>Bugfix in method "set_nodes" of class Markup_document.node_impl.
-</p>
- </li>
- <li>
- <p>Changed in 0.2.5:</p>
- <p>If the XML source is a string (i.e. Latin1 some_string is passed
-to the parser functions as source), resolving did not work properly in
-previous releases. This is now fixed.
-</p>
- </li>
- <li>
- <p>Changed in 0.2.4:</p>
- <p>A problem with some kind of DTD that does not specify the name
-of the root element was fixed. As a result, the "xmlforms" application works
-again. Again thanks to Haruo.</p>
- <p>Due to the XML specs it is forbidden that parameter entities are
-referenced within the internal subset if the referenced text is not a
-complete declaration itself. This is checked, but the check was too hard;
-even in external entities referenced from the internal subset this rule
-was enforced. This has been corrected; in external entities it is now possible
-to use parameter entities in an unrestricted way.
-</p>
- </li>
- <li>
- <p>Changed in 0.2.3:</p>
- <p>A fix for a problem when installing Markup on Solaris.
-Haruo detected the problem.</p>
- </li>
- <li>
- <p>Changed in 0.2.2:</p>
- <p>A single bugfix: The parser did not reject documents where the
-root element was not the element declared as root element. Again thanks
-to Claudio.</p>
- </li>
- <li>
- <p>Changed in 0.2.1:</p>
- <p>A single bugfix which reduces the number of warnings. Thanks
-to Claudio for detecting the bug.</p>
- </li>
- <li>
- <p>Changed in 0.2:</p>
- <p>
-Much more constraints are checked in the 0.2 release than in 0.1. Especially
-that entities are properly nested is now guaranteed; parsed entities now always
-match the corresponding production of the grammar.</p>
- <p>
-Many weak checks have been turned into strong checks. For example, it is now
-detected if the "version", "encoding", and "standalone" attributes of an XML
-declaration are ordered in the right way.
-</p>
- <p>
-The error messages have been improved.
-</p>
- </li>
--->
- </ul>
- </sect2>
- </sect1>
-</readme>
-
+++ /dev/null
-******************************************************************************
-Notes on the XML specification
-******************************************************************************
-
-
-==============================================================================
-This document
-==============================================================================
-
-There are some points in the XML specification which are ambiguous. The
-following notes discuss these points, and describe how this parser behaves.
-
-==============================================================================
-Conditional sections and the token ]]>
-==============================================================================
-
-It is unclear what happens if an ignored section contains the token ]]> at
-places where it is normally allowed, i.e. within string literals and comments,
-e.g.
-
-<![IGNORE[ <!-- ]]> --> ]]>
-
-On the one hand, the production rule of the XML grammar does not treat such
-tokens specially. Following the grammar, already the first ]]> ends the
-conditional section
-
-<![IGNORE[ <!-- ]]>
-
-and the other tokens are included into the DTD.
-
-On the other hand, we can read: "Like the internal and external DTD subsets, a
-conditional section may contain one or more complete declarations, comments,
-processing instructions, or nested conditional sections, intermingled with
-white space" (XML 1.0 spec, section 3.4). Complete declarations and comments
-may contain ]]>, so this is contradictory to the grammar.
-
-The intention of conditional sections is to include or exclude the section
-depending on the current replacement text of a parameter entity. Almost always
-such sections are used as in
-
-<!ENTITY % want.a.feature.or.not "INCLUDE"> (or "IGNORE")
-<![ %want.a.feature.or.not; [ ... ]]>
-
-This means that if it is possible to include a section it must also be legal to
-ignore the same section. This is a strong indication that the token ]]> must
-not count as section terminator if it occurs in a string literal or comment.
-
-This parser implements the latter.
-
-==============================================================================
-Conditional sections and the inclusion of parameter entities
-==============================================================================
-
-It is unclear what happens if an ignored section contains a reference to a
-parameter entity. In most cases, this is not problematic because nesting of
-parameter entities must respect declaration braces. The replacement text of
-parameter entities must either contain a whole number of declarations or only
-inner material of one declaration. Almost always it does not matter whether
-these references are resolved or not (the section is ignored).
-
-But there is one case which is not explicitly specified: Is it allowed that the
-replacement text of an entity contains the end marker ]]> of an ignored
-conditional section? Example:
-
-<!ENTITY % end "]]>">
-<![ IGNORE [ %end;
-
-We do not find the statement in the XML spec that the ]]> must be contained in
-the same entity as the corresponding <![ (as for the tokens <! and > of
-declarations). So it is possible to conclude that ]]> may be in another entity.
-
-Of course, there are many arguments not to allow such constructs: The resulting
-code is incomprehensive, and parsing takes longer (especially if the entities
-are external). I think the best argument against this kind of XML is that the
-XML spec is not detailed enough, as it contains no rules where entity
-references should be recognized and where not. For example:
-
-<!ENTITY % y "]]>">
-<!ENTITY % x "<!ENTITY z '<![CDATA[some text%y;'>">
-<![ IGNORE [ %x; ]]>
-
-Which token ]]> counts? From a logical point of view, the ]]> in the third line
-ends the conditional section. As already pointed out, the XML spec permits the
-interpretation that ]]> is recognized even in string literals, and this may be
-also true if it is "imported" from a separate entity; and so the first ]]>
-denotes the end of the section.
-
-As a practical solution, this parser does not expand parameter entities in
-ignored sections. Furthermore, it is also not allowed that the ending ]]> of
-ignored or included sections is contained in a different entity than the
-starting <![ token.
-
-==============================================================================
-Standalone documents and attribute normalization
-==============================================================================
-
-If a document is declared as stand-alone, a restriction on the effect of
-attribute normalization takes effect for attributes declared in external
-entities. Normally, the parser knows the type of the attribute from the ATTLIST
-declaration, and it can normalize attribute values depending on their types.
-For example, an NMTOKEN attribute can be written with leading or trailing
-spaces, but the parser returns always the nmtoken without such added spaces; in
-contrast to this, a CDATA attribute is not normalized in this way. For
-stand-alone document the type information is not available if the ATTLIST
-declaration is located in an external entity. Because of this, the XML spec
-demands that attribute values must be written in their normal form in this
-case, i.e. without additional spaces.
-
-This parser interprets this restriction as follows. Obviously, the substitution
-of character and entity references is not considered as a "change of the value"
-as a result of the normalization, because these operations will be performed
-identically if the ATTLIST declaration is not available. The same applies to
-the substitution of TABs, CRs, and LFs by space characters. Only the removal of
-spaces depending on the type of the attribute changes the value if the ATTLIST
-is not available.
-
-This means in detail: CDATA attributes never violate the stand-alone status.
-ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator attributes must not be
-written with leading and/or trailing spaces. IDREF, ENTITIES, and NMTOKENS
-attributes must not be written with extra spaces at the beginning or at the end
-of the value, or between the tokens of the list.
-
-The whole check is dubious, because the attribute type expresses also a
-semantical constraint, not only a syntactical one. At least this parser
-distinguishes strictly between single-value and list types, and returns the
-attribute values differently; the first are represented as Value s (where s is
-a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The
-internal representation of the value is dependent on the attribute type, too,
-such that even normalized values are processed differently depending on whether
-the attribute has list type or not. For this parser, it makes still a
-difference whether a value is normalized and processed as if it were CDATA, or
-whether the value is processed according to its declared type.
-
-The stand-alone check is included to be able to make a statement whether other,
-well-formedness parsers can process the document. Of course, these parsers
-always process attributes as CDATA, and the stand-alone check guarantees that
-these parsers will always see the normalized values.
-
-==============================================================================
-Standalone documents and the restrictions on entity
-references
-==============================================================================
-
-Stand-alone documents must not refer to entities which are declared in an
-external entity. This parser applies this rule only: to general and NDATA
-entities when they occur in the document body (i.e. not in the DTD); and to
-general and NDATA entities occuring in default attribute values declared in the
-internal subset of the DTD.
-
-Parameter entities are out of discussion for the stand-alone property. If there
-is a parameter entity reference in the internal subset which was declared in an
-external entity, it is not available in the same way as the external entity is
-not available that contains its declaration. Because of this "equivalence",
-parameter entity references are not checked on violations against the
-stand-alone declaration. It simply does not matter. - Illustration:
-
-Main document:
-
-<!ENTITY % ext SYSTEM "ext">
-%ext;
-%ent;
-
-"ext" contains:
-
-<!ENTITY % ent "<!ELEMENT el (other*)>">
-
-
-
-Here, the reference %ent; would be illegal if the standalone declaration is
-strictly interpreted. This parser handles the references %ent; and %ext;
-equivalently which means that %ent; is allowed, but the element type "el" is
-treated as externally declared.
-
-General entities can occur within the DTD, but they can only be contained in
-the default value of attributes, or in the definition of other general
-entities. The latter can be ignored, because the check will be repeated when
-the entities are expanded. Though, general entities occuring in default
-attribute values are actually checked at the moment when the default is used in
-an element instance.
-
-General entities occuring in the document body are always checked.
-
-NDATA entities can occur in ENTITY attribute values; either in the element
-instance or in the default declaration. Both cases are checked.
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd" [
-
-<!ENTITY % common SYSTEM "common.xml">
-%common;
-
-<!-- Special HTML config: -->
-<!ENTITY % readme:html:up '<a href="../..">up</a>'>
-
-<!ENTITY % config SYSTEM "config.xml">
-%config;
-
-]>
-
-<readme title="Notes on the XML specification">
-
- <sect1>
- <title>This document</title>
- <p>There are some points in the XML specification which are ambiguous.
-The following notes discuss these points, and describe how this parser
-behaves.</p>
- </sect1>
-
- <sect1>
- <title>Conditional sections and the token ]]></title>
-
- <p>It is unclear what happens if an ignored section contains the
-token ]]> at places where it is normally allowed, i.e. within string
-literals and comments, e.g.
-
-<code>
-<![IGNORE[ <!-- ]]> --> ]]>
-</code>
-
-On the one hand, the production rule of the XML grammar does not treat such
-tokens specially. Following the grammar, already the first ]]> ends
-the conditional section
-
-<code>
-<![IGNORE[ <!-- ]]>
-</code>
-
-and the other tokens are included into the DTD.</p>
-
-<p>On the other hand, we can read: "Like the internal and external DTD subsets,
-a conditional section may contain one or more complete declarations, comments,
-processing instructions, or nested conditional sections, intermingled with
-white space" (XML 1.0 spec, section 3.4). Complete declarations and comments
-may contain ]]>, so this is contradictory to the grammar.</p>
-
-<p>The intention of conditional sections is to include or exclude the section
-depending on the current replacement text of a parameter entity. Almost
-always such sections are used as in
-
-<code>
-<!ENTITY % want.a.feature.or.not "INCLUDE"> (or "IGNORE")
-<![ %want.a.feature.or.not; [ ... ]]>
-</code>
-
-This means that if it is possible to include a section it must also be
-legal to ignore the same section. This is a strong indication that
-the token ]]> must not count as section terminator if it occurs
-in a string literal or comment.</p>
-
-<p>This parser implements the latter.</p>
-
- </sect1>
-
- <sect1>
- <title>Conditional sections and the inclusion of parameter entities</title>
-
- <p>It is unclear what happens if an ignored section contains a reference
-to a parameter entity. In most cases, this is not problematic because
-nesting of parameter entities must respect declaration braces. The
-replacement text of parameter entities must either contain a <em>whole</em>
-number of declarations or only inner material of one declaration. Almost always
-it does not matter whether these references are resolved or not
-(the section is ignored).</p>
-
- <p>But there is one case which is not explicitly specified: Is it allowed
-that the replacement text of an entity contains the end marker ]]>
-of an ignored conditional section? Example:
-
-<code>
-<!ENTITY % end "]]>">
-<![ IGNORE [ %end;
-</code>
-
-We do not find the statement in the XML spec that the ]]> must be contained
-in the same entity as the corresponding <![ (as for the tokens <! and
-> of declarations). So it is possible to conclude that ]]> may be in
-another entity.</p>
-
- <p>Of course, there are many arguments not to allow such constructs: The
-resulting code is incomprehensive, and parsing takes longer (especially if the
-entities are external). I think the best argument against this kind of XML
-is that the XML spec is not detailed enough, as it contains no rules where
-entity references should be recognized and where not. For example:
-
-<code>
-<!ENTITY % y "]]>">
-<!ENTITY % x "<!ENTITY z '<![CDATA[some text%y;'>">
-<![ IGNORE [ %x; ]]>
-</code>
-
-Which token ]]> counts? From a logical point of view, the ]]> in the
-third line ends the conditional section. As already pointed out, the XML spec
-permits the interpretation that ]]> is recognized even in string literals,
-and this may be also true if it is "imported" from a separate entity; and so
-the first ]]> denotes the end of the section.</p>
-
- <p>As a practical solution, this parser does not expand parameter entities
-in ignored sections. Furthermore, it is also not allowed that the ending ]]>
-of ignored or included sections is contained in a different entity than the
-starting <![ token.</p>
- </sect1>
-
-
- <sect1>
- <title>Standalone documents and attribute normalization</title>
-
- <p>
-If a document is declared as stand-alone, a restriction on the effect of
-attribute normalization takes effect for attributes declared in external
-entities. Normally, the parser knows the type of the attribute from
-the ATTLIST declaration, and it can normalize attribute values depending
-on their types. For example, an NMTOKEN attribute can be written with
-leading or trailing spaces, but the parser returns always the nmtoken
-without such added spaces; in contrast to this, a CDATA attribute is
-not normalized in this way. For stand-alone document the type information is
-not available if the ATTLIST declaration is located in an external
-entity. Because of this, the XML spec demands that attribute values must
-be written in their normal form in this case, i.e. without additional
-spaces.
-</p>
- <p>This parser interprets this restriction as follows. Obviously,
-the substitution of character and entity references is not considered
-as a "change of the value" as a result of the normalization, because
-these operations will be performed identically if the ATTLIST declaration
-is not available. The same applies to the substitution of TABs, CRs,
-and LFs by space characters. Only the removal of spaces depending on
-the type of the attribute changes the value if the ATTLIST is not
-available.
-</p>
- <p>This means in detail: CDATA attributes never violate the
-stand-alone status. ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator
-attributes must not be written with leading and/or trailing spaces. IDREF,
-ENTITIES, and NMTOKENS attributes must not be written with extra spaces at the
-beginning or at the end of the value, or between the tokens of the list.
-</p>
- <p>The whole check is dubious, because the attribute type expresses also a
-semantical constraint, not only a syntactical one. At least this parser
-distinguishes strictly between single-value and list types, and returns the
-attribute values differently; the first are represented as Value s (where s is
-a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The
-internal representation of the value is dependent on the attribute type, too,
-such that even normalized values are processed differently depending on
-whether the attribute has list type or not. For this parser, it makes still a
-difference whether a value is normalized and processed as if it were CDATA, or
-whether the value is processed according to its declared type.
-</p>
- <p>The stand-alone check is included to be able to make a statement
-whether other, well-formedness parsers can process the document. Of course,
-these parsers always process attributes as CDATA, and the stand-alone check
-guarantees that these parsers will always see the normalized values.
-</p>
- </sect1>
-
- <sect1>
- <title>Standalone documents and the restrictions on entity
-references</title>
- <p>
-Stand-alone documents must not refer to entities which are declared in an
-external entity. This parser applies this rule only: to general and NDATA
-entities when they occur in the document body (i.e. not in the DTD); and to
-general and NDATA entities occuring in default attribute values declared in the
-internal subset of the DTD.
-</p>
- <p>
-Parameter entities are out of discussion for the stand-alone property. If there
-is a parameter entity reference in the internal subset which was declared in an
-external entity, it is not available in the same way as the external entity is
-not available that contains its declaration. Because of this "equivalence",
-parameter entity references are not checked on violations against the
-stand-alone declaration. It simply does not matter. - Illustration:
-</p>
-
- <p>
-Main document:
-
- <code><![CDATA[
-<!ENTITY % ext SYSTEM "ext">
-%ext;
-%ent;
-]]></code>
-
-"ext" contains:
-
- <code><![CDATA[
-<!ENTITY % ent "<!ELEMENT el (other*)>">
-]]></code>
-</p>
-
- <p>Here, the reference %ent; would be illegal if the standalone
-declaration is strictly interpreted. This parser handles the references
-%ent; and %ext; equivalently which means that %ent; is allowed, but the
-element type "el" is treated as externally declared.
-</p>
-
- <p>
-General entities can occur within the DTD, but they can only be contained in
-the default value of attributes, or in the definition of other general
-entities. The latter can be ignored, because the check will be repeated when
-the entities are expanded. Though, general entities occuring in default
-attribute values are actually checked at the moment when the default is
-used in an element instance.
-</p>
- <p>
-General entities occuring in the document body are always checked.</p>
- <p>
-NDATA entities can occur in ENTITY attribute values; either in the element
-instance or in the default declaration. Both cases are checked.
-</p>
- </sect1>
-
-</readme>
+++ /dev/null
------------------------------------------------- -*- indented-text -*-
-Some Notes About the Design:
-----------------------------------------------------------------------
-
-----------------------------------------------------------------------
-Compilation
-----------------------------------------------------------------------
-
-Compilation is non-trivial because:
-
- - The lexer and parser generators ocamlllex resp. ocamlyacc normally
- create code such that the parser module precedes the lexer module.
- THIS design requires that the lexer layer precedes the entity layer
- which precedes the parser layer, because the parsing results modify
- the behaviour of the lexer and entity layers. There is no way to get
- around this because of the nature of XML.
-
- So the dependency relation of the lexer and the parser is modified;
- in particular the "token" type that is normally defined by the
- generated parser is moved to a common prdecessor of both lexer
- and parser.
-
- - Another modification of the standard way of handling parsers is that
- the parser is turned into an object. This is necessary because the
- whole parser is polymorphic, i.e. there is a type parameter (the
- type of the node extension).
-
-......................................................................
-
-First some modules are generated as illustrated by the following
-diagram:
-
-
- markup_yacc.mly
- | |
- \|/ \|/ [ocamlyacc, 1]
- V V
- markup_yacc.mli markup_yacc.ml
- | --> renamed into markup_yacc.ml0
- [awk, 2] \|/ |
- V \|/ [sed, 3]
- markup_yacc_token.mlf V
- | | markup_yacc.ml
- markup_lexer_types_ | |
- shadow.mli | | | markup_lexer_types_
- \|/ [sed, \|/ | shadow.ml
- V 4] V | |
- markup_lexer_types.mli | | [sed, 4]
- \|/ \|/
- V V
- markup_lexer_types.ml
-
-
- markup_yacc_shadow.mli
- |
- \|/ [replaces, 5]
- V
- markup_yacc.mli
-
-
-
- markup_lexers.mll
- |
- \|/ [ocamllex, 6]
- V
- markup_lexers.ml
-
-
-Notes:
-
- (1) ocamlyacc generates both a module and a module interface.
- The module is postprocessed in step (3). The interface cannot
- be used, but it contains the definition of the "token" type.
- This definition is extracted in step (2). The interface is
- completely replaced in step (5) by a different file.
-
- (2) An "awk" script extracts the definition of the type "token".
- "token" is created by ocamlyacc upon the %token directives
- in markup_yacc.mly, and normally "token" is defined in
- the module generated by ocamlyacc. This turned out not to be
- useful as the module dependency must be that the lexer is
- an antecedent of the parser and not vice versa (as usually),
- so the "token" type is "moved" to the module Markup_lexer_types
- which is an antecedent of both the lexer and the parser.
-
- (3) A "sed" script turns the generated parser into an object.
- This is rather simple; some "let" definitions must be rewritten
- as "val" definitions, the other "let" definitions as
- "method" definitions. The parser object is needed because
- the whole parser has a polymorphic type parameter.
-
- (4) The implementation and definition of Markup_lexer_types are
- both generated by inserting the "token" type definition
- (in markup_lexer_types.mlf) into two pattern files,
- markup_lexer_types_shadow.ml resp. -.mli. The point of insertion
- is marked by the string INCLUDE_HERE.
-
- (5) The generated interface of the Markup_yacc module is replaced
- by a hand-written file.
-
- (6) ocamllex generates the lexer; this process is not patched in any
- way.
-
-......................................................................
-
-After the additional modules have been generated, compilation proceeds
-in the usual manner.
-
-
-----------------------------------------------------------------------
-Hierarchy of parsing layers:
-----------------------------------------------------------------------
-
-From top to bottom:
-
- - Parser: Markup_yacc
- + gets input stream from the main entity object
- + checks most of the grammar
- + creates the DTD object as side-effect
- + creates the element tree as side-effect
- + creates further entity objects that are entered into the DTD
- - Entity layer: Markup_entity
- + gets input stream from the lexers, or another entity object
- + handles entity references: if a reference is encountered the
- input stream is redirected such that the tokens come from the
- referenced entity object
- + handles conditional sections
- - Lexer layer: Markup_lexers
- + gets input from lexbuffers created by resolvers
- + different lexers for different lexical contexts
- + a lexer returns pairs (token,lexid), where token is the scanned
- token, and lexid is the name of the lexer that must be used for
- the next token
- - Resolver layer: Markup_entity
- + a resolver creates the lexbuf from some character source
- + a resolver recodes the input and handles the encoding scheme
-
-----------------------------------------------------------------------
-The YACC based parser
-----------------------------------------------------------------------
-
-ocamlyacc allows it to pass an arbitrary 'next_token' function to the
-parsing functions. We always use 'en # next_token()' where 'en' is the
-main entity object representing the main file to be parsed.
-
-The parser is not functional, but uses mainly side-effects to accumulate
-the structures that have been recognized. This is very important for the
-entity definitions, because once an entity definition has been found there
-may be a reference to it which is handled by the entity layer (which is
-below the yacc layer). This means that such a definition modifies the
-token source of the parser, and this can only be handled by side-effects
-(at least in a sensible manner; a purely functional parser would have to
-pass unresolved entity references to its caller, which would have to
-resolve the reference and to re-parse the whole document!).
-
-Note that also element definitions profit from the imperative style of
-the parser; an element instance can be validated directly once the end
-tag has been read in.
-
-----------------------------------------------------------------------
-The entity layer
-----------------------------------------------------------------------
-
-The parser gets the tokens from the main entity object. This object
-controls the underlying lexing mechanism (see below), and already
-interprets the following:
-
-- Conditional sections (if they are allowed in this entity):
- The structures <![ INCLUDE [ ... ]]> and <! IGNORE [ ... ]]> are
- recognized and interpreted.
-
- This would be hard to realize by the yacc parser, because:
- - INCLUDE and IGNORE are not recognized as lexical keywords but as names.
- This means that the parser cannot select different rules for them.
- - The text after IGNORE requires a different lexical handling.
-
-- Entity references: &name; and %name;
- The named entity is looked up and the input source is redirected to it, i.e.
- if the main entity object gets the message 'next_token' this message is
- forwarded to the referenced entity. (This entity may choose to forward the
- message again to a third entity, and so on.)
-
- There are some fine points:
-
- - It is okay that redirection happens at token level, not at character level:
- + General entities must always match the 'content' production, and because
- of this they must always consist of a whole number of tokens.
- + If parameter entities are resolved, the XML specification states that
- a space character is inserted before and after the replacement text.
- This also means that such entities always consists of a whole number
- of tokens.
-
- - There are some "nesting constraints":
- + General entities must match the 'content' production. Because of this,
- the special token Begin_entity is inserted before the first token of
- the entity, and End_entity is inserted just before the Eof token. The
- brace Begin_entity...End_entity is recognized by the yacc parser, but
- only in the 'content' production.
- + External parameter entities must match 'extSubsetDecl'. Again,
- Begin_entity and End_entity tokens embrace the inner token stream.
- The brace Begin_entity...End_entity is recognized by the yacc parser
- at the appropriate position.
- (As general and parameter entities are used in different contexts
- (document vs. DTD), both kinds of entities can use the same brace
- Begin_entity...End_entity.)
- + TODO:
- The constraints for internal parameter entities are not yet checked.
-
- - Recursive references can be detected because entities must be opened
- before the 'next_token' method can be invoked.
-
-----------------------------------------------------------------------
-The lexer layer
-----------------------------------------------------------------------
-
-There are five main lexers, and a number of auxiliary lexers. The five
-main lexers are:
-
-- Document (function scan_document):
- Scans an XML document outside the DTD and outside the element instance.
-
-- Content (function scan_content):
- Scans an element instance, but not within tags.
-
-- Within_tag (function scan_within_tag):
- Scans within <...>, i.e. a tag denoting an element instance.
-
-- Document_type (function scan_document_type):
- Scans after <!DOCTYPE until the corresponding >.
-
-- Declaration (function scan_declaration):
- Scans sequences of declarations
-
-Why several lexers? Because there are different lexical rules in these
-five regions of an XML document.
-
-Every lexer not only produces tokens, but also the name of the next lexer
-to use. For example, if the Document lexer scans "<!DOCTYPE", it also
-outputs that the next token must be scanned by Document_type.
-
-It is interesting that this really works. The beginning of every lexical
-context can be recognized by the lexer of the previous context, and there
-is always a token that unambigously indicates that the context ends.
-
-----------------------------------------------------------------------
-The DTD object
-----------------------------------------------------------------------
-
-There is usually one object that collects DTD declarations. All kinds of
-declarations are entered here:
-
-- element and attribute list declarations
-- entity declarations
-- notation declarations
-
-Some properties are validated directly after a declarations has been added
-to the DTD, but most validation is done by a 'validate' method.
-
-The result of 'validate' is stored such that another invocation is cheap.
-A DTD becomes again 'unchecked' if another declaration is added.
-
-TODO: We need a special DTD object that allows every content.
-
-The DTD object is known by more or less every other object, i.e. entities
-know the DTD, element declarations and instances know the DTD, and so on.
-
-TODO: We need a method that deletes all entity declarations once the DTD
-is complete (to free memory).
-
-----------------------------------------------------------------------
-Element and Document objects
-----------------------------------------------------------------------
-
-The 'element' objects form the tree of the element instances.
-
-The 'document' object is a derivate of 'element' where properties of the
-whole document can be stored.
-
-New element objects are NOT created by the "new class" mechanism, but
-instead by an exemplar/instance scheme: A new instance is the duplicate
-of an exemplar. This has the advantage that the user can provide own
-classes for the element instances. A hashtable contains the exemplars
-for every element type (tag name), and there is a default exemplar.
-The user can configure this hashtable such that for elements A objects
-of class element_a, for elements B objects of class element_b and so on
-are used.
-
-The object for the root element must already be created before parsing
-starts, and the parser returns the (filled) root object. Because of this,
-the user determines the *static* type of the object without the need
-of back coercion (which is not possible in Ocaml).
-
-----------------------------------------------------------------------
-Newline normalization
-----------------------------------------------------------------------
-
-The XML spec states that all of \n, \r, and \r\n must be recognized
-as newline characters/character sequences. Notes:
-- The replacement text of entities always contains the orginal text,
- i.e. \r and \r\n are NOT converted to \n.
- It is unclear if this is a violation of the standard or not.
-- Content of elements: Newline characters are converted to \n.
-- Attribute values: Newline characters are converted to spaces.
-- Processing instructions: Newline characters are not converted.
- It is unclear if this is a violation of the standard or not.
-
-----------------------------------------------------------------------
-Empty entities
-----------------------------------------------------------------------
-
-Many entities are artificially surrounded by a Begin_entity/End_entity pair.
-This is sometimes not done if the entity is empty:
-
-- External parameter entities are parsed entities, i.e. they must match
- the markupdecl* production. If they are not empty, the Begin_entity/End_entity
- trick guarantees that they match markupdecl+, and that they are only
- referred to at positions where markupdecl+ is allowed.
- If they are empty, they are allowed everywhere just like internal
- parameter entities. Because of this, the Begin_entity/End_entity pair
- is dropped.
-
-- This does not apply to parameter entities (either external or internal)
- which are referred to in the internal subset, nor applies to internal
- parameter entities, nor applies to general entities:
-
- + References in the internal subset are only allowed at positions where
- markupdecl can occur, so Begin_entity/End_entity is added even if the
- entity is empty.
- + References to internal parameter entities are allowed anywhere, so
- never Begin_entity/End_entity is added.
- + References to general entities: An empty Begin_entity/End_entity pair
- is recognized by the yacc parser, so special handling is not required.
- Moreover, there is the situation that an empty entity is referred to
- after the toplevel element:
- <!DOCTYPE doc ...[
- <!ENTITY empty "">
- ]>
- <doc></doc>∅
- - This is illegal, and the presence of an empty Begin_entity/End_entity pair
- helps to recognize this.
+++ /dev/null
-<?xml encoding="ISO-8859-1"?>
-
-<!-- ************************************************************ -->
-<!-- EXTERNAL URLs -->
-<!-- ************************************************************ -->
-
-<!ENTITY url.ocaml
- "http://caml.inria.fr/">
-
-<!ENTITY url.ocaml.list
- "http://caml.inria.fr/caml-list-eng.html">
-
-<!ENTITY url.ocaml.download
- "ftp://ftp.inria.fr/lang/caml-light/">
-
-<!ENTITY url.ocaml.camlp4
- "http://caml.inria.fr/camlp4/">
-
-<!ENTITY url.ocaml.hump
- "http://caml.inria.fr/hump.html">
-
-<!ENTITY url.ocaml.mottl
- "http://miss.wu-wien.ac.at/~mottl/ocaml_sources/intro.html">
-
-<!ENTITY url.ocaml.mottl.pcre
- "http://miss.wu-wien.ac.at/~mottl/ocaml_sources/pcre_ocaml.tar.gz">
-
-<!ENTITY url.ocaml.lindig
- "http://www.cs.tu-bs.de/softech/people/lindig/software/index.html">
-
-<!ENTITY url.ocaml.lindig.ocmarkup
- "http://www.cs.tu-bs.de/softech/people/lindig/software/ocmarkup.html">
-
-<!ENTITY url.ocaml.lindig.tony
- "http://www.cs.tu-bs.de/softech/people/lindig/software/tony.html">
-
-<!ENTITY url.ocaml.filliatre
- "http://www.lri.fr/~filliatr/software.en.html">
-
-<!ENTITY url.ocaml.filliatre.cgi
- "http://www.lri.fr/~filliatr/ftp/ocaml/cgi/">
-
-<!ENTITY url.xml-spec
- "http://www.w3.org/TR/1998/REC-xml-19980210.html">
-
-<!ENTITY url.xml.oasis
- "http://www.oasis-open.org/cover/">
-
-<!ENTITY url.xml.w3c
- "http://www.w3c.org/XML/">
-
-<!ENTITY url.jclark-xmltdata
- "ftp://ftp.jclark.com/pub/xml/xmltest.zip">
-
-<!ENTITY urlprefix.ietf.rfc
- "http://www.ietf.org/rfc">
- <!-- Ohne "/" am Ende! -->
-
-<!ENTITY url.apache
- "http://www.apache.org/">
-
-
-<!-- ************************************************************ -->
-<!-- MY URLs -->
-<!-- ************************************************************ -->
-
-<!ENTITY url.linkdb
- "http://www.npc.de/ocaml/linkdb">
-
-<!-- ************************************************************ -->
-<!-- HOMEPAGE URLs -->
-<!-- ************************************************************ -->
-
-<!-- GENERIC -->
-
-<!ENTITY url.gps-ocaml-download
- "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/ocaml">
-
-<!ENTITY url.gps-ocaml-projects
- "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/ocaml/projects">
-
-<!ENTITY url.gps-old-download
- "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/download">
-
-
-<!-- SPECIFIC -->
-
-<!ENTITY release.findlib
- "SOME-VERSION">
-
-<!ENTITY url.findlib-download
- "&url.gps-ocaml-download;/findlib-&release.findlib;.tar.gz">
-
-<!ENTITY url.findlib-project
- "&url.gps-ocaml-projects;/findlib/">
-
-<!ENTITY url.findlib-manual
- "&url.gps-ocaml-projects;/findlib/">
-
-
-
-<!ENTITY release.markup
- "SOME-VERSION">
-
-<!ENTITY url.markup-download
- "&url.gps-ocaml-download;/markup-&release.markup;.tar.gz">
-
-<!ENTITY url.markup-project
- "&url.gps-ocaml-projects;/markup">
-
-<!ENTITY url.markup-manual
- "&url.gps-ocaml-projects;/markup/manual">
-
-
-<!-- ************************************************************ -->
-<!-- MAIL URLs -->
-<!-- ************************************************************ -->
-
-<!ENTITY person.gps '<a href="mailto:&person.gps.mail;">Gerd Stolpmann</a>'>
-
-<!ENTITY person.gps.mail
- "Gerd.Stolpmann@darmstadt.netsurf.de">
-
+++ /dev/null
-DOCBOOK_HTML = /usr/share/sgml/docbkdsl/html
-DOCBOOK_PRINT = /usr/share/sgml/docbkdsl/print
-SRC = $(PWD)/src
-
-.PHONY: html ps
-
-default: html ps
-
-html: html/book1.htm html/pic/done
-
-ps: ps/markup.ps ps/pic/done
-
-
-src/readme.ent: ../../examples/readme/to_html.ml
- src/getcode.ml <../../examples/readme/to_html.ml >src/readme.ent
-
-src/yacc.mli.ent: ../../pxp_yacc.mli
- src/getcode.ml <../../pxp_yacc.mli >src/yacc.mli.ent
-
-src/dtd.mli.ent: ../../pxp_dtd.mli
- src/getcode.ml <../../pxp_dtd.mli >src/dtd.mli.ent
-
-html/book1.htm: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
- mkdir -p html
- cp src/markup.css html; \
- cd html; \
- rm -f *.htm*; \
- jade -t sgml -D$(DOCBOOK_HTML) -D$(SRC) -ihtml markup.sgml; \
- true
- touch html/TIMESTAMP
-
-html/pic/done: src/pic/*.fig
- mkdir -p html/pic
- l=`cd src/pic; echo *.fig`; \
- for x in $$l; do fig2dev -L gif src/pic/$$x html/pic/`basename $$x .fig`.gif; done
- touch html/pic/done
-
-#man: src/findlib_reference.xml
-# mkdir -p man
-# cd man; \
-# rm -f *.[0-9]; \
-# db2man <../src/findlib_reference.xml
-
-ps/markup.tex: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
- mkdir -p ps
- cd ps; \
- jade -t tex -D$(DOCBOOK_PRINT) -D$(SRC) markup.sgml; \
- true
-
-ps/markup.dvi: ps/markup.tex ps/pic/done
- cd ps; \
- jadetex markup.tex; \
- jadetex markup.tex; \
- jadetex markup.tex
-
-ps/markup.ps: ps/markup.dvi
- cd ps; \
- dvips -f <markup.dvi >markup.ps
-
-ps/pic/done: src/pic/*.fig
- mkdir -p ps/pic
- l=`cd src/pic; echo *.fig`; \
- for x in $$l; do fig2dev -L ps -m 0.8 src/pic/$$x ps/pic/`basename $$x .fig`.ps; done
- touch ps/pic/done
-
-.SUFFIXES: .xml .sgml
-
-.sgml.xml:
- sx -xndata $< >$@; true
-
-
-
-clean:
- rm -rf html man ps
- rm -f src/readme.ent
-
-CLEAN: clean
-
-distclean:
- rm -f src/*~
- rm -f *~
- rm -f ps/*.aux ps/*.dvi ps/*.log ps/*.tex
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Configuring and calling the parser</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="User's guide"
-HREF="p34.html"><LINK
-REL="PREVIOUS"
-TITLE="Details of the mapping from XML text to the tree representation"
-HREF="x1496.html"><LINK
-REL="NEXT"
-TITLE="Resolvers and sources"
-HREF="x1629.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="CHAPTER"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x1496.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x1629.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="CHAPTER"
-><H1
-><A
-NAME="AEN1567"
->Chapter 4. Configuring and calling the parser</A
-></H1
-><DIV
-CLASS="TOC"
-><DL
-><DT
-><B
->Table of Contents</B
-></DT
-><DT
->4.1. <A
-HREF="c1567.html#AEN1569"
->Overview</A
-></DT
-><DT
->4.2. <A
-HREF="x1629.html"
->Resolvers and sources</A
-></DT
-><DT
->4.3. <A
-HREF="x1812.html"
->The DTD classes</A
-></DT
-><DT
->4.4. <A
-HREF="x1818.html"
->Invoking the parser</A
-></DT
-><DT
->4.5. <A
-HREF="x1965.html"
->Updates</A
-></DT
-></DL
-></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN1569"
->4.1. Overview</A
-></H1
-><P
->There are the following main functions invoking the parser (in Pxp_yacc):
-
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->parse_document_entity:</I
-> You want to
-parse a complete and closed document consisting of a DTD and the document body;
-the body is validated against the DTD. This mode is interesting if you have a
-file
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!DOCTYPE root ... [ ... ] > <root> ... </root></PRE
->
-
-and you can accept any DTD that is included in the file (e.g. because the file
-is under your control).</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->parse_wfdocument_entity:</I
-> You want to
-parse a complete and closed document consisting of a DTD and the document body;
-but the body is not validated, only checked for well-formedness. This mode is
-preferred if validation costs too much time or if the DTD is missing.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->parse_dtd_entity:</I
-> You want only to
-parse an entity (file) containing the external subset of a DTD. Sometimes it is
-interesting to read such a DTD, for example to compare it with the DTD included
-in a document, or to apply the next mode:</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->parse_content_entity:</I
-> You want only to
-parse an entity (file) containing a fragment of a document body; this fragment
-is validated against the DTD you pass to the function. Especially, the fragment
-must not have a <TT
-CLASS="LITERAL"
-> <!DOCTYPE></TT
-> clause, and must directly
-begin with an element. The element is validated against the DTD. This mode is
-interesting if you want to check documents against a fixed, immutable DTD.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->parse_wfcontent_entity:</I
-> This function
-also parses a single element without DTD, but does not validate it.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->extract_dtd_from_document_entity:</I
-> This
-function extracts the DTD from a closed document consisting of a DTD and a
-document body. Both the internal and the external subsets are extracted.</P
-></LI
-></UL
-></P
-><P
->In many cases, <TT
-CLASS="LITERAL"
->parse_document_entity</TT
-> is the preferred mode
-to parse a document in a validating way, and
-<TT
-CLASS="LITERAL"
->parse_wfdocument_entity</TT
-> is the mode of choice to parse a
-file while only checking for well-formedness.</P
-><P
->There are a number of variations of these modes. One important application of a
-parser is to check documents of an untrusted source against a fixed DTD. One
-solution is to not allow the <TT
-CLASS="LITERAL"
-><!DOCTYPE></TT
-> clause in
-these documents, and treat the document like a fragment (using mode
-<I
-CLASS="EMPHASIS"
->parse_content_entity</I
->). This is very simple, but
-inflexible; users of such a system cannot even define additional entities to
-abbreviate frequent phrases of their text.</P
-><P
->It may be necessary to have a more intelligent checker. For example, it is also
-possible to parse the document to check fully, i.e. with DTD, and to compare
-this DTD with the prescribed one. In order to fully parse the document, mode
-<I
-CLASS="EMPHASIS"
->parse_document_entity</I
-> is applied, and to get the DTD to
-compare with mode <I
-CLASS="EMPHASIS"
->parse_dtd_entity</I
-> can be used.</P
-><P
->There is another very important configurable aspect of the parser: the
-so-called resolver. The task of the resolver is to locate the contents of an
-(external) entity for a given entity name, and to make the contents accessible
-as a character stream. (Furthermore, it also normalizes the character set;
-but this is a detail we can ignore here.) Consider you have a file called
-<TT
-CLASS="LITERAL"
->"main.xml"</TT
-> containing
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % sub SYSTEM "sub/sub.xml">
-%sub;</PRE
->
-
-and a file stored in the subdirectory <TT
-CLASS="LITERAL"
->"sub"</TT
-> with name
-<TT
-CLASS="LITERAL"
->"sub.xml"</TT
-> containing
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % subsub SYSTEM "subsub/subsub.xml">
-%subsub;</PRE
->
-
-and a file stored in the subdirectory <TT
-CLASS="LITERAL"
->"subsub"</TT
-> of
-<TT
-CLASS="LITERAL"
->"sub"</TT
-> with name <TT
-CLASS="LITERAL"
->"subsub.xml"</TT
-> (the
-contents of this file do not matter). Here, the resolver must track that
-the second entity <TT
-CLASS="LITERAL"
->subsub</TT
-> is located in the directory
-<TT
-CLASS="LITERAL"
->"sub/subsub"</TT
->, i.e. the difficulty is to interpret the
-system (file) names of entities relative to the entities containing them,
-even if the entities are deeply nested.</P
-><P
->There is not a fixed resolver already doing everything right - resolving entity
-names is a task that highly depends on the environment. The XML specification
-only demands that <TT
-CLASS="LITERAL"
->SYSTEM</TT
-> entities are interpreted like URLs
-(which is not very precise, as there are lots of URL schemes in use), hoping
-that this helps overcoming the local peculiarities of the environment; the idea
-is that if you do not know your environment you can refer to other entities by
-denoting URLs for them. I think that this interpretation of
-<TT
-CLASS="LITERAL"
->SYSTEM</TT
-> names may have some applications in the internet, but
-it is not the first choice in general. Because of this, the resolver is a
-separate module of the parser that can be exchanged by another one if
-necessary; more precisely, the parser already defines several resolvers.</P
-><P
->The following resolvers do already exist:
-
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
->Resolvers reading from arbitrary input channels. These
-can be configured such that a certain ID is associated with the channel; in
-this case inner references to external entities can be resolved. There is also
-a special resolver that interprets SYSTEM IDs as URLs; this resolver can
-process relative SYSTEM names and determine the corresponding absolute URL.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->A resolver that reads always from a given O'Caml
-string. This resolver is not able to resolve further names unless the string is
-not associated with any name, i.e. if the document contained in the string
-refers to an external entity, this reference cannot be followed in this
-case.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->A resolver for file names. The <TT
-CLASS="LITERAL"
->SYSTEM</TT
->
-name is interpreted as file URL with the slash "/" as separator for
-directories. - This resolver is derived from the generic URL resolver.</P
-></LI
-></UL
->
-
-The interface a resolver must have is documented, so it is possible to write
-your own resolver. For example, you could connect the parser with an HTTP
-client, and resolve URLs of the HTTP namespace. The resolver classes support
-that several independent resolvers are combined to one more powerful resolver;
-thus it is possible to combine a self-written resolver with the already
-existing resolvers.</P
-><P
->Note that the existing resolvers only interpret <TT
-CLASS="LITERAL"
->SYSTEM</TT
->
-names, not <TT
-CLASS="LITERAL"
->PUBLIC</TT
-> names. If it helps you, it is possible to
-define resolvers for <TT
-CLASS="LITERAL"
->PUBLIC</TT
-> names, too; for example, such a
-resolver could look up the public name in a hash table, and map it to a system
-name which is passed over to the existing resolver for system names. It is
-relatively simple to provide such a resolver.</P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x1496.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x1629.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Details of the mapping from XML text to the tree representation</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="p34.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Resolvers and sources</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->What is XML?</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="User's guide"
-HREF="p34.html"><LINK
-REL="PREVIOUS"
-TITLE="User's guide"
-HREF="p34.html"><LINK
-REL="NEXT"
-TITLE="Highlights of XML"
-HREF="x107.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="CHAPTER"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="p34.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x107.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="CHAPTER"
-><H1
-><A
-NAME="AEN36"
->Chapter 1. What is XML?</A
-></H1
-><DIV
-CLASS="TOC"
-><DL
-><DT
-><B
->Table of Contents</B
-></DT
-><DT
->1.1. <A
-HREF="c36.html#AEN38"
->Introduction</A
-></DT
-><DT
->1.2. <A
-HREF="x107.html"
->Highlights of XML</A
-></DT
-><DT
->1.3. <A
-HREF="x468.html"
->A complete example: The <I
-CLASS="EMPHASIS"
->readme</I
-> DTD</A
-></DT
-></DL
-></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN38"
->1.1. Introduction</A
-></H1
-><P
->XML (short for <I
-CLASS="EMPHASIS"
->Extensible Markup Language</I
->)
-generalizes the idea that text documents are typically structured in sections,
-sub-sections, paragraphs, and so on. The format of the document is not fixed
-(as, for example, in HTML), but can be declared by a so-called DTD (document
-type definition). The DTD describes only the rules how the document can be
-structured, but not how the document can be processed. For example, if you want
-to publish a book that uses XML markup, you will need a processor that converts
-the XML file into a printable format such as Postscript. On the one hand, the
-structure of XML documents is configurable; on the other hand, there is no
-longer a canonical interpretation of the elements of the document; for example
-one XML DTD might want that paragraphes are delimited by
-<TT
-CLASS="LITERAL"
->para</TT
-> tags, and another DTD expects <TT
-CLASS="LITERAL"
->p</TT
-> tags
-for the same purpose. As a result, for every DTD a new processor is required.</P
-><P
->Although XML can be used to express structured text documents it is not limited
-to this kind of application. For example, XML can also be used to exchange
-structured data over a network, or to simply store structured data in
-files. Note that XML documents cannot contain arbitrary binary data because
-some characters are forbidden; for some applications you need to encode binary
-data as text (e.g. the base 64 encoding).</P
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN45"
->1.1.1. The "hello world" example</A
-></H2
-><P
->The following example shows a very simple DTD, and a corresponding document
-instance. The document is structured such that it consists of sections, and
-that sections consist of paragraphs, and that paragraphs contain plain text:</P
-><PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT document (section)+>
-<!ELEMENT section (paragraph)+>
-<!ELEMENT paragraph (#PCDATA)></PRE
-><P
->The following document is an instance of this DTD:</P
-><PRE
-CLASS="PROGRAMLISTING"
-><?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE document SYSTEM "simple.dtd">
-<document>
- <section>
- <paragraph>This is a paragraph of the first section.</paragraph>
- <paragraph>This is another paragraph of the first section.</paragraph>
- </section>
- <section>
- <paragraph>This is the only paragraph of the second section.</paragraph>
- </section>
-</document></PRE
-><P
->As in HTML (and, of course, in grand-father SGML), the "pieces" of
-the document are delimited by element braces, i.e. such a piece begins with
-<TT
-CLASS="LITERAL"
-><name-of-the-type-of-the-piece></TT
-> and ends with
-<TT
-CLASS="LITERAL"
-></name-of-the-type-of-the-piece></TT
->, and the pieces are
-called <I
-CLASS="EMPHASIS"
->elements</I
->. Unlike HTML and SGML, both start tags and
-end tags (i.e. the delimiters written in angle brackets) can never be left
-out. For example, HTML calls the paragraphs simply <TT
-CLASS="LITERAL"
->p</TT
->, and
-because paragraphs never contain paragraphs, a sequence of several paragraphs
-can be written as:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><p>First paragraph
-<p>Second paragraph</PRE
->
-
-This is not possible in XML; continuing our example above we must always write
-
-<PRE
-CLASS="PROGRAMLISTING"
-><paragraph>First paragraph</paragraph>
-<paragraph>Second paragraph</paragraph></PRE
->
-
-The rationale behind that is to (1) simplify the development of XML parsers
-(you need not convert the DTD into a deterministic finite automaton which is
-required to detect omitted tags), and to (2) make it possible to parse the
-document independent of whether the DTD is known or not.</P
-><P
->The first line of our sample document,
-
-<PRE
-CLASS="PROGRAMLISTING"
-><?xml version="1.0" encoding="ISO-8859-1"?></PRE
->
-
-is the so-called <I
-CLASS="EMPHASIS"
->XML declaration</I
->. It expresses that the
-document follows the conventions of XML version 1.0, and that the document is
-encoded using characters from the ISO-8859-1 character set (often known as
-"Latin 1", mostly used in Western Europe). Although the XML declaration is not
-mandatory, it is good style to include it; everybody sees at the first glance
-that the document uses XML markup and not the similar-looking HTML and SGML
-markup languages. If you omit the XML declaration, the parser will assume
-that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
-it possible to distinguish between UTF-8 and UTF-16 automatically); these
-are encodings of Unicode's universal character set. (Note that <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
->, unlike its
-predecessor "Markup", fully supports Unicode.)</P
-><P
->The second line,
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!DOCTYPE document SYSTEM "simple.dtd"></PRE
->
-
-names the DTD that is going to be used for the rest of the document. In
-general, it is possible that the DTD consists of two parts, the so-called
-external and the internal subset. "External" means that the DTD exists as a
-second file; "internal" means that the DTD is included in the same file. In
-this example, there is only an external subset, and the system identifier
-"simple.dtd" specifies where the DTD file can be found. System identifiers are
-interpreted as URLs; for instance this would be legal:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!DOCTYPE document SYSTEM "http://host/location/simple.dtd"></PRE
->
-
-Please note that <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> cannot interpret HTTP identifiers by default, but it is
-possible to change the interpretation of system identifiers.</P
-><P
->The word immediately following <TT
-CLASS="LITERAL"
->DOCTYPE</TT
-> determines which of
-the declared element types (here "document", "section", and "paragraph") is
-used for the outermost element, the <I
-CLASS="EMPHASIS"
->root element</I
->. In this
-example it is <TT
-CLASS="LITERAL"
->document</TT
-> because the outermost element is
-delimited by <TT
-CLASS="LITERAL"
-><document></TT
-> and
-<TT
-CLASS="LITERAL"
-></document></TT
->. </P
-><P
->The DTD consists of three declarations for element types:
-<TT
-CLASS="LITERAL"
->document</TT
->, <TT
-CLASS="LITERAL"
->section</TT
->, and
-<TT
-CLASS="LITERAL"
->paragraph</TT
->. Such a declaration has two parts:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> <TT
-CLASS="REPLACEABLE"
-><I
->content-model</I
-></TT
->></PRE
->
-
-The content model is a regular expression which describes the possible inner
-structure of the element. Here, <TT
-CLASS="LITERAL"
->document</TT
-> contains one or
-more sections, and a <TT
-CLASS="LITERAL"
->section</TT
-> contains one or more
-paragraphs. Note that these two element types are not allowed to contain
-arbitrary text. Only the <TT
-CLASS="LITERAL"
->paragraph</TT
-> element type is declared
-such that parsed character data (indicated by the symbol
-<TT
-CLASS="LITERAL"
->#PCDATA</TT
->) is permitted.</P
-><P
->See below for a detailed discussion of content models. </P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN84"
->1.1.2. XML parsers and processors</A
-></H2
-><P
->XML documents are human-readable, but this is not the main purpose of this
-language. XML has been designed such that documents can be read by a program
-called an <I
-CLASS="EMPHASIS"
->XML parser</I
->. The parser checks that the document
-is well-formatted, and it represents the document as objects of the programming
-language. There are two aspects when checking the document: First, the document
-must follow some basic syntactic rules, such as that tags are written in angle
-brackets, that for every start tag there must be a corresponding end tag and so
-on. A document respecting these rules is
-<I
-CLASS="EMPHASIS"
->well-formed</I
->. Second, the document must match the DTD in
-which case the document is <I
-CLASS="EMPHASIS"
->valid</I
->. Many parsers check only
-on well-formedness and ignore the DTD; <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> is designed such that it can
-even validate the document.</P
-><P
->A parser does not make a sensible application, it only reads XML
-documents. The whole application working with XML-formatted data is called an
-<I
-CLASS="EMPHASIS"
->XML processor</I
->. Often XML processors convert documents into
-another format, such as HTML or Postscript. Sometimes processors extract data
-of the documents and output the processed data again XML-formatted. The parser
-can help the application processing the document; for example it can provide
-means to access the document in a specific manner. <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> supports an
-object-oriented access layer specially.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN94"
->1.1.3. Discussion</A
-></H2
-><P
->As we have seen, there are two levels of description: On the one hand, XML can
-define rules about the format of a document (the DTD), on the other hand, XML
-expresses structured documents. There are a number of possible applications:</P
-><P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
->XML can be used to express structured texts. Unlike HTML, there is no canonical
-interpretation; one would have to write a backend for the DTD that translates
-the structured texts into a format that existing browsers, printers
-etc. understand. The advantage of a self-defined document format is that it is
-possible to design the format in a more problem-oriented way. For example, if
-the task is to extract reports from a database, one can use a DTD that reflects
-the structure of the report or the database. A possible approach would be to
-have an element type for every database table and for every column. Once the
-DTD has been designed, the report procedure can be splitted up in a part that
-selects the database rows and outputs them as an XML document according to the
-DTD, and in a part that translates the document into other formats. Of course,
-the latter part can be solved in a generic way, e.g. there may be configurable
-backends for all DTDs that follow the approach and have element types for
-tables and columns.</P
-><P
->XML plays the role of a configurable intermediate format. The database
-extraction function can be written without having to know the details of
-typesetting; the backends can be written without having to know the details of
-the database.</P
-><P
->Of course, there are traditional solutions. One can define an ad hoc
-intermediate text file format. This disadvantage is that there are no names for
-the pieces of the format, and that such formats usually lack of documentation
-because of this. Another solution would be to have a binary representation,
-either as language-dependent or language-independent structure (example of the
-latter can be found in RPC implementations). The disadvantage is that it is
-harder to view such representations, one has to write pretty printers for this
-purpose. It is also more difficult to enter test data; XML is plain text that
-can be written using an arbitrary editor (Emacs has even a good XML mode,
-PSGML). All these alternatives suffer from a missing structure checker,
-i.e. the programs processing these formats usually do not check the input file
-or input object in detail; XML parsers check the syntax of the input (the
-so-called well-formedness check), and the advanced parsers like <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> even
-verify that the structure matches the DTD (the so-called validation).</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->XML can be used as configurable communication language. A fundamental problem
-of every communication is that sender and receiver must follow the same
-conventions about the language. For data exchange, the question is usually
-which data records and fields are available, how they are syntactically
-composed, and which values are possible for the various fields. Similar
-questions arise for text document exchange. XML does not answer these problems
-completely, but it reduces the number of ambiguities for such conventions: The
-outlines of the syntax are specified by the DTD (but not necessarily the
-details), and XML introduces canonical names for the components of documents
-such that it is simpler to describe the rest of the syntax and the semantics
-informally.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->XML is a data storage format. Currently, every software product tends to use
-its own way to store data; commercial software often does not describe such
-formats, and it is a pain to integrate such software into a bigger project.
-XML can help to improve this situation when several applications share the same
-syntax of data files. DTDs are then neutral instances that check the format of
-data files independent of applications. </P
-></LI
-></UL
-></DIV
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="p34.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x107.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->User's guide</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="p34.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Highlights of XML</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Using PXP</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="User's guide"
-HREF="p34.html"><LINK
-REL="PREVIOUS"
-TITLE="A complete example: The readme DTD"
-HREF="x468.html"><LINK
-REL="NEXT"
-TITLE="How to parse a document from an application"
-HREF="x550.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="CHAPTER"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x468.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x550.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="CHAPTER"
-><H1
-><A
-NAME="AEN533"
->Chapter 2. Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></A
-></H1
-><DIV
-CLASS="TOC"
-><DL
-><DT
-><B
->Table of Contents</B
-></DT
-><DT
->2.1. <A
-HREF="c533.html#AEN536"
->Validation</A
-></DT
-><DT
->2.2. <A
-HREF="x550.html"
->How to parse a document from an application</A
-></DT
-><DT
->2.3. <A
-HREF="x675.html"
->Class-based processing of the node tree</A
-></DT
-><DT
->2.4. <A
-HREF="x738.html"
->Example: An HTML backend for the <I
-CLASS="EMPHASIS"
->readme</I
->
-DTD</A
-></DT
-></DL
-></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN536"
->2.1. Validation</A
-></H1
-><P
->The parser can be used to <I
-CLASS="EMPHASIS"
->validate</I
-> a document. This means
-that all the constraints that must hold for a valid document are actually
-checked. Validation is the default mode of <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
->, i.e. every document is
-validated while it is being parsed.</P
-><P
->In the <TT
-CLASS="LITERAL"
->examples</TT
-> directory of the distribution you find the
-<TT
-CLASS="LITERAL"
->pxpvalidate</TT
-> application. It is invoked in the following way:
-
-<PRE
-CLASS="PROGRAMLISTING"
->pxpvalidate [ -wf ] <TT
-CLASS="REPLACEABLE"
-><I
->file</I
-></TT
->...</PRE
->
-
-The files mentioned on the command line are validated, and every warning and
-every error messages are printed to stderr.</P
-><P
->The -wf switch modifies the behaviour such that a well-formedness parser is
-simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
-DTD are ignored, and only the ENTITY declarations will take effect. This mode
-is intended for documents lacking a DTD. Please note that the parser still
-scans the DTD fully and will report all errors in the DTD; such checks are not
-required by a well-formedness parser.</P
-><P
->The <TT
-CLASS="LITERAL"
->pxpvalidate</TT
-> application is the simplest sensible program
-using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
->, you may consider it as "hello world" program. </P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x468.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x550.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->A complete example: The <I
-CLASS="EMPHASIS"
->readme</I
-> DTD</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="p34.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->How to parse a document from an application</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->The objects representing the document</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="User's guide"
-HREF="p34.html"><LINK
-REL="PREVIOUS"
-TITLE="Example: An HTML backend for the readme
-DTD"
-HREF="x738.html"><LINK
-REL="NEXT"
-TITLE="The class type node"
-HREF="x939.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="CHAPTER"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x738.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x939.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="CHAPTER"
-><H1
-><A
-NAME="AEN893"
->Chapter 3. The objects representing the document</A
-></H1
-><DIV
-CLASS="TOC"
-><DL
-><DT
-><B
->Table of Contents</B
-></DT
-><DT
->3.1. <A
-HREF="c893.html#AEN897"
->The <TT
-CLASS="LITERAL"
->document</TT
-> class</A
-></DT
-><DT
->3.2. <A
-HREF="x939.html"
->The class type <TT
-CLASS="LITERAL"
->node</TT
-></A
-></DT
-><DT
->3.3. <A
-HREF="x1439.html"
->The class type <TT
-CLASS="LITERAL"
->extension</TT
-></A
-></DT
-><DT
->3.4. <A
-HREF="x1496.html"
->Details of the mapping from XML text to the tree representation</A
-></DT
-></DL
-></DIV
-><P
-><I
-CLASS="EMPHASIS"
->This description might be out-of-date. See the module interface files
-for updated information.</I
-></P
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN897"
->3.1. The <TT
-CLASS="LITERAL"
->document</TT
-> class</A
-></H1
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class [ 'ext ] document :
- Pxp_types.collect_warnings ->
- object
- method init_xml_version : string -> unit
- method init_root : 'ext node -> unit
-
- method xml_version : string
- method xml_standalone : bool
- method dtd : dtd
- method root : 'ext node
-
- method encoding : Pxp_types.rep_encoding
-
- method add_pinstr : proc_instruction -> unit
- method pinstr : string -> proc_instruction list
- method pinstr_names : string list
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
-
- end
-;;</PRE
->
-
-The methods beginning with <TT
-CLASS="LITERAL"
->init_</TT
-> are only for internal use
-of the parser.</P
-><P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->xml_version</TT
->: returns the version string at the beginning of
-the document. For example, "1.0" is returned if the document begins with
-<TT
-CLASS="LITERAL"
-><?xml version="1.0"?></TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->xml_standalone</TT
->: returns the boolean value of
-<TT
-CLASS="LITERAL"
->standalone</TT
-> declaration in the XML declaration. If the
-<TT
-CLASS="LITERAL"
->standalone</TT
-> attribute is missing, <TT
-CLASS="LITERAL"
->false</TT
-> is
-returned. </P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->dtd</TT
->: returns a reference to the global DTD object.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->root</TT
->: returns a reference to the root element.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->encoding</TT
->: returns the internal encoding of the
-document. This means that all strings of which the document consists are
-encoded in this character set.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->pinstr</TT
->: returns the processing instructions outside the DTD
-and outside the root element. The argument passed to the method names a
-<I
-CLASS="EMPHASIS"
->target</I
->, and the method returns all instructions with this
-target. The target is the first word inside <TT
-CLASS="LITERAL"
-><?</TT
-> and
-<TT
-CLASS="LITERAL"
->?></TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->pinstr_names</TT
->: returns the names of the processing instructions</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->add_pinstr</TT
->: adds another processing instruction. This method
-is used by the parser itself to enter the instructions returned by
-<TT
-CLASS="LITERAL"
->pinstr</TT
->, but you can also enter additional instructions.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->write</TT
->: writes the document to the passed stream as XML
-text using the passed (external) encoding. The generated text is always valid
-XML and can be parsed by PXP; however, the text is badly formatted (this is not
-a pretty printer).</P
-></LI
-></UL
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x738.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x939.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Example: An HTML backend for the <I
-CLASS="EMPHASIS"
->readme</I
->
-DTD</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="p34.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->The class type <TT
-CLASS="LITERAL"
->node</TT
-></TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->The PXP user's guide</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="NEXT"
-TITLE="User's guide"
-HREF="p34.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="BOOK"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="BOOK"
-><A
-NAME="AEN1"
-></A
-><DIV
-CLASS="TITLEPAGE"
-><H1
-CLASS="TITLE"
-><A
-NAME="AEN1"
->The PXP user's guide</A
-></H1
-><H3
-CLASS="AUTHOR"
->Gerd Stolpmann</H3
-><P
-CLASS="COPYRIGHT"
->Copyright © 1999, 2000 by <SPAN
-CLASS="HOLDER"
->Gerd Stolpmann</SPAN
-></P
-><DIV
-><DIV
-CLASS="ABSTRACT"
-><P
-></P
-><P
-><SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> is a validating parser for XML-1.0 which has been
-written entirely in Objective Caml.</P
-><DIV
-CLASS="FORMALPARA"
-><P
-><H1
-CLASS="TITLE"
-><A
-NAME="AEN18"
->Download <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
->:</A
-></H1
->The free <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> library can be downloaded at
-<A
-HREF="http://www.ocaml-programming.de/packages/"
-TARGET="_top"
->http://www.ocaml-programming.de/packages/</A
->. This user's guide is included.
-Newest releases of <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> will be announced in
-<A
-HREF="http://www.npc.de/ocaml/linkdb/"
-TARGET="_top"
->The OCaml Link
-Database</A
->.</P
-></DIV
-><P
-></P
-></DIV
-></DIV
-><DIV
-CLASS="LEGALNOTICE"
-><P
-><B
->License</B
-></P
-><P
->This document, and the described software, "<SPAN
-CLASS="ACRONYM"
->PXP</SPAN
->", are copyright by
-Gerd Stolpmann. </P
-><P
->Permission is hereby granted, free of charge, to any person obtaining
-a copy of this document and the "<SPAN
-CLASS="ACRONYM"
->PXP</SPAN
->" software (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:</P
-><P
->The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.</P
-><P
->The Software is provided ``as is'', without warranty of any kind, express
-or implied, including but not limited to the warranties of
-merchantability, fitness for a particular purpose and noninfringement.
-In no event shall Gerd Stolpmann be liable for any claim, damages or
-other liability, whether in an action of contract, tort or otherwise,
-arising from, out of or in connection with the Software or the use or
-other dealings in the software.</P
-></DIV
-><HR></DIV
-><DIV
-CLASS="TOC"
-><DL
-><DT
-><B
->Table of Contents</B
-></DT
-><DT
->I. <A
-HREF="p34.html"
->User's guide</A
-></DT
-><DD
-><DL
-><DT
->1. <A
-HREF="c36.html"
->What is XML?</A
-></DT
-><DD
-><DL
-><DT
->1.1. <A
-HREF="c36.html#AEN38"
->Introduction</A
-></DT
-><DT
->1.2. <A
-HREF="x107.html"
->Highlights of XML</A
-></DT
-><DT
->1.3. <A
-HREF="x468.html"
->A complete example: The <I
-CLASS="EMPHASIS"
->readme</I
-> DTD</A
-></DT
-></DL
-></DD
-><DT
->2. <A
-HREF="c533.html"
->Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></A
-></DT
-><DD
-><DL
-><DT
->2.1. <A
-HREF="c533.html#AEN536"
->Validation</A
-></DT
-><DT
->2.2. <A
-HREF="x550.html"
->How to parse a document from an application</A
-></DT
-><DT
->2.3. <A
-HREF="x675.html"
->Class-based processing of the node tree</A
-></DT
-><DT
->2.4. <A
-HREF="x738.html"
->Example: An HTML backend for the <I
-CLASS="EMPHASIS"
->readme</I
->
-DTD</A
-></DT
-></DL
-></DD
-><DT
->3. <A
-HREF="c893.html"
->The objects representing the document</A
-></DT
-><DD
-><DL
-><DT
->3.1. <A
-HREF="c893.html#AEN897"
->The <TT
-CLASS="LITERAL"
->document</TT
-> class</A
-></DT
-><DT
->3.2. <A
-HREF="x939.html"
->The class type <TT
-CLASS="LITERAL"
->node</TT
-></A
-></DT
-><DT
->3.3. <A
-HREF="x1439.html"
->The class type <TT
-CLASS="LITERAL"
->extension</TT
-></A
-></DT
-><DT
->3.4. <A
-HREF="x1496.html"
->Details of the mapping from XML text to the tree representation</A
-></DT
-></DL
-></DD
-><DT
->4. <A
-HREF="c1567.html"
->Configuring and calling the parser</A
-></DT
-><DD
-><DL
-><DT
->4.1. <A
-HREF="c1567.html#AEN1569"
->Overview</A
-></DT
-><DT
->4.2. <A
-HREF="x1629.html"
->Resolvers and sources</A
-></DT
-><DT
->4.3. <A
-HREF="x1812.html"
->The DTD classes</A
-></DT
-><DT
->4.4. <A
-HREF="x1818.html"
->Invoking the parser</A
-></DT
-><DT
->4.5. <A
-HREF="x1965.html"
->Updates</A
-></DT
-></DL
-></DD
-></DL
-></DD
-></DL
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-> </TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-> </TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="p34.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-> </TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-> </TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->User's guide</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-.acronym {
- font-weight: bold;
- color: #c71585
-}
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->User's guide</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="PREVIOUS"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="NEXT"
-TITLE="What is XML?"
-HREF="c36.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="PART"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="index.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="c36.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="PART"
-><A
-NAME="AEN34"
-></A
-><DIV
-CLASS="TITLEPAGE"
-><H1
-CLASS="TITLE"
->I. User's guide</H1
-><DIV
-CLASS="TOC"
-><DL
-><DT
-><B
->Table of Contents</B
-></DT
-><DT
->1. <A
-HREF="c36.html"
->What is XML?</A
-></DT
-><DT
->2. <A
-HREF="c533.html"
->Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></A
-></DT
-><DT
->3. <A
-HREF="c893.html"
->The objects representing the document</A
-></DT
-><DT
->4. <A
-HREF="c1567.html"
->Configuring and calling the parser</A
-></DT
-></DL
-></DIV
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="index.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="c36.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->The PXP user's guide</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-> </TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->What is XML?</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Highlights of XML</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="What is XML?"
-HREF="c36.html"><LINK
-REL="PREVIOUS"
-TITLE="What is XML?"
-HREF="c36.html"><LINK
-REL="NEXT"
-TITLE="A complete example: The readme DTD"
-HREF="x468.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="c36.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 1. What is XML?</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x468.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN107"
->1.2. Highlights of XML</A
-></H1
-><P
->This section explains many of the features of XML, but not all, and some
-features not in detail. For a complete description, see the <A
-HREF="http://www.w3.org/TR/1998/REC-xml-19980210.html"
-TARGET="_top"
->XML
-specification</A
->.</P
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN111"
->1.2.1. The DTD and the instance</A
-></H2
-><P
->The DTD contains various declarations; in general you can only use a feature if
-you have previously declared it. The document instance file may contain the
-full DTD, but it is also possible to split the DTD into an internal and an
-external subset. A document must begin as follows if the full DTD is included:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><?xml version="1.0" encoding="<TT
-CLASS="REPLACEABLE"
-><I
->Your encoding</I
-></TT
->"?>
-<!DOCTYPE <TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
-> [
- <TT
-CLASS="REPLACEABLE"
-><I
->Declarations</I
-></TT
->
-]></PRE
->
-
-These declarations are called the <I
-CLASS="EMPHASIS"
->internal subset</I
->. Note
-that the usage of entities and conditional sections is restricted within the
-internal subset.</P
-><P
->If the declarations are located in a different file, you can refer to this file
-as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><?xml version="1.0" encoding="<TT
-CLASS="REPLACEABLE"
-><I
->Your encoding</I
-></TT
->"?>
-<!DOCTYPE <TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
-> SYSTEM "<TT
-CLASS="REPLACEABLE"
-><I
->file name</I
-></TT
->"></PRE
->
-
-The declarations in the file are called the <I
-CLASS="EMPHASIS"
->external
-subset</I
->. The file name is called the <I
-CLASS="EMPHASIS"
->system
-identifier</I
->.
-It is also possible to refer to the file by a so-called
-<I
-CLASS="EMPHASIS"
->public identifier</I
->, but most XML applications won't use
-this feature.</P
-><P
->You can also specify both internal and external subsets. In this case, the
-declarations of both subsets are mixed, and if there are conflicts, the
-declaration of the internal subset overrides those of the external subset with
-the same name. This looks as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><?xml version="1.0" encoding="<TT
-CLASS="REPLACEABLE"
-><I
->Your encoding</I
-></TT
->"?>
-<!DOCTYPE <TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
-> SYSTEM "<TT
-CLASS="REPLACEABLE"
-><I
->file name</I
-></TT
->" [
- <TT
-CLASS="REPLACEABLE"
-><I
->Declarations</I
-></TT
->
-]></PRE
-></P
-><P
->The XML declaration (the string beginning with <TT
-CLASS="LITERAL"
-><?xml</TT
-> and
-ending at <TT
-CLASS="LITERAL"
->?></TT
->) should specify the encoding of the
-file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
-that every file parsed by the XML processor can begin with an XML declaration
-and that every file may have its own encoding.</P
-><P
->The name of the root element must be mentioned directly after the
-<TT
-CLASS="LITERAL"
->DOCTYPE</TT
-> string. This means that a full document instance
-looks like
-
-<PRE
-CLASS="PROGRAMLISTING"
-><?xml version="1.0" encoding="<TT
-CLASS="REPLACEABLE"
-><I
->Your encoding</I
-></TT
->"?>
-<!DOCTYPE <TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
-> SYSTEM "<TT
-CLASS="REPLACEABLE"
-><I
->file name</I
-></TT
->" [
- <TT
-CLASS="REPLACEABLE"
-><I
->Declarations</I
-></TT
->
-]>
-
-<<TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
->>
- <TT
-CLASS="REPLACEABLE"
-><I
->inner contents</I
-></TT
->
-</<TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
->></PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN146"
->1.2.2. Reserved characters</A
-></H2
-><P
->Some characters are generally reserved to indicate markup such that they cannot
-be used for character data. These characters are <, >, and
-&. Furthermore, single and double quotes are sometimes reserved. If you
-want to include such a character as character, write it as follows:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->&lt;</TT
-> instead of <</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->&gt;</TT
-> instead of ></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->&amp;</TT
-> instead of &</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->&apos;</TT
-> instead of '</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->&quot;</TT
-> instead of "</P
-></LI
-></UL
->
-
-All other characters are free in the document instance. It is possible to
-include a character by its position in the Unicode alphabet:
-
-<PRE
-CLASS="PROGRAMLISTING"
->&#<TT
-CLASS="REPLACEABLE"
-><I
->n</I
-></TT
->;</PRE
->
-
-where <TT
-CLASS="REPLACEABLE"
-><I
->n</I
-></TT
-> is the decimal number of the
-character. Alternatively, you can specify the character by its hexadecimal
-number:
-
-<PRE
-CLASS="PROGRAMLISTING"
->&#x<TT
-CLASS="REPLACEABLE"
-><I
->n</I
-></TT
->;</PRE
->
-
-In the scope of declarations, the character % is no longer free. To include it
-as character, you must use the notations <TT
-CLASS="LITERAL"
->&#37;</TT
-> or
-<TT
-CLASS="LITERAL"
->&#x25;</TT
->.</P
-><P
->Note that besides &lt;, &gt;, &amp;,
-&apos;, and &quot; there are no predefines character entities. This is
-different from HTML which defines a list of characters that can be referenced
-by name (e.g. &auml; for ä); however, if you prefer named characters, you
-can declare such entities yourself (see below).</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN173"
->1.2.3. Elements and ELEMENT declarations</A
-></H2
-><P
->Elements structure the document instance in a hierarchical way. There is a
-top-level element, the <I
-CLASS="EMPHASIS"
->root element</I
->, which contains a
-sequence of inner elements and character sections. The inner elements are
-structured in the same way. Every element has an <I
-CLASS="EMPHASIS"
->element
-type</I
->. The beginning of the element is indicated by a <I
-CLASS="EMPHASIS"
->start
-tag</I
->, written
-
-<PRE
-CLASS="PROGRAMLISTING"
-><<TT
-CLASS="REPLACEABLE"
-><I
->element-type</I
-></TT
->></PRE
->
-
-and the element continues until the corresponding <I
-CLASS="EMPHASIS"
->end tag</I
->
-is reached:
-
-<PRE
-CLASS="PROGRAMLISTING"
-></<TT
-CLASS="REPLACEABLE"
-><I
->element-type</I
-></TT
->></PRE
->
-
-In XML, it is not allowed to omit start or end tags, even if the DTD would
-permit this. Note that there are no special rules how to interpret spaces or
-newlines near start or end tags; all spaces and newlines count.</P
-><P
->Every element type must be declared before it can be used. The declaration
-consists of two parts: the ELEMENT declaration describes the content model,
-i.e. which inner elements are allowed; the ATTLIST declaration describes the
-attributes of the element.</P
-><P
->An element can simply allow everything as content. This is written:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> ANY></PRE
->
-
-On the opposite, an element can be forced to be empty; declared by:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> EMPTY></PRE
->
-
-Note that there is an abbreviated notation for empty element instances:
-<TT
-CLASS="LITERAL"
-><<TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
->/></TT
->. </P
-><P
->There are two more sophisticated forms of declarations: so-called
-<I
-CLASS="EMPHASIS"
->mixed declarations</I
->, and <I
-CLASS="EMPHASIS"
->regular
-expressions</I
->. An element with mixed content contains character data
-interspersed with inner elements, and the set of allowed inner elements can be
-specified. In contrast to this, a regular expression declaration does not allow
-character data, but the inner elements can be described by the more powerful
-means of regular expressions.</P
-><P
->A declaration for mixed content looks as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> (#PCDATA | <TT
-CLASS="REPLACEABLE"
-><I
->element<SUB
->1</SUB
-></I
-></TT
-> | ... | <TT
-CLASS="REPLACEABLE"
-><I
->element<SUB
->n</SUB
-></I
-></TT
-> )*></PRE
->
-
-or if you do not want to allow any inner element, simply
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> (#PCDATA)></PRE
-></P
-><BLOCKQUOTE
-CLASS="BLOCKQUOTE"
-><P
-><B
->Example</B
-></P
-><P
->If element type <TT
-CLASS="LITERAL"
->q</TT
-> is declared as
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT q (#PCDATA | r | s)*></PRE
->
-
-this is a legal instance:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><q>This is character data<r></r>with <s></s>inner elements</q></PRE
->
-
-But this is illegal because <TT
-CLASS="LITERAL"
->t</TT
-> has not been enumerated in the
-declaration:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><q>This is character data<r></r>with <t></t>inner elements</q></PRE
-></P
-></BLOCKQUOTE
-><P
->The other form uses a regular expression to describe the possible contents:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> <TT
-CLASS="REPLACEABLE"
-><I
->regexp</I
-></TT
->></PRE
->
-
-The following well-known regexp operators are allowed:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->element-name</I
-></TT
-></TT
-></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->(<TT
-CLASS="REPLACEABLE"
-><I
->subexpr<SUB
->1</SUB
-></I
-></TT
-> ,</TT
-> ... <TT
-CLASS="LITERAL"
->, <TT
-CLASS="REPLACEABLE"
-><I
->subexpr<SUB
->n</SUB
-></I
-></TT
-> )</TT
-></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->(<TT
-CLASS="REPLACEABLE"
-><I
->subexpr<SUB
->1</SUB
-></I
-></TT
-> |</TT
-> ... <TT
-CLASS="LITERAL"
->| <TT
-CLASS="REPLACEABLE"
-><I
->subexpr<SUB
->n</SUB
-></I
-></TT
-> )</TT
-></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->subexpr</I
-></TT
->*</TT
-></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->subexpr</I
-></TT
->+</TT
-></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->subexpr</I
-></TT
->?</TT
-></P
-></LI
-></UL
->
-
-The <TT
-CLASS="LITERAL"
->,</TT
-> operator indicates a sequence of sub-models, the
-<TT
-CLASS="LITERAL"
->|</TT
-> operator describes alternative sub-models. The
-<TT
-CLASS="LITERAL"
->*</TT
-> indicates zero or more repetitions, and
-<TT
-CLASS="LITERAL"
->+</TT
-> one or more repetitions. Finally, <TT
-CLASS="LITERAL"
->?</TT
-> can
-be used for optional sub-models. As atoms the regexp can contain names of
-elements; note that it is not allowed to include <TT
-CLASS="LITERAL"
->#PCDATA</TT
->.</P
-><P
->The exact syntax of the regular expressions is rather strange. This can be
-explained best by a list of constraints:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
->The outermost expression must not be
-<TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->element-name</I
-></TT
-></TT
->. </P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-<TT
-CLASS="LITERAL"
-><!ELEMENT x y></TT
->; this must be written as
-<TT
-CLASS="LITERAL"
-><!ELEMENT x (y)></TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->For the unary operators <TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->subexpr</I
-></TT
->*</TT
->,
-<TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->subexpr</I
-></TT
->+</TT
->, and
-<TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->subexpr</I
-></TT
->?</TT
->, the
-<TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->subexpr</I
-></TT
-></TT
-> must not be again an
-unary operator.</P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-<TT
-CLASS="LITERAL"
-><!ELEMENT x y**></TT
->; this must be written as
-<TT
-CLASS="LITERAL"
-><!ELEMENT x (y*)*></TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->Between <TT
-CLASS="LITERAL"
->)</TT
-> and one of the unary operatory
-<TT
-CLASS="LITERAL"
->*</TT
->, <TT
-CLASS="LITERAL"
->+</TT
->, or <TT
-CLASS="LITERAL"
->?</TT
->, there must
-not be whitespace.</P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-<TT
-CLASS="LITERAL"
-><!ELEMENT x (y|z) *></TT
->; this must be written as
-<TT
-CLASS="LITERAL"
-><!ELEMENT x (y|z)*></TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->There is the additional constraint that the
-right parenthsis must be contained in the same entity as the left parenthesis;
-see the section about parsed entities below.</P
-></LI
-></UL
-> </P
-><P
->Note that there is another restriction on regular expressions which must be
-deterministic. This means that the parser must be able to see by looking at the
-next token which alternative is actually used, or whether the repetition
-stops. The reason for this is simply compatability with SGML (there is no
-intrinsic reason for this rule; XML can live without this restriction).</P
-><BLOCKQUOTE
-CLASS="BLOCKQUOTE"
-><P
-><B
->Example</B
-></P
-><P
->The elements are declared as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT q (r?, (s | t)+)>
-<!ELEMENT r (#PCDATA)>
-<!ELEMENT s EMPTY>
-<!ELEMENT t (q | r)></PRE
->
-
-This is a legal instance:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><q><r>Some characters</r><s/></q></PRE
->
-
-(Note: <TT
-CLASS="LITERAL"
-><s/></TT
-> is an abbreviation for
-<TT
-CLASS="LITERAL"
-><s></s></TT
->.)
-
-It would be illegal to leave <TT
-CLASS="LITERAL"
-><s/></TT
-> out because at
-least one instance of <TT
-CLASS="LITERAL"
->s</TT
-> or <TT
-CLASS="LITERAL"
->t</TT
-> must be
-present. It would be illegal, too, if characters existed outside the
-<TT
-CLASS="LITERAL"
->r</TT
-> element; the only exception is white space. -- This is
-legal, too:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><q><s/><t><q><s/></q></t></q></PRE
-></P
-></BLOCKQUOTE
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN304"
->1.2.4. Attribute lists and ATTLIST declarations</A
-></H2
-><P
->Elements may have attributes. These are put into the start tag of an element as
-follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><<TT
-CLASS="REPLACEABLE"
-><I
->element-name</I
-></TT
-> <TT
-CLASS="REPLACEABLE"
-><I
->attribute<SUB
->1</SUB
-></I
-></TT
->="<TT
-CLASS="REPLACEABLE"
-><I
->value<SUB
->1</SUB
-></I
-></TT
->" ... <TT
-CLASS="REPLACEABLE"
-><I
->attribute<SUB
->n</SUB
-></I
-></TT
->="<TT
-CLASS="REPLACEABLE"
-><I
->value<SUB
->n</SUB
-></I
-></TT
->"></PRE
->
-
-Instead of
-<TT
-CLASS="LITERAL"
->"<TT
-CLASS="REPLACEABLE"
-><I
->value<SUB
->k</SUB
-></I
-></TT
->"</TT
->
-it is also possible to use single quotes as in
-<TT
-CLASS="LITERAL"
->'<TT
-CLASS="REPLACEABLE"
-><I
->value<SUB
->k</SUB
-></I
-></TT
->'</TT
->.
-Note that you cannot use double quotes literally within the value of the
-attribute if double quotes are the delimiters; the same applies to single
-quotes. You can generally not use < and & as characters in attribute
-values. It is possible to include the paraphrases &lt;, &gt;,
-&amp;, &apos;, and &quot; (and any other reference to a general
-entity as long as the entity is not defined by an external file) as well as
-&#<TT
-CLASS="REPLACEABLE"
-><I
->n</I
-></TT
->;.</P
-><P
->Before you can use an attribute you must declare it. An ATTLIST declaration
-looks as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ATTLIST <TT
-CLASS="REPLACEABLE"
-><I
->element-name</I
-></TT
->
- <TT
-CLASS="REPLACEABLE"
-><I
->attribute-name</I
-></TT
-> <TT
-CLASS="REPLACEABLE"
-><I
->attribute-type</I
-></TT
-> <TT
-CLASS="REPLACEABLE"
-><I
->attribute-default</I
-></TT
->
- ...
- <TT
-CLASS="REPLACEABLE"
-><I
->attribute-name</I
-></TT
-> <TT
-CLASS="REPLACEABLE"
-><I
->attribute-type</I
-></TT
-> <TT
-CLASS="REPLACEABLE"
-><I
->attribute-default</I
-></TT
->
-></PRE
->
-
-There are a lot of types, but most important are:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->CDATA</TT
->: Every string is allowed as attribute value.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->NMTOKEN</TT
->: Every nametoken is allowed as attribute
-value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
-order.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->NMTOKENS</TT
->: A space-separated list of nametokens is allowed as
-attribute value.</P
-></LI
-></UL
->
-
-The most interesting default declarations are:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->#REQUIRED</TT
->: The attribute must be specified.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->#IMPLIED</TT
->: The attribute can be specified but also can be
-left out. The application can find out whether the attribute was present or
-not. </P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->"<TT
-CLASS="REPLACEABLE"
-><I
->value</I
-></TT
->"</TT
-> or
-<TT
-CLASS="LITERAL"
->'<TT
-CLASS="REPLACEABLE"
-><I
->value</I
-></TT
->'</TT
->: This particular value is
-used as default if the attribute is omitted in the element.</P
-></LI
-></UL
-></P
-><BLOCKQUOTE
-CLASS="BLOCKQUOTE"
-><P
-><B
->Example</B
-></P
-><P
->This is a valid attribute declaration for element type <TT
-CLASS="LITERAL"
->r</TT
->:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ATTLIST r
- x CDATA #REQUIRED
- y NMTOKEN #IMPLIED
- z NMTOKENS "one two three"></PRE
->
-
-This means that <TT
-CLASS="LITERAL"
->x</TT
-> is a required attribute that cannot be
-left out, while <TT
-CLASS="LITERAL"
->y</TT
-> and <TT
-CLASS="LITERAL"
->z</TT
-> are optional. The
-XML parser indicates the application whether <TT
-CLASS="LITERAL"
->y</TT
-> is present or
-not, but if <TT
-CLASS="LITERAL"
->z</TT
-> is missing the default value
-"one two three" is returned automatically. </P
-><P
->This is a valid example of these attributes:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><r x="He said: &quot;I don't like quotes!&quot;" y='1'></PRE
-></P
-></BLOCKQUOTE
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN368"
->1.2.5. Parsed entities</A
-></H2
-><P
->Elements describe the logical structure of the document, while
-<I
-CLASS="EMPHASIS"
->entities</I
-> determine the physical structure. Entities are
-the pieces of text the parser operates on, mostly files and macros. Entities
-may be <I
-CLASS="EMPHASIS"
->parsed</I
-> in which case the parser reads the text and
-interprets it as XML markup, or <I
-CLASS="EMPHASIS"
->unparsed</I
-> which simply
-means that the data of the entity has a foreign format (e.g. a GIF icon).</P
-><P
->If the parsed entity is going to be used as part of the DTD, it
-is called a <I
-CLASS="EMPHASIS"
->parameter entity</I
->. You can declare a parameter
-entity with a fixed text as content by:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> "<TT
-CLASS="REPLACEABLE"
-><I
->value</I
-></TT
->"></PRE
->
-
-Within the DTD, you can <I
-CLASS="EMPHASIS"
->refer to</I
-> this entity, i.e. read
-the text of the entity, by:
-
-<PRE
-CLASS="PROGRAMLISTING"
->%<TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
->;</PRE
->
-
-Such entities behave like macros, i.e. when they are referred to, the
-macro text is inserted and read instead of the original text.
-
-<BLOCKQUOTE
-CLASS="BLOCKQUOTE"
-><P
-><B
->Example</B
-></P
-><P
->For example, you can declare two elements with the same content model by:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % model "a | b | c">
-<!ELEMENT x (%model;)>
-<!ELEMENT y (%model;)></PRE
-> </P
-></BLOCKQUOTE
->
-
-If the contents of the entity are given as string constant, the entity is
-called an <I
-CLASS="EMPHASIS"
->internal</I
-> entity. It is also possible to name a
-file to be used as content (an <I
-CLASS="EMPHASIS"
->external</I
-> entity):
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> SYSTEM "<TT
-CLASS="REPLACEABLE"
-><I
->file name</I
-></TT
->"></PRE
->
-
-There are some restrictions for parameter entities:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
->If the internal parameter entity contains the first token of a declaration
-(i.e. <TT
-CLASS="LITERAL"
-><!</TT
->), it must also contain the last token of the
-declaration, i.e. the <TT
-CLASS="LITERAL"
->></TT
->. This means that the entity
-either contains a whole number of complete declarations, or some text from the
-middle of one declaration.</P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % e "(a | b | c)>">
-<!ELEMENT x %e;</PRE
-> Because <TT
-CLASS="LITERAL"
-><!</TT
-> is contained in the main
-entity, and the corresponding <TT
-CLASS="LITERAL"
->></TT
-> is contained in the
-entity <TT
-CLASS="LITERAL"
->e</TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->If the internal parameter entity contains a left paranthesis, it must also
-contain the corresponding right paranthesis.</P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % e "(a | b | c">
-<!ELEMENT x %e;)></PRE
-> Because <TT
-CLASS="LITERAL"
->(</TT
-> is contained in the entity
-<TT
-CLASS="LITERAL"
->e</TT
->, and the corresponding <TT
-CLASS="LITERAL"
->)</TT
-> is
-contained in the main entity.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->When reading text from an entity, the parser automatically inserts one space
-character before the entity text and one space character after the entity
-text. However, this rule is not applied within the definition of another
-entity.</P
-><P
-><I
-CLASS="EMPHASIS"
->Legal:</I
->
-<PRE
-CLASS="PROGRAMLISTING"
->
-<!ENTITY % suffix "gif">
-<!ENTITY iconfile 'icon.%suffix;'></PRE
-> Because <TT
-CLASS="LITERAL"
->%suffix;</TT
-> is referenced within
-the definition text for <TT
-CLASS="LITERAL"
->iconfile</TT
->, no additional spaces are
-added.</P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % suffix "test">
-<!ELEMENT x.%suffix; ANY></PRE
->
-Because <TT
-CLASS="LITERAL"
->%suffix;</TT
-> is referenced outside the definition
-text of another entity, the parser replaces <TT
-CLASS="LITERAL"
->%suffix;</TT
-> by
-<TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->space</I
-></TT
->test<TT
-CLASS="REPLACEABLE"
-><I
->space</I
-></TT
-></TT
->. </P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % e "(a | b | c)">
-<!ELEMENT x %e;*></PRE
-> Because there is a whitespace between <TT
-CLASS="LITERAL"
->)</TT
->
-and <TT
-CLASS="LITERAL"
->*</TT
->, which is illegal.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->An external parameter entity must always consist of a whole number of complete
-declarations.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->In the internal subset of the DTD, a reference to a parameter entity (internal
-or external) is only allowed at positions where a new declaration can start.</P
-></LI
-></UL
-></P
-><P
->If the parsed entity is going to be used in the document instance, it is called
-a <I
-CLASS="EMPHASIS"
->general entity</I
->. Such entities can be used as
-abbreviations for frequent phrases, or to include external files. Internal
-general entities are declared as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> "<TT
-CLASS="REPLACEABLE"
-><I
->value</I
-></TT
->"></PRE
->
-
-External general entities are declared this way:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY <TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
-> SYSTEM "<TT
-CLASS="REPLACEABLE"
-><I
->file name</I
-></TT
->"></PRE
->
-
-References to general entities are written as:
-
-<PRE
-CLASS="PROGRAMLISTING"
->&<TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
->;</PRE
->
-
-The main difference between parameter and general entities is that the former
-are only recognized in the DTD and that the latter are only recognized in the
-document instance. As the DTD is parsed before the document, the parameter
-entities are expanded first; for example it is possible to use the content of a
-parameter entity as the name of a general entity:
-<TT
-CLASS="LITERAL"
->&#38;%name;;</TT
-><A
-NAME="AEN445"
-HREF="#FTN.AEN445"
->[1]</A
->.</P
-><P
->General entities must respect the element hierarchy. This means that there must
-be an end tag for every start tag in the entity value, and that end tags
-without corresponding start tags are not allowed.</P
-><BLOCKQUOTE
-CLASS="BLOCKQUOTE"
-><P
-><B
->Example</B
-></P
-><P
->If the author of a document changes sometimes, it is worthwhile to set up a
-general entity containing the names of the authors. If the author changes, you
-need only to change the definition of the entity, and do not need to check all
-occurrences of authors' names:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY authors "Gerd Stolpmann"></PRE
->
-
-In the document text, you can now refer to the author names by writing
-<TT
-CLASS="LITERAL"
->&authors;</TT
->.</P
-><P
-><I
-CLASS="EMPHASIS"
->Illegal:</I
->
-The following two entities are illegal because the elements in the definition
-do not nest properly:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY lengthy-tag "<section textcolor='white' background='graphic'>">
-<!ENTITY nonsense "<a></b>"></PRE
-></P
-></BLOCKQUOTE
-><P
->Earlier in this introduction we explained that there are substitutes for
-reserved characters: &lt;, &gt;, &amp;, &apos;, and
-&quot;. These are simply predefined general entities; note that they are
-the only predefined entities. It is allowed to define these entities again
-as long as the meaning is unchanged.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN463"
->1.2.6. Notations and unparsed entities</A
-></H2
-><P
->Unparsed entities have a foreign format and can thus not be read by the XML
-parser. Unparsed entities are always external. The format of an unparsed entity
-must have been declared, such a format is called a
-<I
-CLASS="EMPHASIS"
->notation</I
->. The entity can then be declared by referring to
-this notation. As unparsed entities do not contain XML text, it is not possible
-to include them directly into the document; you can only declare attributes
-such that names of unparsed entities are acceptable values.</P
-><P
->As you can see, unparsed entities are too complicated in order to have any
-purpose. It is almost always better to simply pass the name of the data file as
-normal attribute value, and let the application recognize and process the
-foreign format. </P
-></DIV
-></DIV
-><H3
-CLASS="FOOTNOTES"
->Notes</H3
-><TABLE
-BORDER="0"
-CLASS="FOOTNOTES"
-WIDTH="100%"
-><TR
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="5%"
-><A
-NAME="FTN.AEN445"
-HREF="x107.html#AEN445"
->[1]</A
-></TD
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="95%"
-><P
->This construct is only
-allowed within the definition of another entity; otherwise extra spaces would
-be added (as explained above). Such indirection is not recommended.</P
-><P
->Complete example:
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % variant "a"> <!-- or "b" -->
-<!ENTITY text-a "This is text A.">
-<!ENTITY text-b "This is text B.">
-<!ENTITY text "&#38;text-%variant;;"></PRE
->
-You can now write <TT
-CLASS="LITERAL"
->&text;</TT
-> in the document instance, and
-depending on the value of <TT
-CLASS="LITERAL"
->variant</TT
-> either
-<TT
-CLASS="LITERAL"
->text-a</TT
-> or <TT
-CLASS="LITERAL"
->text-b</TT
-> is inserted.</P
-></TD
-></TR
-></TABLE
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="c36.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x468.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->What is XML?</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c36.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->A complete example: The <I
-CLASS="EMPHASIS"
->readme</I
-> DTD</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->The class type extension</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="The objects representing the document"
-HREF="c893.html"><LINK
-REL="PREVIOUS"
-TITLE="The class type node"
-HREF="x939.html"><LINK
-REL="NEXT"
-TITLE="Details of the mapping from XML text to the tree representation"
-HREF="x1496.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x939.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 3. The objects representing the document</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x1496.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN1439"
->3.3. The class type <TT
-CLASS="LITERAL"
->extension</TT
-></A
-></H1
-><P
-> <PRE
-CLASS="PROGRAMLISTING"
->class type [ 'node ] extension =
- object ('self)
- method clone : 'self
- (* "clone" should return an exact deep copy of the object. *)
- method node : 'node
- (* "node" returns the corresponding node of this extension. This method
- * intended to return exactly what previously has been set by "set_node".
- *)
- method set_node : 'node -> unit
- (* "set_node" is invoked once the extension is associated to a new
- * node object.
- *)
- end</PRE
->
-
-This is the type of classes used for node extensions. For every node of the
-document tree, there is not only the <TT
-CLASS="LITERAL"
->node</TT
-> object, but also
-an <TT
-CLASS="LITERAL"
->extension</TT
-> object. The latter has minimal
-functionality; it has only the necessary methods to be attached to the node
-object containing the details of the node instance. The extension object is
-called extension because its purpose is extensibility.</P
-><P
->For some reasons, it is impossible to derive the
-<TT
-CLASS="LITERAL"
->node</TT
-> classes (i.e. <TT
-CLASS="LITERAL"
->element_impl</TT
-> and
-<TT
-CLASS="LITERAL"
->data_impl</TT
->) such that the subclasses can be extended by new
-new methods. But
-subclassing nodes is a great feature, because it allows the user to provide
-different classes for different types of nodes. The extension objects are a
-workaround that is as powerful as direct subclassing, the costs are
-some notation overhead.</P
-><DIV
-CLASS="FIGURE"
-><A
-NAME="EXTENSION-GENERAL"
-></A
-><P
-><B
->Figure 3-6. The structure of nodes and extensions</B
-></P
-><P
-><IMG
-SRC="pic/extension_general.gif"></P
-></DIV
-><P
->The picture shows how the nodes and extensions are linked
-together. Every node has a reference to its extension, and every extension has
-a reference to its node. The methods <TT
-CLASS="LITERAL"
->extension</TT
-> and
-<TT
-CLASS="LITERAL"
->node</TT
-> follow these references; a typical phrase is
-
-<PRE
-CLASS="PROGRAMLISTING"
->self # node # attribute "xy"</PRE
->
-
-to get the value of an attribute from a method defined in the extension object;
-or
-
-<PRE
-CLASS="PROGRAMLISTING"
->self # node # iter
- (fun n -> n # extension # my_method ...)</PRE
->
-
-to iterate over the subnodes and to call <TT
-CLASS="LITERAL"
->my_method</TT
-> of the
-corresponding extension objects.</P
-><P
->Note that extension objects do not have references to subnodes
-(or "subextensions") themselves; in order to get one of the children of an
-extension you must first go to the node object, then get the child node, and
-finally reach the extension that is logically the child of the extension you
-started with.</P
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1460"
->3.3.1. How to define an extension class</A
-></H2
-><P
->At minimum, you must define the methods
-<TT
-CLASS="LITERAL"
->clone</TT
->, <TT
-CLASS="LITERAL"
->node</TT
->, and
-<TT
-CLASS="LITERAL"
->set_node</TT
-> such that your class is compatible with the type
-<TT
-CLASS="LITERAL"
->extension</TT
->. The method <TT
-CLASS="LITERAL"
->set_node</TT
-> is called
-during the initialization of the node, or after a node has been cloned; the
-node object invokes <TT
-CLASS="LITERAL"
->set_node</TT
-> on the extension object to tell
-it that this node is now the object the extension is linked to. The extension
-must return the node object passed as argument of <TT
-CLASS="LITERAL"
->set_node</TT
->
-when the <TT
-CLASS="LITERAL"
->node</TT
-> method is called.</P
-><P
->The <TT
-CLASS="LITERAL"
->clone</TT
-> method must return a copy of the
-extension object; at least the object itself must be duplicated, but if
-required, the copy should deeply duplicate all objects and values that are
-referred by the extension, too. Whether this is required, depends on the
-application; <TT
-CLASS="LITERAL"
->clone</TT
-> is invoked by the node object when one of
-its cloning methods is called.</P
-><P
->A good starting point for an extension class:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class custom_extension =
- object (self)
-
- val mutable node = (None : custom_extension node option)
-
- method clone = {< >}
-
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
-
- method set_node n =
- node <- Some n
-
- end</PRE
->
-
-This class is compatible with <TT
-CLASS="LITERAL"
->extension</TT
->. The purpose of
-defining such a class is, of course, adding further methods; and you can do it
-without restriction. </P
-><P
->Often, you want not only one extension class. In this case,
-it is the simplest way that all your classes (for one kind of document) have
-the same type (with respect to the interface; i.e. it does not matter if your
-classes differ in the defined private methods and instance variables, but
-public methods count). This approach avoids lots of coercions and problems with
-type incompatibilities. It is simple to implement:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class custom_extension =
- object (self)
- val mutable node = (None : custom_extension node option)
-
- method clone = ... (* see above *)
- method node = ... (* see above *)
- method set_node n = ... (* see above *)
-
- method virtual my_method1 : ...
- method virtual my_method2 : ...
- ... (* etc. *)
- end
-
-class custom_extension_kind_A =
- object (self)
- inherit custom_extension
-
- method my_method1 = ...
- method my_method2 = ...
- end
-
-class custom_extension_kind_B =
- object (self)
- inherit custom_extension
-
- method my_method1 = ...
- method my_method2 = ...
- end</PRE
->
-
-If a class does not need a method (e.g. because it does not make sense, or it
-would violate some important condition), it is possible to define the method
-and to always raise an exception when the method is invoked
-(e.g. <TT
-CLASS="LITERAL"
->assert false</TT
->).</P
-><P
->The latter is a strong recommendation: do not try to further
-specialize the types of extension objects. It is difficult, sometimes even
-impossible, and almost never worth-while.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1481"
->3.3.2. How to bind extension classes to element types</A
-></H2
-><P
->Once you have defined your extension classes, you can bind them
-to element types. The simplest case is that you have only one class and that
-this class is to be always used. The parsing functions in the module
-<TT
-CLASS="LITERAL"
->Pxp_yacc</TT
-> take a <TT
-CLASS="LITERAL"
->spec</TT
-> argument which
-can be customized. If your single class has the name <TT
-CLASS="LITERAL"
->c</TT
->,
-this argument should be
-
-<PRE
-CLASS="PROGRAMLISTING"
->let spec =
- make_spec_from_alist
- ~data_exemplar: (new data_impl c)
- ~default_element_exemplar: (new element_impl c)
- ~element_alist: []
- ()</PRE
->
-
-This means that data nodes will be created from the exemplar passed by
-~data_exemplar and that all element nodes will be made from the exemplar
-specified by ~default_element_exemplar. In ~element_alist, you can
-pass that different exemplars are to be used for different element types; but
-this is an optional feature. If you do not need it, pass the empty list.</P
-><P
->Remember that an exemplar is a (node, extension) pair that serves as pattern
-when new nodes (and the corresponding extension objects) are added to the
-document tree. In this case, the exemplar contains <TT
-CLASS="LITERAL"
->c</TT
-> as
-extension, and when nodes are created, the exemplar is cloned, and cloning
-makes also a copy of <TT
-CLASS="LITERAL"
->c</TT
-> such that all nodes of the document
-tree will have a copy of <TT
-CLASS="LITERAL"
->c</TT
-> as extension.</P
-><P
->The <TT
-CLASS="LITERAL"
->~element_alist</TT
-> argument can bind
-specific element types to specific exemplars; as exemplars may be instances of
-different classes it is effectively possible to bind element types to
-classes. For example, if the element type "p" is implemented by class "c_p",
-and "q" is realized by "c_q", you can pass the following value:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let spec =
- make_spec_from_alist
- ~data_exemplar: (new data_impl c)
- ~default_element_exemplar: (new element_impl c)
- ~element_alist:
- [ "p", new element_impl c_p;
- "q", new element_impl c_q;
- ]
- ()</PRE
->
-
-The extension object <TT
-CLASS="LITERAL"
->c</TT
-> is still used for all data nodes and
-for all other element types.</P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x939.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x1496.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->The class type <TT
-CLASS="LITERAL"
->node</TT
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c893.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Details of the mapping from XML text to the tree representation</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Details of the mapping from XML text to the tree representation</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="The objects representing the document"
-HREF="c893.html"><LINK
-REL="PREVIOUS"
-TITLE="The class type extension"
-HREF="x1439.html"><LINK
-REL="NEXT"
-TITLE="Configuring and calling the parser"
-HREF="c1567.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x1439.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 3. The objects representing the document</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="c1567.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN1496"
->3.4. Details of the mapping from XML text to the tree representation</A
-></H1
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1498"
->3.4.1. The representation of character-free elements</A
-></H2
-><P
->If an element declaration does not allow the element to
-contain character data, the following rules apply.</P
-><P
->If the element must be empty, i.e. it is declared with the
-keyword <TT
-CLASS="LITERAL"
->EMPTY</TT
->, the element instance must be effectively
-empty (it must not even contain whitespace characters). The parser guarantees
-that a declared <TT
-CLASS="LITERAL"
->EMPTY</TT
-> element does never contain a data
-node, even if the data node represents the empty string.</P
-><P
->If the element declaration only permits other elements to occur
-within that element but not character data, it is still possible to insert
-whitespace characters between the subelements. The parser ignores these
-characters, too, and does not create data nodes for them.</P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Example. </B
->Consider the following element types:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT x ( #PCDATA | z )* >
-<!ELEMENT y ( z )* >
-<!ELEMENT z EMPTY></PRE
->
-
-Only <TT
-CLASS="LITERAL"
->x</TT
-> may contain character data, the keyword
-<TT
-CLASS="LITERAL"
->#PCDATA</TT
-> indicates this. The other types are character-free. </P
-></DIV
-><P
->The XML term
-
-<PRE
-CLASS="PROGRAMLISTING"
-><x><z/> <z/></x></PRE
->
-
-will be internally represented by an element node for <TT
-CLASS="LITERAL"
->x</TT
->
-with three subnodes: the first <TT
-CLASS="LITERAL"
->z</TT
-> element, a data node
-containing the space character, and the second <TT
-CLASS="LITERAL"
->z</TT
-> element.
-In contrast to this, the term
-
-<PRE
-CLASS="PROGRAMLISTING"
-><y><z/> <z/></y></PRE
->
-
-is represented by an element node for <TT
-CLASS="LITERAL"
->y</TT
-> with only
-<I
-CLASS="EMPHASIS"
->two</I
-> subnodes, the two <TT
-CLASS="LITERAL"
->z</TT
-> elements. There
-is no data node for the space character because spaces are ignored in the
-character-free element <TT
-CLASS="LITERAL"
->y</TT
->.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1521"
->3.4.2. The representation of character data</A
-></H2
-><P
->The XML specification allows all Unicode characters in XML
-texts. This parser can be configured such that UTF-8 is used to represent the
-characters internally; however, the default character encoding is
-ISO-8859-1. (Currently, no other encodings are possible for the internal string
-representation; the type <TT
-CLASS="LITERAL"
->Pxp_types.rep_encoding</TT
-> enumerates
-the possible encodings. Principially, the parser could use any encoding that is
-ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
-ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
-encodings (or other multibyte encodings which are not ASCII-compatible) unless
-major parts of the parser are rewritten - unlikely...)</P
-><P
->The internal encoding may be different from the external encoding (specified
-in the XML declaration <TT
-CLASS="LITERAL"
-><?xml ... encoding="..."?></TT
->); in
-this case the strings are automatically converted to the internal encoding.</P
-><P
->If the internal encoding is ISO-8859-1, it is possible that there are
-characters that cannot be represented. In this case, the parser ignores such
-characters and prints a warning (to the <TT
-CLASS="LITERAL"
->collect_warning</TT
->
-object that must be passed when the parser is called).</P
-><P
->The XML specification allows lines to be separated by single LF
-characters, by CR LF character sequences, or by single CR
-characters. Internally, these separators are always converted to single LF
-characters.</P
-><P
->The parser guarantees that there are never two adjacent data
-nodes; if necessary, data material that would otherwise be represented by
-several nodes is collapsed into one node. Note that you can still create node
-trees with adjacent data nodes; however, the parser does not return such trees.</P
-><P
->Note that CDATA sections are not represented specially; such
-sections are added to the current data material that being collected for the
-next data node.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1532"
->3.4.3. The representation of entities within documents</A
-></H2
-><P
-><I
-CLASS="EMPHASIS"
->Entities are not represented within
-documents!</I
-> If the parser finds an entity reference in the document
-content, the reference is immediately expanded, and the parser reads the
-expansion text instead of the reference.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1536"
->3.4.4. The representation of attributes</A
-></H2
-><P
->As attribute
-values are composed of Unicode characters, too, the same problems with the
-character encoding arise as for character material. Attribute values are
-converted to the internal encoding, too; and if there are characters that
-cannot be represented, these are dropped, and a warning is printed.</P
-><P
->Attribute values are normalized before they are returned by
-methods like <TT
-CLASS="LITERAL"
->attribute</TT
->. First, any remaining entity
-references are expanded; if necessary, expansion is performed recursively.
-Second, newline characters (any of LF, CR LF, or CR characters) are converted
-to single space characters. Note that especially the latter action is
-prescribed by the XML standard (but <TT
-CLASS="LITERAL"
-></TT
-> is not converted
-such that it is still possible to include line feeds into attributes).</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1542"
->3.4.5. The representation of processing instructions</A
-></H2
-><P
->Processing instructions are parsed to some extent: The first word of the
-PI is called the target, and it is stored separated from the rest of the PI:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><?target rest?></PRE
->
-
-The exact location where a PI occurs is not represented (by default). The
-parser puts the PI into the object that represents the embracing construct (an
-element, a DTD, or the whole document); that means you can find out which PIs
-occur in a certain element, in the DTD, or in the whole document, but you
-cannot lookup the exact position within the construct.</P
-><P
->If you require the exact location of PIs, it is possible to
-create extra nodes for them. This mode is controled by the option
-<TT
-CLASS="LITERAL"
->enable_pinstr_nodes</TT
->. The additional nodes have the node type
-<TT
-CLASS="LITERAL"
->T_pinstr <TT
-CLASS="REPLACEABLE"
-><I
->target</I
-></TT
-></TT
->, and are created
-from special exemplars contained in the <TT
-CLASS="LITERAL"
->spec</TT
-> (see
-pxp_document.mli).</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1551"
->3.4.6. The representation of comments</A
-></H2
-><P
->Normally, comments are not represented; they are dropped by
-default. However, if you require them, it is possible to create
-<TT
-CLASS="LITERAL"
->T_comment</TT
-> nodes for them. This mode can be specified by the
-option <TT
-CLASS="LITERAL"
->enable_comment_nodes</TT
->. Comment nodes are created from
-special exemplars contained in the <TT
-CLASS="LITERAL"
->spec</TT
-> (see
-pxp_document.mli). You can access the contents of comments through the
-method <TT
-CLASS="LITERAL"
->comment</TT
->.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1558"
->3.4.7. The attributes <TT
-CLASS="LITERAL"
->xml:lang</TT
-> and
-<TT
-CLASS="LITERAL"
->xml:space</TT
-></A
-></H2
-><P
->These attributes are not supported specially; they are handled
-like any other attribute.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1563"
->3.4.8. And what about namespaces?</A
-></H2
-><P
->Currently, there is no special support for namespaces.
-However, the parser allows it that the colon occurs in names such that it is
-possible to implement namespaces on top of the current API.</P
-><P
->Some future release of PXP will support namespaces as built-in
-feature...</P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x1439.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="c1567.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->The class type <TT
-CLASS="LITERAL"
->extension</TT
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c893.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Configuring and calling the parser</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Resolvers and sources</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="Configuring and calling the parser"
-HREF="c1567.html"><LINK
-REL="PREVIOUS"
-TITLE="Configuring and calling the parser"
-HREF="c1567.html"><LINK
-REL="NEXT"
-TITLE="The DTD classes"
-HREF="x1812.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="c1567.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 4. Configuring and calling the parser</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x1812.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN1629"
->4.2. Resolvers and sources</A
-></H1
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1631"
->4.2.1. Using the built-in resolvers (called sources)</A
-></H2
-><P
->The type <TT
-CLASS="LITERAL"
->source</TT
-> enumerates the two
-possibilities where the document to parse comes from.
-
-<PRE
-CLASS="PROGRAMLISTING"
->type source =
- Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
- | ExtID of (ext_id * Pxp_reader.resolver)</PRE
->
-
-You normally need not to worry about this type as there are convenience
-functions that create <TT
-CLASS="LITERAL"
->source</TT
-> values:
-
-
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->from_file s</TT
->: The document is read from
-file <TT
-CLASS="LITERAL"
->s</TT
->; you may specify absolute or relative path names.
-The file name must be encoded as UTF-8 string.</P
-><P
->There is an optional argument <TT
-CLASS="LITERAL"
->~system_encoding</TT
->
-specifying the character encoding which is used for the names of the file
-system. For example, if this encoding is ISO-8859-1 and <TT
-CLASS="LITERAL"
->s</TT
-> is
-also a ISO-8859-1 string, you can form the source:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let s_utf8 = recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
-from_file ~system_encoding:`Enc_iso88591 s_utf8</PRE
-></P
-><P
->This <TT
-CLASS="LITERAL"
->source</TT
-> has the advantage that
-it is able to resolve inner external entities; i.e. if your document includes
-data from another file (using the <TT
-CLASS="LITERAL"
->SYSTEM</TT
-> attribute), this
-mode will find that file. However, this mode cannot resolve
-<TT
-CLASS="LITERAL"
->PUBLIC</TT
-> identifiers nor <TT
-CLASS="LITERAL"
->SYSTEM</TT
-> identifiers
-other than "file:".</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->from_channel ch</TT
->: The document is read
-from the channel <TT
-CLASS="LITERAL"
->ch</TT
->. In general, this source also supports
-file URLs found in the document; however, by default only absolute URLs are
-understood. It is possible to associate an ID with the channel such that the
-resolver knows how to interpret relative URLs:
-
-<PRE
-CLASS="PROGRAMLISTING"
->from_channel ~id:(System "file:///dir/dir1/") ch</PRE
->
-
-There is also the ~system_encoding argument specifying how file names are
-encoded. - The example from above can also be written (but it is no
-longer possible to interpret relative URLs because there is no ~id argument,
-and computing this argument is relatively complicated because it must
-be a valid URL):
-
-<PRE
-CLASS="PROGRAMLISTING"
->let ch = open_in s in
-let src = from_channel ~system_encoding:`Enc_iso88591 ch in
-...;
-close_in ch</PRE
-></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->from_string s</TT
->: The string
-<TT
-CLASS="LITERAL"
->s</TT
-> is the document to parse. This mode is not able to
-interpret file names of <TT
-CLASS="LITERAL"
->SYSTEM</TT
-> clauses, nor it can look up
-<TT
-CLASS="LITERAL"
->PUBLIC</TT
-> identifiers. </P
-><P
->Normally, the encoding of the string is detected as usual
-by analyzing the XML declaration, if any. However, it is also possible to
-specify the encoding directly:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let src = from_string ~fixenc:`ISO-8859-2 s</PRE
-></P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->ExtID (id, r)</TT
->: The document to parse
-is denoted by the identifier <TT
-CLASS="LITERAL"
->id</TT
-> (either a
-<TT
-CLASS="LITERAL"
->SYSTEM</TT
-> or <TT
-CLASS="LITERAL"
->PUBLIC</TT
-> clause), and this
-identifier is interpreted by the resolver <TT
-CLASS="LITERAL"
->r</TT
->. Use this mode
-if you have written your own resolver.</P
-><P
->Which character sets are possible depends on the passed
-resolver <TT
-CLASS="LITERAL"
->r</TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->Entity (get_entity, r)</TT
->: The document
-to parse is returned by the function invocation <TT
-CLASS="LITERAL"
->get_entity
-dtd</TT
->, where <TT
-CLASS="LITERAL"
->dtd</TT
-> is the DTD object to use (it may be
-empty). Inner external references occuring in this entity are resolved using
-the resolver <TT
-CLASS="LITERAL"
->r</TT
->.</P
-><P
->Which character sets are possible depends on the passed
-resolver <TT
-CLASS="LITERAL"
->r</TT
->.</P
-></LI
-></UL
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1682"
->4.2.2. The resolver API</A
-></H2
-><P
->A resolver is an object that can be opened like a file, but you
-do not pass the file name to the resolver, but the XML identifier of the entity
-to read from (either a <TT
-CLASS="LITERAL"
->SYSTEM</TT
-> or <TT
-CLASS="LITERAL"
->PUBLIC</TT
->
-clause). When opened, the resolver must return the
-<TT
-CLASS="LITERAL"
->Lexing.lexbuf</TT
-> that reads the characters. The resolver can
-be closed, and it can be cloned. Furthermore, it is possible to tell the
-resolver which character set it should assume. - The following from Pxp_reader:
-
-<PRE
-CLASS="PROGRAMLISTING"
->exception Not_competent
-exception Not_resolvable of exn
-
-class type resolver =
- object
- method init_rep_encoding : rep_encoding -> unit
- method init_warner : collect_warnings -> unit
- method rep_encoding : rep_encoding
- method open_in : ext_id -> Lexing.lexbuf
- method close_in : unit
- method change_encoding : string -> unit
- method clone : resolver
- method close_all : unit
- end</PRE
->
-
-The resolver object must work as follows:</P
-><P
-> <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
->When the parser is called, it tells the resolver the
-warner object and the internal encoding by invoking
-<TT
-CLASS="LITERAL"
->init_warner</TT
-> and <TT
-CLASS="LITERAL"
->init_rep_encoding</TT
->. The
-resolver should store these values. The method <TT
-CLASS="LITERAL"
->rep_encoding</TT
->
-should return the internal encoding.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->If the parser wants to read from the resolver, it invokes
-the method <TT
-CLASS="LITERAL"
->open_in</TT
->. Either the resolver succeeds, in which
-case the <TT
-CLASS="LITERAL"
->Lexing.lexbuf</TT
-> reading from the file or stream must
-be returned, or opening fails. In the latter case the method implementation
-should raise an exception (see below).</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->If the parser finishes reading, it calls the
-<TT
-CLASS="LITERAL"
->close_in</TT
-> method.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->If the parser finds a reference to another external
-entity in the input stream, it calls <TT
-CLASS="LITERAL"
->clone</TT
-> to get a second
-resolver which must be initially closed (not yet connected with an input
-stream). The parser then invokes <TT
-CLASS="LITERAL"
->open_in</TT
-> and the other
-methods as described.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->If you already know the character set of the input
-stream, you should recode it to the internal encoding, and define the method
-<TT
-CLASS="LITERAL"
->change_encoding</TT
-> as an empty method.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->If you want to support multiple external character sets,
-the object must follow a much more complicated protocol. Directly after
-<TT
-CLASS="LITERAL"
->open_in</TT
-> has been called, the resolver must return a lexical
-buffer that only reads one byte at a time. This is only possible if you create
-the lexical buffer with <TT
-CLASS="LITERAL"
->Lexing.from_function</TT
->; the function
-must then always return 1 if the EOF is not yet reached, and 0 if EOF is
-reached. If the parser has read the first line of the document, it will invoke
-<TT
-CLASS="LITERAL"
->change_encoding</TT
-> to tell the resolver which character set to
-assume. From this moment, the object can return more than one byte at once. The
-argument of <TT
-CLASS="LITERAL"
->change_encoding</TT
-> is either the parameter of the
-"encoding" attribute of the XML declaration, or the empty string if there is
-not any XML declaration or if the declaration does not contain an encoding
-attribute. </P
-><P
->At the beginning the resolver must only return one
-character every time something is read from the lexical buffer. The reason for
-this is that you otherwise would not exactly know at which position in the
-input stream the character set changes.</P
-><P
->If you want automatic recognition of the character set,
-it is up to the resolver object to implement this.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->If an error occurs, the parser calls the method
-<TT
-CLASS="LITERAL"
->close_all</TT
-> for the top-level resolver; this method should
-close itself (if not already done) and all clones.</P
-></LI
-></UL
-></P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Exceptions. </B
->It is possible to chain resolvers such that when the first resolver is not able
-to open the entity, the other resolvers of the chain are tried in turn. The
-method <TT
-CLASS="LITERAL"
->open_in</TT
-> should raise the exception
-<TT
-CLASS="LITERAL"
->Not_competent</TT
-> to indicate that the next resolver should try
-to open the entity. If the resolver is able to handle the ID, but some other
-error occurs, the exception <TT
-CLASS="LITERAL"
->Not_resolvable</TT
-> should be raised
-to force that the chain breaks.
- </P
-></DIV
-><P
->Example: How to define a resolver that is equivalent to
-from_string: ...</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1728"
->4.2.3. Predefined resolver components</A
-></H2
-><P
->There are some classes in Pxp_reader that define common resolver behaviour.
-
-<PRE
-CLASS="PROGRAMLISTING"
->class resolve_read_this_channel :
- ?id:ext_id ->
- ?fixenc:encoding ->
- ?auto_close:bool ->
- in_channel ->
- resolver</PRE
->
-
-Reads from the passed channel (it may be even a pipe). If the
-<TT
-CLASS="LITERAL"
->~id</TT
-> argument is passed to the object, the created resolver
-accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has
-been cloned, it does not accept any ID. This means that this resolver cannot
-handle inner references to external entities. Note that you can combine this
-resolver with another resolver that can handle inner references (such as
-resolve_as_file); see class 'combine' below. - If you pass the
-<TT
-CLASS="LITERAL"
->~fixenc</TT
-> argument, the encoding of the channel is set to the
-passed value, regardless of any auto-recognition or any XML declaration. - If
-<TT
-CLASS="LITERAL"
->~auto_close = true</TT
-> (which is the default), the channel is
-closed after use. If <TT
-CLASS="LITERAL"
->~auto_close = false</TT
->, the channel is
-left open.
- </P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class resolve_read_any_channel :
- ?auto_close:bool ->
- channel_of_id:(ext_id -> (in_channel * encoding option)) ->
- resolver</PRE
->
-
-This resolver calls the function <TT
-CLASS="LITERAL"
->~channel_of_id</TT
-> to open a
-new channel for the passed <TT
-CLASS="LITERAL"
->ext_id</TT
->. This function must either
-return the channel and the encoding, or it must fail with Not_competent. The
-function must return <TT
-CLASS="LITERAL"
->None</TT
-> as encoding if the default
-mechanism to recognize the encoding should be used. It must return
-<TT
-CLASS="LITERAL"
->Some e</TT
-> if it is already known that the encoding of the
-channel is <TT
-CLASS="LITERAL"
->e</TT
->. If <TT
-CLASS="LITERAL"
->~auto_close = true</TT
->
-(which is the default), the channel is closed after use. If
-<TT
-CLASS="LITERAL"
->~auto_close = false</TT
->, the channel is left open.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class resolve_read_url_channel :
- ?base_url:Neturl.url ->
- ?auto_close:bool ->
- url_of_id:(ext_id -> Neturl.url) ->
- channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
- resolver</PRE
->
-
-When this resolver gets an ID to read from, it calls the function
-<TT
-CLASS="LITERAL"
->~url_of_id</TT
-> to get the corresponding URL. This URL may be a
-relative URL; however, a URL scheme must be used which contains a path. The
-resolver converts the URL to an absolute URL if necessary. The second
-function, <TT
-CLASS="LITERAL"
->~channel_of_url</TT
->, is fed with the absolute URL as
-input. This function opens the resource to read from, and returns the channel
-and the encoding of the resource.</P
-><P
->Both functions, <TT
-CLASS="LITERAL"
->~url_of_id</TT
-> and
-<TT
-CLASS="LITERAL"
->~channel_of_url</TT
->, can raise Not_competent to indicate that
-the object is not able to read from the specified resource. However, there is a
-difference: A Not_competent from <TT
-CLASS="LITERAL"
->~url_of_id</TT
-> is left as it
-is, but a Not_competent from <TT
-CLASS="LITERAL"
->~channel_of_url</TT
-> is converted to
-Not_resolvable. So only <TT
-CLASS="LITERAL"
->~url_of_id</TT
-> decides which URLs are
-accepted by the resolver and which not.</P
-><P
->The function <TT
-CLASS="LITERAL"
->~channel_of_url</TT
-> must return
-<TT
-CLASS="LITERAL"
->None</TT
-> as encoding if the default mechanism to recognize the
-encoding should be used. It must return <TT
-CLASS="LITERAL"
->Some e</TT
-> if it is
-already known that the encoding of the channel is <TT
-CLASS="LITERAL"
->e</TT
->.</P
-><P
->If <TT
-CLASS="LITERAL"
->~auto_close = true</TT
-> (which is the default), the channel is
-closed after use. If <TT
-CLASS="LITERAL"
->~auto_close = false</TT
->, the channel is
-left open.</P
-><P
->Objects of this class contain a base URL relative to which relative URLs are
-interpreted. When creating a new object, you can specify the base URL by
-passing it as <TT
-CLASS="LITERAL"
->~base_url</TT
-> argument. When an existing object is
-cloned, the base URL of the clone is the URL of the original object. - Note
-that the term "base URL" has a strict definition in RFC 1808.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class resolve_read_this_string :
- ?id:ext_id ->
- ?fixenc:encoding ->
- string ->
- resolver</PRE
->
-
-Reads from the passed string. If the <TT
-CLASS="LITERAL"
->~id</TT
-> argument is passed
-to the object, the created resolver accepts only this ID. Otherwise all IDs are
-accepted. - Once the resolver has been cloned, it does not accept any ID. This
-means that this resolver cannot handle inner references to external
-entities. Note that you can combine this resolver with another resolver that
-can handle inner references (such as resolve_as_file); see class 'combine'
-below. - If you pass the <TT
-CLASS="LITERAL"
->~fixenc</TT
-> argument, the encoding of
-the string is set to the passed value, regardless of any auto-recognition or
-any XML declaration.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class resolve_read_any_string :
- string_of_id:(ext_id -> (string * encoding option)) ->
- resolver</PRE
->
-
-This resolver calls the function <TT
-CLASS="LITERAL"
->~string_of_id</TT
-> to get the
-string for the passed <TT
-CLASS="LITERAL"
->ext_id</TT
->. This function must either
-return the string and the encoding, or it must fail with Not_competent. The
-function must return <TT
-CLASS="LITERAL"
->None</TT
-> as encoding if the default
-mechanism to recognize the encoding should be used. It must return
-<TT
-CLASS="LITERAL"
->Some e</TT
-> if it is already known that the encoding of the
-string is <TT
-CLASS="LITERAL"
->e</TT
->.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class resolve_as_file :
- ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
- ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
- ?system_encoding:encoding ->
- ?url_of_id:(ext_id -> Neturl.url) ->
- ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
- unit ->
- resolver</PRE
->
-Reads from the local file system. Every file name is interpreted as
-file name of the local file system, and the referred file is read.</P
-><P
->The full form of a file URL is: file://host/path, where
-'host' specifies the host system where the file identified 'path'
-resides. host = "" or host = "localhost" are accepted; other values
-will raise Not_competent. The standard for file URLs is
-defined in RFC 1738.</P
-><P
->Option <TT
-CLASS="LITERAL"
->~file_prefix</TT
->: Specifies how the "file:" prefix of
-file names is handled:
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->`Not_recognized:</TT
->The prefix is not
-recognized.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->`Allowed:</TT
-> The prefix is allowed but
-not required (the default).</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->`Required:</TT
-> The prefix is
-required.</P
-></LI
-></UL
-></P
-><P
->Option <TT
-CLASS="LITERAL"
->~host_prefix:</TT
-> Specifies how the "//host" phrase of
-file names is handled:
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->`Not_recognized:</TT
->The prefix is not
-recognized.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->`Allowed:</TT
-> The prefix is allowed but
-not required (the default).</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->`Required:</TT
-> The prefix is
-required.</P
-></LI
-></UL
-></P
-><P
->Option <TT
-CLASS="LITERAL"
->~system_encoding:</TT
-> Specifies the encoding of file
-names of the local file system. Default: UTF-8.</P
-><P
->Options <TT
-CLASS="LITERAL"
->~url_of_id</TT
->, <TT
-CLASS="LITERAL"
->~channel_of_url</TT
->: Not
-for the casual user!</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class combine :
- ?prefer:resolver ->
- resolver list ->
- resolver</PRE
->
-
-Combines several resolver objects. If a concrete entity with an
-<TT
-CLASS="LITERAL"
->ext_id</TT
-> is to be opened, the combined resolver tries the
-contained resolvers in turn until a resolver accepts opening the entity
-(i.e. it does not raise Not_competent on open_in).</P
-><P
->Clones: If the 'clone' method is invoked before 'open_in', all contained
-resolvers are cloned separately and again combined. If the 'clone' method is
-invoked after 'open_in' (i.e. while the resolver is open), additionally the
-clone of the active resolver is flagged as being preferred, i.e. it is tried
-first. </P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="c1567.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x1812.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Configuring and calling the parser</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c1567.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->The DTD classes</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->The DTD classes</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="Configuring and calling the parser"
-HREF="c1567.html"><LINK
-REL="PREVIOUS"
-TITLE="Resolvers and sources"
-HREF="x1629.html"><LINK
-REL="NEXT"
-TITLE="Invoking the parser"
-HREF="x1818.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x1629.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 4. Configuring and calling the parser</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x1818.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN1812"
->4.3. The DTD classes</A
-></H1
-><P
-><I
-CLASS="EMPHASIS"
->Sorry, not yet
-written. Perhaps the interface definition of Pxp_dtd expresses the same:</I
-></P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
-> (**********************************************************************)
-(* *)
-(* Pxp_dtd: *)
-(* Object model of document type declarations *)
-(* *)
-(**********************************************************************)
-
-(* ======================================================================
- * OVERVIEW
- *
- * class dtd ............... represents the whole DTD, including element
- * declarations, entity declarations, notation
- * declarations, and processing instructions
- * class dtd_element ....... represents an element declaration consisting
- * of a content model and an attribute list
- * declaration
- * class dtd_notation ...... represents a notation declaration
- * class proc_instruction .. represents a processing instruction
- * ======================================================================
- *
- *)
-
-
-class dtd :
- (* Creation:
- * new dtd
- * creates a new, empty DTD object without any declaration, without a root
- * element, without an ID.
- *)
- Pxp_types.collect_warnings ->
- Pxp_types.rep_encoding ->
- object
- method root : string option
- (* get the name of the root element if present *)
-
- method set_root : string -> unit
- (* set the name of the root element. This method can be invoked
- * only once
- *)
-
- method id : Pxp_types.dtd_id option
- (* get the identifier for this DTD *)
-
- method set_id : Pxp_types.dtd_id -> unit
- (* set the identifier. This method can be invoked only once *)
-
- method encoding : Pxp_types.rep_encoding
- (* returns the encoding used for character representation *)
-
-
- method allow_arbitrary : unit
- (* After this method has been invoked, the object changes its behaviour:
- * - elements and notations that have not been added may be used in an
- * arbitrary way; the methods "element" and "notation" indicate this
- * by raising Undeclared instead of Validation_error.
- *)
-
- method disallow_arbitrary : unit
-
- method arbitrary_allowed : bool
- (* Returns whether arbitrary contents are allowed or not. *)
-
- method standalone_declaration : bool
- (* Whether there is a 'standalone' declaration or not. Strictly
- * speaking, this declaration is not part of the DTD, but it is
- * included here because of practical reasons.
- * If not set, this property defaults to 'false'.
- *)
-
- method set_standalone_declaration : bool -> unit
- (* Sets the 'standalone' declaration. *)
-
-
- method add_element : dtd_element -> unit
- (* add the given element declaration to this DTD. Raises Not_found
- * if there is already an element declaration with the same name.
- *)
-
- method add_gen_entity : Pxp_entity.entity -> bool -> unit
- (* add_gen_entity e extdecl:
- * add the entity 'e' as general entity to this DTD (general entities
- * are those represented by &name;). If there is already a declaration
- * with the same name, the second definition is ignored; as exception from
- * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
- * may only be redeclared with a definition that is equivalent to the
- * standard definition; otherwise a Validation_error is raised.
- *
- * 'extdecl': 'true' indicates that the entity declaration occurs in
- * an external entity. (Used for the standalone check.)
- *)
-
- method add_par_entity : Pxp_entity.entity -> unit
- (* add the given entity as parameter entity to this DTD (parameter
- * entities are those represented by %name;). If there is already a
- * declaration with the same name, the second definition is ignored.
- *)
-
- method add_notation : dtd_notation -> unit
- (* add the given notation to this DTD. If there is already a declaration
- * with the same name, a Validation_error is raised.
- *)
-
- method add_pinstr : proc_instruction -> unit
- (* add the given processing instruction to this DTD. *)
-
- method element : string -> dtd_element
- (* looks up the element declaration with the given name. Raises
- * Validation_error if the element cannot be found. (If "allow_arbitrary"
- * has been invoked before, Unrestricted is raised instead.)
- *)
-
- method element_names : string list
- (* returns the list of the names of all element declarations. *)
-
- method gen_entity : string -> (Pxp_entity.entity * bool)
- (* let e, extdecl = obj # gen_entity n:
- * looks up the general entity 'e' with the name 'n'. Raises
- * WF_error if the entity cannot be found.
- * 'extdecl': indicates whether the entity declaration occured in an
- * external entity.
- *)
-
- method gen_entity_names : string list
- (* returns the list of all general entity names *)
-
- method par_entity : string -> Pxp_entity.entity
- (* looks up the parameter entity with the given name. Raises
- * WF_error if the entity cannot be found.
- *)
-
- method par_entity_names : string list
- (* returns the list of all parameter entity names *)
-
- method notation : string -> dtd_notation
- (* looks up the notation declaration with the given name. Raises
- * Validation_error if the notation cannot be found. (If "allow_arbitrary"
- * has been invoked before, Unrestricted is raised instead.)
- *)
-
- method notation_names : string list
- (* Returns the list of the names of all added notations *)
-
- method pinstr : string -> proc_instruction list
- (* looks up all processing instructions with the given target.
- * The "target" is the identifier following "<?".
- * Note: It is not possible to find out the exact position of the
- * processing instruction.
- *)
-
- method pinstr_names : string list
- (* Returns the list of the names (targets) of all added pinstrs *)
-
- method validate : unit
- (* ensures that the DTD is valid. This method is optimized such that
- * actual validation is only performed if DTD has changed.
- * If the DTD is invalid, mostly a Validation_error is raised,
- * but other exceptions are possible, too.
- *)
-
- method only_deterministic_models : unit
- (* Succeeds if all regexp content models are deterministic.
- * Otherwise Validation_error.
- *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
- (* write_compact_as_latin1 os enc doctype:
- * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
- * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
- * only the declarations are written (the material within the
- * square brackets).
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
-
- (*----------------------------------------*)
- method invalidate : unit
- (* INTERNAL METHOD *)
- method warner : Pxp_types.collect_warnings
- (* INTERNAL METHOD *)
- end
-
-
-
-(* ---------------------------------------------------------------------- *)
-
-and dtd_element : dtd -> string ->
- (* Creation:
- * new dtd_element init_dtd init_name:
- * creates a new dtd_element object for init_dtd with init_name.
- * The strings are represented in the same encoding as init_dtd.
- *)
- object
-
- method name : string
- (* returns the name of the declared element *)
-
- method externally_declared : bool
- (* returns whether the element declaration occurs in an external
- * entity.
- *)
-
- method content_model : Pxp_types.content_model_type
- (* get the content model of this element declaration, or Unspecified *)
-
- method content_dfa : Pxp_dfa.dfa_definition option
- (* return the DFA of the content model if there is a DFA, or None.
- * A DFA exists only for regexp style content models which are
- * deterministic.
- *)
-
- method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
- (* set_cm_and_extdecl cm extdecl:
- * set the content model to 'cm'. Once the content model is not
- * Unspecified, it cannot be set to a different value again.
- * Furthermore, it is set whether the element occurs in an external
- * entity ('extdecl').
- *)
-
- method encoding : Pxp_types.rep_encoding
- (* Return the encoding of the strings *)
-
- method allow_arbitrary : unit
- (* After this method has been invoked, the object changes its behaviour:
- * - attributes that have not been added may be used in an
- * arbitrary way; the method "attribute" indicates this
- * by raising Undeclared instead of Validation_error.
- *)
-
- method disallow_arbitrary : unit
-
- method arbitrary_allowed : bool
- (* Returns whether arbitrary attributes are allowed or not. *)
-
- method attribute : string ->
- Pxp_types.att_type * Pxp_types.att_default
- (* get the type and default value of a declared attribute, or raise
- * Validation_error if the attribute does not exist.
- * If 'arbitrary_allowed', the exception Undeclared is raised instead
- * of Validation_error.
- *)
-
- method attribute_violates_standalone_declaration :
- string -> string option -> bool
- (* attribute_violates_standalone_declaration name v:
- * Checks whether the attribute 'name' violates the "standalone"
- * declaration if it has value 'v'.
- * The method returns true if:
- * - The attribute declaration occurs in an external entity,
- * and if one of the two conditions holds:
- * - v = None, and there is a default for the attribute value
- * - v = Some s, and the type of the attribute is not CDATA,
- * and s changes if normalized according to the rules of the
- * attribute type.
- *
- * The method raises Validation_error if the attribute does not exist.
- * If 'arbitrary_allowed', the exception Undeclared is raised instead
- * of Validation_error.
- *)
-
- method attribute_names : string list
- (* get the list of all declared attributes *)
-
- method names_of_required_attributes : string list
- (* get the list of all attributes that are specified as required
- * attributes
- *)
-
- method id_attribute_name : string option
- (* Returns the name of the attribute with type ID, or None. *)
-
- method idref_attribute_names : string list
- (* Returns the names of the attributes with type IDREF or IDREFS. *)
-
- method add_attribute : string ->
- Pxp_types.att_type ->
- Pxp_types.att_default ->
- bool ->
- unit
- (* add_attribute name type default extdecl:
- * add an attribute declaration for an attribute with the given name,
- * type, and default value. If there is more than one declaration for
- * an attribute name, the first declaration counts; the other declarations
- * are ignored.
- * 'extdecl': if true, the attribute declaration occurs in an external
- * entity. This property is used to check the "standalone" attribute.
- *)
-
- method validate : unit
- (* checks whether this element declaration (i.e. the content model and
- * all attribute declarations) is valid for the associated DTD.
- * Raises mostly Validation_error if the validation fails.
- *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write_compact_as_latin1 os enc:
- * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
- end
-
-(* ---------------------------------------------------------------------- *)
-
-and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
- (* Creation:
- * new dtd_notation a_name an_external_ID init_encoding
- * creates a new dtd_notation object with the given name and the given
- * external ID.
- *)
- object
- method name : string
- method ext_id : Pxp_types.ext_id
- method encoding : Pxp_types.rep_encoding
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write_compact_as_latin1 os enc:
- * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
- * string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
- end
-
-(* ---------------------------------------------------------------------- *)
-
-and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
- (* Creation:
- * new proc_instruction a_target a_value
- * creates a new proc_instruction object with the given target string and
- * the given value string.
- * Note: A processing instruction is written as <?target value?>.
- *)
- object
- method target : string
- method value : string
- method encoding : Pxp_types.rep_encoding
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write os enc:
- * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
- method parse_pxp_option : (string * string * (string * string) list)
- (* Parses a PI containing a PXP option. Such PIs are formed like:
- * <?target option-name option-att="value" option-att="value" ... ?>
- * The method returns a triple
- * (target, option-name, [option-att, value; ...])
- * or raises Error.
- *)
-
- end
-
-;; </PRE
-></P
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x1629.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x1818.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Resolvers and sources</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c1567.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Invoking the parser</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Invoking the parser</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="Configuring and calling the parser"
-HREF="c1567.html"><LINK
-REL="PREVIOUS"
-TITLE="The DTD classes"
-HREF="x1812.html"><LINK
-REL="NEXT"
-TITLE="Updates"
-HREF="x1965.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x1812.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 4. Configuring and calling the parser</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x1965.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN1818"
->4.4. Invoking the parser</A
-></H1
-><P
->Here a description of Pxp_yacc.</P
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1821"
->4.4.1. Defaults</A
-></H2
-><P
->The following defaults are available:
-
-<PRE
-CLASS="PROGRAMLISTING"
->val default_config : config
-val default_extension : ('a node extension) as 'a
-val default_spec : ('a node extension as 'a) spec</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1825"
->4.4.2. Parsing functions</A
-></H2
-><P
->In the following, the term "closed document" refers to
-an XML structure like
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!DOCTYPE ... [ <TT
-CLASS="REPLACEABLE"
-><I
->declarations</I
-></TT
-> ] >
-<<TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
->>
-...
-</<TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
->></PRE
->
-
-The term "fragment" refers to an XML structure like
-
-<PRE
-CLASS="PROGRAMLISTING"
-><<TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
->>
-...
-</<TT
-CLASS="REPLACEABLE"
-><I
->root</I
-></TT
->></PRE
->
-
-i.e. only to one isolated element instance.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->val parse_dtd_entity : config -> source -> dtd</PRE
->
-
-Parses the declarations which are contained in the entity, and returns them as
-<TT
-CLASS="LITERAL"
->dtd</TT
-> object.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->val extract_dtd_from_document_entity : config -> source -> dtd</PRE
->
-
-Extracts the DTD from a closed document. Both the internal and the external
-subsets are extracted and combined to one <TT
-CLASS="LITERAL"
->dtd</TT
-> object. This
-function does not parse the whole document, but only the parts that are
-necessary to extract the DTD.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->val parse_document_entity :
- ?transform_dtd:(dtd -> dtd) ->
- ?id_index:('ext index) ->
- config ->
- source ->
- 'ext spec ->
- 'ext document</PRE
->
-
-Parses a closed document and validates it against the DTD that is contained in
-the document (internal and external subsets). The option
-<TT
-CLASS="LITERAL"
->~transform_dtd</TT
-> can be used to transform the DTD in the
-document, and to use the transformed DTD for validation. If
-<TT
-CLASS="LITERAL"
->~id_index</TT
-> is specified, an index of all ID attributes is
-created.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->val parse_wfdocument_entity :
- config ->
- source ->
- 'ext spec ->
- 'ext document</PRE
->
-
-Parses a closed document, but checks it only on well-formedness.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->val parse_content_entity :
- ?id_index:('ext index) ->
- config ->
- source ->
- dtd ->
- 'ext spec ->
- 'ext node</PRE
->
-
-Parses a fragment, and validates the element.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->val parse_wfcontent_entity :
- config ->
- source ->
- 'ext spec ->
- 'ext node</PRE
->
-
-Parses a fragment, but checks it only on well-formedness.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1851"
->4.4.3. Configuration options</A
-></H2
-><P
-> <PRE
-CLASS="PROGRAMLISTING"
->type config =
- { warner : collect_warnings;
- errors_with_line_numbers : bool;
- enable_pinstr_nodes : bool;
- enable_super_root_node : bool;
- enable_comment_nodes : bool;
- encoding : rep_encoding;
- recognize_standalone_declaration : bool;
- store_element_positions : bool;
- idref_pass : bool;
- validate_by_dfa : bool;
- accept_only_deterministic_models : bool;
- ...
- }</PRE
->
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->warner:</TT
->The parser prints
-warnings by invoking the method <TT
-CLASS="LITERAL"
->warn</TT
-> for this warner
-object. (Default: all warnings are dropped)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->errors_with_line_numbers:</TT
->If
-true, errors contain line numbers; if false, errors contain only byte
-positions. The latter mode is faster. (Default: true)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->enable_pinstr_nodes:</TT
->If true,
-the parser creates extra nodes for processing instructions. If false,
-processing instructions are simply added to the element or document surrounding
-the instructions. (Default: false)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->enable_super_root_node:</TT
->If
-true, the parser creates an extra node which is the parent of the root of the
-document tree. This node is called super root; it is an element with type
-<TT
-CLASS="LITERAL"
->T_super_root</TT
->. - If there are processing instructions outside
-the root element and outside the DTD, they are added to the super root instead
-of the document. - If false, the super root node is not created. (Default:
-false)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->enable_comment_nodes:</TT
->If true,
-the parser creates nodes for comments with type <TT
-CLASS="LITERAL"
->T_comment</TT
->;
-if false, such nodes are not created. (Default: false)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->encoding:</TT
->Specifies the
-internal encoding of the parser. Most strings are then represented according to
-this encoding; however there are some exceptions (especially
-<TT
-CLASS="LITERAL"
->ext_id</TT
-> values which are always UTF-8 encoded).
-(Default: `Enc_iso88591)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->recognize_standalone_declaration:</TT
-> If true and if the parser is
-validating, the <TT
-CLASS="LITERAL"
->standalone="yes"</TT
-> declaration forces that it
-is checked whether the document is a standalone document. - If false, or if the
-parser is in well-formedness mode, such declarations are ignored.
-(Default: true)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->store_element_positions:</TT
-> If
-true, for every non-data node the source position is stored. If false, the
-position information is lost. If available, you can get the positions of nodes
-by invoking the <TT
-CLASS="LITERAL"
->position</TT
-> method.
-(Default: true)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->idref_pass:</TT
->If true and if
-there is an ID index, the parser checks whether every IDREF or IDREFS attribute
-refer to an existing node; this requires that the parser traverses the whole
-doument tree. If false, this check is left out. (Default: false)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->validate_by_dfa:</TT
->If true and if
-the content model for an element type is deterministic, a deterministic finite
-automaton is used to validate whether the element contents match the content
-model of the type. If false, or if a DFA is not available, a backtracking
-algorithm is used for validation. (Default: true)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->accept_only_deterministic_models:</TT
-> If true, only deterministic content
-models are accepted; if false, any syntactically correct content models can be
-processed. (Default: true)</P
-></LI
-></UL
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1895"
->4.4.4. Which configuration should I use?</A
-></H2
-><P
->First, I recommend to vary the default configuration instead of
-creating a new configuration record. For instance, to set
-<TT
-CLASS="LITERAL"
->idref_pass</TT
-> to <TT
-CLASS="LITERAL"
->true</TT
->, change the default
-as in:
-<PRE
-CLASS="PROGRAMLISTING"
->let config = { default_config with idref_pass = true }</PRE
->
-The background is that I can add more options to the record in future versions
-of the parser without breaking your programs.</P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Do I need extra nodes for processing instructions? </B
->By default, such nodes are not created. This does not mean that the
-processing instructions are lost; however, you cannot find out the exact
-location where they occur. For example, the following XML text
-
-<PRE
-CLASS="PROGRAMLISTING"
-><x><?pi1?><y/><?pi2?></x> </PRE
->
-
-will normally create one element node for <TT
-CLASS="LITERAL"
->x</TT
-> containing
-<I
-CLASS="EMPHASIS"
->one</I
-> subnode for <TT
-CLASS="LITERAL"
->y</TT
->. The processing
-instructions are attached to <TT
-CLASS="LITERAL"
->x</TT
-> in a separate hash table; you
-can access them using <TT
-CLASS="LITERAL"
->x # pinstr "pi1"</TT
-> and <TT
-CLASS="LITERAL"
->x #
-pinstr "pi2"</TT
->, respectively. The information is lost where the
-instructions occur within <TT
-CLASS="LITERAL"
->x</TT
->.</P
-></DIV
-><P
->If the option <TT
-CLASS="LITERAL"
->enable_pinstr_nodes</TT
-> is
-turned on, the parser creates extra nodes <TT
-CLASS="LITERAL"
->pi1</TT
-> and
-<TT
-CLASS="LITERAL"
->pi2</TT
-> such that the subnodes of <TT
-CLASS="LITERAL"
->x</TT
-> are now:
-
-<PRE
-CLASS="PROGRAMLISTING"
->x # sub_nodes = [ pi1; y; pi2 ]</PRE
->
-
-The extra nodes contain the processing instructions in the usual way, i.e. you
-can access them using <TT
-CLASS="LITERAL"
->pi1 # pinstr "pi1"</TT
-> and <TT
-CLASS="LITERAL"
->pi2 #
-pinstr "pi2"</TT
->, respectively.</P
-><P
->Note that you will need an exemplar for the PI nodes (see
-<TT
-CLASS="LITERAL"
->make_spec_from_alist</TT
->).</P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Do I need a super root node? </B
->By default, there is no super root node. The
-<TT
-CLASS="LITERAL"
->document</TT
-> object refers directly to the node representing the
-root element of the document, i.e.
-
-<PRE
-CLASS="PROGRAMLISTING"
->doc # root = r</PRE
->
-
-if <TT
-CLASS="LITERAL"
->r</TT
-> is the root node. This is sometimes inconvenient: (1)
-Some algorithms become simpler if every node has a parent, even the root
-node. (2) Some standards such as XPath call the "root node" the node whose
-child represents the root of the document. (3) The super root node can serve
-as a container for processing instructions outside the root element. Because of
-these reasons, it is possible to create an extra super root node, whose child
-is the root node:
-
-<PRE
-CLASS="PROGRAMLISTING"
->doc # root = sr &&
-sr # sub_nodes = [ r ]</PRE
->
-
-When extra nodes are also created for processing instructions, these nodes can
-be added to the super root node if they occur outside the root element (reason
-(3)), and the order reflects the order in the source text.</P
-></DIV
-><P
->Note that you will need an exemplar for the super root node
-(see <TT
-CLASS="LITERAL"
->make_spec_from_alist</TT
->).</P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->What is the effect of the UTF-8 encoding? </B
->By default, the parser represents strings (with few
-exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
-and fonts for this encoding.</P
-></DIV
-><P
->However, internationalization may require that you switch over
-to UTF-8 encoding. In most environments, the immediate effect will be that you
-cannot read strings with character codes >= 160 any longer; your terminal will
-only show funny glyph combinations. It is strongly recommended to install
-Unicode fonts (<A
-HREF="http://czyborra.com/unifont/"
-TARGET="_top"
->GNU Unifont</A
->,
-<A
-HREF="http://www.cl.cam.ac.uk/~mgk25/download/ucs-fonts.tar.gz"
-TARGET="_top"
->Markus Kuhn's fonts</A
->) and <A
-HREF="http://myweb.clark.net/pub/dickey/xterm/xterm.html"
-TARGET="_top"
->terminal emulators
-that can handle UTF-8 byte sequences</A
->. Furthermore, a Unicode editor may
-be helpful (such as <A
-HREF="ftp://metalab.unc.edu/pub/Linux/apps/editors/X/"
-TARGET="_top"
->Yudit</A
->). There are
-also <A
-HREF="http://www.cl.cam.ac.uk/~mgk25/unicode.html"
-TARGET="_top"
->FAQ</A
-> by
-Markus Kuhn.</P
-><P
->By setting <TT
-CLASS="LITERAL"
->encoding</TT
-> to
-<TT
-CLASS="LITERAL"
->`Enc_utf8</TT
-> all strings originating from the parsed XML
-document are represented as UTF-8 strings. This includes not only character
-data and attribute values but also element names, attribute names and so on, as
-it is possible to use any Unicode letter to form such names. Strictly
-speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
-will have difficulties when validating documents containing
-non-ISO-8859-1-names.</P
-><P
->This mode does not have any impact on the external
-representation of documents. The character set assumed when reading a document
-is set in the XML declaration, and character set when writing a document must
-be passed to the <TT
-CLASS="LITERAL"
->write</TT
-> method.</P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->How do I check that nodes exist which are referred by IDREF attributes? </B
->First, you must create an index of all occurring ID
-attributes:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let index = new hash_index</PRE
->
-
-This index must be passed to the parsing function:
-
-<PRE
-CLASS="PROGRAMLISTING"
->parse_document_entity
- ~id_index:(index :> index)
- config source spec</PRE
->
-
-Next, you must turn on the <TT
-CLASS="LITERAL"
->idref_pass</TT
-> mode:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let config = { default_config with idref_pass = true }</PRE
->
-
-Note that now the whole document tree will be traversed, and every node will be
-checked for IDREF and IDREFS attributes. If the tree is big, this may take some
-time.</P
-></DIV
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->What are deterministic content models? </B
->These type of models can speed up the validation checks;
-furthermore they ensure SGML-compatibility. In particular, a content model is
-deterministic if the parser can determine the actually used alternative by
-inspecting only the current token. For example, this element has
-non-deterministic contents:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT x ((u,v) | (u,y+) | v)></PRE
->
-
-If the first element in <TT
-CLASS="LITERAL"
->x</TT
-> is <TT
-CLASS="LITERAL"
->u</TT
->, the
-parser does not know which of the alternatives <TT
-CLASS="LITERAL"
->(u,v)</TT
-> or
-<TT
-CLASS="LITERAL"
->(u,y+)</TT
-> will work; the parser must also inspect the second
-element to be able to distinguish between the alternatives. Because such
-look-ahead (or "guessing") is required, this example is
-non-deterministic.</P
-></DIV
-><P
->The XML standard demands that content models must be
-deterministic. So it is recommended to turn the option
-<TT
-CLASS="LITERAL"
->accept_only_deterministic_models</TT
-> on; however, PXP can also
-process non-deterministic models using a backtracking algorithm.</P
-><P
->Deterministic models ensure that validation can be performed in
-linear time. In order to get the maximum benefits, PXP also implements a
-special validator that profits from deterministic models; this is the
-deterministic finite automaton (DFA). This validator is enabled per element
-type if the element type has a deterministic model and if the option
-<TT
-CLASS="LITERAL"
->validate_by_dfa</TT
-> is turned on.</P
-><P
->In general, I expect that the DFA method is faster than the
-backtracking method; especially in the worst case the DFA takes only linear
-time. However, if the content model has only few alternatives and the
-alternatives do not nest, the backtracking algorithm may be better.</P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x1812.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x1965.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->The DTD classes</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c1567.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Updates</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Updates</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="Configuring and calling the parser"
-HREF="c1567.html"><LINK
-REL="PREVIOUS"
-TITLE="Invoking the parser"
-HREF="x1818.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x1818.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 4. Configuring and calling the parser</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-> </TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN1965"
->4.5. Updates</A
-></H1
-><P
-><I
-CLASS="EMPHASIS"
->Some (often later added) features that are otherwise
-not explained in the manual but worth to be mentioned.</I
-></P
-><P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
->Methods node_position, node_path, nth_node,
-previous_node, next_node for nodes: See pxp_document.mli</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
->Functions to determine the document order of nodes:
-compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli</P
-></LI
-></UL
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x1818.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-> </TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Invoking the parser</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c1567.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-> </TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->A complete example: The readme DTD</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="What is XML?"
-HREF="c36.html"><LINK
-REL="PREVIOUS"
-TITLE="Highlights of XML"
-HREF="x107.html"><LINK
-REL="NEXT"
-TITLE="Using PXP"
-HREF="c533.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x107.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 1. What is XML?</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="c533.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="SECT.README.DTD"
->1.3. A complete example: The <I
-CLASS="EMPHASIS"
->readme</I
-> DTD</A
-></H1
-><P
->The reason for <I
-CLASS="EMPHASIS"
->readme</I
-> was that I often wrote two versions
-of files such as README and INSTALL which explain aspects of a distributed
-software archive; one version was ASCII-formatted, the other was written in
-HTML. Maintaining both versions means double amount of work, and changes
-of one version may be forgotten in the other version. To improve this situation
-I invented the <I
-CLASS="EMPHASIS"
->readme</I
-> DTD which allows me to maintain only
-one source written as XML document, and to generate the ASCII and the HTML
-version from it.</P
-><P
->In this section, I explain only the DTD. The <I
-CLASS="EMPHASIS"
->readme</I
-> DTD is
-contained in the <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> distribution together with the two converters to
-produce ASCII and HTML. Another <A
-HREF="x738.html"
->section</A
-> of this manual describes the HTML
-converter.</P
-><P
->The documents have a simple structure: There are up to three levels of nested
-sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
-outermost element has usually the type <TT
-CLASS="LITERAL"
->readme</TT
->, it is
-declared by
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT readme (sect1+)>
-<!ATTLIST readme
- title CDATA #REQUIRED></PRE
->
-
-This means that this element contains one or more sections of the first level
-(element type <TT
-CLASS="LITERAL"
->sect1</TT
->), and that the element has a required
-attribute <TT
-CLASS="LITERAL"
->title</TT
-> containing character data (CDATA). Note that
-<TT
-CLASS="LITERAL"
->readme</TT
-> elements must not contain text data.</P
-><P
->The three levels of sections are declared as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT sect1 (title,(sect2|p|ul)+)>
-
-<!ELEMENT sect2 (title,(sect3|p|ul)+)>
-
-<!ELEMENT sect3 (title,(p|ul)+)></PRE
->
-
-Every section has a <TT
-CLASS="LITERAL"
->title</TT
-> element as first subelement. After
-the title an arbitrary but non-empty sequence of inner sections, paragraphs and
-item lists follows. Note that the inner sections must belong to the next higher
-section level; <TT
-CLASS="LITERAL"
->sect3</TT
-> elements must not contain inner
-sections because there is no next higher level.</P
-><P
->Obviously, all three declarations allow paragraphs (<TT
-CLASS="LITERAL"
->p</TT
->) and
-item lists (<TT
-CLASS="LITERAL"
->ul</TT
->). The definition can be simplified at this
-point by using a parameter entity:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % p.like "p|ul">
-
-<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
-
-<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
-
-<!ELEMENT sect3 (title,(%p.like;)+)></PRE
->
-
-Here, the entity <TT
-CLASS="LITERAL"
->p.like</TT
-> is nothing but a macro abbreviating
-the same sequence of declarations; if new elements on the same level as
-<TT
-CLASS="LITERAL"
->p</TT
-> and <TT
-CLASS="LITERAL"
->ul</TT
-> are later added, it is
-sufficient only to change the entity definition. Note that there are some
-restrictions on the usage of entities in this context; most important, entities
-containing a left paranthesis must also contain the corresponding right
-paranthesis. </P
-><P
->Note that the entity <TT
-CLASS="LITERAL"
->p.like</TT
-> is a
-<I
-CLASS="EMPHASIS"
->parameter</I
-> entity, i.e. the ENTITY declaration contains a
-percent sign, and the entity is referred to by
-<TT
-CLASS="LITERAL"
->%p.like;</TT
->. This kind of entity must be used to abbreviate
-parts of the DTD; the <I
-CLASS="EMPHASIS"
->general</I
-> entities declared without
-percent sign and referred to as <TT
-CLASS="LITERAL"
->&name;</TT
-> are not allowed
-in this context.</P
-><P
->The <TT
-CLASS="LITERAL"
->title</TT
-> element specifies the title of the section in
-which it occurs. The title is given as character data, optionally interspersed
-with line breaks (<TT
-CLASS="LITERAL"
->br</TT
->):
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT title (#PCDATA|br)*></PRE
->
-
-Compared with the <TT
-CLASS="LITERAL"
->title</TT
-> <I
-CLASS="EMPHASIS"
->attribute</I
-> of
-the <TT
-CLASS="LITERAL"
->readme</TT
-> element, this element allows inner markup
-(i.e. <TT
-CLASS="LITERAL"
->br</TT
->) while attribute values do not: It is an error if
-an attribute value contains the left angle bracket < literally such that it
-is impossible to include inner elements. </P
-><P
->The paragraph element <TT
-CLASS="LITERAL"
->p</TT
-> has a structure similar to
-<TT
-CLASS="LITERAL"
->title</TT
->, but it allows more inner elements:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ENTITY % text "br|code|em|footnote|a">
-
-<!ELEMENT p (#PCDATA|%text;)*></PRE
->
-
-Line breaks do not have inner structure, so they are declared as being empty:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT br EMPTY></PRE
->
-
-This means that really nothing is allowed within <TT
-CLASS="LITERAL"
->br</TT
->; you
-must always write <TT
-CLASS="LITERAL"
-><br></br></TT
-> or abbreviated
-<TT
-CLASS="LITERAL"
-><br/></TT
->.</P
-><P
->Code samples should be marked up by the <TT
-CLASS="LITERAL"
->code</TT
-> tag; emphasized
-text can be indicated by <TT
-CLASS="LITERAL"
->em</TT
->:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT code (#PCDATA)>
-
-<!ELEMENT em (#PCDATA|%text;)*></PRE
->
-
-That <TT
-CLASS="LITERAL"
->code</TT
-> elements are not allowed to contain further markup
-while <TT
-CLASS="LITERAL"
->em</TT
-> elements do is a design decision by the author of
-the DTD.</P
-><P
->Unordered lists simply consists of one or more list items, and a list item may
-contain paragraph-level material:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT ul (li+)>
-
-<!ELEMENT li (%p.like;)*></PRE
->
-
-Footnotes are described by the text of the note; this text may contain
-text-level markup. There is no mechanism to describe the numbering scheme of
-footnotes, or to specify how footnote references are printed.
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT footnote (#PCDATA|%text;)*></PRE
->
-
-Hyperlinks are written as in HTML. The anchor tag contains the text describing
-where the link points to, and the <TT
-CLASS="LITERAL"
->href</TT
-> attribute is the
-pointer (as URL). There is no way to describe locations of "hash marks". If the
-link refers to another <I
-CLASS="EMPHASIS"
->readme</I
-> document, the attribute
-<TT
-CLASS="LITERAL"
->readmeref</TT
-> should be used instead of <TT
-CLASS="LITERAL"
->href</TT
->.
-The reason is that the converted document has usually a different system
-identifier (file name), and the link to a converted document must be
-converted, too.
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ELEMENT a (#PCDATA)*>
-<!ATTLIST a
- href CDATA #IMPLIED
- readmeref CDATA #IMPLIED
-></PRE
->
-
-Note that although it is only sensible to specify one of the two attributes,
-the DTD has no means to express this restriction.</P
-><P
->So far the DTD. Finally, here is a document for it:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd">
-<readme title="How to use the readme converters">
-<sect1>
- <title>Usage</title>
- <p>
- The <em>readme</em> converter is invoked on the command line by:
- </p>
- <p>
- <code>readme [ -text | -html ] input.xml</code>
- </p>
- <p>
- Here a list of options:
- </p>
- <ul>
- <li>
- <p><code>-text</code>: specifies that ASCII output should be produced</p>
- </li>
- <li>
- <p><code>-html</code>: specifies that HTML output should be produced</p>
- </li>
- </ul>
- <p>
- The input file must be given on the command line. The converted output is
- printed to <em>stdout</em>.
- </p>
-</sect1>
-<sect1>
- <title>Author</title>
- <p>
- The program has been written by
- <a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>.
- </p>
-</sect1>
-</readme></PRE
-> </P
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x107.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="c533.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Highlights of XML</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c36.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->How to parse a document from an application</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="Using PXP"
-HREF="c533.html"><LINK
-REL="PREVIOUS"
-TITLE="Using PXP"
-HREF="c533.html"><LINK
-REL="NEXT"
-TITLE="Class-based processing of the node tree"
-HREF="x675.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="c533.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 2. Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x675.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN550"
->2.2. How to parse a document from an application</A
-></H1
-><P
->Let me first give a rough overview of the object model of the parser. The
-following items are represented by objects:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->Documents:</I
-> The document representation is more or less the
-anchor for the application; all accesses to the parsed entities start here. It
-is described by the class <TT
-CLASS="LITERAL"
->document</TT
-> contained in the module
-<TT
-CLASS="LITERAL"
->Pxp_document</TT
->. You can get some global information, such
-as the XML declaration the document begins with, the DTD of the document,
-global processing instructions, and most important, the document tree. </P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->The contents of documents:</I
-> The contents have the structure
-of a tree: Elements contain other elements and text<A
-NAME="AEN562"
-HREF="#FTN.AEN562"
->[1]</A
->.
-
-The common type to represent both kinds of content is <TT
-CLASS="LITERAL"
->node</TT
->
-which is a class type that unifies the properties of elements and character
-data. Every node has a list of children (which is empty if the element is empty
-or the node represents text); nodes may have attributes; nodes have always text
-contents. There are two implementations of <TT
-CLASS="LITERAL"
->node</TT
->, the class
-<TT
-CLASS="LITERAL"
->element_impl</TT
-> for elements, and the class
-<TT
-CLASS="LITERAL"
->data_impl</TT
-> for text data. You find these classes and class
-types in the module <TT
-CLASS="LITERAL"
->Pxp_document</TT
->, too.</P
-><P
->Note that attribute lists are represented by non-class values.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->The node extension:</I
-> For advanced usage, every node of the
-document may have an associated <I
-CLASS="EMPHASIS"
->extension</I
-> which is simply
-a second object. This object must have the three methods
-<TT
-CLASS="LITERAL"
->clone</TT
->, <TT
-CLASS="LITERAL"
->node</TT
->, and
-<TT
-CLASS="LITERAL"
->set_node</TT
-> as bare minimum, but you are free to add methods as
-you want. This is the preferred way to add functionality to the document
-tree<A
-NAME="AEN582"
-HREF="#FTN.AEN582"
->[2]</A
->. The class type <TT
-CLASS="LITERAL"
->extension</TT
-> is
-defined in <TT
-CLASS="LITERAL"
->Pxp_document</TT
->, too.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->The DTD:</I
-> Sometimes it is necessary to access the DTD of a
-document; the average application does not need this feature. The class
-<TT
-CLASS="LITERAL"
->dtd</TT
-> describes DTDs, and makes it possible to get
-representations of element, entity, and notation declarations as well as
-processing instructions contained in the DTD. This class, and
-<TT
-CLASS="LITERAL"
->dtd_element</TT
->, <TT
-CLASS="LITERAL"
->dtd_notation</TT
->, and
-<TT
-CLASS="LITERAL"
->proc_instruction</TT
-> can be found in the module
-<TT
-CLASS="LITERAL"
->Pxp_dtd</TT
->. There are a couple of classes representing
-different kinds of entities; these can be found in the module
-<TT
-CLASS="LITERAL"
->Pxp_entity</TT
->. </P
-></LI
-></UL
->
-
-Additionally, the following modules play a role:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->Pxp_yacc:</I
-> Here the main parsing functions such as
-<TT
-CLASS="LITERAL"
->parse_document_entity</TT
-> are located. Some additional types and
-functions allow the parser to be configured in a non-standard way.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><I
-CLASS="EMPHASIS"
->Pxp_types:</I
-> This is a collection of basic types and
-exceptions. </P
-></LI
-></UL
->
-
-There are some further modules that are needed internally but are not part of
-the API.</P
-><P
->Let the document to be parsed be stored in a file called
-<TT
-CLASS="LITERAL"
->doc.xml</TT
->. The parsing process is started by calling the
-function
-
-<PRE
-CLASS="PROGRAMLISTING"
->val parse_document_entity : config -> source -> 'ext spec -> 'ext document</PRE
->
-
-defined in the module <TT
-CLASS="LITERAL"
->Pxp_yacc</TT
->. The first argument
-specifies some global properties of the parser; it is recommended to start with
-the <TT
-CLASS="LITERAL"
->default_config</TT
->. The second argument determines where the
-document to be parsed comes from; this may be a file, a channel, or an entity
-ID. To parse <TT
-CLASS="LITERAL"
->doc.xml</TT
->, it is sufficient to pass
-<TT
-CLASS="LITERAL"
->from_file "doc.xml"</TT
->. </P
-><P
->The third argument passes the object specification to use. Roughly
-speaking, it determines which classes implement the node objects of which
-element types, and which extensions are to be used. The <TT
-CLASS="LITERAL"
->'ext</TT
->
-polymorphic variable is the type of the extension. For the moment, let us
-simply pass <TT
-CLASS="LITERAL"
->default_spec</TT
-> as this argument, and ignore it.</P
-><P
->So the following expression parses <TT
-CLASS="LITERAL"
->doc.xml</TT
->:
-
-<PRE
-CLASS="PROGRAMLISTING"
->open Pxp_yacc
-let d = parse_document_entity default_config (from_file "doc.xml") default_spec</PRE
->
-
-Note that <TT
-CLASS="LITERAL"
->default_config</TT
-> implies that warnings are collected
-but not printed. Errors raise one of the exception defined in
-<TT
-CLASS="LITERAL"
->Pxp_types</TT
->; to get readable errors and warnings catch the
-exceptions as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class warner =
- object
- method warn w =
- print_endline ("WARNING: " ^ w)
- end
-;;
-
-try
- let config = { default_config with warner = new warner } in
- let d = parse_document_entity config (from_file "doc.xml") default_spec
- in
- ...
-with
- e ->
- print_endline (Pxp_types.string_of_exn e)</PRE
->
-
-Now <TT
-CLASS="LITERAL"
->d</TT
-> is an object of the <TT
-CLASS="LITERAL"
->document</TT
->
-class. If you want the node tree, you can get the root element by
-
-<PRE
-CLASS="PROGRAMLISTING"
->let root = d # root</PRE
->
-
-and if you would rather like to access the DTD, determine it by
-
-<PRE
-CLASS="PROGRAMLISTING"
->let dtd = d # dtd</PRE
->
-
-As it is more interesting, let us investigate the node tree now. Given the root
-element, it is possible to recursively traverse the whole tree. The children of
-a node <TT
-CLASS="LITERAL"
->n</TT
-> are returned by the method
-<TT
-CLASS="LITERAL"
->sub_nodes</TT
->, and the type of a node is returned by
-<TT
-CLASS="LITERAL"
->node_type</TT
->. This function traverses the tree, and prints the
-type of each node:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let rec print_structure n =
- let ntype = n # node_type in
- match ntype with
- T_element name ->
- print_endline ("Element of type " ^ name);
- let children = n # sub_nodes in
- List.iter print_structure children
- | T_data ->
- print_endline "Data"
- | _ ->
- (* Other node types are not possible unless the parser is configured
- differently.
- *)
- assert false</PRE
->
-
-You can call this function by
-
-<PRE
-CLASS="PROGRAMLISTING"
->print_structure root</PRE
->
-
-The type returned by <TT
-CLASS="LITERAL"
->node_type</TT
-> is either <TT
-CLASS="LITERAL"
->T_element
-name</TT
-> or <TT
-CLASS="LITERAL"
->T_data</TT
->. The <TT
-CLASS="LITERAL"
->name</TT
-> of the
-element type is the string included in the angle brackets. Note that only
-elements have children; data nodes are always leaves of the tree.</P
-><P
->There are some more methods in order to access a parsed node tree:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->n # parent</TT
->: Returns the parent node, or raises
-<TT
-CLASS="LITERAL"
->Not_found</TT
-> if the node is already the root</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->n # root</TT
->: Returns the root of the node tree. </P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->n # attribute a</TT
->: Returns the value of the attribute with
-name <TT
-CLASS="LITERAL"
->a</TT
->. The method returns a value for every
-<I
-CLASS="EMPHASIS"
->declared</I
-> attribute, independently of whether the attribute
-instance is defined or not. If the attribute is not declared,
-<TT
-CLASS="LITERAL"
->Not_found</TT
-> will be raised. (In well-formedness mode, every
-attribute is considered as being implicitly declared with type
-<TT
-CLASS="LITERAL"
->CDATA</TT
->.) </P
-><P
->The following return values are possible: <TT
-CLASS="LITERAL"
->Value s</TT
->,
-<TT
-CLASS="LITERAL"
->Valuelist sl</TT
-> , and <TT
-CLASS="LITERAL"
->Implied_value</TT
->.
-The first two value types indicate that the attribute value is available,
-either because there is a definition
-<TT
-CLASS="LITERAL"
-><TT
-CLASS="REPLACEABLE"
-><I
->a</I
-></TT
->="<TT
-CLASS="REPLACEABLE"
-><I
->value</I
-></TT
->"</TT
->
-in the XML text, or because there is a default value (declared in the
-DTD). Only if both the instance definition and the default declaration are
-missing, the latter value <TT
-CLASS="LITERAL"
->Implied_value</TT
-> will be returned.</P
-><P
->In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
-IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
-<TT
-CLASS="LITERAL"
->Value s</TT
-> back, where <TT
-CLASS="LITERAL"
->s</TT
-> is the normalized
-string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
-represent list values, and the parser splits the XML literal into several
-tokens and returns these tokens as <TT
-CLASS="LITERAL"
->Valuelist sl</TT
->.</P
-><P
->Normalization means that entity references (the
-<TT
-CLASS="LITERAL"
->&<TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
->;</TT
-> tokens) and
-character references
-(<TT
-CLASS="LITERAL"
->&#<TT
-CLASS="REPLACEABLE"
-><I
->number</I
-></TT
->;</TT
->) are replaced
-by the text they represent, and that white space characters are converted into
-plain spaces.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->n # data</TT
->: Returns the character data contained in the
-node. For data nodes, the meaning is obvious as this is the main content of
-data nodes. For element nodes, this method returns the concatenated contents of
-all inner data nodes.</P
-><P
->Note that entity references included in the text are resolved while they are
-being parsed; for example the text "a &lt;&gt; b" will be returned
-as "a <> b" by this method. Spaces of data nodes are always
-preserved. Newlines are preserved, but always converted to \n characters even
-if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
-data nodes because the parser collapses all data material at one location into
-one node. (However, if you create your own tree or transform the parsed tree,
-it is possible to have adjacent data nodes.)</P
-><P
->Note that elements that do <I
-CLASS="EMPHASIS"
->not</I
-> allow #PCDATA as content
-will not have data nodes as children. This means that spaces and newlines, the
-only character material allowed for such elements, are silently dropped.</P
-></LI
-></UL
->
-
-For example, if the task is to print all contents of elements with type
-"valuable" whose attribute "priority" is "1", this function can help:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let rec print_valuable_prio1 n =
- let ntype = n # node_type in
- match ntype with
- T_element "valuable" when n # attribute "priority" = Value "1" ->
- print_endline "Valuable node with priotity 1 found:";
- print_endline (n # data)
- | (T_element _ | T_data) ->
- let children = n # sub_nodes in
- List.iter print_valuable_prio1 children
- | _ ->
- assert false</PRE
->
-
-You can call this function by:
-
-<PRE
-CLASS="PROGRAMLISTING"
->print_valuable_prio1 root</PRE
->
-
-If you like a DSSSL-like style, you can make the function
-<TT
-CLASS="LITERAL"
->process_children</TT
-> explicit:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let rec print_valuable_prio1 n =
-
- let process_children n =
- let children = n # sub_nodes in
- List.iter print_valuable_prio1 children
- in
-
- let ntype = n # node_type in
- match ntype with
- T_element "valuable" when n # attribute "priority" = Value "1" ->
- print_endline "Valuable node with priority 1 found:";
- print_endline (n # data)
- | (T_element _ | T_data) ->
- process_children n
- | _ ->
- assert false</PRE
->
-
-So far, O'Caml is now a simple "style-sheet language": You can form a big
-"match" expression to distinguish between all significant cases, and provide
-different reactions on different conditions. But this technique has
-limitations; the "match" expression tends to get larger and larger, and it is
-difficult to store intermediate values as there is only one big
-recursion. Alternatively, it is also possible to represent the various cases as
-classes, and to use dynamic method lookup to find the appropiate class. The
-next section explains this technique in detail. </P
-></DIV
-><H3
-CLASS="FOOTNOTES"
->Notes</H3
-><TABLE
-BORDER="0"
-CLASS="FOOTNOTES"
-WIDTH="100%"
-><TR
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="5%"
-><A
-NAME="FTN.AEN562"
-HREF="x550.html#AEN562"
->[1]</A
-></TD
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="95%"
-><P
->Elements may
-also contain processing instructions. Unlike other document models, <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
->
-separates processing instructions from the rest of the text and provides a
-second interface to access them (method <TT
-CLASS="LITERAL"
->pinstr</TT
->). However,
-there is a parser option (<TT
-CLASS="LITERAL"
->enable_pinstr_nodes</TT
->) which changes
-the behaviour of the parser such that extra nodes for processing instructions
-are included into the tree.</P
-><P
->Furthermore, the tree does normally not contain nodes for XML comments;
-they are ignored by default. Again, there is an option
-(<TT
-CLASS="LITERAL"
->enable_comment_nodes</TT
->) changing this.</P
-></TD
-></TR
-><TR
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="5%"
-><A
-NAME="FTN.AEN582"
-HREF="x550.html#AEN582"
->[2]</A
-></TD
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="95%"
-><P
->Due to the typing system it is more or less impossible to
-derive recursive classes in O'Caml. To get around this, it is common practice
-to put the modifiable or extensible part of recursive objects into parallel
-objects.</P
-></TD
-></TR
-></TABLE
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="c533.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x675.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c533.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Class-based processing of the node tree</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Class-based processing of the node tree</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="Using PXP"
-HREF="c533.html"><LINK
-REL="PREVIOUS"
-TITLE="How to parse a document from an application"
-HREF="x550.html"><LINK
-REL="NEXT"
-TITLE="Example: An HTML backend for the readme
-DTD"
-HREF="x738.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x550.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 2. Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x738.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN675"
->2.3. Class-based processing of the node tree</A
-></H1
-><P
->By default, the parsed node tree consists of objects of the same class; this is
-a good design as long as you want only to access selected parts of the
-document. For complex transformations, it may be better to use different
-classes for objects describing different element types.</P
-><P
->For example, if the DTD declares the element types <TT
-CLASS="LITERAL"
->a</TT
->,
-<TT
-CLASS="LITERAL"
->b</TT
->, and <TT
-CLASS="LITERAL"
->c</TT
->, and if the task is to convert
-an arbitrary document into a printable format, the idea is to define for every
-element type a separate class that has a method <TT
-CLASS="LITERAL"
->print</TT
->. The
-classes are <TT
-CLASS="LITERAL"
->eltype_a</TT
->, <TT
-CLASS="LITERAL"
->eltype_b</TT
->, and
-<TT
-CLASS="LITERAL"
->eltype_c</TT
->, and every class implements
-<TT
-CLASS="LITERAL"
->print</TT
-> such that elements of the type corresponding to the
-class are converted to the output format.</P
-><P
->The parser supports such a design directly. As it is impossible to derive
-recursive classes in O'Caml<A
-NAME="AEN688"
-HREF="#FTN.AEN688"
->[1]</A
->, the specialized element classes cannot be formed by
-simply inheriting from the built-in classes of the parser and adding methods
-for customized functionality. To get around this limitation, every node of the
-document tree is represented by <I
-CLASS="EMPHASIS"
->two</I
-> objects, one called
-"the node" and containing the recursive definition of the tree, one called "the
-extension". Every node object has a reference to the extension, and the
-extension has a reference to the node. The advantage of this model is that it
-is now possible to customize the extension without affecting the typing
-constraints of the recursive node definition.</P
-><P
->Every extension must have the three methods <TT
-CLASS="LITERAL"
->clone</TT
->,
-<TT
-CLASS="LITERAL"
->node</TT
->, and <TT
-CLASS="LITERAL"
->set_node</TT
->. The method
-<TT
-CLASS="LITERAL"
->clone</TT
-> creates a deep copy of the extension object and
-returns it; <TT
-CLASS="LITERAL"
->node</TT
-> returns the node object for this extension
-object; and <TT
-CLASS="LITERAL"
->set_node</TT
-> is used to tell the extension object
-which node is associated with it, this method is automatically called when the
-node tree is initialized. The following definition is a good starting point
-for these methods; usually <TT
-CLASS="LITERAL"
->clone</TT
-> must be further refined
-when instance variables are added to the class:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class custom_extension =
- object (self)
-
- val mutable node = (None : custom_extension node option)
-
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
-
- end</PRE
->
-
-This part of the extension is usually the same for all classes, so it is a good
-idea to consider <TT
-CLASS="LITERAL"
->custom_extension</TT
-> as the super-class of the
-further class definitions. Continuining the example of above, we can define the
-element type classes as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class virtual custom_extension =
- object (self)
- ... clone, node, set_node defined as above ...
-
- method virtual print : out_channel -> unit
- end
-
-class eltype_a =
- object (self)
- inherit custom_extension
- method print ch = ...
- end
-
-class eltype_b =
- object (self)
- inherit custom_extension
- method print ch = ...
- end
-
-class eltype_c =
- object (self)
- inherit custom_extension
- method print ch = ...
- end</PRE
->
-
-The method <TT
-CLASS="LITERAL"
->print</TT
-> can now be implemented for every element
-type separately. Note that you get the associated node by invoking
-
-<PRE
-CLASS="PROGRAMLISTING"
->self # node</PRE
->
-
-and you get the extension object of a node <TT
-CLASS="LITERAL"
->n</TT
-> by writing
-
-<PRE
-CLASS="PROGRAMLISTING"
->n # extension</PRE
->
-
-It is guaranteed that
-
-<PRE
-CLASS="PROGRAMLISTING"
->self # node # extension == self</PRE
->
-
-always holds.</P
-><P
->Here are sample definitions of the <TT
-CLASS="LITERAL"
->print</TT
->
-methods:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class eltype_a =
- object (self)
- inherit custom_extension
- method print ch =
- (* Nodes <a>...</a> are only containers: *)
- output_string ch "(";
- List.iter
- (fun n -> n # extension # print ch)
- (self # node # sub_nodes);
- output_string ch ")";
- end
-
-class eltype_b =
- object (self)
- inherit custom_extension
- method print ch =
- (* Print the value of the CDATA attribute "print": *)
- match self # node # attribute "print" with
- Value s -> output_string ch s
- | Implied_value -> output_string ch "<missing>"
- | Valuelist l -> assert false
- (* not possible because the att is CDATA *)
- end
-
-class eltype_c =
- object (self)
- inherit custom_extension
- method print ch =
- (* Print the contents of this element: *)
- output_string ch (self # node # data)
- end
-
-class null_extension =
- object (self)
- inherit custom_extension
- method print ch = assert false
- end</PRE
-></P
-><P
->The remaining task is to configure the parser such that these extension classes
-are actually used. Here another problem arises: It is not possible to
-dynamically select the class of an object to be created. As workaround,
-<SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-> allows the user to specify <I
-CLASS="EMPHASIS"
->exemplar objects</I
-> for
-the various element types; instead of creating the nodes of the tree by
-applying the <TT
-CLASS="LITERAL"
->new</TT
-> operator the nodes are produced by
-duplicating the exemplars. As object duplication preserves the class of the
-object, one can create fresh objects of every class for which previously an
-exemplar has been registered.</P
-><P
->Exemplars are meant as objects without contents, the only interesting thing is
-that exemplars are instances of a certain class. The creation of an exemplar
-for an element node can be done by:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let element_exemplar = new element_impl extension_exemplar</PRE
->
-
-And a data node exemplar is created by:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let data_exemplar = new data_impl extension_exemplar</PRE
->
-
-The classes <TT
-CLASS="LITERAL"
->element_impl</TT
-> and <TT
-CLASS="LITERAL"
->data_impl</TT
->
-are defined in the module <TT
-CLASS="LITERAL"
->Pxp_document</TT
->. The constructors
-initialize the fresh objects as empty objects, i.e. without children, without
-data contents, and so on. The <TT
-CLASS="LITERAL"
->extension_exemplar</TT
-> is the
-initial extension object the exemplars are associated with. </P
-><P
->Once the exemplars are created and stored somewhere (e.g. in a hash table), you
-can take an exemplar and create a concrete instance (with contents) by
-duplicating it. As user of the parser you are normally not concerned with this
-as this is part of the internal logic of the parser, but as background knowledge
-it is worthwhile to mention that the two methods
-<TT
-CLASS="LITERAL"
->create_element</TT
-> and <TT
-CLASS="LITERAL"
->create_data</TT
-> actually
-perform the duplication of the exemplar for which they are invoked,
-additionally apply modifications to the clone, and finally return the new
-object. Moreover, the extension object is copied, too, and the new node object
-is associated with the fresh extension object. Note that this is the reason why
-every extension object must have a <TT
-CLASS="LITERAL"
->clone</TT
-> method.</P
-><P
->The configuration of the set of exemplars is passed to the
-<TT
-CLASS="LITERAL"
->parse_document_entity</TT
-> function as third argument. In our
-example, this argument can be set up as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let spec =
- make_spec_from_alist
- ~data_exemplar: (new data_impl (new null_extension))
- ~default_element_exemplar: (new element_impl (new null_extension))
- ~element_alist:
- [ "a", new element_impl (new eltype_a);
- "b", new element_impl (new eltype_b);
- "c", new element_impl (new eltype_c);
- ]
- ()</PRE
->
-
-The <TT
-CLASS="LITERAL"
->~element_alist</TT
-> function argument defines the mapping
-from element types to exemplars as associative list. The argument
-<TT
-CLASS="LITERAL"
->~data_exemplar</TT
-> specifies the exemplar for data nodes, and
-the <TT
-CLASS="LITERAL"
->~default_element_exemplar</TT
-> is used whenever the parser
-finds an element type for which the associative list does not define an
-exemplar. </P
-><P
->The configuration is now complete. You can still use the same parsing
-functions, only the initialization is a bit different. For example, call the
-parser by:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let d = parse_document_entity default_config (from_file "doc.xml") spec</PRE
->
-
-Note that the resulting document <TT
-CLASS="LITERAL"
->d</TT
-> has a usable type;
-especially the <TT
-CLASS="LITERAL"
->print</TT
-> method we added is visible. So you can
-print your document by
-
-<PRE
-CLASS="PROGRAMLISTING"
->d # root # extension # print stdout</PRE
-></P
-><P
->This object-oriented approach looks rather complicated; this is mostly caused
-by working around some problems of the strict typing system of O'Caml. Some
-auxiliary concepts such as extensions were needed, but the practical
-consequences are low. In the next section, one of the examples of the
-distribution is explained, a converter from <I
-CLASS="EMPHASIS"
->readme</I
->
-documents to HTML.</P
-></DIV
-><H3
-CLASS="FOOTNOTES"
->Notes</H3
-><TABLE
-BORDER="0"
-CLASS="FOOTNOTES"
-WIDTH="100%"
-><TR
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="5%"
-><A
-NAME="FTN.AEN688"
-HREF="x675.html#AEN688"
->[1]</A
-></TD
-><TD
-ALIGN="LEFT"
-VALIGN="TOP"
-WIDTH="95%"
-><P
->The problem is that the subclass is
-usually not a subtype in this case because O'Caml has a contravariant subtyping
-rule. </P
-></TD
-></TR
-></TABLE
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x550.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x738.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->How to parse a document from an application</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c533.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->Example: An HTML backend for the <I
-CLASS="EMPHASIS"
->readme</I
->
-DTD</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->Example: An HTML backend for the readme
-DTD</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="Using PXP"
-HREF="c533.html"><LINK
-REL="PREVIOUS"
-TITLE="Class-based processing of the node tree"
-HREF="x675.html"><LINK
-REL="NEXT"
-TITLE="The objects representing the document"
-HREF="c893.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="x675.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 2. Using <SPAN
-CLASS="ACRONYM"
->PXP</SPAN
-></TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="c893.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="SECT.README.TO-HTML"
->2.4. Example: An HTML backend for the <I
-CLASS="EMPHASIS"
->readme</I
->
-DTD</A
-></H1
-><P
->The converter from <I
-CLASS="EMPHASIS"
->readme</I
-> documents to HTML
-documents follows strictly the approach to define one class per element
-type. The HTML code is similar to the <I
-CLASS="EMPHASIS"
->readme</I
-> source,
-because of this most elements can be converted in the following way: Given the
-input element
-
-<PRE
-CLASS="PROGRAMLISTING"
-><e>content</e></PRE
->
-
-the conversion text is the concatenation of a computed prefix, the recursively
-converted content, and a computed suffix. </P
-><P
->Only one element type cannot be handled by this scheme:
-<TT
-CLASS="LITERAL"
->footnote</TT
->. Footnotes are collected while they are found in
-the input text, and they are printed after the main text has been converted and
-printed. </P
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN747"
->2.4.1. Header</A
-></H2
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->open Pxp_types
-open Pxp_document</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN751"
->2.4.2. Type declarations</A
-></H2
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class type footnote_printer =
- object
- method footnote_to_html : store_type -> out_channel -> unit
- end
-
-and store_type =
- object
- method alloc_footnote : footnote_printer -> int
- method print_footnotes : out_channel -> unit
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN755"
->2.4.3. Class <TT
-CLASS="LITERAL"
->store</TT
-></A
-></H2
-><P
->The <TT
-CLASS="LITERAL"
->store</TT
-> is a container for footnotes. You can add a
-footnote by invoking <TT
-CLASS="LITERAL"
->alloc_footnote</TT
->; the argument is an
-object of the class <TT
-CLASS="LITERAL"
->footnote_printer</TT
->, the method returns the
-number of the footnote. The interesting property of a footnote is that it can
-be converted to HTML, so a <TT
-CLASS="LITERAL"
->footnote_printer</TT
-> is an object
-with a method <TT
-CLASS="LITERAL"
->footnote_to_html</TT
->. The class
-<TT
-CLASS="LITERAL"
->footnote</TT
-> which is defined below has a compatible method
-<TT
-CLASS="LITERAL"
->footnote_to_html</TT
-> such that objects created from it can be
-used as <TT
-CLASS="LITERAL"
->footnote_printer</TT
->s.</P
-><P
->The other method, <TT
-CLASS="LITERAL"
->print_footnotes</TT
-> prints the footnotes as
-definition list, and is typically invoked after the main material of the page
-has already been printed. Every item of the list is printed by
-<TT
-CLASS="LITERAL"
->footnote_to_html</TT
->.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class store =
- object (self)
-
- val mutable footnotes = ( [] : (int * footnote_printer) list )
- val mutable next_footnote_number = 1
-
- method alloc_footnote n =
- let number = next_footnote_number in
- next_footnote_number <- number+1;
- footnotes <- footnotes @ [ number, n ];
- number
-
- method print_footnotes ch =
- if footnotes <> [] then begin
- output_string ch "<hr align=left noshade=noshade width=\"30%\">\n";
- output_string ch "<dl>\n";
- List.iter
- (fun (_,n) ->
- n # footnote_to_html (self : #store_type :> store_type) ch)
- footnotes;
- output_string ch "</dl>\n";
- end
-
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN772"
->2.4.4. Function <TT
-CLASS="LITERAL"
->escape_html</TT
-></A
-></H2
-><P
->This function converts the characters <, >, &, and " to their HTML
-representation. For example,
-<TT
-CLASS="LITERAL"
->escape_html "<>" = "&lt;&gt;"</TT
->. Other
-characters are left unchanged.
-
-<PRE
-CLASS="PROGRAMLISTING"
->let escape_html s =
- Str.global_substitute
- (Str.regexp "<\\|>\\|&\\|\"")
- (fun s ->
- match Str.matched_string s with
- "<" -> "&lt;"
- | ">" -> "&gt;"
- | "&" -> "&amp;"
- | "\"" -> "&quot;"
- | _ -> assert false)
- s
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN778"
->2.4.5. Virtual class <TT
-CLASS="LITERAL"
->shared</TT
-></A
-></H2
-><P
->This virtual class is the abstract superclass of the extension classes shown
-below. It defines the standard methods <TT
-CLASS="LITERAL"
->clone</TT
->,
-<TT
-CLASS="LITERAL"
->node</TT
->, and <TT
-CLASS="LITERAL"
->set_node</TT
->, and declares the type
-of the virtual method <TT
-CLASS="LITERAL"
->to_html</TT
->. This method recursively
-traverses the whole element tree, and prints the converted HTML code to the
-output channel passed as second argument. The first argument is the reference
-to the global <TT
-CLASS="LITERAL"
->store</TT
-> object which collects the footnotes.
-
-<PRE
-CLASS="PROGRAMLISTING"
->class virtual shared =
- object (self)
-
- (* --- default_ext --- *)
-
- val mutable node = (None : shared node option)
-
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
-
- (* --- virtual --- *)
-
- method virtual to_html : store -> out_channel -> unit
-
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN788"
->2.4.6. Class <TT
-CLASS="LITERAL"
->only_data</TT
-></A
-></H2
-><P
->This class defines <TT
-CLASS="LITERAL"
->to_html</TT
-> such that the character data of
-the current node is converted to HTML. Note that <TT
-CLASS="LITERAL"
->self</TT
-> is an
-extension object, <TT
-CLASS="LITERAL"
->self # node</TT
-> is the node object, and
-<TT
-CLASS="LITERAL"
->self # node # data</TT
-> returns the character data of the node.
-
-<PRE
-CLASS="PROGRAMLISTING"
->class only_data =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch (escape_html (self # node # data))
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN797"
->2.4.7. Class <TT
-CLASS="LITERAL"
->readme</TT
-></A
-></H2
-><P
->This class converts elements of type <TT
-CLASS="LITERAL"
->readme</TT
-> to HTML. Such an
-element is (by definition) always the root element of the document. First, the
-HTML header is printed; the <TT
-CLASS="LITERAL"
->title</TT
-> attribute of the element
-determines the title of the HTML page. Some aspects of the HTML page can be
-configured by setting certain parameter entities, for example the background
-color, the text color, and link colors. After the header, the
-<TT
-CLASS="LITERAL"
->body</TT
-> tag, and the headline have been printed, the contents
-of the page are converted by invoking <TT
-CLASS="LITERAL"
->to_html</TT
-> on all
-children of the current node (which is the root node). Then, the footnotes are
-appended to this by telling the global <TT
-CLASS="LITERAL"
->store</TT
-> object to print
-the footnotes. Finally, the end tags of the HTML pages are printed.</P
-><P
->This class is an example how to access the value of an attribute: The value is
-determined by invoking <TT
-CLASS="LITERAL"
->self # node # attribute "title"</TT
->. As
-this attribute has been declared as CDATA and as being required, the value has
-always the form <TT
-CLASS="LITERAL"
->Value s</TT
-> where <TT
-CLASS="LITERAL"
->s</TT
-> is the
-string value of the attribute. </P
-><P
->You can also see how entity contents can be accessed. A parameter entity object
-can be looked up by <TT
-CLASS="LITERAL"
->self # node # dtd # par_entity "name"</TT
->,
-and by invoking <TT
-CLASS="LITERAL"
->replacement_text</TT
-> the value of the entity
-is returned after inner parameter and character entities have been
-processed. Note that you must use <TT
-CLASS="LITERAL"
->gen_entity</TT
-> instead of
-<TT
-CLASS="LITERAL"
->par_entity</TT
-> to access general entities.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class readme =
- object (self)
- inherit shared
-
- method to_html store ch =
- (* output header *)
- output_string
- ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
- output_string
- ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
- let title =
- match self # node # attribute "title" with
- Value s -> s
- | _ -> assert false
- in
- let html_header, _ =
- try (self # node # dtd # par_entity "readme:html:header")
- # replacement_text
- with WF_error _ -> "", false in
- let html_trailer, _ =
- try (self # node # dtd # par_entity "readme:html:trailer")
- # replacement_text
- with WF_error _ -> "", false in
- let html_bgcolor, _ =
- try (self # node # dtd # par_entity "readme:html:bgcolor")
- # replacement_text
- with WF_error _ -> "white", false in
- let html_textcolor, _ =
- try (self # node # dtd # par_entity "readme:html:textcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_alinkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:alinkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_vlinkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:vlinkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_linkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:linkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_background, _ =
- try (self # node # dtd # par_entity "readme:html:background")
- # replacement_text
- with WF_error _ -> "", false in
-
- output_string ch "<html><header><title>\n";
- output_string ch (escape_html title);
- output_string ch "</title></header>\n";
- output_string ch "<body ";
- List.iter
- (fun (name,value) ->
- if value <> "" then
- output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
- [ "bgcolor", html_bgcolor;
- "text", html_textcolor;
- "link", html_linkcolor;
- "alink", html_alinkcolor;
- "vlink", html_vlinkcolor;
- ];
- output_string ch ">\n";
- output_string ch html_header;
- output_string ch "<h1>";
- output_string ch (escape_html title);
- output_string ch "</h1>\n";
- (* process main content: *)
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- (* now process footnotes *)
- store # print_footnotes ch;
- (* trailer *)
- output_string ch html_trailer;
- output_string ch "</html>\n";
-
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN817"
->2.4.8. Classes <TT
-CLASS="LITERAL"
->section</TT
->, <TT
-CLASS="LITERAL"
->sect1</TT
->,
-<TT
-CLASS="LITERAL"
->sect2</TT
->, and <TT
-CLASS="LITERAL"
->sect3</TT
-></A
-></H2
-><P
->As the conversion process is very similar, the conversion classes of the three
-section levels are derived from the more general <TT
-CLASS="LITERAL"
->section</TT
->
-class. The HTML code of the section levels only differs in the type of the
-headline, and because of this the classes describing the section levels can be
-computed by replacing the class argument <TT
-CLASS="LITERAL"
->the_tag</TT
-> of
-<TT
-CLASS="LITERAL"
->section</TT
-> by the HTML name of the headline tag.</P
-><P
->Section elements are converted to HTML by printing a headline and then
-converting the contents of the element recursively. More precisely, the first
-sub-element is always a <TT
-CLASS="LITERAL"
->title</TT
-> element, and the other
-elements are the contents of the section. This structure is declared in the
-DTD, and it is guaranteed that the document matches the DTD. Because of this
-the title node can be separated from the rest without any checks.</P
-><P
->Both the title node, and the body nodes are then converted to HTML by calling
-<TT
-CLASS="LITERAL"
->to_html</TT
-> on them.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class section the_tag =
- object (self)
- inherit shared
-
- val tag = the_tag
-
- method to_html store ch =
- let sub_nodes = self # node # sub_nodes in
- match sub_nodes with
- title_node :: rest ->
- output_string ch ("<" ^ tag ^ ">\n");
- title_node # extension # to_html store ch;
- output_string ch ("\n</" ^ tag ^ ">");
- List.iter
- (fun n -> n # extension # to_html store ch)
- rest
- | _ ->
- assert false
- end
-;;
-
-class sect1 = section "h1";;
-class sect2 = section "h3";;
-class sect3 = section "h4";;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN833"
->2.4.9. Classes <TT
-CLASS="LITERAL"
->map_tag</TT
->, <TT
-CLASS="LITERAL"
->p</TT
->,
-<TT
-CLASS="LITERAL"
->em</TT
->, <TT
-CLASS="LITERAL"
->ul</TT
->, <TT
-CLASS="LITERAL"
->li</TT
-></A
-></H2
-><P
->Several element types are converted to HTML by simply mapping them to
-corresponding HTML element types. The class <TT
-CLASS="LITERAL"
->map_tag</TT
->
-implements this, and the class argument <TT
-CLASS="LITERAL"
->the_target_tag</TT
->
-determines the tag name to map to. The output consists of the start tag, the
-recursively converted inner elements, and the end tag.
-
-<PRE
-CLASS="PROGRAMLISTING"
->class map_tag the_target_tag =
- object (self)
- inherit shared
-
- val target_tag = the_target_tag
-
- method to_html store ch =
- output_string ch ("<" ^ target_tag ^ ">\n");
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- output_string ch ("\n</" ^ target_tag ^ ">");
- end
-;;
-
-class p = map_tag "p";;
-class em = map_tag "b";;
-class ul = map_tag "ul";;
-class li = map_tag "li";;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN844"
->2.4.10. Class <TT
-CLASS="LITERAL"
->br</TT
-></A
-></H2
-><P
->Element of type <TT
-CLASS="LITERAL"
->br</TT
-> are mapped to the same HTML type. Note
-that HTML forbids the end tag of <TT
-CLASS="LITERAL"
->br</TT
->.
-
-<PRE
-CLASS="PROGRAMLISTING"
->class br =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch "<br>\n";
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN851"
->2.4.11. Class <TT
-CLASS="LITERAL"
->code</TT
-></A
-></H2
-><P
->The <TT
-CLASS="LITERAL"
->code</TT
-> type is converted to a <TT
-CLASS="LITERAL"
->pre</TT
->
-section (preformatted text). As the meaning of tabs is unspecified in HTML,
-tabs are expanded to spaces.
-
-<PRE
-CLASS="PROGRAMLISTING"
->class code =
- object (self)
- inherit shared
-
- method to_html store ch =
- let data = self # node # data in
- (* convert tabs *)
- let l = String.length data in
- let rec preprocess i column =
- (* this is very ineffective but comprehensive: *)
- if i < l then
- match data.[i] with
- '\t' ->
- let n = 8 - (column mod 8) in
- String.make n ' ' ^ preprocess (i+1) (column + n)
- | '\n' ->
- "\n" ^ preprocess (i+1) 0
- | c ->
- String.make 1 c ^ preprocess (i+1) (column + 1)
- else
- ""
- in
- output_string ch "<p><pre>";
- output_string ch (escape_html (preprocess 0 0));
- output_string ch "</pre></p>";
-
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN858"
->2.4.12. Class <TT
-CLASS="LITERAL"
->a</TT
-></A
-></H2
-><P
->Hyperlinks, expressed by the <TT
-CLASS="LITERAL"
->a</TT
-> element type, are converted
-to the HTML <TT
-CLASS="LITERAL"
->a</TT
-> type. If the target of the hyperlink is given
-by <TT
-CLASS="LITERAL"
->href</TT
->, the URL of this attribute can be used
-directly. Alternatively, the target can be given by
-<TT
-CLASS="LITERAL"
->readmeref</TT
-> in which case the ".html" suffix must be added to
-the file name. </P
-><P
->Note that within <TT
-CLASS="LITERAL"
->a</TT
-> only #PCDATA is allowed, so the contents
-can be converted directly by applying <TT
-CLASS="LITERAL"
->escape_html</TT
-> to the
-character data contents.
-
-<PRE
-CLASS="PROGRAMLISTING"
->class a =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch "<a ";
- let href =
- match self # node # attribute "href" with
- Value v -> escape_html v
- | Valuelist _ -> assert false
- | Implied_value ->
- begin match self # node # attribute "readmeref" with
- Value v -> escape_html v ^ ".html"
- | Valuelist _ -> assert false
- | Implied_value ->
- ""
- end
- in
- if href <> "" then
- output_string ch ("href=\"" ^ href ^ "\"");
- output_string ch ">";
- output_string ch (escape_html (self # node # data));
- output_string ch "</a>";
-
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN870"
->2.4.13. Class <TT
-CLASS="LITERAL"
->footnote</TT
-></A
-></H2
-><P
->The <TT
-CLASS="LITERAL"
->footnote</TT
-> class has two methods:
-<TT
-CLASS="LITERAL"
->to_html</TT
-> to convert the footnote reference to HTML, and
-<TT
-CLASS="LITERAL"
->footnote_to_html</TT
-> to convert the footnote text itself.</P
-><P
->The footnote reference is converted to a local hyperlink; more precisely, to
-two anchor tags which are connected with each other. The text anchor points to
-the footnote anchor, and the footnote anchor points to the text anchor.</P
-><P
->The footnote must be allocated in the <TT
-CLASS="LITERAL"
->store</TT
-> object. By
-allocating the footnote, you get the number of the footnote, and the text of
-the footnote is stored until the end of the HTML page is reached when the
-footnotes can be printed. The <TT
-CLASS="LITERAL"
->to_html</TT
-> method stores simply
-the object itself, such that the <TT
-CLASS="LITERAL"
->footnote_to_html</TT
-> method is
-invoked on the same object that encountered the footnote.</P
-><P
->The <TT
-CLASS="LITERAL"
->to_html</TT
-> only allocates the footnote, and prints the
-reference anchor, but it does not print nor convert the contents of the
-note. This is deferred until the footnotes actually get printed, i.e. the
-recursive call of <TT
-CLASS="LITERAL"
->to_html</TT
-> on the sub nodes is done by
-<TT
-CLASS="LITERAL"
->footnote_to_html</TT
->. </P
-><P
->Note that this technique does not work if you make another footnote within a
-footnote; the second footnote gets allocated but not printed.</P
-><P
-><PRE
-CLASS="PROGRAMLISTING"
->class footnote =
- object (self)
- inherit shared
-
- val mutable footnote_number = 0
-
- method to_html store ch =
- let number =
- store # alloc_footnote (self : #shared :> footnote_printer) in
- let foot_anchor =
- "footnote" ^ string_of_int number in
- let text_anchor =
- "textnote" ^ string_of_int number in
- footnote_number <- number;
- output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^
- foot_anchor ^ "\">[" ^ string_of_int number ^
- "]</a>" )
-
- method footnote_to_html store ch =
- (* prerequisite: we are in a definition list <dl>...</dl> *)
- let foot_anchor =
- "footnote" ^ string_of_int footnote_number in
- let text_anchor =
- "textnote" ^ string_of_int footnote_number in
- output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^
- text_anchor ^ "\">[" ^ string_of_int footnote_number ^
- "]</a></dt>\n<dd>");
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- output_string ch ("\n</dd>")
-
- end
-;;</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN889"
->2.4.14. The specification of the document model</A
-></H2
-><P
->This code sets up the hash table that connects element types with the exemplars
-of the extension classes that convert the elements to HTML.
-
-<PRE
-CLASS="PROGRAMLISTING"
->open Pxp_yacc
-
-let tag_map =
- make_spec_from_alist
- ~data_exemplar:(new data_impl (new only_data))
- ~default_element_exemplar:(new element_impl (new no_markup))
- ~element_alist:
- [ "readme", (new element_impl (new readme));
- "sect1", (new element_impl (new sect1));
- "sect2", (new element_impl (new sect2));
- "sect3", (new element_impl (new sect3));
- "title", (new element_impl (new no_markup));
- "p", (new element_impl (new p));
- "br", (new element_impl (new br));
- "code", (new element_impl (new code));
- "em", (new element_impl (new em));
- "ul", (new element_impl (new ul));
- "li", (new element_impl (new li));
- "footnote", (new element_impl (new footnote : #shared :> shared));
- "a", (new element_impl (new a));
- ]
- ()
-;;</PRE
-></P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="x675.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="c893.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->Class-based processing of the node tree</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c533.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->The objects representing the document</TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-<HTML
-><HEAD
-><TITLE
->The class type node</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
-REL="HOME"
-TITLE="The PXP user's guide"
-HREF="index.html"><LINK
-REL="UP"
-TITLE="The objects representing the document"
-HREF="c893.html"><LINK
-REL="PREVIOUS"
-TITLE="The objects representing the document"
-HREF="c893.html"><LINK
-REL="NEXT"
-TITLE="The class type extension"
-HREF="x1439.html"><LINK
-REL="STYLESHEET"
-TYPE="text/css"
-HREF="markup.css"></HEAD
-><BODY
-CLASS="SECT1"
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-LINK="#0000FF"
-VLINK="#840084"
-ALINK="#0000FF"
-><DIV
-CLASS="NAVHEADER"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TH
-COLSPAN="3"
-ALIGN="center"
->The PXP user's guide</TH
-></TR
-><TR
-><TD
-WIDTH="10%"
-ALIGN="left"
-VALIGN="bottom"
-><A
-HREF="c893.html"
->Prev</A
-></TD
-><TD
-WIDTH="80%"
-ALIGN="center"
-VALIGN="bottom"
->Chapter 3. The objects representing the document</TD
-><TD
-WIDTH="10%"
-ALIGN="right"
-VALIGN="bottom"
-><A
-HREF="x1439.html"
->Next</A
-></TD
-></TR
-></TABLE
-><HR
-ALIGN="LEFT"
-WIDTH="100%"></DIV
-><DIV
-CLASS="SECT1"
-><H1
-CLASS="SECT1"
-><A
-NAME="AEN939"
->3.2. The class type <TT
-CLASS="LITERAL"
->node</TT
-></A
-></H1
-><P
-> From <TT
-CLASS="LITERAL"
->Pxp_document</TT
->:
-
-<PRE
-CLASS="PROGRAMLISTING"
->type node_type =
- T_data
-| T_element of string
-| T_super_root
-| T_pinstr of string
-| T_comment
-<TT
-CLASS="REPLACEABLE"
-><I
->and some other, reserved types</I
-></TT
->
-;;
-
-class type [ 'ext ] node =
- object ('self)
- constraint 'ext = 'ext node #extension
-
- <A
-NAME="TYPE-NODE-GENERAL.SIG"
-></A
->(* <A
-HREF="x939.html#TYPE-NODE-GENERAL"
-><I
-><I
->General observers</I
-></I
-></A
-> *)
-
- method extension : 'ext
- method dtd : dtd
- method parent : 'ext node
- method root : 'ext node
- method sub_nodes : 'ext node list
- method iter_nodes : ('ext node -> unit) -> unit
- method iter_nodes_sibl :
- ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
- method node_type : node_type
- method encoding : Pxp_types.rep_encoding
- method data : string
- method position : (string * int * int)
- method comment : string option
- method pinstr : string -> proc_instruction list
- method pinstr_names : string list
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
-
- <A
-NAME="TYPE-NODE-ATTS.SIG"
-></A
->(* <A
-HREF="x939.html#TYPE-NODE-ATTS"
-><I
-><I
->Attribute observers</I
-></I
-></A
-> *)
-
- method attribute : string -> Pxp_types.att_value
- method required_string_attribute : string -> string
- method optional_string_attribute : string -> string option
- method required_list_attribute : string -> string list
- method optional_list_attribute : string -> string list
- method attribute_names : string list
- method attribute_type : string -> Pxp_types.att_type
- method attributes : (string * Pxp_types.att_value) list
- method id_attribute_name : string
- method id_attribute_value : string
- method idref_attribute_names : string
-
- <A
-NAME="TYPE-NODE-MODS.SIG"
-></A
->(* <A
-HREF="x939.html#TYPE-NODE-MODS"
-><I
-><I
->Modifying methods</I
-></I
-></A
-> *)
-
- method add_node : ?force:bool -> 'ext node -> unit
- method add_pinstr : proc_instruction -> unit
- method delete : unit
- method set_nodes : 'ext node list -> unit
- method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
- method set_comment : string option -> unit
-
- <A
-NAME="TYPE-NODE-CLONING.SIG"
-></A
->(* <A
-HREF="x939.html#TYPE-NODE-CLONING"
-><I
-><I
->Cloning methods</I
-></I
-></A
-> *)
-
- method orphaned_clone : 'self
- method orphaned_flat_clone : 'self
- method create_element :
- ?position:(string * int * int) ->
- dtd -> node_type -> (string * string) list ->
- 'ext node
- method create_data : dtd -> string -> 'ext node
- method keep_always_whitespace_mode : unit
-
- <A
-NAME="TYPE-NODE-WEIRD.SIG"
-></A
->(* <A
-HREF="x939.html#TYPE-NODE-WEIRD"
-><I
-><I
->Validating methods</I
-></I
-></A
-> *)
-
- method local_validate : ?use_dfa:bool -> unit -> unit
-
- (* ... Internal methods are undocumented. *)
-
- end
-;;</PRE
->
-
-In the module <TT
-CLASS="LITERAL"
->Pxp_types</TT
-> you can find another type
-definition that is important in this context:
-
-<PRE
-CLASS="PROGRAMLISTING"
->type Pxp_types.att_value =
- Value of string
- | Valuelist of string list
- | Implied_value
-;;</PRE
-></P
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN958"
->3.2.1. The structure of document trees</A
-></H2
-><P
->A node represents either an element or a character data section. There are two
-classes implementing the two aspects of nodes: <TT
-CLASS="LITERAL"
->element_impl</TT
->
-and <TT
-CLASS="LITERAL"
->data_impl</TT
->. The latter class does not implement all
-methods because some methods do not make sense for data nodes.</P
-><P
->(Note: PXP also supports a mode which forces that processing instructions and
-comments are represented as nodes of the document tree. However, these nodes
-are instances of <TT
-CLASS="LITERAL"
->element_impl</TT
-> with node types
-<TT
-CLASS="LITERAL"
->T_pinstr</TT
-> and <TT
-CLASS="LITERAL"
->T_comment</TT
->,
-respectively. This mode must be explicitly configured; the basic representation
-knows only element and data nodes.)</P
-><P
->The following figure
-(<A
-HREF="x939.html#NODE-TERM"
-><I
-><I
->A tree with element nodes, data nodes, and attributes</I
-><I
-></I
-></I
-></A
->) shows an example how
-a tree is constructed from element and data nodes. The circular areas
-represent element nodes whereas the ovals denote data nodes. Only elements
-may have subnodes; data nodes are always leaves of the tree. The subnodes
-of an element can be either element or data nodes; in both cases the O'Caml
-objects storing the nodes have the class type <TT
-CLASS="LITERAL"
->node</TT
->.</P
-><P
->Attributes (the clouds in the picture) are not directly
-integrated into the tree; there is always an extra link to the attribute
-list. This is also true for processing instructions (not shown in the
-picture). This means that there are separated access methods for attributes and
-processing instructions.</P
-><DIV
-CLASS="FIGURE"
-><A
-NAME="NODE-TERM"
-></A
-><P
-><B
->Figure 3-1. A tree with element nodes, data nodes, and attributes</B
-></P
-><P
-><IMG
-SRC="pic/node_term.gif"></P
-></DIV
-><P
->Only elements, data sections, attributes and processing
-instructions (and comments, if configured) can, directly or indirectly, occur
-in the document tree. It is impossible to add entity references to the tree; if
-the parser finds such a reference, not the reference as such but the referenced
-text (i.e. the tree representing the structured text) is included in the
-tree.</P
-><P
->Note that the parser collapses as much data material into one
-data node as possible such that there are normally never two adjacent data
-nodes. This invariant is enforced even if data material is included by entity
-references or CDATA sections, or if a data sequence is interrupted by
-comments. So <TT
-CLASS="LITERAL"
->a &amp; b <-- comment --> c <![CDATA[
-<> d]]></TT
-> is represented by only one data node, for
-instance. However, you can create document trees manually which break this
-invariant; it is only the way the parser forms the tree.</P
-><DIV
-CLASS="FIGURE"
-><A
-NAME="NODE-GENERAL"
-></A
-><P
-><B
->Figure 3-2. Nodes are doubly linked trees</B
-></P
-><P
-><IMG
-SRC="pic/node_general.gif"></P
-></DIV
-><P
->The node tree has links in both directions: Every node has a link to its parent
-(if any), and it has links to the subnodes (see
-figure <A
-HREF="x939.html#NODE-GENERAL"
-><I
-><I
->Nodes are doubly linked trees</I
-><I
-></I
-></I
-></A
->). Obviously,
-this doubly-linked structure simplifies the navigation in the tree; but has
-also some consequences for the possible operations on trees.</P
-><P
->Because every node must have at most <I
-CLASS="EMPHASIS"
->one</I
-> parent node,
-operations are illegal if they violate this condition. The following figure
-(<A
-HREF="x939.html#NODE-ADD"
-><I
-><I
->A node can only be added if it is a root</I
-><I
-></I
-></I
-></A
->) shows on the left side
-that node <TT
-CLASS="LITERAL"
->y</TT
-> is added to <TT
-CLASS="LITERAL"
->x</TT
-> as new subnode
-which is allowed because <TT
-CLASS="LITERAL"
->y</TT
-> does not have a parent yet. The
-right side of the picture illustrates what would happen if <TT
-CLASS="LITERAL"
->y</TT
->
-had a parent node; this is illegal because <TT
-CLASS="LITERAL"
->y</TT
-> would have two
-parents after the operation.</P
-><DIV
-CLASS="FIGURE"
-><A
-NAME="NODE-ADD"
-></A
-><P
-><B
->Figure 3-3. A node can only be added if it is a root</B
-></P
-><P
-><IMG
-SRC="pic/node_add.gif"></P
-></DIV
-><P
->The "delete" operation simply removes the links between two nodes. In the
-picture (<A
-HREF="x939.html#NODE-DELETE"
-><I
-><I
->A deleted node becomes the root of the subtree</I
-><I
-></I
-></I
-></A
->) the node
-<TT
-CLASS="LITERAL"
->x</TT
-> is deleted from the list of subnodes of
-<TT
-CLASS="LITERAL"
->y</TT
->. After that, <TT
-CLASS="LITERAL"
->x</TT
-> becomes the root of the
-subtree starting at this node.</P
-><DIV
-CLASS="FIGURE"
-><A
-NAME="NODE-DELETE"
-></A
-><P
-><B
->Figure 3-4. A deleted node becomes the root of the subtree</B
-></P
-><P
-><IMG
-SRC="pic/node_delete.gif"></P
-></DIV
-><P
->It is also possible to make a clone of a subtree; illustrated in
-<A
-HREF="x939.html#NODE-CLONE"
-><I
-><I
->The clone of a subtree</I
-><I
-></I
-></I
-></A
->. In this case, the
-clone is a copy of the original subtree except that it is no longer a
-subnode. Because cloning never keeps the connection to the parent, the clones
-are called <I
-CLASS="EMPHASIS"
->orphaned</I
->.</P
-><DIV
-CLASS="FIGURE"
-><A
-NAME="NODE-CLONE"
-></A
-><P
-><B
->Figure 3-5. The clone of a subtree</B
-></P
-><P
-><IMG
-SRC="pic/node_clone.gif"></P
-></DIV
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1007"
->3.2.2. The methods of the class type <TT
-CLASS="LITERAL"
->node</TT
-></A
-></H2
-><A
-NAME="TYPE-NODE-GENERAL"
-></A
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
-> <A
-HREF="x939.html#TYPE-NODE-GENERAL.SIG"
->General observers</A
->
- . </B
-> <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->extension</TT
->: The reference to the extension object which
-belongs to this node (see ...).</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->dtd</TT
->: Returns a reference to the global DTD. All nodes
-of a tree must share the same DTD.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->parent</TT
->: Get the father node. Raises
-<TT
-CLASS="LITERAL"
->Not_found</TT
-> in the case the node does not have a
-parent, i.e. the node is the root.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->root</TT
->: Gets the reference to the root node of the tree.
-Every node is contained in a tree with a root, so this method always
-succeeds. Note that this method <I
-CLASS="EMPHASIS"
->searches</I
-> the root,
-which costs time proportional to the length of the path to the root.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->sub_nodes</TT
->: Returns references to the children. The returned
-list reflects the order of the children. For data nodes, this method returns
-the empty list.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->iter_nodes f</TT
->: Iterates over the children, and calls
-<TT
-CLASS="LITERAL"
->f</TT
-> for every child in turn. </P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->iter_nodes_sibl f</TT
->: Iterates over the children, and calls
-<TT
-CLASS="LITERAL"
->f</TT
-> for every child in turn. <TT
-CLASS="LITERAL"
->f</TT
-> gets as
-arguments the previous node, the current node, and the next node.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->node_type</TT
->: Returns either <TT
-CLASS="LITERAL"
->T_data</TT
-> which
-means that the node is a data node, or <TT
-CLASS="LITERAL"
->T_element n</TT
->
-which means that the node is an element of type <TT
-CLASS="LITERAL"
->n</TT
->.
-If configured, possible node types are also <TT
-CLASS="LITERAL"
->T_pinstr t</TT
->
-indicating that the node represents a processing instruction with target
-<TT
-CLASS="LITERAL"
->t</TT
->, and <TT
-CLASS="LITERAL"
->T_comment</TT
-> in which case the node
-is a comment.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->encoding</TT
->: Returns the encoding of the strings.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->data</TT
->: Returns the character data of this node and all
-children, concatenated as one string. The encoding of the string is what
-the method <TT
-CLASS="LITERAL"
->encoding</TT
-> returns.
-- For data nodes, this method simply returns the represented characters.
-For elements, the meaning of the method has been extended such that it
-returns something useful, i.e. the effectively contained characters, without
-markup. (For <TT
-CLASS="LITERAL"
->T_pinstr</TT
-> and <TT
-CLASS="LITERAL"
->T_comment</TT
->
-nodes, the method returns the empty string.)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->position</TT
->: If configured, this method returns the position of
-the element as triple (entity, line, byteposition). For data nodes, the
-position is not stored. If the position is not available the triple
-<TT
-CLASS="LITERAL"
->"?", 0, 0</TT
-> is returned.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->comment</TT
->: Returns <TT
-CLASS="LITERAL"
->Some text</TT
-> for comment
-nodes, and <TT
-CLASS="LITERAL"
->None</TT
-> for other nodes. The <TT
-CLASS="LITERAL"
->text</TT
->
-is everything between the comment delimiters <TT
-CLASS="LITERAL"
-><--</TT
-> and
-<TT
-CLASS="LITERAL"
->--></TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->pinstr n</TT
->: Returns all processing instructions that are
-directly contained in this element and that have a <I
-CLASS="EMPHASIS"
->target</I
->
-specification of <TT
-CLASS="LITERAL"
->n</TT
->. The target is the first word after
-the <TT
-CLASS="LITERAL"
-><?</TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->pinstr_names</TT
->: Returns the list of all targets of processing
-instructions directly contained in this element.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->write s enc</TT
->: Prints the node and all subnodes to the passed
-output stream as valid XML text, using the passed external encoding.</P
-></LI
-></UL
->
- </P
-></DIV
-><A
-NAME="TYPE-NODE-ATTS"
-></A
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
-> <A
-HREF="x939.html#TYPE-NODE-ATTS.SIG"
->Attribute observers</A
->
- . </B
-> <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->attribute n</TT
->: Returns the value of the attribute with name
-<TT
-CLASS="LITERAL"
->n</TT
->. This method returns a value for every declared
-attribute, and it raises <TT
-CLASS="LITERAL"
->Not_found</TT
-> for any undeclared
-attribute. Note that it even returns a value if the attribute is actually
-missing but is declared as <TT
-CLASS="LITERAL"
->#IMPLIED</TT
-> or has a default
-value. - Possible values are:
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->Implied_value</TT
->: The attribute has been declared with the
-keyword <TT
-CLASS="LITERAL"
->#IMPLIED</TT
->, and the attribute is missing in the
-attribute list of this element.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->Value s</TT
->: The attribute has been declared as type
-<TT
-CLASS="LITERAL"
->CDATA</TT
->, as <TT
-CLASS="LITERAL"
->ID</TT
->, as
-<TT
-CLASS="LITERAL"
->IDREF</TT
->, as <TT
-CLASS="LITERAL"
->ENTITY</TT
->, or as
-<TT
-CLASS="LITERAL"
->NMTOKEN</TT
->, or as enumeration or notation, and one of the two
-conditions holds: (1) The attribute value is present in the attribute list in
-which case the value is returned in the string <TT
-CLASS="LITERAL"
->s</TT
->. (2) The
-attribute has been omitted, and the DTD declared the attribute with a default
-value. The default value is returned in <TT
-CLASS="LITERAL"
->s</TT
->.
-- Summarized, <TT
-CLASS="LITERAL"
->Value s</TT
-> is returned for non-implied, non-list
-attribute values.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->Valuelist l</TT
->: The attribute has been declared as type
-<TT
-CLASS="LITERAL"
->IDREFS</TT
->, as <TT
-CLASS="LITERAL"
->ENTITIES</TT
->, or
-as <TT
-CLASS="LITERAL"
->NMTOKENS</TT
->, and one of the two conditions holds: (1) The
-attribute value is present in the attribute list in which case the
-space-separated tokens of the value are returned in the string list
-<TT
-CLASS="LITERAL"
->l</TT
->. (2) The attribute has been omitted, and the DTD declared
-the attribute with a default value. The default value is returned in
-<TT
-CLASS="LITERAL"
->l</TT
->.
-- Summarized, <TT
-CLASS="LITERAL"
->Valuelist l</TT
-> is returned for all list-type
-attribute values.</P
-></LI
-></UL
->
-
-Note that before the attribute value is returned, the value is normalized. This
-means that newlines are converted to spaces, and that references to character
-entities (i.e. <TT
-CLASS="LITERAL"
->&#<TT
-CLASS="REPLACEABLE"
-><I
->n</I
-></TT
->;</TT
->) and
-general entities
-(i.e. <TT
-CLASS="LITERAL"
->&<TT
-CLASS="REPLACEABLE"
-><I
->name</I
-></TT
->;</TT
->) are expanded;
-if necessary, expansion is performed recursively.</P
-><P
->In well-formedness mode, there is no DTD which could declare an
-attribute. Because of this, every occuring attribute is considered as a CDATA
-attribute.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->required_string_attribute n</TT
->: returns the Value attribute
-called n, or the Valuelist attribute as a string where the list elements
-are separated by spaces. If the attribute value is implied, or if the
-attribute does not exists, the method will fail. - This method is convenient
-if you expect a non-implied and non-list attribute value.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->optional_string_attribute n</TT
->: returns the Value attribute
-called n, or the Valuelist attribute as a string where the list elements
-are separated by spaces. If the attribute value is implied, or if the
-attribute does not exists, the method returns None. - This method is
-convenient if you expect a non-list attribute value including the implied
-value.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->required_list_attribute n</TT
->: returns the Valuelist attribute
-called n, or the Value attribute as a list with a single element.
-If the attribute value is implied, or if the
-attribute does not exists, the method will fail. - This method is
-convenient if you expect a list attribute value.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->optional_list_attribute n</TT
->: returns the Valuelist attribute
-called n, or the Value attribute as a list with a single element.
-If the attribute value is implied, or if the
-attribute does not exists, an empty list will be returned. - This method
-is convenient if you expect a list attribute value or the implied value.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->attribute_names</TT
->: returns the list of all attribute names of
-this element. As this is a validating parser, this list is equal to the
-list of declared attributes.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->attribute_type n</TT
->: returns the type of the attribute called
-<TT
-CLASS="LITERAL"
->n</TT
->. See the module <TT
-CLASS="LITERAL"
->Pxp_types</TT
-> for a
-description of the encoding of the types.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->attributes</TT
->: returns the list of pairs of names and values
-for all attributes of
-this element.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->id_attribute_name</TT
->: returns the name of the attribute that is
-declared with type ID. There is at most one such attribute. The method raises
-<TT
-CLASS="LITERAL"
->Not_found</TT
-> if there is no declared ID attribute for the
-element type.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->id_attribute_value</TT
->: returns the value of the attribute that
-is declared with type ID. There is at most one such attribute. The method raises
-<TT
-CLASS="LITERAL"
->Not_found</TT
-> if there is no declared ID attribute for the
-element type.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->idref_attribute_names</TT
->: returns the list of attribute names
-that are declared as IDREF or IDREFS.</P
-></LI
-></UL
->
- </P
-></DIV
-><A
-NAME="TYPE-NODE-MODS"
-></A
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
-> <A
-HREF="x939.html#TYPE-NODE-MODS.SIG"
->Modifying methods</A
->
- . </B
->The following methods are only defined for element nodes (more exactly:
-the methods are defined for data nodes, too, but fail always).
-
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->add_node sn</TT
->: Adds sub node <TT
-CLASS="LITERAL"
->sn</TT
-> to the list
-of children. This operation is illustrated in the picture
-<A
-HREF="x939.html#NODE-ADD"
-><I
-><I
->A node can only be added if it is a root</I
-><I
-></I
-></I
-></A
->. This method expects that
-<TT
-CLASS="LITERAL"
->sn</TT
-> is a root, and it requires that <TT
-CLASS="LITERAL"
->sn</TT
-> and
-the current object share the same DTD.</P
-><P
->Because <TT
-CLASS="LITERAL"
->add_node</TT
-> is the method the parser itself uses
-to add new nodes to the tree, it performs by default some simple validation
-checks: If the content model is a regular expression, it is not allowed to add
-data nodes to this node unless the new nodes consist only of whitespace. In
-this case, the new data nodes are silently dropped (you can change this by
-invoking <TT
-CLASS="LITERAL"
->keep_always_whitespace_mode</TT
->).</P
-><P
->If the document is flagged as stand-alone, these data nodes only
-containing whitespace are even forbidden if the element declaration is
-contained in an external entity. This case is detected and rejected.</P
-><P
->If the content model is <TT
-CLASS="LITERAL"
->EMPTY</TT
->, it is not allowed to
-add any data node unless the data node is empty. In this case, the new data
-node is silently dropped.</P
-><P
->These checks only apply if there is a DTD. In well-formedness mode, it is
-assumed that every element is declared with content model
-<TT
-CLASS="LITERAL"
->ANY</TT
-> which prohibits any validation check. Furthermore, you
-turn these checks off by passing <TT
-CLASS="LITERAL"
->~force:true</TT
-> as first
-argument.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->add_pinstr pi</TT
->: Adds the processing instruction
-<TT
-CLASS="LITERAL"
->pi</TT
-> to the list of processing instructions.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->delete</TT
->: Deletes this node from the tree. After this
-operation, this node is no longer the child of the former father node; and the
-node loses the connection to the father as well. This operation is illustrated
-by the figure <A
-HREF="x939.html#NODE-DELETE"
-><I
-><I
->A deleted node becomes the root of the subtree</I
-><I
-></I
-></I
-></A
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->set_nodes nl</TT
->: Sets the list of children to
-<TT
-CLASS="LITERAL"
->nl</TT
->. It is required that every member of <TT
-CLASS="LITERAL"
->nl</TT
->
-is a root, and that all members and the current object share the same DTD.
-Unlike <TT
-CLASS="LITERAL"
->add_node</TT
->, no validation checks are performed.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->quick_set_attributes atts</TT
->: sets the attributes of this
-element to <TT
-CLASS="LITERAL"
->atts</TT
->. It is <I
-CLASS="EMPHASIS"
->not</I
-> checked
-whether <TT
-CLASS="LITERAL"
->atts</TT
-> matches the DTD or not; it is up to the
-caller of this method to ensure this. (This method may be useful to transform
-the attribute values, i.e. apply a mapping to every attribute.)</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->set_comment text</TT
->: This method is only applicable to
-<TT
-CLASS="LITERAL"
->T_comment</TT
-> nodes; it sets the comment text contained by such
-nodes. </P
-></LI
-></UL
-></P
-></DIV
-><A
-NAME="TYPE-NODE-CLONING"
-></A
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
-> <A
-HREF="x939.html#TYPE-NODE-CLONING.SIG"
->Cloning methods</A
->
- . </B
-> <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->orphaned_clone</TT
->: Returns a clone of the node and the complete
-tree below this node (deep clone). The clone does not have a parent (i.e. the
-reference to the parent node is <I
-CLASS="EMPHASIS"
->not</I
-> cloned). While
-copying the subtree, strings are skipped; it is likely that the original tree
-and the copy tree share strings. Extension objects are cloned by invoking
-the <TT
-CLASS="LITERAL"
->clone</TT
-> method on the original objects; how much of
-the extension objects is cloned depends on the implemention of this method.</P
-><P
->This operation is illustrated by the figure
-<A
-HREF="x939.html#NODE-CLONE"
-><I
-><I
->The clone of a subtree</I
-><I
-></I
-></I
-></A
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->orphaned_flat_clone</TT
->: Returns a clone of the node,
-but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><A
-NAME="TYPE-NODE-METH-CREATE-ELEMENT"
-></A
->
-<TT
-CLASS="LITERAL"
->create_element dtd nt al</TT
->: Returns a flat copy of this node
-(which must be an element) with the following modifications: The DTD is set to
-<TT
-CLASS="LITERAL"
->dtd</TT
->; the node type is set to <TT
-CLASS="LITERAL"
->nt</TT
->, and the
-new attribute list is set to <TT
-CLASS="LITERAL"
->al</TT
-> (given as list of
-(name,value) pairs). The copy does not have children nor a parent. It does not
-contain processing instructions. See
-<A
-HREF="x939.html#TYPE-NODE-EX-CREATE-ELEMENT"
->the example below</A
->.</P
-><P
->Note that you can specify the position of the new node
-by the optional argument <TT
-CLASS="LITERAL"
->~position</TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><A
-NAME="TYPE-NODE-METH-CREATE-DATA"
-></A
->
-<TT
-CLASS="LITERAL"
->create_data dtd cdata</TT
->: Returns a flat copy of this node
-(which must be a data node) with the following modifications: The DTD is set to
-<TT
-CLASS="LITERAL"
->dtd</TT
->; the node type is set to <TT
-CLASS="LITERAL"
->T_data</TT
->; the
-attribute list is empty (data nodes never have attributes); the list of
-children and PIs is empty, too (same reason). The new node does not have a
-parent. The value <TT
-CLASS="LITERAL"
->cdata</TT
-> is the new character content of the
-node. See
-<A
-HREF="x939.html#TYPE-NODE-EX-CREATE-DATA"
->the example below</A
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->keep_always_whitespace_mode</TT
->: Even data nodes which are
-normally dropped because they only contain ignorable whitespace, can added to
-this node once this mode is turned on. (This mode is useful to produce
-canonical XML.)</P
-></LI
-></UL
-></P
-></DIV
-><A
-NAME="TYPE-NODE-WEIRD"
-></A
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
-> <A
-HREF="x939.html#TYPE-NODE-WEIRD.SIG"
->Validating methods</A
->
- . </B
->There is one method which locally validates the node, i.e. checks whether the
-subnodes match the content model of this node.
-
- <P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->local_validate</TT
->: Checks that this node conforms to the
-DTD by comparing the type of the subnodes with the content model for this
-node. (Applications need not call this method unless they add new nodes
-themselves to the tree.)</P
-></LI
-></UL
-></P
-></DIV
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1252"
->3.2.3. The class <TT
-CLASS="LITERAL"
->element_impl</TT
-></A
-></H2
-><P
->This class is an implementation of <TT
-CLASS="LITERAL"
->node</TT
-> which
-realizes element nodes:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class [ 'ext ] element_impl : 'ext -> [ 'ext ] node</PRE
-> </P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Constructor. </B
->You can create a new instance by
-
-<PRE
-CLASS="PROGRAMLISTING"
->new element_impl <TT
-CLASS="REPLACEABLE"
-><I
->extension_object</I
-></TT
-></PRE
->
-
-which creates a special form of empty element which already contains a
-reference to the <TT
-CLASS="REPLACEABLE"
-><I
->extension_object</I
-></TT
->, but is
-otherwise empty. This special form is called an
-<I
-CLASS="EMPHASIS"
->exemplar</I
->. The purpose of exemplars is that they serve as
-patterns that can be duplicated and filled with data. The method
-<A
-HREF="x939.html#TYPE-NODE-METH-CREATE-ELEMENT"
-><TT
-CLASS="LITERAL"
->create_element</TT
-></A
-> is designed to perform this action.</P
-></DIV
-><A
-NAME="TYPE-NODE-EX-CREATE-ELEMENT"
-></A
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Example. </B
->First, create an exemplar by
-
-<PRE
-CLASS="PROGRAMLISTING"
->let exemplar_ext = ... in
-let exemplar = new element_impl exemplar_ext in</PRE
->
-
-The <TT
-CLASS="LITERAL"
->exemplar</TT
-> is not used in node trees, but only as
-a pattern when the element nodes are created:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let element = exemplar # <A
-HREF="x939.html#TYPE-NODE-METH-CREATE-ELEMENT"
->create_element</A
-> dtd (T_element name) attlist </PRE
->
-
-The <TT
-CLASS="LITERAL"
->element</TT
-> is a copy of <TT
-CLASS="LITERAL"
->exemplar</TT
->
-(even the extension <TT
-CLASS="LITERAL"
->exemplar_ext</TT
-> has been copied)
-which ensures that <TT
-CLASS="LITERAL"
->element</TT
-> and its extension are objects
-of the same class as the exemplars; note that you need not to pass a
-class name or other meta information. The copy is initially connected
-with the <TT
-CLASS="LITERAL"
->dtd</TT
->, it gets a node type, and the attribute list
-is filled. The <TT
-CLASS="LITERAL"
->element</TT
-> is now fully functional; it can
-be added to another element as child, and it can contain references to
-subnodes.</P
-></DIV
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1281"
->3.2.4. The class <TT
-CLASS="LITERAL"
->data_impl</TT
-></A
-></H2
-><P
->This class is an implementation of <TT
-CLASS="LITERAL"
->node</TT
-> which
-should be used for all character data nodes:
-
-<PRE
-CLASS="PROGRAMLISTING"
->class [ 'ext ] data_impl : 'ext -> [ 'ext ] node</PRE
-> </P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Constructor. </B
->You can create a new instance by
-
-<PRE
-CLASS="PROGRAMLISTING"
->new data_impl <TT
-CLASS="REPLACEABLE"
-><I
->extension_object</I
-></TT
-></PRE
->
-
-which creates an empty exemplar node which is connected to
-<TT
-CLASS="REPLACEABLE"
-><I
->extension_object</I
-></TT
->. The node does not contain a
-reference to any DTD, and because of this it cannot be added to node trees.</P
-></DIV
-><P
->To get a fully working data node, apply the method
-<A
-HREF="x939.html#TYPE-NODE-METH-CREATE-DATA"
-><TT
-CLASS="LITERAL"
->create_data</TT
-></A
-> to the exemplar (see example).</P
-><A
-NAME="TYPE-NODE-EX-CREATE-DATA"
-></A
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Example. </B
->First, create an exemplar by
-
-<PRE
-CLASS="PROGRAMLISTING"
->let exemplar_ext = ... in
-let exemplar = new exemplar_ext data_impl in</PRE
->
-
-The <TT
-CLASS="LITERAL"
->exemplar</TT
-> is not used in node trees, but only as
-a pattern when the data nodes are created:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let data_node = exemplar # <A
-HREF="x939.html#TYPE-NODE-METH-CREATE-DATA"
->create_data</A
-> dtd "The characters contained in the data node" </PRE
->
-
-The <TT
-CLASS="LITERAL"
->data_node</TT
-> is a copy of <TT
-CLASS="LITERAL"
->exemplar</TT
->.
-The copy is initially connected
-with the <TT
-CLASS="LITERAL"
->dtd</TT
->, and it is filled with character material.
-The <TT
-CLASS="LITERAL"
->data_node</TT
-> is now fully functional; it can
-be added to an element as child.</P
-></DIV
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1308"
->3.2.5. The type <TT
-CLASS="LITERAL"
->spec</TT
-></A
-></H2
-><P
->The type <TT
-CLASS="LITERAL"
->spec</TT
-> defines a way to handle the details of
-creating nodes from exemplars.
-
-<PRE
-CLASS="PROGRAMLISTING"
->type 'ext spec
-constraint 'ext = 'ext node #extension
-
-val make_spec_from_mapping :
- ?super_root_exemplar : 'ext node ->
- ?comment_exemplar : 'ext node ->
- ?default_pinstr_exemplar : 'ext node ->
- ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
- data_exemplar: 'ext node ->
- default_element_exemplar: 'ext node ->
- element_mapping: (string, 'ext node) Hashtbl.t ->
- unit ->
- 'ext spec
-
-val make_spec_from_alist :
- ?super_root_exemplar : 'ext node ->
- ?comment_exemplar : 'ext node ->
- ?default_pinstr_exemplar : 'ext node ->
- ?pinstr_alist : (string * 'ext node) list ->
- data_exemplar: 'ext node ->
- default_element_exemplar: 'ext node ->
- element_alist: (string * 'ext node) list ->
- unit ->
- 'ext spec</PRE
->
-
-The two functions <TT
-CLASS="LITERAL"
->make_spec_from_mapping</TT
-> and
-<TT
-CLASS="LITERAL"
->make_spec_from_alist</TT
-> create <TT
-CLASS="LITERAL"
->spec</TT
->
-values. Both functions are functionally equivalent and the only difference is
-that the first function prefers hashtables and the latter associative lists to
-describe mappings from names to exemplars.</P
-><P
->You can specify exemplars for the various kinds of nodes that need to be
-generated when an XML document is parsed:
-
-<P
-></P
-><UL
-COMPACT="COMPACT"
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->~super_root_exemplar</TT
->: This exemplar
-is used to create the super root. This special node is only created if the
-corresponding configuration option has been selected; it is the parent node of
-the root node which may be convenient if every working node must have a parent.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->~comment_exemplar</TT
->: This exemplar is
-used when a comment node must be created. Note that such nodes are only created
-if the corresponding configuration option is "on".</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->~default_pinstr_exemplar</TT
->: If a node
-for a processing instruction must be created, and the instruction is not listed
-in the table passed by <TT
-CLASS="LITERAL"
->~pinstr_mapping</TT
-> or
-<TT
-CLASS="LITERAL"
->~pinstr_alist</TT
->, this exemplar is used.
-Again the configuration option must be "on" in order to create such nodes at
-all. </P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->~pinstr_mapping</TT
-> or
-<TT
-CLASS="LITERAL"
->~pinstr_alist</TT
->: Map the target names of processing
-instructions to exemplars. These mappings are only used when nodes for
-processing instructions are created.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->~data_exemplar</TT
->: The exemplar for
-ordinary data nodes.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->~default_element_exemplar</TT
->: This
-exemplar is used if an element node must be created, but the element type
-cannot be found in the tables <TT
-CLASS="LITERAL"
->element_mapping</TT
-> or
-<TT
-CLASS="LITERAL"
->element_alist</TT
->.</P
-></LI
-><LI
-STYLE="list-style-type: disc"
-><P
-><TT
-CLASS="LITERAL"
->~element_mapping</TT
-> or
-<TT
-CLASS="LITERAL"
->~element_alist</TT
->: Map the element types to exemplars. These
-mappings are used to create element nodes.</P
-></LI
-></UL
->
-
-In most cases, you only want to create <TT
-CLASS="LITERAL"
->spec</TT
-> values to pass
-them to the parser functions found in <TT
-CLASS="LITERAL"
->Pxp_yacc</TT
->. However, it
-might be useful to apply <TT
-CLASS="LITERAL"
->spec</TT
-> values directly.</P
-><P
->The following functions create various types of nodes by selecting the
-corresponding exemplar from the passed <TT
-CLASS="LITERAL"
->spec</TT
-> value, and by
-calling <TT
-CLASS="LITERAL"
->create_element</TT
-> or <TT
-CLASS="LITERAL"
->create_data</TT
-> on
-the exemplar.
-
-<PRE
-CLASS="PROGRAMLISTING"
->val create_data_node :
- 'ext spec ->
- dtd ->
- (* data material: *) string ->
- 'ext node
-
-val create_element_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- (* element type: *) string ->
- (* attributes: *) (string * string) list ->
- 'ext node
-
-val create_super_root_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- 'ext node
-
-val create_comment_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- (* comment text: *) string ->
- 'ext node
-
-val create_pinstr_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- proc_instruction ->
- 'ext node</PRE
-></P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1354"
->3.2.6. Examples</A
-></H2
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Building trees. </B
->Here is the piece of code that creates the tree of
-the figure <A
-HREF="x939.html#NODE-TERM"
-><I
-><I
->A tree with element nodes, data nodes, and attributes</I
-><I
-></I
-></I
-></A
->. The extension
-object and the DTD are beyond the scope of this example.
-
-<PRE
-CLASS="PROGRAMLISTING"
->let exemplar_ext = ... (* some extension *) in
-let dtd = ... (* some DTD *) in
-
-let element_exemplar = new element_impl exemplar_ext in
-let data_exemplar = new data_impl exemplar_ext in
-
-let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
-and b1 = element_exemplar # create_element dtd (T_element "b") []
-and c1 = element_exemplar # create_element dtd (T_element "c") []
-and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
-in
-
-let cherries = data_exemplar # create_data dtd "Cherries" in
-let orange = data_exemplar # create_data dtd "An orange" in
-
-a1 # add_node b1;
-a1 # add_node c1;
-b1 # add_node a2;
-b1 # add_node cherries;
-a2 # add_node orange;</PRE
->
-
-Alternatively, the last block of statements could also be written as:
-
-<PRE
-CLASS="PROGRAMLISTING"
->a1 # set_nodes [b1; c1];
-b1 # set_nodes [a2; cherries];
-a2 # set_nodes [orange];</PRE
->
-
-The root of the tree is <TT
-CLASS="LITERAL"
->a1</TT
->, i.e. it is true that
-
-<PRE
-CLASS="PROGRAMLISTING"
->x # root == a1</PRE
->
-
-for every x from { <TT
-CLASS="LITERAL"
->a1</TT
->, <TT
-CLASS="LITERAL"
->a2</TT
->,
-<TT
-CLASS="LITERAL"
->b1</TT
->, <TT
-CLASS="LITERAL"
->c1</TT
->, <TT
-CLASS="LITERAL"
->cherries</TT
->,
-<TT
-CLASS="LITERAL"
->orange</TT
-> }.</P
-></DIV
-><P
->Furthermore, the following properties hold:
-
-<PRE
-CLASS="PROGRAMLISTING"
-> a1 # attribute "att" = Value "apple"
-& a2 # attribute "att" = Value "orange"
-
-& cherries # data = "Cherries"
-& orange # data = "An orange"
-& a1 # data = "CherriesAn orange"
-
-& a1 # node_type = T_element "a"
-& a2 # node_type = T_element "a"
-& b1 # node_type = T_element "b"
-& c1 # node_type = T_element "c"
-& cherries # node_type = T_data
-& orange # node_type = T_data
-
-& a1 # sub_nodes = [ b1; c1 ]
-& a2 # sub_nodes = [ orange ]
-& b1 # sub_nodes = [ a2; cherries ]
-& c1 # sub_nodes = []
-& cherries # sub_nodes = []
-& orange # sub_nodes = []
-
-& a2 # parent == a1
-& b1 # parent == b1
-& c1 # parent == a1
-& cherries # parent == b1
-& orange # parent == a2</PRE
-></P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Searching nodes. </B
->The following function searches all nodes of a tree
-for which a certain condition holds:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let rec search p t =
- if p t then
- t :: search_list p (t # sub_nodes)
- else
- search_list p (t # sub_nodes)
-
-and search_list p l =
- match l with
- [] -> []
- | t :: l' -> (search p t) @ (search_list p l')
-;;</PRE
-></P
-></DIV
-><P
->For example, if you want to search all elements of a certain
-type <TT
-CLASS="LITERAL"
->et</TT
->, the function <TT
-CLASS="LITERAL"
->search</TT
-> can be
-applied as follows:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let search_element_type et t =
- search (fun x -> x # node_type = T_element et) t
-;;</PRE
-></P
-><DIV
-CLASS="FORMALPARA"
-><P
-><B
->Getting attribute values. </B
->Suppose we have the declaration:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ATTLIST e a CDATA #REQUIRED
- b CDATA #IMPLIED
- c CDATA "12345"></PRE
->
-
-In this case, every element <TT
-CLASS="LITERAL"
->e</TT
-> must have an attribute
-<TT
-CLASS="LITERAL"
->a</TT
->, otherwise the parser would indicate an error. If
-the O'Caml variable <TT
-CLASS="LITERAL"
->n</TT
-> holds the node of the tree
-corresponding to the element, you can get the value of the attribute
-<TT
-CLASS="LITERAL"
->a</TT
-> by
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_a = n # required_string_attribute "a"</PRE
->
-
-which is more or less an abbreviation for
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_a =
- match n # attribute "a" with
- Value s -> s
- | _ -> assert false</PRE
->
-
-- as the attribute is required, the <TT
-CLASS="LITERAL"
->attribute</TT
-> method always
-returns a <TT
-CLASS="LITERAL"
->Value</TT
->.</P
-></DIV
-><P
->In contrast to this, the attribute <TT
-CLASS="LITERAL"
->b</TT
-> can be
-omitted. In this case, the method <TT
-CLASS="LITERAL"
->required_string_attribute</TT
->
-works only if the attribute is there, and the method will fail if the attribute
-is missing. To get the value, you can apply the method
-<TT
-CLASS="LITERAL"
->optional_string_attribute</TT
->:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_b = n # optional_string_attribute "b"</PRE
->
-
-Now, <TT
-CLASS="LITERAL"
->value_of_b</TT
-> is of type <TT
-CLASS="LITERAL"
->string option</TT
->,
-and <TT
-CLASS="LITERAL"
->None</TT
-> represents the omitted attribute. Alternatively,
-you could also use <TT
-CLASS="LITERAL"
->attribute</TT
->:
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_b =
- match n # attribute "b" with
- Value s -> Some s
- | Implied_value -> None
- | _ -> assert false</PRE
-></P
-><P
->The attribute <TT
-CLASS="LITERAL"
->c</TT
-> behaves much like
-<TT
-CLASS="LITERAL"
->a</TT
->, because it has always a value. If the attribute is
-omitted, the default, here "12345", will be returned instead. Because of this,
-you can again use <TT
-CLASS="LITERAL"
->required_string_attribute</TT
-> to get the
-value.</P
-><P
->The type <TT
-CLASS="LITERAL"
->CDATA</TT
-> is the most general string
-type. The types <TT
-CLASS="LITERAL"
->NMTOKEN</TT
->, <TT
-CLASS="LITERAL"
->ID</TT
->,
-<TT
-CLASS="LITERAL"
->IDREF</TT
->, <TT
-CLASS="LITERAL"
->ENTITY</TT
->, and all enumerators and
-notations are special forms of string types that restrict the possible
-values. From O'Caml, they behave like <TT
-CLASS="LITERAL"
->CDATA</TT
->, i.e. you can
-use the methods <TT
-CLASS="LITERAL"
->required_string_attribute</TT
-> and
-<TT
-CLASS="LITERAL"
->optional_string_attribute</TT
->, too.</P
-><P
->In contrast to this, the types <TT
-CLASS="LITERAL"
->NMTOKENS</TT
->,
-<TT
-CLASS="LITERAL"
->IDREFS</TT
->, and <TT
-CLASS="LITERAL"
->ENTITIES</TT
-> mean lists of
-strings. Suppose we have the declaration:
-
-<PRE
-CLASS="PROGRAMLISTING"
-><!ATTLIST f d NMTOKENS #REQUIRED
- e NMTOKENS #IMPLIED></PRE
->
-
-The type <TT
-CLASS="LITERAL"
->NMTOKENS</TT
-> stands for lists of space-separated
-tokens; for example the value <TT
-CLASS="LITERAL"
->"1 abc 23ef"</TT
-> means the list
-<TT
-CLASS="LITERAL"
->["1"; "abc"; "23ef"]</TT
->. (Again, <TT
-CLASS="LITERAL"
->IDREFS</TT
->
-and <TT
-CLASS="LITERAL"
->ENTITIES</TT
-> have more restricted values.) To get the
-value of attribute <TT
-CLASS="LITERAL"
->d</TT
->, one can use
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_d = n # required_list_attribute "d"</PRE
->
-
-or
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_d =
- match n # attribute "d" with
- Valuelist l -> l
- | _ -> assert false</PRE
->
-
-As <TT
-CLASS="LITERAL"
->d</TT
-> is required, the attribute cannot be omitted, and
-the <TT
-CLASS="LITERAL"
->attribute</TT
-> method returns always a
-<TT
-CLASS="LITERAL"
->Valuelist</TT
->. </P
-><P
->For optional attributes like <TT
-CLASS="LITERAL"
->e</TT
->, apply
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_e = n # optional_list_attribute "e"</PRE
->
-
-or
-
-<PRE
-CLASS="PROGRAMLISTING"
->let value_of_e =
- match n # attribute "e" with
- Valuelist l -> l
- | Implied_value -> []
- | _ -> assert false</PRE
->
-
-Here, the case that the attribute is missing counts like the empty list.</P
-></DIV
-><DIV
-CLASS="SECT2"
-><H2
-CLASS="SECT2"
-><A
-NAME="AEN1435"
->3.2.7. Iterators</A
-></H2
-><P
->There are also several iterators in Pxp_document; please see
-the mli file for details. You can find examples for them in the
-"simple_transformation" directory.
-
-<PRE
-CLASS="PROGRAMLISTING"
->val find : ?deeply:bool ->
- f:('ext node -> bool) -> 'ext node -> 'ext node
-
-val find_all : ?deeply:bool ->
- f:('ext node -> bool) -> 'ext node -> 'ext node list
-
-val find_element : ?deeply:bool ->
- string -> 'ext node -> 'ext node
-
-val find_all_elements : ?deeply:bool ->
- string -> 'ext node -> 'ext node list
-
-exception Skip
-val map_tree : pre:('exta node -> 'extb node) ->
- ?post:('extb node -> 'extb node) ->
- 'exta node ->
- 'extb node
-
-
-val map_tree_sibl :
- pre: ('exta node option -> 'exta node -> 'exta node option ->
- 'extb node) ->
- ?post:('extb node option -> 'extb node -> 'extb node option ->
- 'extb node) ->
- 'exta node ->
- 'extb node
-
-val iter_tree : ?pre:('ext node -> unit) ->
- ?post:('ext node -> unit) ->
- 'ext node ->
- unit
-
-val iter_tree_sibl :
- ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
- ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
- 'ext node ->
- unit</PRE
-></P
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><HR
-ALIGN="LEFT"
-WIDTH="100%"><TABLE
-WIDTH="100%"
-BORDER="0"
-CELLPADDING="0"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
-><A
-HREF="c893.html"
->Prev</A
-></TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="index.html"
->Home</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
-><A
-HREF="x1439.html"
->Next</A
-></TD
-></TR
-><TR
-><TD
-WIDTH="33%"
-ALIGN="left"
-VALIGN="top"
->The objects representing the document</TD
-><TD
-WIDTH="34%"
-ALIGN="center"
-VALIGN="top"
-><A
-HREF="c893.html"
->Up</A
-></TD
-><TD
-WIDTH="33%"
-ALIGN="right"
-VALIGN="top"
->The class type <TT
-CLASS="LITERAL"
->extension</TT
-></TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
+++ /dev/null
-%!PS-Adobe-2.0
-%%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software
-%%Pages: 96
-%%PageOrder: Ascend
-%%BoundingBox: 0 0 596 842
-%%DocumentFonts: Helvetica-Bold Times-Roman Times-Bold Times-Italic
-%%+ Courier Courier-Oblique Helvetica-BoldOblique Courier-Bold
-%%DocumentPaperSizes: a4
-%%EndComments
-%DVIPSWebPage: (www.radicaleye.com)
-%DVIPSCommandLine: dvips -f
-%DVIPSParameters: dpi=600, compressed
-%DVIPSSource: TeX output 2000.08.30:1757
-%%BeginProcSet: texc.pro
-%!
-/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S
-N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72
-mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0
-0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{
-landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize
-mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[
-matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round
-exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{
-statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0]
-N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin
-/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array
-/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2
-array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N
-df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A
-definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get
-}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub}
-B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr
-1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3
-1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx
-0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx
-sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{
-rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp
-gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B
-/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{
-/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{
-A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy
-get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse}
-ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp
-fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17
-{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add
-chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{
-1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop}
-forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn
-/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put
-}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{
-bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A
-mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{
-SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{
-userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X
-1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4
-index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N
-/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{
-/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT)
-(LaserWriter 16/600)]{A length product length le{A length product exch 0
-exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse
-end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask
-grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot}
-imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round
-exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto
-fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p
-delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M}
-B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{
-p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S
-rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end
-
-%%EndProcSet
-%%BeginProcSet: 8r.enc
-% @@psencodingfile@{
-% author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry",
-% version = "0.6",
-% date = "1 July 1998",
-% filename = "8r.enc",
-% email = "tex-fonts@@tug.org",
-% docstring = "Encoding for TrueType or Type 1 fonts
-% to be used with TeX."
-% @}
-%
-% Idea is to have all the characters normally included in Type 1 fonts
-% available for typesetting. This is effectively the characters in Adobe
-% Standard Encoding + ISO Latin 1 + extra characters from Lucida.
-%
-% Character code assignments were made as follows:
-%
-% (1) the Windows ANSI characters are almost all in their Windows ANSI
-% positions, because some Windows users cannot easily reencode the
-% fonts, and it makes no difference on other systems. The only Windows
-% ANSI characters not available are those that make no sense for
-% typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen
-% (173). quotesingle and grave are moved just because it's such an
-% irritation not having them in TeX positions.
-%
-% (2) Remaining characters are assigned arbitrarily to the lower part
-% of the range, avoiding 0, 10 and 13 in case we meet dumb software.
-%
-% (3) Y&Y Lucida Bright includes some extra text characters; in the
-% hopes that other PostScript fonts, perhaps created for public
-% consumption, will include them, they are included starting at 0x12.
-%
-% (4) Remaining positions left undefined are for use in (hopefully)
-% upward-compatible revisions, if someday more characters are generally
-% available.
-%
-% (5) hyphen appears twice for compatibility with both
-% ASCII and Windows.
-%
-/TeXBase1Encoding [
-% 0x00 (encoded characters from Adobe Standard not in Windows 3.1)
- /.notdef /dotaccent /fi /fl
- /fraction /hungarumlaut /Lslash /lslash
- /ogonek /ring /.notdef
- /breve /minus /.notdef
-% These are the only two remaining unencoded characters, so may as
-% well include them.
- /Zcaron /zcaron
-% 0x10
- /caron /dotlessi
-% (unusual TeX characters available in, e.g., Lucida Bright)
- /dotlessj /ff /ffi /ffl
- /.notdef /.notdef /.notdef /.notdef
- /.notdef /.notdef /.notdef /.notdef
- % very contentious; it's so painful not having quoteleft and quoteright
- % at 96 and 145 that we move the things normally found there to here.
- /grave /quotesingle
-% 0x20 (ASCII begins)
- /space /exclam /quotedbl /numbersign
- /dollar /percent /ampersand /quoteright
- /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash
-% 0x30
- /zero /one /two /three /four /five /six /seven
- /eight /nine /colon /semicolon /less /equal /greater /question
-% 0x40
- /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O
-% 0x50
- /P /Q /R /S /T /U /V /W
- /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
-% 0x60
- /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o
-% 0x70
- /p /q /r /s /t /u /v /w
- /x /y /z /braceleft /bar /braceright /asciitilde
- /.notdef % rubout; ASCII ends
-% 0x80
- /.notdef /.notdef /quotesinglbase /florin
- /quotedblbase /ellipsis /dagger /daggerdbl
- /circumflex /perthousand /Scaron /guilsinglleft
- /OE /.notdef /.notdef /.notdef
-% 0x90
- /.notdef /.notdef /.notdef /quotedblleft
- /quotedblright /bullet /endash /emdash
- /tilde /trademark /scaron /guilsinglright
- /oe /.notdef /.notdef /Ydieresis
-% 0xA0
- /.notdef % nobreakspace
- /exclamdown /cent /sterling
- /currency /yen /brokenbar /section
- /dieresis /copyright /ordfeminine /guillemotleft
- /logicalnot
- /hyphen % Y&Y (also at 45); Windows' softhyphen
- /registered
- /macron
-% 0xD0
- /degree /plusminus /twosuperior /threesuperior
- /acute /mu /paragraph /periodcentered
- /cedilla /onesuperior /ordmasculine /guillemotright
- /onequarter /onehalf /threequarters /questiondown
-% 0xC0
- /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
- /Egrave /Eacute /Ecircumflex /Edieresis
- /Igrave /Iacute /Icircumflex /Idieresis
-% 0xD0
- /Eth /Ntilde /Ograve /Oacute
- /Ocircumflex /Otilde /Odieresis /multiply
- /Oslash /Ugrave /Uacute /Ucircumflex
- /Udieresis /Yacute /Thorn /germandbls
-% 0xE0
- /agrave /aacute /acircumflex /atilde
- /adieresis /aring /ae /ccedilla
- /egrave /eacute /ecircumflex /edieresis
- /igrave /iacute /icircumflex /idieresis
-% 0xF0
- /eth /ntilde /ograve /oacute
- /ocircumflex /otilde /odieresis /divide
- /oslash /ugrave /uacute /ucircumflex
- /udieresis /yacute /thorn /ydieresis
-] def
-
-%%EndProcSet
-%%BeginProcSet: texps.pro
-%!
-TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2
-index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll
-exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics
-exch def dict begin Encoding{exch dup type/integertype ne{pop pop 1 sub
-dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def}
-ifelse}forall Metrics/Metrics currentdict end def[2 index currentdict
-end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{
-dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1
-roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def
-dup[exch{dup CharStrings exch known not{pop/.notdef/Encoding true def}
-if}forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def}
-def end
-
-%%EndProcSet
-%%BeginProcSet: special.pro
-%!
-TeXDict begin/SDict 200 dict N SDict begin/@SpecialDefaults{/hs 612 N
-/vs 792 N/ho 0 N/vo 0 N/hsc 1 N/vsc 1 N/ang 0 N/CLIP 0 N/rwiSeen false N
-/rhiSeen false N/letter{}N/note{}N/a4{}N/legal{}N}B/@scaleunit 100 N
-/@hscale{@scaleunit div/hsc X}B/@vscale{@scaleunit div/vsc X}B/@hsize{
-/hs X/CLIP 1 N}B/@vsize{/vs X/CLIP 1 N}B/@clip{/CLIP 2 N}B/@hoffset{/ho
-X}B/@voffset{/vo X}B/@angle{/ang X}B/@rwi{10 div/rwi X/rwiSeen true N}B
-/@rhi{10 div/rhi X/rhiSeen true N}B/@llx{/llx X}B/@lly{/lly X}B/@urx{
-/urx X}B/@ury{/ury X}B/magscale true def end/@MacSetUp{userdict/md known
-{userdict/md get type/dicttype eq{userdict begin md length 10 add md
-maxlength ge{/md md dup length 20 add dict copy def}if end md begin
-/letter{}N/note{}N/legal{}N/od{txpose 1 0 mtx defaultmatrix dtransform S
-atan/pa X newpath clippath mark{transform{itransform moveto}}{transform{
-itransform lineto}}{6 -2 roll transform 6 -2 roll transform 6 -2 roll
-transform{itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll
-curveto}}{{closepath}}pathforall newpath counttomark array astore/gc xdf
-pop ct 39 0 put 10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack}
-if}N/txpose{pxs pys scale ppr aload pop por{noflips{pop S neg S TR pop 1
--1 scale}if xflip yflip and{pop S neg S TR 180 rotate 1 -1 scale ppr 3
-get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip
-yflip not and{pop S neg S TR pop 180 rotate ppr 3 get ppr 1 get neg sub
-neg 0 TR}if yflip xflip not and{ppr 1 get neg ppr 0 get neg TR}if}{
-noflips{TR pop pop 270 rotate 1 -1 scale}if xflip yflip and{TR pop pop
-90 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get
-neg sub neg TR}if xflip yflip not and{TR pop pop 90 rotate ppr 3 get ppr
-1 get neg sub neg 0 TR}if yflip xflip not and{TR pop pop 270 rotate ppr
-2 get ppr 0 get neg sub neg 0 S TR}if}ifelse scaleby96{ppr aload pop 4
--1 roll add 2 div 3 1 roll add 2 div 2 copy TR .96 dup scale neg S neg S
-TR}if}N/cp{pop pop showpage pm restore}N end}if}if}N/normalscale{
-Resolution 72 div VResolution 72 div neg scale magscale{DVImag dup scale
-}if 0 setgray}N/psfts{S 65781.76 div N}N/startTexFig{/psf$SavedState
-save N userdict maxlength dict begin/magscale true def normalscale
-currentpoint TR/psf$ury psfts/psf$urx psfts/psf$lly psfts/psf$llx psfts
-/psf$y psfts/psf$x psfts currentpoint/psf$cy X/psf$cx X/psf$sx psf$x
-psf$urx psf$llx sub div N/psf$sy psf$y psf$ury psf$lly sub div N psf$sx
-psf$sy scale psf$cx psf$sx div psf$llx sub psf$cy psf$sy div psf$ury sub
-TR/showpage{}N/erasepage{}N/copypage{}N/p 3 def @MacSetUp}N/doclip{
-psf$llx psf$lly psf$urx psf$ury currentpoint 6 2 roll newpath 4 copy 4 2
-roll moveto 6 -1 roll S lineto S lineto S lineto closepath clip newpath
-moveto}N/endTexFig{end psf$SavedState restore}N/@beginspecial{SDict
-begin/SpecialSave save N gsave normalscale currentpoint TR
-@SpecialDefaults count/ocount X/dcount countdictstack N}N/@setspecial{
-CLIP 1 eq{newpath 0 0 moveto hs 0 rlineto 0 vs rlineto hs neg 0 rlineto
-closepath clip}if ho vo TR hsc vsc scale ang rotate rwiSeen{rwi urx llx
-sub div rhiSeen{rhi ury lly sub div}{dup}ifelse scale llx neg lly neg TR
-}{rhiSeen{rhi ury lly sub div dup scale llx neg lly neg TR}if}ifelse
-CLIP 2 eq{newpath llx lly moveto urx lly lineto urx ury lineto llx ury
-lineto closepath clip}if/showpage{}N/erasepage{}N/copypage{}N newpath}N
-/@endspecial{count ocount sub{pop}repeat countdictstack dcount sub{end}
-repeat grestore SpecialSave restore end}N/@defspecial{SDict begin}N
-/@fedspecial{end}B/li{lineto}B/rl{rlineto}B/rc{rcurveto}B/np{/SaveX
-currentpoint/SaveY X N 1 setlinecap newpath}N/st{stroke SaveX SaveY
-moveto}N/fil{fill SaveX SaveY moveto}N/ellipse{/endangle X/startangle X
-/yrad X/xrad X/savematrix matrix currentmatrix N TR xrad yrad scale 0 0
-1 startangle endangle arc savematrix setmatrix}N end
-
-%%EndProcSet
-%%BeginProcSet: color.pro
-%!
-TeXDict begin/setcmykcolor where{pop}{/setcmykcolor{dup 10 eq{pop
-setrgbcolor}{1 sub 4 1 roll 3{3 index add neg dup 0 lt{pop 0}if 3 1 roll
-}repeat setrgbcolor pop}ifelse}B}ifelse/TeXcolorcmyk{setcmykcolor}def
-/TeXcolorrgb{setrgbcolor}def/TeXcolorgrey{setgray}def/TeXcolorgray{
-setgray}def/TeXcolorhsb{sethsbcolor}def/currentcmykcolor where{pop}{
-/currentcmykcolor{currentrgbcolor 10}B}ifelse/DC{exch dup userdict exch
-known{pop pop}{X}ifelse}B/GreenYellow{0.15 0 0.69 0 setcmykcolor}DC
-/Yellow{0 0 1 0 setcmykcolor}DC/Goldenrod{0 0.10 0.84 0 setcmykcolor}DC
-/Dandelion{0 0.29 0.84 0 setcmykcolor}DC/Apricot{0 0.32 0.52 0
-setcmykcolor}DC/Peach{0 0.50 0.70 0 setcmykcolor}DC/Melon{0 0.46 0.50 0
-setcmykcolor}DC/YellowOrange{0 0.42 1 0 setcmykcolor}DC/Orange{0 0.61
-0.87 0 setcmykcolor}DC/BurntOrange{0 0.51 1 0 setcmykcolor}DC
-/Bittersweet{0 0.75 1 0.24 setcmykcolor}DC/RedOrange{0 0.77 0.87 0
-setcmykcolor}DC/Mahogany{0 0.85 0.87 0.35 setcmykcolor}DC/Maroon{0 0.87
-0.68 0.32 setcmykcolor}DC/BrickRed{0 0.89 0.94 0.28 setcmykcolor}DC/Red{
-0 1 1 0 setcmykcolor}DC/OrangeRed{0 1 0.50 0 setcmykcolor}DC/RubineRed{
-0 1 0.13 0 setcmykcolor}DC/WildStrawberry{0 0.96 0.39 0 setcmykcolor}DC
-/Salmon{0 0.53 0.38 0 setcmykcolor}DC/CarnationPink{0 0.63 0 0
-setcmykcolor}DC/Magenta{0 1 0 0 setcmykcolor}DC/VioletRed{0 0.81 0 0
-setcmykcolor}DC/Rhodamine{0 0.82 0 0 setcmykcolor}DC/Mulberry{0.34 0.90
-0 0.02 setcmykcolor}DC/RedViolet{0.07 0.90 0 0.34 setcmykcolor}DC
-/Fuchsia{0.47 0.91 0 0.08 setcmykcolor}DC/Lavender{0 0.48 0 0
-setcmykcolor}DC/Thistle{0.12 0.59 0 0 setcmykcolor}DC/Orchid{0.32 0.64 0
-0 setcmykcolor}DC/DarkOrchid{0.40 0.80 0.20 0 setcmykcolor}DC/Purple{
-0.45 0.86 0 0 setcmykcolor}DC/Plum{0.50 1 0 0 setcmykcolor}DC/Violet{
-0.79 0.88 0 0 setcmykcolor}DC/RoyalPurple{0.75 0.90 0 0 setcmykcolor}DC
-/BlueViolet{0.86 0.91 0 0.04 setcmykcolor}DC/Periwinkle{0.57 0.55 0 0
-setcmykcolor}DC/CadetBlue{0.62 0.57 0.23 0 setcmykcolor}DC
-/CornflowerBlue{0.65 0.13 0 0 setcmykcolor}DC/MidnightBlue{0.98 0.13 0
-0.43 setcmykcolor}DC/NavyBlue{0.94 0.54 0 0 setcmykcolor}DC/RoyalBlue{1
-0.50 0 0 setcmykcolor}DC/Blue{1 1 0 0 setcmykcolor}DC/Cerulean{0.94 0.11
-0 0 setcmykcolor}DC/Cyan{1 0 0 0 setcmykcolor}DC/ProcessBlue{0.96 0 0 0
-setcmykcolor}DC/SkyBlue{0.62 0 0.12 0 setcmykcolor}DC/Turquoise{0.85 0
-0.20 0 setcmykcolor}DC/TealBlue{0.86 0 0.34 0.02 setcmykcolor}DC
-/Aquamarine{0.82 0 0.30 0 setcmykcolor}DC/BlueGreen{0.85 0 0.33 0
-setcmykcolor}DC/Emerald{1 0 0.50 0 setcmykcolor}DC/JungleGreen{0.99 0
-0.52 0 setcmykcolor}DC/SeaGreen{0.69 0 0.50 0 setcmykcolor}DC/Green{1 0
-1 0 setcmykcolor}DC/ForestGreen{0.91 0 0.88 0.12 setcmykcolor}DC
-/PineGreen{0.92 0 0.59 0.25 setcmykcolor}DC/LimeGreen{0.50 0 1 0
-setcmykcolor}DC/YellowGreen{0.44 0 0.74 0 setcmykcolor}DC/SpringGreen{
-0.26 0 0.76 0 setcmykcolor}DC/OliveGreen{0.64 0 0.95 0.40 setcmykcolor}
-DC/RawSienna{0 0.72 1 0.45 setcmykcolor}DC/Sepia{0 0.83 1 0.70
-setcmykcolor}DC/Brown{0 0.81 1 0.60 setcmykcolor}DC/Tan{0.14 0.42 0.56 0
-setcmykcolor}DC/Gray{0 0 0 0.50 setcmykcolor}DC/Black{0 0 0 1
-setcmykcolor}DC/White{0 0 0 0 setcmykcolor}DC end
-
-%%EndProcSet
-TeXDict begin 39158280 55380996 1000 600 600 () @start
-/Fa 106[21 149[{TeXBase1Encoding ReEncodeFont}1 59.7758
-/Times-Roman rf /Fb 135[77 2[77 77 77 3[77 77 77 3[77
-3[77 77 77 99[{TeXBase1Encoding ReEncodeFont}11 129.116
-/Courier-Bold rf /Fc 134[65 65 2[65 65 65 65 1[65 65
-65 65 65 2[65 65 65 65 65 65 65 65 65 1[65 36[65 6[65
-65 65 49[{TeXBase1Encoding ReEncodeFont}25 107.597 /Courier-Bold
-rf /Fd 141[56 4[128 7[80 88 2[80 97[{TeXBase1Encoding ReEncodeFont}5
-143.462 /Helvetica-BoldOblique rf /Fe 147[21 4[37 1[33
-3[37 23[25 14[25 58[{TeXBase1Encoding ReEncodeFont}6
-74.7198 /Times-Italic rf /Ff 204[25 25 25 49[{
-TeXBase1Encoding ReEncodeFont}3 49.8132 /Times-Roman
-rf
-%DVIPSBitmapFont: Fg cmmi8 8 2
-/Fg 2 63 df<EE01C01607161FEE7F00ED01FCED07F0ED1FC0037FC7FCEC01FCEC07F0EC
-0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FCEA03F8EA0FE0EA3F8000FECAFC12F8
-12FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0EC07F0EC01
-FCEC007FED1FC0ED07F0ED01FCED007FEE1FC0160716012A2B7AA537>60
-D<12E012F812FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0
-EC07F0EC01FCEC007FED1FC0ED07F0ED01FCED007FEE1FC01607161FEE7F00ED01FCED07
-F0ED1FC0037FC7FCEC01FCEC07F0EC0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FC
-EA03F8EA0FE0EA3F8000FECAFC12F812E02A2B7AA537>62 D E
-%EndDVIPSBitmapFont
-/Fh 131[40 1[40 40 40 40 40 40 40 40 40 40 40 40 40 40
-40 40 1[40 40 40 1[40 40 40 40 40 1[40 5[40 3[40 40 40
-40 40 40 40 40 40 40 40 1[40 40 40 1[40 40 40 40 40 1[40
-40 40 40 40 40 1[40 4[40 1[40 1[40 40 40 40 40 40 40
-40 40 40 40 1[40 40 40 33[{TeXBase1Encoding ReEncodeFont}69
-67.2479 /Courier rf /Fi 105[37 28[37 37 54 37 37 21 29
-25 37 37 37 37 58 21 37 1[21 37 37 25 33 37 33 37 33
-7[54 54 3[46 5[54 66 46 2[25 2[42 2[50 50 54 5[21 21
-11[19 1[19 2[25 25 25 4[30 31[42 2[{TeXBase1Encoding ReEncodeFont}45
-74.7198 /Times-Roman rf /Fj 135[55 7[61 2[89 28 6[55
-3[55 27[66 69[{TeXBase1Encoding ReEncodeFont}7 99.6264
-/Helvetica-Bold rf /Fk 145[27 2[27 57[27 49[{
-TeXBase1Encoding ReEncodeFont}3 44.8318 /Courier-Oblique
-rf /Fl 135[50 3[50 50 3[50 50 3[50 50 3[50 1[50 50 2[50
-95[{TeXBase1Encoding ReEncodeFont}11 83.022 /Courier-Oblique
-rf
-%DVIPSBitmapFont: Fm cmmi10 10 2
-/Fm 2 63 df<EF0380EF0FC0173FEFFF80933803FE00EE0FF8EE3FE0EEFF80DB03FEC7FC
-ED0FF8ED3FE0EDFF80DA03FEC8FCEC0FF8EC3FE0ECFF80D903FEC9FCEB0FF8EB3FE0EBFF
-80D803FECAFCEA0FF8EA3FE0EA7F8000FECBFCA2EA7F80EA3FE0EA0FF8EA03FEC66C7EEB
-3FE0EB0FF8EB03FE903800FF80EC3FE0EC0FF8EC03FE913800FF80ED3FE0ED0FF8ED03FE
-923800FF80EE3FE0EE0FF8EE03FE933800FF80EF3FC0170FEF0380323279AD41>60
-D<126012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
-C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
-01FF9338007F80EF1FC0A2EF7F80933801FF00EE07FCEE1FF0EE7FC04B48C7FCED07FCED
-1FF0ED7FC04A48C8FCEC07FCEC1FF0EC7FC04948C9FCEB07FCEB1FF0EB7FC04848CAFCEA
-07FCEA3FF0EA7FC048CBFC12FC1270323279AD41>62 D E
-%EndDVIPSBitmapFont
-/Fn 134[45 45 1[45 45 45 45 45 1[45 45 45 45 45 1[45
-45 45 45 45 45 45 45 45 45 1[45 5[45 2[45 8[45 5[45 2[45
-45 1[45 19[45 45 44[{TeXBase1Encoding ReEncodeFont}32
-74.7198 /Courier-Oblique rf
-%DVIPSBitmapFont: Fo cmmi9 9 2
-/Fo 2 63 df<171C177EEE01FEEE07FCEE1FF0EE7FC0923801FF00ED07FCED1FF0ED7FC0
-4A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07FCEA1FF0
-EA7FC048CAFCA2EA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
-C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
-01FEEE007E171C2F2E7AA93C>60 D<127012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007F
-C0EB1FF0EB07FCEB01FF9038007FC0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED
-01FF9238007FC0EE1FF0EE07FCEE01FEA2EE07FCEE1FF0EE7FC0923801FF00ED07FCED1F
-F0ED7FC04A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07
-FCEA1FF0EA7FC048CAFC12FC12702F2E7AA93C>62 D E
-%EndDVIPSBitmapFont
-/Fp 134[66 66 93 66 73 40 66 47 1[73 73 73 106 33 2[33
-73 73 40 66 73 66 73 66 8[80 113 80 86 73 80 86 1[80
-1[86 100 73 2[33 86 1[73 80 86 86 1[86 1[73 5[66 66 66
-66 66 66 66 66 66 66 1[33 40 33 2[40 40 5[57 31[73 2[{
-TeXBase1Encoding ReEncodeFont}58 119.552 /Helvetica-Bold
-rf /Fq 129[45 45 45 45 45 45 45 45 45 45 45 45 45 45
-45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
-45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
-45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
-45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
-45 45 45 45 1[45 45 45 33[{TeXBase1Encoding ReEncodeFont}90
-74.7198 /Courier rf /Fr 134[37 37 55 37 42 23 32 32 1[42
-42 42 60 23 37 23 23 42 42 23 37 42 37 42 42 1[42 6[51
-69 1[60 46 42 2[51 1[55 69 46 2[28 3[51 60 55 1[51 1[42
-4[28 42 42 42 42 42 42 42 42 42 42 1[21 28 21 2[28 28
-6[28 30[42 2[{TeXBase1Encoding ReEncodeFont}58 83.022
-/Times-Italic rf /Fs 138[105 57 96 67 1[105 105 105 153
-48 1[48 48 105 105 57 96 105 96 105 96 8[115 163 1[124
-105 3[115 2[143 105 5[105 2[124 3[105 10[96 96 96 96
-2[48 43[105 2[{TeXBase1Encoding ReEncodeFont}35 172.154
-/Helvetica-Bold rf /Ft 106[23 29 29 25[33 33 48 33 33
-18 26 22 1[33 33 33 52 18 33 18 18 33 33 22 29 33 29
-33 29 8[48 3[41 37 2[37 6[22 1[48 12[18 10[18 17 1[17
-2[22 22 5[27 31[37 2[{TeXBase1Encoding ReEncodeFont}41
-66.4176 /Times-Roman rf /Fu 134[42 42 60 42 46 28 32
-37 1[46 42 46 69 23 46 1[23 46 42 28 37 46 37 46 42 9[83
-60 60 55 46 60 3[60 78 55 2[32 65 65 51 55 60 60 55 60
-1[42 6[42 1[42 42 42 42 42 42 2[21 28 21 4[28 39[{
-TeXBase1Encoding ReEncodeFont}53 83.022 /Times-Bold rf
-/Fv 27[37 58[63 42[45 40 1[40 37 42 42 60 42 42 23 32
-28 42 42 42 42 65 23 42 23 23 42 42 28 37 42 37 42 37
-28 42 1[28 23 28 1[60 60 78 60 60 51 46 55 60 46 60 60
-74 51 60 1[28 60 60 46 51 60 55 55 60 1[37 47 47 47 23
-23 42 42 42 42 42 42 42 42 42 42 23 21 28 21 2[28 28
-28 65 69 1[42 34 28 29[46 46 2[{TeXBase1Encoding ReEncodeFont}90
-83.022 /Times-Roman rf /Fw 136[65 1[51 1[46 32 2[51 51
-1[23 2[23 51 51 1[46 51 2[46 8[55 3[51 3[55 11[60 9[28
-18[23 39[{TeXBase1Encoding ReEncodeFont}19 83.022 /Helvetica-Bold
-rf /Fx 134[80 80 112 80 88 48 80 56 1[88 88 88 128 40
-80 1[40 88 88 48 80 88 80 88 80 8[96 1[96 104 88 96 104
-2[112 104 120 88 2[40 104 112 1[96 104 104 1[104 6[48
-4[80 80 80 80 80 2[40 48 45[{TeXBase1Encoding ReEncodeFont}48
-143.462 /Helvetica-Bold rf /Fy 138[126 1[115 80 8[57
-126 126 1[115 126 11[138 2[149 126 3[138 6[57 26[57 6[57
-39[{TeXBase1Encoding ReEncodeFont}15 206.584 /Helvetica-Bold
-rf end
-%%EndProlog
-%%BeginSetup
-%%Feature: *Resolution 600dpi
-TeXDict begin
-%%BeginPaperSize: a4
-a4
-%%EndPaperSize
-
-%%EndSetup
-%%Page: 1 1
-1 0 bop Black Black 890 647 a Fy(The)58 b(PXP)f(user')-12
-b(s)58 b(guide)1384 2594 y Fx(Ger)m(d)39 b(Stolpmann)p
-Black Black eop
-%%Page: 2 2
-2 1 bop Black Black -2 579 a Fw(The)22 b(PXP)j(user')-5
-b(s)23 b(guide)-2 687 y Fv(by)d(Gerd)f(Stolpmann)-2 903
-y(Cop)o(yright)f(\251)j(1999,)e(2000)g(by)g(Gerd)h(Stolpmann)-2
-1135 y(PXP)h(is)g(a)g(v)n(alidating)d(parser)i(for)f(XML-1.0)g(which)h
-(has)g(been)g(written)g(entirely)f(in)h(Objecti)n(v)o(e)g(Caml.)-2
-1285 y Fw(Do)o(wnload)h(PXP:)j Fv(The)c(free)g(PXP)h(library)e(can)h
-(be)g(do)n(wnloaded)d(at)k(http://www)-5 b(.ocaml-programming)o(.de)o
-(/pack)o(age)o(s/.)15 b(This)-2 1393 y(user')-5 b(s)20
-b(guide)f(is)j(included.)c(Ne)n(west)j(releases)f(of)g(PXP)h(will)g(be)
-f(announced)e(in)i(The)g(OCaml)g(Link)g(Database)-2 1500
-y(\(http://www)-5 b(.npc.de/ocaml/linkdb)o(/\).)-2 1899
-y Fu(License)-2 2090 y Ft(This)16 b(document,)j(and)e(the)h(described)h
-(softw)o(are,)f("PXP",)e(are)i(cop)o(yright)i(by)d(Gerd)g(Stolpmann.)-2
-2198 y(Permission)h(is)e(hereby)j(granted,)f(free)g(of)f(char)o(ge,)h
-(to)f(an)o(y)h(person)f(obtaining)j(a)d(cop)o(y)h(of)f(this)h(document)
-g(and)g(the)f("PXP")g(softw)o(are)i(\(the)f("Softw)o(are"\),)g(to)f
-(deal)i(in)-2 2306 y(the)f(Softw)o(are)g(without)h(restriction,)g
-(including)h(without)e(limitation)i(the)e(rights)g(to)f(use,)g(cop)o(y)
-l(,)g(modify)l(,)g(mer)o(ge,)g(publish,)h(distrib)o(ute,)h(sublicense,)
-g(and/or)f(sell)-2 2414 y(copies)g(of)f(the)h(Softw)o(are,)g(and)g(to)f
-(permit)h(persons)f(to)h(whom)e(the)i(Softw)o(are)h(is)e(furnished)h
-(to)f(do)g(so,)g(subject)h(to)g(the)f(follo)n(wing)j(conditions:)-2
-2522 y(The)d(abo)o(v)o(e)h(cop)o(yright)h(notice)g(and)f(this)f
-(permission)h(notice)h(shall)f(be)g(included)h(in)e(all)h(copies)h(or)e
-(substantial)i(portions)g(of)e(the)g(Softw)o(are.)-2
-2630 y(The)g(Softw)o(are)h(is)f(pro)o(vided)i(\223as)e(is\224,)g
-(without)i(w)o(arranty)g(of)e(an)o(y)g(kind,)h(e)o(xpress)f(or)g
-(implied,)i(including)g(b)o(ut)e(not)h(limited)h(to)e(the)h(w)o
-(arranties)h(of)e(merchantability)l(,)-2 2737 y(\002tness)g(for)g(a)g
-(particular)j(purpose)e(and)g(noninfringement.)i(In)d(no)g(e)n(v)o(ent)
-h(shall)h(Gerd)e(Stolpmann)h(be)g(liable)h(for)e(an)o(y)g(claim,)h
-(damages)g(or)f(other)h(liability)l(,)i(whether)-2 2845
-y(in)d(an)g(action)i(of)e(contract,)i(tort)f(or)f(otherwise,)i(arising)
-f(from,)e(out)i(of)f(or)g(in)g(connection)j(with)e(the)f(Softw)o(are)i
-(or)e(the)h(use)f(or)g(other)h(dealings)h(in)e(the)h(softw)o(are.)p
-Black Black eop
-%%Page: 3 3
-3 2 bop Black Black -2 621 a Fs(T)-14 b(ab)n(le)48 b(of)g(Contents)396
-815 y Fu(I.)21 b(User')m(s)g(guide)p Black 4 w(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black 4 w(6)596
-943 y Fv(1.)f(What)g(is)h(XML?)p Black 4 w(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black 4 w(7)795 1051
-y(1.1.)e(Introduction)p Black 14 w(.)p Black Black -1
-w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black 4 w(7)994 1159
-y(1.1.1.)g(The)g("hello)h(w)o(orld")g(e)o(xample)p Black
-13 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black 4 w(7)994 1267 y(1.1.2.)f(XML)h(parsers)g(and)f
-(processors)p Black 3 w(.)p Black Black -2 w(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black 4 w(9)994 1375 y(1.1.3.)g(Discussion)p
-Black 9 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-4 w(9)795 1483 y(1.2.)g(Highlights)g(of)h(XML)p Black
-10 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(11)994
-1591 y(1.2.1.)f(The)g(DTD)i(and)e(the)i(instance)p Black
-15 w(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(11)994 1699 y(1.2.2.)e(Reserv)o(ed)g(characters)p
-Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(12)994 1807 y(1.2.3.)g(Elements)g(and)h
-(ELEMENT)f(declarations)p Black 7 w(.)p Black Black -2
-w(.)p Black Black(.)p Black Black(.)p Black Black -1
-w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(13)994
-1915 y(1.2.4.)g(Attrib)n(ute)g(lists)j(and)e(A)-9 b(TTLIST)19
-b(declarations)p Black 6 w(.)p Black Black -2 w(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(15)994 2023 y(1.2.5.)g(P)o(arsed)g(entities)p
-Black 18 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black 4 w(16)994 2131 y(1.2.6.)g(Notations)g(and)h
-(unparsed)e(entities)p Black 14 w(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(19)795 2238 y(1.3.)h(A)i(complete)e(e)o(xample:)g
-(The)h Fr(r)m(eadme)f Fv(DTD)p Black 3 w(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(20)596 2346 y(2.)h(Using)g(PXP)p Black
-6 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(24)795 2454 y(2.1.)f(V)-9 b(alidation)p
-Black 3 w(.)p Black Black -2 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
-2562 y(2.2.)19 b(Ho)n(w)h(to)g(parse)g(a)h(document)d(from)h(an)h
-(application)p Black 10 w(.)p Black Black -2 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
-2670 y(2.3.)f(Class-based)h(processing)f(of)h(the)g(node)g(tree)p
-Black 8 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(29)795
-2778 y(2.4.)f(Example:)g(An)h(HTML)g(back)o(end)f(for)g(the)i
-Fr(r)m(eadme)e Fv(DTD)p Black 3 w(.)p Black Black -1
-w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(33)994 2886 y(2.4.1.)g(Header)p
-Black 9 w(.)p Black Black -2 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(33)994 2994 y(2.4.2.)g(T)-7 b(ype)19
-b(declarations)p Black 14 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(33)994 3102 y(2.4.3.)g(Class)i Fq(store)p Black
-11 w Fv(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black 4 w(34)994 3210 y(2.4.4.)e(Function)g
-Fq(escape_html)p Black Fv(.)p Black Black -2 w(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(35)994 3318 y(2.4.5.)g(V)-5 b(irtual)20 b(class)h
-Fq(shared)p Black 4 w Fv(.)p Black Black -2 w(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(35)994 3426 y(2.4.6.)e(Class)i
-Fq(only_data)p Black 17 w Fv(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(36)994
-3534 y(2.4.7.)e(Class)i Fq(readme)p Black 8 w Fv(.)p
-Black Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(36)994 3642 y(2.4.8.)e(Classes)i
-Fq(section)p Fv(,)f Fq(sect1)p Fv(,)f Fq(sect2)p Fv(,)h(and)g
-Fq(sect3)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(39)994 3749 y(2.4.9.)f(Classes)i
-Fq(map_tag)p Fv(,)f Fq(p)p Fv(,)g Fq(em)p Fv(,)g Fq(ul)p
-Fv(,)g Fq(li)p Black 16 w Fv(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(39)994
-3857 y(2.4.10.)e(Class)k Fq(br)p Black Fv(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(40)994 3965 y(2.4.11.)c(Class)k
-Fq(code)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(40)994 4073 y(2.4.12.)c(Class)k
-Fq(a)p Black 4 w Fv(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(41)994
-4181 y(2.4.13.)c(Class)k Fq(footnote)p Black 1 w Fv(.)p
-Black Black -2 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black 4 w(42)994 4289
-y(2.4.14.)c(The)i(speci\002cation)f(of)h(the)g(document)f(model)p
-Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black 4 w(43)596 4397 y(3.)h(The)f(objects)h
-(representing)e(the)j(document)p Black 4 w(.)p Black
-Black -3 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(46)795 4505 y(3.1.)e(The)h Fq(document)f Fv(class)p
-Black 7 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(46)795 4613 y(3.2.)g(The)h(class)h(type)f
-Fq(node)p Black 2 w Fv(.)p Black Black -2 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(47)994 4721 y(3.2.1.)f(The)g(structure)h(of)g(document)e(trees)p
-Black 3 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(49)994
-4829 y(3.2.2.)h(The)g(methods)h(of)f(the)i(class)g(type)f
-Fq(node)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black 4 w(52)p Black 3842
-5278 a Fr(3)p Black eop
-%%Page: 4 4
-4 3 bop Black Black 994 579 a Fv(3.2.3.)19 b(The)g(class)j
-Fq(element_impl)p Black 2 w Fv(.)p Black Black -3 w(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(56)994 687 y(3.2.4.)d(The)g(class)j Fq(data_impl)p
-Black 12 w Fv(.)p Black Black -2 w(.)p Black Black -1
-w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(57)994
-795 y(3.2.5.)d(The)g(type)h Fq(spec)p Black 5 w Fv(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(58)994 903 y(3.2.6.)f(Examples)p Black
-5 w(.)p Black Black -3 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(60)994
-1011 y(3.2.7.)g(Iterators)p Black 12 w(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(64)795 1119 y(3.3.)g(The)h(class)h(type)f Fq(extension)p
-Black 6 w Fv(.)p Black Black -2 w(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black 4 w(65)994 1226 y(3.3.1.)f(Ho)n(w)h(to)g(de\002ne)
-g(an)g(e)o(xtension)f(class)p Black 13 w(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black 4 w(66)994 1334
-y(3.3.2.)g(Ho)n(w)h(to)g(bind)f(e)o(xtension)g(classes)i(to)g(element)e
-(types)p Black 10 w(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(68)795 1442 y(3.4.)g(Details)i(of)f(the)g(mapping)e(from)i(XML)g
-(te)o(xt)g(to)g(the)g(tree)h(representation)p Black 13
-w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(69)994
-1550 y(3.4.1.)e(The)g(representation)g(of)g(character)n(-free)f
-(elements)p Black 9 w(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(69)994 1658 y(3.4.2.)h(The)g(representation)g(of)g(character)g
-(data)p Black 10 w(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black 4 w(70)994 1766
-y(3.4.3.)g(The)g(representation)g(of)g(entities)i(within)f(documents)p
-Black 12 w(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black 4 w(70)994 1874 y(3.4.4.)f(The)g(representation)g
-(of)g(attrib)n(utes)p Black 20 w(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(71)994 1982 y(3.4.5.)g(The)g(representation)g(of)g
-(processing)g(instructions)p Black(.)p Black Black -1
-w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(71)994 2090 y(3.4.6.)g(The)g
-(representation)g(of)g(comments)p Black 7 w(.)p Black
-Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(71)994 2198 y(3.4.7.)g(The)g(attrib)n(utes)i
-Fq(xml:lang)e Fv(and)h Fq(xml:space)p Black 10 w Fv(.)p
-Black Black -2 w(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(72)994 2306 y(3.4.8.)f(And)g(what)h(about)g(namespaces?)p
-Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(72)596
-2414 y(4.)g(Con\002guring)e(and)h(calling)h(the)g(parser)p
-Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(73)795 2522 y(4.1.)f(Ov)o(ervie)n(w)p
-Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(73)795
-2630 y(4.2.)g(Resolv)o(ers)h(and)g(sources)p Black 2
-w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black 4 w(75)994 2737
-y(4.2.1.)f(Using)h(the)g(b)n(uilt-in)f(resolv)o(ers)h(\(called)f
-(sources\))p Black 5 w(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(75)994 2845 y(4.2.2.)g(The)g(resolv)o(er)g(API)p
-Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(76)994 2953 y(4.2.3.)g(Prede\002ned)f(resolv)o(er)h
-(components)p Black 13 w(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black 4 w(78)795 3061
-y(4.3.)g(The)h(DTD)g(classes)p Black 1 w(.)p Black Black
-1 w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(81)795
-3169 y(4.4.)f(In)m(v)n(oking)f(the)i(parser)p Black 14
-w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(89)994
-3277 y(4.4.1.)f(Def)o(aults)p Black 10 w(.)p Black Black
--1 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(89)994 3385 y(4.4.2.)g(P)o(arsing)g(functions)p
-Black 4 w(.)p Black Black -3 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(90)994 3493 y(4.4.3.)g(Con\002guration)f(options)p
-Black 19 w(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(91)994 3601 y(4.4.4.)h(Which)h
-(con\002guration)d(should)i(I)i(use?)p Black 18 w(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(93)795 3709 y(4.5.)e(Updates)p Black 10 w(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(95)p Black 3842 5278 a
-Fr(4)p Black eop
-%%Page: 5 5
-5 4 bop Black Black -2 621 a Fs(List)48 b(of)g(Figures)396
-815 y Fv(3-1.)19 b(A)i(tree)f(with)h(element)e(nodes,)h(data)g(nodes,)f
-(and)g(attrib)n(utes)p Black 18 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black 4 w(49)396 923 y(3-2.)g(Nodes)h(are)g(doubly)f(link)o
-(ed)g(trees)p Black 15 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black 4 w(50)396
-1031 y(3-3.)g(A)i(node)e(can)h(only)g(be)g(added)f(if)h(it)h(is)g(a)g
-(root)p Black 5 w(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black 4 w(51)396 1139 y(3-4.)e(A)i(deleted)f(node)f
-(becomes)g(the)h(root)g(of)g(the)g(subtree)p Black 3
-w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-4 w(51)396 1247 y(3-5.)f(The)h(clone)g(of)g(a)g(subtree)p
-Black 18 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black 4 w(52)396 1355 y(3-6.)f(The)h(structure)g
-(of)f(nodes)h(and)g(e)o(xtensions)p Black 18 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black -1 w(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black Black(.)p Black Black(.)p Black Black(.)p Black
-Black -1 w(.)p Black Black(.)p Black Black(.)p Black
-Black(.)p Black Black -1 w(.)p Black Black(.)p Black
-Black(.)p Black Black(.)p Black Black -1 w(.)p Black
-Black(.)p Black Black(.)p Black Black(.)p Black Black
--1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
-Black Black -1 w(.)p Black Black(.)p Black Black(.)p
-Black Black(.)p Black Black(.)p Black Black -1 w(.)p
-Black 4 w(65)p Black 3842 5278 a Fr(5)p Black eop
-%%Page: 6 6
-6 5 bop Black Black 1241 647 a Fy(I.)58 b(User')-12 b(s)57
-b(guide)p Black Black eop
-%%Page: 7 7
-7 6 bop Black Black -2 621 a Fs(Chapter)48 b(1.)f(What)h(is)f(XML?)-2
-1055 y Fx(1.1.)39 b(Intr)m(oduction)396 1235 y Fv(XML)20
-b(\(short)g(for)f Fr(Extensible)h(Markup)g(Langua)o(g)o(e)p
-Fv(\))e(generalizes)h(the)h(idea)g(that)g(te)o(xt)g(documents)f(are)h
-(typically)396 1343 y(structured)f(in)h(sections,)g(sub-sections,)f
-(paragraphs,)f(and)i(so)g(on.)g(The)g(format)f(of)h(the)g(document)e
-(is)j(not)f(\002x)o(ed)g(\(as,)396 1451 y(for)g(e)o(xample,)e(in)j
-(HTML\),)e(b)n(ut)h(can)g(be)g(declared)f(by)h(a)h(so-called)e(DTD)i
-(\(document)c(type)j(de\002nition\).)f(The)g(DTD)396
-1559 y(describes)h(only)f(the)i(rules)f(ho)n(w)f(the)i(document)d(can)i
-(be)g(structured,)e(b)n(ut)j(not)e(ho)n(w)h(the)g(document)e(can)i(be)
-396 1667 y(processed.)f(F)o(or)h(e)o(xample,)e(if)j(you)e(w)o(ant)i(to)
-f(publish)f(a)i(book)e(that)h(uses)h(XML)f(markup,)e(you)h(will)i(need)
-f(a)g(processor)396 1775 y(that)h(con)m(v)o(erts)d(the)i(XML)g(\002le)h
-(into)f(a)h(printable)e(format)g(such)h(as)h(Postscript.)f(On)g(the)g
-(one)g(hand,)f(the)h(structure)f(of)396 1883 y(XML)h(documents)f(is)i
-(con\002gurable;)d(on)i(the)g(other)f(hand,)g(there)h(is)h(no)f(longer)
-f(a)h(canonical)f(interpretation)f(of)i(the)396 1991
-y(elements)g(of)g(the)g(document;)f(for)g(e)o(xample)g(one)h(XML)g(DTD)
-g(might)g(w)o(ant)g(that)g(paragraphes)e(are)i(delimited)g(by)396
-2099 y Fq(para)g Fv(tags,)h(and)e(another)g(DTD)h(e)o(xpects)g
-Fq(p)g Fv(tags)h(for)e(the)i(same)f(purpose.)e(As)j(a)g(result,)f(for)g
-(e)n(v)o(ery)e(DTD)j(a)f(ne)n(w)396 2206 y(processor)f(is)i(required.)
-396 2356 y(Although)e(XML)h(can)g(be)g(used)g(to)g(e)o(xpress)g
-(structured)f(te)o(xt)h(documents)e(it)j(is)g(not)f(limited)g(to)g
-(this)h(kind)e(of)396 2464 y(application.)g(F)o(or)h(e)o(xample,)e(XML)
-i(can)g(also)h(be)f(used)g(to)g(e)o(xchange)e(structured)h(data)h(o)o
-(v)o(er)f(a)h(netw)o(ork,)f(or)h(to)396 2572 y(simply)g(store)g
-(structured)f(data)h(in)g(\002les.)h(Note)f(that)h(XML)f(documents)e
-(cannot)i(contain)f(arbitrary)f(binary)h(data)396 2680
-y(because)g(some)g(characters)g(are)g(forbidden;)e(for)i(some)g
-(applications)g(you)f(need)h(to)h(encode)e(binary)g(data)h(as)h(te)o
-(xt)g(\(e.g.)396 2788 y(the)g(base)h(64)f(encoding\).)-2
-3116 y Fp(1.1.1.)35 b(The)f("hello)g(w)n(orld")e(e)n(xample)396
-3283 y Fv(The)20 b(follo)n(wing)f(e)o(xample)f(sho)n(ws)j(a)f(v)o(ery)f
-(simple)i(DTD,)f(and)f(a)i(corresponding)c(document)h(instance.)h(The)
-396 3391 y(document)f(is)k(structured)c(such)i(that)h(it)f(consists)h
-(of)f(sections,)g(and)g(that)g(sections)g(consist)h(of)f(paragraphs,)d
-(and)j(that)396 3499 y(paragraphs)e(contain)h(plain)h(te)o(xt:)396
-3679 y Fq(<!ELEMENT)44 b(document)f(\(section\)+>)396
-3777 y(<!ELEMENT)h(section)f(\(paragraph\)+>)396 3874
-y(<!ELEMENT)h(paragraph)f(\(#PCDATA\)>)396 4065 y Fv(The)20
-b(follo)n(wing)f(document)f(is)j(an)f(instance)g(of)g(this)h(DTD:)396
-4245 y Fq(<?xml)44 b(version="1.0")f(encoding="ISO-8859-1"?>)396
-4342 y(<!DOCTYPE)h(document)f(SYSTEM)h("simple.dtd">)396
-4439 y(<document>)486 4536 y(<section>)576 4633 y(<paragraph>This)e(is)
-i(a)h(paragraph)e(of)i(the)f(first)g(section.</paragraph>)576
-4731 y(<paragraph>This)e(is)i(another)g(paragraph)f(of)i(the)f(first)g
-(section.</paragraph>)486 4828 y(</section>)p Black 3839
-5278 a Fr(7)p Black eop
-%%Page: 8 8
-8 7 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 486 579 a Fq(<section>)576 676 y(<paragraph>This)42
-b(is)i(the)h(only)f(paragraph)f(of)i(the)f(second)g
-(section.</paragraph>)486 773 y(</section>)396 870 y(</document>)396
-1061 y Fv(As)21 b(in)g(HTML)f(\(and,)f(of)h(course,)f(in)h(grand-f)o
-(ather)d(SGML\),)j(the)g("pieces")g(of)g(the)g(document)f(are)h
-(delimited)f(by)396 1169 y(element)h(braces,)f(i.e.)i(such)f(a)g(piece)
-g(be)o(gins)f(with)i Fo(<)p Fq(name-of-the-type-of-the-piece)p
-Fo(>)15 b Fv(and)20 b(ends)g(with)396 1277 y Fo(<)p Fq
-(/name-of-the-type-of-the-piece)p Fo(>)p Fv(,)15 b(and)20
-b(the)g(pieces)g(are)g(called)g Fr(elements)p Fv(.)g(Unlik)o(e)g(HTML)g
-(and)396 1385 y(SGML,)g(both)g(start)g(tags)h(and)f(end)f(tags)i
-(\(i.e.)f(the)g(delimiters)g(written)g(in)g(angle)g(brack)o(ets\))f
-(can)h(ne)n(v)o(er)f(be)h(left)g(out.)396 1493 y(F)o(or)g(e)o(xample,)f
-(HTML)h(calls)h(the)f(paragraphs)e(simply)i Fq(p)p Fv(,)g(and)f
-(because)h(paragraphs)e(ne)n(v)o(er)h(contain)g(paragraphs,)f(a)396
-1601 y(sequence)h(of)h(se)n(v)o(eral)g(paragraphs)e(can)i(be)g(written)
-g(as:)396 1781 y Fq(<p>First)44 b(paragraph)396 1878
-y(<p>Second)g(paragraph)396 2069 y Fv(This)21 b(is)g(not)f(possible)g
-(in)g(XML;)g(continuing)e(our)i(e)o(xample)e(abo)o(v)o(e)h(we)h(must)h
-(al)o(w)o(ays)f(write)396 2249 y Fq(<paragraph>First)42
-b(paragraph</paragraph>)396 2346 y(<paragraph>Second)g
-(paragraph</paragraph>)396 2537 y Fv(The)20 b(rationale)f(behind)g
-(that)h(is)i(to)e(\(1\))f(simplify)h(the)g(de)n(v)o(elopment)d(of)j
-(XML)h(parsers)f(\(you)e(need)i(not)g(con)m(v)o(ert)e(the)396
-2645 y(DTD)j(into)f(a)g(deterministic)f(\002nite)i(automaton)d(which)i
-(is)h(required)d(to)j(detect)f(omitted)f(tags\),)h(and)g(to)g(\(2\))g
-(mak)o(e)f(it)396 2753 y(possible)h(to)h(parse)e(the)i(document)d
-(independent)f(of)j(whether)f(the)i(DTD)f(is)h(kno)n(wn)e(or)h(not.)396
-2903 y(The)g(\002rst)h(line)f(of)g(our)g(sample)g(document,)396
-3083 y Fq(<?xml)44 b(version="1.0")f(encoding="ISO-8859-1"?>)396
-3274 y Fv(is)21 b(the)e(so-called)g Fr(XML)h(declar)o(ation)p
-Fv(.)d(It)j(e)o(xpresses)e(that)i(the)f(document)f(follo)n(ws)h(the)g
-(con)m(v)o(entions)e(of)i(XML)g(v)o(ersion)396 3382 y(1.0,)h(and)f
-(that)h(the)h(document)d(is)j(encoded)d(using)i(characters)f(from)g
-(the)i(ISO-8859-1)c(character)i(set)i(\(often)e(kno)n(wn)396
-3490 y(as)i("Latin)e(1",)g(mostly)h(used)f(in)h(W)-7
-b(estern)20 b(Europe\).)d(Although)h(the)i(XML)g(declaration)e(is)i
-(not)g(mandatory)-5 b(,)16 b(it)21 b(is)f(good)396 3598
-y(style)h(to)f(include)f(it;)i(e)n(v)o(erybody)c(sees)k(at)g(the)f
-(\002rst)h(glance)f(that)g(the)g(document)e(uses)j(XML)f(markup)f(and)g
-(not)h(the)396 3706 y(similar)n(-looking)e(HTML)i(and)g(SGML)g(markup)f
-(languages.)f(If)i(you)g(omit)g(the)g(XML)g(declaration,)e(the)j
-(parser)e(will)396 3813 y(assume)h(that)h(the)f(document)e(is)j
-(encoded)e(as)i(UTF-8)e(or)h(UTF-16)f(\(there)h(is)h(a)g(rule)e(that)i
-(mak)o(es)f(it)h(possible)f(to)396 3921 y(distinguish)f(between)h
-(UTF-8)g(and)f(UTF-16)g(automatically\);)g(these)h(are)g(encodings)f
-(of)h(Unicode')-5 b(s)19 b(uni)n(v)o(ersal)396 4029 y(character)g(set.)
-i(\(Note)f(that)g(PXP,)h(unlik)o(e)e(its)i(predecessor)e("Markup",)f
-(fully)i(supports)f(Unicode.\))396 4179 y(The)h(second)f(line,)396
-4359 y Fq(<!DOCTYPE)44 b(document)f(SYSTEM)h("simple.dtd">)396
-4550 y Fv(names)20 b(the)g(DTD)h(that)f(is)h(going)e(to)h(be)g(used)g
-(for)g(the)g(rest)h(of)f(the)g(document.)e(In)i(general,)f(it)i(is)g
-(possible)f(that)g(the)396 4658 y(DTD)h(consists)f(of)g(tw)o(o)h
-(parts,)f(the)g(so-called)f(e)o(xternal)g(and)h(the)g(internal)f
-(subset.)h("External")f(means)h(that)g(the)h(DTD)396
-4766 y(e)o(xists)g(as)g(a)f(second)g(\002le;)h("internal")e(means)h
-(that)g(the)g(DTD)h(is)g(included)d(in)j(the)f(same)g(\002le.)h(In)f
-(this)g(e)o(xample,)f(there)p Black 3842 5278 a Fr(8)p
-Black eop
-%%Page: 9 9
-9 8 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 396 579 a Fv(is)g(only)f(an)g(e)o(xternal)f(subset,)h(and)g(the)g
-(system)g(identi\002er)g("simple.dtd")e(speci\002es)j(where)f(the)g
-(DTD)g(\002le)h(can)f(be)396 687 y(found.)e(System)j(identi\002ers)f
-(are)g(interpreted)e(as)j(URLs;)g(for)f(instance)g(this)g(w)o(ould)g
-(be)g(le)o(gal:)396 867 y Fq(<!DOCTYPE)44 b(document)f(SYSTEM)h
-("http://host/location/simple.dtd">)396 1058 y Fv(Please)21
-b(note)f(that)g(PXP)h(cannot)e(interpret)g(HTTP)i(identi\002ers)e(by)h
-(def)o(ault,)f(b)n(ut)i(it)g(is)g(possible)f(to)g(change)f(the)396
-1166 y(interpretation)f(of)i(system)h(identi\002ers.)396
-1315 y(The)f(w)o(ord)g(immediately)f(follo)n(wing)f Fq(DOCTYPE)i
-Fv(determines)f(which)g(of)h(the)g(declared)f(element)h(types)g(\(here)
-396 1423 y("document",)e("section",)h(and)h("paragraph"\))d(is)k(used)f
-(for)g(the)g(outermost)f(element,)g(the)h Fr(r)l(oot)h(element)q
-Fv(.)f(In)g(this)396 1531 y(e)o(xample)f(it)i(is)g Fq(document)f
-Fv(because)f(the)h(outermost)f(element)h(is)h(delimited)e(by)h
-Fo(<)p Fq(document)p Fo(>)f Fv(and)396 1639 y Fo(<)p
-Fq(/document)p Fo(>)p Fv(.)396 1789 y(The)h(DTD)g(consists)h(of)f
-(three)g(declarations)f(for)g(element)h(types:)g Fq(document)p
-Fv(,)f Fq(section)p Fv(,)g(and)h Fq(paragraph)p Fv(.)f(Such)396
-1896 y(a)i(declaration)d(has)j(tw)o(o)f(parts:)396 2077
-y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(content-model)p Fo(>)396
-2268 y Fv(The)20 b(content)f(model)h(is)h(a)f(re)o(gular)f(e)o
-(xpression)g(which)g(describes)h(the)g(possible)g(inner)f(structure)h
-(of)g(the)g(element.)396 2376 y(Here,)g Fq(document)f
-Fv(contains)h(one)g(or)g(more)f(sections,)h(and)g(a)g
-Fq(section)g Fv(contains)f(one)h(or)g(more)f(paragraphs.)f(Note)396
-2483 y(that)j(these)f(tw)o(o)g(element)g(types)g(are)g(not)g(allo)n
-(wed)f(to)i(contain)e(arbitrary)g(te)o(xt.)g(Only)h(the)g
-Fq(paragraph)g Fv(element)f(type)396 2591 y(is)i(declared)e(such)h
-(that)h(parsed)e(character)g(data)h(\(indicated)f(by)h(the)g(symbol)f
-Fq(#PCDATA)p Fv(\))g(is)i(permitted.)396 2741 y(See)g(belo)n(w)e(for)h
-(a)h(detailed)e(discussion)h(of)g(content)f(models.)-2
-3110 y Fp(1.1.2.)35 b(XML)e(par)n(ser)n(s)h(and)g(pr)n(ocessor)n(s)396
-3278 y Fv(XML)20 b(documents)f(are)h(human-readable,)c(b)n(ut)21
-b(this)f(is)h(not)f(the)h(main)e(purpose)g(of)h(this)h(language.)d(XML)
-i(has)g(been)396 3386 y(designed)f(such)h(that)g(documents)f(can)h(be)g
-(read)g(by)f(a)i(program)d(called)i(an)g Fr(XML)h(par)o(ser)r
-Fv(.)f(The)g(parser)g(checks)f(that)396 3494 y(the)h(document)f(is)i
-(well-formatted,)d(and)h(it)i(represents)f(the)g(document)e(as)j
-(objects)f(of)g(the)g(programming)d(language.)396 3602
-y(There)j(are)g(tw)o(o)g(aspects)h(when)e(checking)g(the)h(document:)e
-(First,)j(the)f(document)e(must)j(follo)n(w)e(some)h(basic)396
-3710 y(syntactic)g(rules,)g(such)g(as)h(that)f(tags)h(are)f(written)g
-(in)g(angle)g(brack)o(ets,)f(that)h(for)g(e)n(v)o(ery)f(start)h(tag)h
-(there)e(must)i(be)f(a)396 3818 y(corresponding)d(end)j(tag)g(and)f(so)
-i(on.)f(A)g(document)e(respecting)h(these)i(rules)f(is)h
-Fr(well-formed)r Fv(.)f(Second,)f(the)396 3926 y(document)f(must)j
-(match)e(the)i(DTD)f(in)g(which)g(case)h(the)f(document)e(is)j
-Fr(valid)r Fv(.)f(Man)o(y)f(parsers)h(check)f(only)h(on)396
-4034 y(well-formedness)e(and)i(ignore)f(the)h(DTD;)h(PXP)g(is)g
-(designed)e(such)g(that)i(it)g(can)f(e)n(v)o(en)f(v)n(alidate)g(the)i
-(document.)396 4183 y(A)g(parser)f(does)f(not)h(mak)o(e)g(a)h(sensible)
-f(application,)e(it)j(only)f(reads)g(XML)g(documents.)e(The)i(whole)g
-(application)396 4291 y(w)o(orking)f(with)h(XML-formatted)e(data)i(is)h
-(called)f(an)g Fr(XML)h(pr)l(ocessor)r Fv(.)f(Often)g(XML)g(processors)
-f(con)m(v)o(ert)396 4399 y(documents)g(into)h(another)e(format,)h(such)
-h(as)h(HTML)f(or)g(Postscript.)g(Sometimes)g(processors)f(e)o(xtract)g
-(data)h(of)g(the)396 4507 y(documents)f(and)g(output)g(the)i(processed)
-e(data)h(again)f(XML-formatted.)e(The)j(parser)g(can)g(help)f(the)i
-(application)396 4615 y(processing)e(the)h(document;)f(for)g(e)o
-(xample)g(it)i(can)f(pro)o(vide)e(means)i(to)g(access)h(the)f(document)
-e(in)j(a)f(speci\002c)h(manner)-5 b(.)396 4723 y(PXP)21
-b(supports)e(an)i(object-oriented)c(access)k(layer)e(specially)-5
-b(.)p Black 3842 5278 a Fr(9)p Black eop
-%%Page: 10 10
-10 9 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black -2 583 a Fp(1.1.3.)35 b(Discussion)396 751 y Fv(As)21
-b(we)g(ha)n(v)o(e)e(seen,)h(there)g(are)g(tw)o(o)h(le)n(v)o(els)f(of)g
-(description:)f(On)h(the)g(one)g(hand,)f(XML)h(can)g(de\002ne)f(rules)i
-(about)e(the)396 859 y(format)g(of)h(a)h(document)d(\(the)i(DTD\),)g
-(on)f(the)i(other)e(hand,)g(XML)h(e)o(xpresses)g(structured)f
-(documents.)f(There)h(are)h(a)396 967 y(number)f(of)h(possible)f
-(applications:)p Black 396 1199 a Ft(\225)p Black 60
-w Fv(XML)i(can)f(be)g(used)g(to)g(e)o(xpress)f(structured)g(te)o(xts.)h
-(Unlik)o(e)g(HTML,)g(there)g(is)h(no)e(canonical)g(interpretation;)g
-(one)479 1307 y(w)o(ould)h(ha)n(v)o(e)f(to)i(write)f(a)h(back)o(end)d
-(for)i(the)g(DTD)g(that)h(translates)f(the)g(structured)f(te)o(xts)h
-(into)g(a)h(format)e(that)479 1415 y(e)o(xisting)h(bro)n(wsers,)f
-(printers)g(etc.)i(understand.)c(The)j(adv)n(antage)e(of)i(a)h
-(self-de\002ned)e(document)f(format)h(is)i(that)f(it)479
-1523 y(is)h(possible)f(to)h(design)e(the)h(format)f(in)i(a)f(more)g
-(problem-oriented)c(w)o(ay)-5 b(.)20 b(F)o(or)f(e)o(xample,)g(if)h(the)
-h(task)f(is)h(to)g(e)o(xtract)479 1631 y(reports)f(from)f(a)h
-(database,)g(one)f(can)h(use)h(a)f(DTD)h(that)f(re\003ects)h(the)f
-(structure)f(of)h(the)g(report)f(or)h(the)g(database.)g(A)479
-1739 y(possible)g(approach)e(w)o(ould)i(be)g(to)g(ha)n(v)o(e)g(an)g
-(element)f(type)h(for)g(e)n(v)o(ery)f(database)g(table)h(and)g(for)g(e)
-n(v)o(ery)e(column.)479 1847 y(Once)i(the)g(DTD)h(has)f(been)g
-(designed,)e(the)j(report)e(procedure)e(can)j(be)g(splitted)h(up)e(in)i
-(a)f(part)g(that)h(selects)g(the)479 1955 y(database)f(ro)n(ws)g(and)g
-(outputs)f(them)h(as)h(an)f(XML)g(document)e(according)g(to)j(the)f
-(DTD,)g(and)g(in)g(a)g(part)g(that)479 2063 y(translates)h(the)f
-(document)e(into)i(other)f(formats.)g(Of)i(course,)e(the)h(latter)h
-(part)e(can)h(be)h(solv)o(ed)e(in)h(a)h(generic)e(w)o(ay)-5
-b(,)479 2170 y(e.g.)20 b(there)g(may)f(be)h(con\002gurable)e(back)o
-(ends)h(for)h(all)g(DTDs)h(that)f(follo)n(w)g(the)g(approach)e(and)i
-(ha)n(v)o(e)f(element)h(types)479 2278 y(for)g(tables)g(and)g(columns.)
-479 2428 y(XML)h(plays)f(the)g(role)g(of)g(a)g(con\002gurable)e
-(intermediate)h(format.)g(The)g(database)h(e)o(xtraction)e(function)h
-(can)h(be)479 2536 y(written)g(without)g(ha)n(ving)f(to)h(kno)n(w)f
-(the)h(details)h(of)f(typesetting;)f(the)h(back)o(ends)f(can)h(be)g
-(written)g(without)g(ha)n(ving)479 2644 y(to)h(kno)n(w)e(the)h(details)
-h(of)e(the)i(database.)479 2793 y(Of)g(course,)e(there)h(are)g
-(traditional)f(solutions.)g(One)h(can)g(de\002ne)g(an)g(ad)g(hoc)g
-(intermediate)e(te)o(xt)j(\002le)f(format.)f(This)479
-2901 y(disadv)n(antage)f(is)k(that)e(there)g(are)g(no)f(names)h(for)g
-(the)g(pieces)g(of)g(the)g(format,)f(and)h(that)g(such)g(formats)g
-(usually)f(lack)479 3009 y(of)h(documentation)d(because)j(of)g(this.)g
-(Another)f(solution)g(w)o(ould)h(be)g(to)g(ha)n(v)o(e)g(a)h(binary)e
-(representation,)e(either)j(as)479 3117 y(language-dependent)c(or)k
-(language-independent)14 b(structure)20 b(\(e)o(xample)e(of)i(the)g
-(latter)h(can)f(be)g(found)e(in)j(RPC)479 3225 y(implementations\).)d
-(The)i(disadv)n(antage)e(is)j(that)f(it)h(is)g(harder)e(to)i(vie)n(w)f
-(such)g(representations,)e(one)h(has)i(to)f(write)479
-3333 y(pretty)g(printers)f(for)h(this)g(purpose.)f(It)h(is)h(also)g
-(more)e(dif)n(\002cult)h(to)g(enter)g(test)h(data;)f(XML)g(is)h(plain)f
-(te)o(xt)g(that)h(can)f(be)479 3441 y(written)g(using)g(an)g(arbitrary)
-f(editor)g(\(Emacs)h(has)g(e)n(v)o(en)f(a)i(good)e(XML)h(mode,)f
-(PSGML\).)h(All)h(these)f(alternati)n(v)o(es)479 3549
-y(suf)n(fer)g(from)f(a)h(missing)g(structure)g(check)o(er)m(,)e(i.e.)i
-(the)h(programs)d(processing)h(these)h(formats)f(usually)h(do)g(not)479
-3657 y(check)g(the)g(input)f(\002le)i(or)f(input)g(object)f(in)i
-(detail;)f(XML)g(parsers)g(check)f(the)h(syntax)g(of)g(the)g(input)g
-(\(the)f(so-called)479 3765 y(well-formedness)f(check\),)h(and)h(the)g
-(adv)n(anced)e(parsers)i(lik)o(e)g(PXP)h(e)n(v)o(en)f(v)o(erify)e(that)
-j(the)f(structure)f(matches)h(the)479 3872 y(DTD)h(\(the)f(so-called)f
-(v)n(alidation\).)p Black 396 4022 a Ft(\225)p Black
-60 w Fv(XML)i(can)f(be)g(used)g(as)g(con\002gurable)e(communication)g
-(language.)g(A)i(fundamental)e(problem)h(of)h(e)n(v)o(ery)479
-4130 y(communication)e(is)j(that)f(sender)f(and)h(recei)n(v)o(er)f
-(must)h(follo)n(w)g(the)g(same)g(con)m(v)o(entions)e(about)h(the)h
-(language.)e(F)o(or)479 4238 y(data)i(e)o(xchange,)e(the)i(question)f
-(is)j(usually)d(which)h(data)g(records)f(and)h(\002elds)g(are)g(a)n(v)n
-(ailable,)g(ho)n(w)g(the)o(y)f(are)479 4346 y(syntactically)h
-(composed,)e(and)i(which)f(v)n(alues)h(are)g(possible)g(for)g(the)g(v)n
-(arious)f(\002elds.)h(Similar)h(questions)e(arise)479
-4454 y(for)h(te)o(xt)g(document)e(e)o(xchange.)g(XML)i(does)g(not)g
-(answer)g(these)g(problems)f(completely)-5 b(,)18 b(b)n(ut)i(it)h
-(reduces)e(the)479 4562 y(number)g(of)h(ambiguities)f(for)g(such)h(con)
-m(v)o(entions:)e(The)i(outlines)f(of)h(the)g(syntax)g(are)g
-(speci\002ed)g(by)g(the)g(DTD)g(\(b)n(ut)479 4669 y(not)g(necessarily)g
-(the)g(details\),)g(and)g(XML)g(introduces)e(canonical)h(names)h(for)g
-(the)g(components)e(of)i(documents)479 4777 y(such)g(that)h(it)f(is)i
-(simpler)d(to)i(describe)e(the)h(rest)h(of)f(the)g(syntax)g(and)f(the)h
-(semantics)h(informally)-5 b(.)p Black 3800 5278 a Fr(10)p
-Black eop
-%%Page: 11 11
-11 10 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black Black 396 579 a Ft(\225)p Black 60 w Fv(XML)f(is)g(a)g(data)f
-(storage)g(format.)f(Currently)-5 b(,)17 b(e)n(v)o(ery)h(softw)o(are)h
-(product)f(tends)h(to)h(use)f(its)i(o)n(wn)d(w)o(ay)i(to)f(store)h
-(data;)479 687 y(commercial)f(softw)o(are)h(often)f(does)h(not)g
-(describe)f(such)h(formats,)f(and)h(it)h(is)g(a)g(pain)e(to)i(inte)o
-(grate)e(such)h(softw)o(are)479 795 y(into)g(a)g(bigger)f(project.)f
-(XML)i(can)g(help)f(to)h(impro)o(v)o(e)e(this)j(situation)e(when)g(se)n
-(v)o(eral)g(applications)g(share)h(the)g(same)479 903
-y(syntax)g(of)g(data)g(\002les.)h(DTDs)f(are)g(then)g(neutral)g
-(instances)g(that)g(check)f(the)h(format)g(of)f(data)i(\002les)g
-(independent)c(of)479 1011 y(applications.)-2 1512 y
-Fx(1.2.)39 b(Highlights)e(of)i(XML)396 1692 y Fv(This)21
-b(section)f(e)o(xplains)f(man)o(y)g(of)h(the)g(features)f(of)h(XML,)g
-(b)n(ut)h(not)e(all,)i(and)f(some)g(features)f(not)h(in)g(detail.)g(F)o
-(or)g(a)396 1800 y(complete)f(description,)g(see)i(the)f(XML)g
-(speci\002cation)396 1908 y(\(http://www)-5 b(.w3.or)o
-(g/TR/1998/REC-xml-)o(19)o(98)o(02)o(10)o(.htm)o(l\).)-2
-2236 y Fp(1.2.1.)35 b(The)f(DTD)g(and)g(the)f(instance)396
-2404 y Fv(The)20 b(DTD)g(contains)g(v)n(arious)f(declarations;)g(in)h
-(general)f(you)h(can)g(only)f(use)i(a)f(feature)f(if)i(you)e(ha)n(v)o
-(e)h(pre)n(viously)396 2512 y(declared)f(it.)i(The)f(document)e
-(instance)i(\002le)h(may)e(contain)g(the)i(full)f(DTD,)g(b)n(ut)g(it)h
-(is)g(also)g(possible)f(to)g(split)h(the)f(DTD)396 2619
-y(into)g(an)g(internal)g(and)f(an)h(e)o(xternal)f(subset.)h(A)h
-(document)d(must)j(be)o(gin)e(as)h(follo)n(ws)g(if)h(the)f(full)g(DTD)g
-(is)h(included:)396 2800 y Fo(<)p Fq(?xml)44 b(version="1.0")f
-(encoding=")p Fn(Your)f(encoding)t Fq("?)p Fo(>)396 2897
-y(<)p Fq(!DOCTYPE)h Fn(root)i Fq([)486 2994 y Fn(Declarations)396
-3091 y Fq(])p Fo(>)396 3282 y Fv(These)20 b(declarations)f(are)h
-(called)g(the)h Fr(internal)e(subset)q Fv(.)i(Note)f(that)g(the)g
-(usage)g(of)g(entities)h(and)e(conditional)g(sections)396
-3390 y(is)i(restricted)f(within)g(the)g(internal)g(subset.)396
-3539 y(If)g(the)h(declarations)d(are)j(located)e(in)h(a)h(dif)n(ferent)
-e(\002le,)h(you)f(can)h(refer)g(to)g(this)h(\002le)g(as)g(follo)n(ws:)
-396 3720 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
-Fn(Your)f(encoding)t Fq("?)p Fo(>)396 3817 y(<)p Fq(!DOCTYPE)h
-Fn(root)i Fq(SYSTEM)e(")p Fn(file)h(name)p Fq(")p Fo(>)396
-4008 y Fv(The)20 b(declarations)f(in)h(the)h(\002le)f(are)h(called)f
-(the)g Fr(e)n(xternal)g(subset)q Fv(.)g(The)g(\002le)h(name)f(is)h
-(called)f(the)g Fr(system)h(identi\002er)r Fv(.)e(It)396
-4116 y(is)i(also)g(possible)f(to)g(refer)g(to)g(the)g(\002le)h(by)f(a)g
-(so-called)g Fr(public)f(identi\002er)r Fv(,)g(b)n(ut)i(most)f(XML)g
-(applications)f(w)o(on')o(t)g(use)396 4223 y(this)i(feature.)396
-4373 y(Y)-9 b(ou)20 b(can)g(also)g(specify)g(both)f(internal)h(and)f(e)
-o(xternal)g(subsets.)i(In)e(this)i(case,)g(the)f(declarations)f(of)h
-(both)f(subsets)i(are)396 4481 y(mix)o(ed,)e(and)h(if)g(there)g(are)g
-(con\003icts,)g(the)g(declaration)f(of)h(the)g(internal)f(subset)i(o)o
-(v)o(errides)d(those)i(of)g(the)g(e)o(xternal)396 4589
-y(subset)h(with)f(the)g(same)h(name.)e(This)h(looks)g(as)h(follo)n(ws:)
-396 4769 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
-Fn(Your)f(encoding)t Fq("?)p Fo(>)396 4866 y(<)p Fq(!DOCTYPE)h
-Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)p
-Black 3800 5278 a Fr(11)p Black eop
-%%Page: 12 12
-12 11 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 486 579 a Fn(Declarations)396 676 y Fq(])p Fo(>)396
-909 y Fv(The)f(XML)g(declaration)f(\(the)h(string)g(be)o(ginning)d
-(with)k Fo(<)p Fq(?xml)e Fv(and)h(ending)f(at)i Fq(?)p
-Fo(>)p Fv(\))f(should)f(specify)g(the)h(encoding)396
-1016 y(of)g(the)g(\002le.)h(Common)e(v)n(alues)h(are)g(UTF-8,)f(and)h
-(the)g(ISO-8859)e(series)j(of)f(character)f(sets.)i(Note)f(that)g(e)n
-(v)o(ery)f(\002le)396 1124 y(parsed)h(by)f(the)i(XML)f(processor)f(can)
-h(be)o(gin)f(with)h(an)g(XML)h(declaration)d(and)i(that)g(e)n(v)o(ery)f
-(\002le)i(may)e(ha)n(v)o(e)h(its)h(o)n(wn)396 1232 y(encoding.)396
-1382 y(The)f(name)g(of)g(the)g(root)f(element)h(must)g(be)g(mentioned)f
-(directly)g(after)h(the)g Fq(DOCTYPE)g Fv(string.)f(This)i(means)e
-(that)i(a)396 1490 y(full)f(document)f(instance)g(looks)h(lik)o(e)396
-1670 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
-Fn(Your)f(encoding)t Fq("?)p Fo(>)396 1767 y(<)p Fq(!DOCTYPE)h
-Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)486
-1864 y Fn(Declarations)396 1961 y Fq(])p Fo(>)396 2156
-y(<)p Fn(root)p Fo(>)486 2253 y Fn(inner)g(contents)396
-2350 y Fo(<)p Fq(/)p Fn(root)p Fo(>)-2 2802 y Fp(1.2.2.)35
-b(Reser)q(ved)h(c)o(haracter)n(s)396 2970 y Fv(Some)20
-b(characters)f(are)i(generally)d(reserv)o(ed)h(to)h(indicate)g(markup)e
-(such)i(that)g(the)o(y)g(cannot)f(be)h(used)g(for)g(character)396
-3078 y(data.)g(These)g(characters)f(are)h Fm(<)p Fv(,)h
-Fm(>)p Fv(,)f(and)f(&.)h(Furthermore,)e(single)i(and)g(double)e(quotes)
-i(are)g(sometimes)g(reserv)o(ed.)396 3186 y(If)g(you)g(w)o(ant)g(to)g
-(include)f(such)h(a)h(character)e(as)i(character)m(,)d(write)j(it)f(as)
-h(follo)n(ws:)p Black 396 3473 a Ft(\225)p Black 60 w
-Fq(<)f Fv(instead)g(of)g Fm(<)p Black 396 3581 a Ft(\225)p
-Black 60 w Fq(>)g Fv(instead)g(of)g Fm(>)p Black 396
-3689 a Ft(\225)p Black 60 w Fq(&)g Fv(instead)g(of)g(&)p
-Black 396 3797 a Ft(\225)p Black 60 w Fq(')g Fv(instead)g(of)g(')p
-Black 396 3905 a Ft(\225)p Black 60 w Fq(")g Fv(instead)g(of)g(")
-396 4054 y(All)h(other)e(characters)h(are)g(free)g(in)g(the)g(document)
-e(instance.)i(It)g(is)i(possible)d(to)i(include)e(a)i(character)e(by)g
-(its)j(position)396 4162 y(in)f(the)f(Unicode)f(alphabet:)396
-4342 y Fq(&#)p Fn(n)p Fq(;)396 4533 y Fv(where)h Fl(n)g
-Fv(is)i(the)e(decimal)f(number)g(of)h(the)g(character)-5
-b(.)19 b(Alternati)n(v)o(ely)-5 b(,)18 b(you)h(can)h(specify)g(the)g
-(character)f(by)h(its)396 4641 y(he)o(xadecimal)e(number:)396
-4822 y Fq(&#x)p Fn(n)p Fq(;)p Black 3800 5278 a Fr(12)p
-Black eop
-%%Page: 13 13
-13 12 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 396 579 a Fv(In)f(the)g(scope)g(of)g(declarations,)f(the)h
-(character)f(\045)i(is)g(no)f(longer)f(free.)g(T)-7 b(o)20
-b(include)g(it)h(as)f(character)m(,)f(you)g(must)h(use)396
-687 y(the)g(notations)g Fq(%)g Fv(or)f Fq(%)p
-Fv(.)396 836 y(Note)h(that)h(besides)f(<,)g(>,)g(&,)f
-(',)g(and)h(")f(there)h(are)g(no)g(prede\002nes)f(character)g
-(entities.)h(This)396 944 y(is)h(dif)n(ferent)e(from)g(HTML)h(which)g
-(de\002nes)g(a)g(list)i(of)d(characters)h(that)g(can)g(be)g(referenced)
-e(by)i(name)f(\(e.g.)h(ä)396 1052 y(for)g(\344\);)g(ho)n(we)n(v)o
-(er)m(,)e(if)i(you)g(prefer)e(named)i(characters,)f(you)g(can)h
-(declare)f(such)h(entities)h(yourself)e(\(see)h(belo)n(w\).)-2
-1422 y Fp(1.2.3.)35 b(Elements)g(and)f(ELEMENT)e(dec)n(larations)396
-1589 y Fv(Elements)20 b(structure)f(the)h(document)f(instance)g(in)i(a)
-f(hierarchical)f(w)o(ay)-5 b(.)20 b(There)f(is)i(a)g(top-le)n(v)o(el)d
-(element,)i(the)g Fr(r)l(oot)396 1697 y(element)q Fv(,)g(which)g
-(contains)g(a)g(sequence)f(of)h(inner)g(elements)f(and)h(character)f
-(sections.)h(The)g(inner)f(elements)h(are)396 1805 y(structured)f(in)h
-(the)f(same)h(w)o(ay)-5 b(.)20 b(Ev)o(ery)e(element)h(has)h(an)g
-Fr(element)f(type)p Fv(.)h(The)f(be)o(ginning)f(of)h(the)h(element)f
-(is)i(indicated)396 1913 y(by)f(a)h Fr(start)g(ta)o(g)p
-Fv(,)e(written)396 2093 y Fo(<)p Fn(element-type)p Fo(>)396
-2284 y Fv(and)h(the)g(element)g(continues)f(until)h(the)g
-(corresponding)d Fr(end)i(ta)o(g)h Fv(is)h(reached:)396
-2465 y Fo(<)p Fq(/)p Fn(element-type)p Fo(>)396 2655
-y Fv(In)f(XML,)f(it)i(is)f(not)g(allo)n(wed)f(to)h(omit)f(start)i(or)e
-(end)g(tags,)h(e)n(v)o(en)f(if)h(the)g(DTD)g(w)o(ould)f(permit)g(this.)
-h(Note)g(that)g(there)f(are)396 2763 y(no)h(special)g(rules)g(ho)n(w)g
-(to)g(interpret)g(spaces)g(or)g(ne)n(wlines)g(near)f(start)i(or)f(end)g
-(tags;)g(all)h(spaces)f(and)g(ne)n(wlines)g(count.)396
-2913 y(Ev)o(ery)f(element)h(type)f(must)i(be)f(declared)f(before)f(it)j
-(can)f(be)g(used.)g(The)g(declaration)f(consists)h(of)g(tw)o(o)h
-(parts:)f(the)396 3021 y(ELEMENT)f(declaration)f(describes)h(the)h
-(content)f(model,)f(i.e.)i(which)f(inner)g(elements)g(are)h(allo)n
-(wed;)f(the)h(A)-9 b(TTLIST)396 3129 y(declaration)19
-b(describes)h(the)g(attrib)n(utes)g(of)g(the)g(element.)396
-3278 y(An)g(element)g(can)g(simply)g(allo)n(w)g(e)n(v)o(erything)e(as)i
-(content.)f(This)i(is)g(written:)396 3458 y Fo(<)p Fq(!ELEMENT)43
-b Fn(name)i Fq(ANY)p Fo(>)396 3649 y Fv(On)20 b(the)h(opposite,)e(an)h
-(element)f(can)h(be)g(forced)f(to)i(be)f(empty;)f(declared)g(by:)396
-3829 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(EMPTY)p Fo(>)396
-4020 y Fv(Note)20 b(that)h(there)e(is)j(an)e(abbre)n(viated)e(notation)
-h(for)g(empty)g(element)h(instances:)g Fo(<)p Fn(name)p
-Fq(/)p Fo(>)p Fv(.)396 4170 y(There)g(are)g(tw)o(o)g(more)g
-(sophisticated)f(forms)g(of)h(declarations:)f(so-called)h
-Fr(mixed)g(declar)o(ations)p Fv(,)e(and)i Fr(r)m(e)m(gular)396
-4278 y(e)n(xpr)m(essions)p Fv(.)g(An)h(element)e(with)i(mix)o(ed)e
-(content)g(contains)g(character)g(data)h(interspersed)f(with)i(inner)e
-(elements,)396 4386 y(and)h(the)g(set)h(of)f(allo)n(wed)g(inner)f
-(elements)h(can)g(be)g(speci\002ed.)g(In)f(contrast)h(to)g(this,)h(a)g
-(re)o(gular)d(e)o(xpression)396 4494 y(declaration)h(does)h(not)g(allo)
-n(w)g(character)f(data,)h(b)n(ut)g(the)g(inner)f(elements)h(can)g(be)g
-(described)f(by)h(the)g(more)g(po)n(werful)396 4601 y(means)g(of)g(re)o
-(gular)f(e)o(xpressions.)396 4751 y(A)i(declaration)e(for)g(mix)o(ed)g
-(content)g(looks)h(as)h(follo)n(ws:)p Black 3800 5278
-a Fr(13)p Black eop
-%%Page: 14 14
-14 13 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 396 579 a Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(\(#PCDATA)e(|)i
-Fn(element)1892 609 y Fk(1)1962 579 y Fq(|)g(...)f(|)h
-Fn(element)2636 609 y Fk(n)2707 579 y Fq(\)*)p Fo(>)396
-770 y Fv(or)20 b(if)h(you)e(do)h(not)g(w)o(ant)g(to)g(allo)n(w)g(an)o
-(y)g(inner)f(element,)h(simply)396 950 y Fo(<)p Fq(!ELEMENT)43
-b Fn(name)i Fq(\(#PCDATA\))p Fo(>)396 1279 y Fj(Example)479
-1426 y Fi(If)19 b(element)g(type)g Fh(q)g Fi(is)g(declared)h(as)479
-1596 y Fh(<!ELEMENT)44 b(q)c(\(#PCDATA)k(|)c(r)h(|)g(s\)*>)479
-1776 y Fi(this)19 b(is)f(a)h(le)o(gal)g(instance:)479
-1947 y Fh(<q>This)43 b(is)e(character)j(data<r></r>with)h(<s></s>inner)
-g(elements</q>)479 2127 y Fi(But)19 b(this)g(is)f(ille)o(gal)g(because)
-i Fh(t)f Fi(has)h(not)f(been)g(enumerated)i(in)e(the)g(declaration:)479
-2297 y Fh(<q>This)43 b(is)e(character)j(data<r></r>with)h(<t></t>inner)
-g(elements</q>)396 2571 y Fv(The)20 b(other)f(form)h(uses)g(a)h(re)o
-(gular)e(e)o(xpression)f(to)j(describe)e(the)h(possible)g(contents:)396
-2752 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(regexp)p Fo(>)396
-2942 y Fv(The)20 b(follo)n(wing)f(well-kno)n(wn)f(re)o(ge)o(xp)g
-(operators)h(are)h(allo)n(wed:)p Black 396 3299 a Ft(\225)p
-Black 60 w Fn(element-name)p Black 396 3407 a Ft(\225)p
-Black 60 w Fq(\()p Fn(subexpr)839 3437 y Fk(1)910 3407
-y Fq(,)g Fv(...)g Fq(,)45 b Fn(subexpr)1463 3437 y Fk(n)1533
-3407 y Fq(\))p Black 396 3515 a Ft(\225)p Black 60 w
-Fq(\()p Fn(subexpr)839 3545 y Fk(1)910 3515 y Fq(|)20
-b Fv(...)g Fq(|)45 b Fn(subexpr)1463 3545 y Fk(n)1533
-3515 y Fq(\))p Black 396 3623 a Ft(\225)p Black 60 w
-Fn(subexpr)s Fq(*)p Black 396 3731 a Ft(\225)p Black
-60 w Fn(subexpr)s Fq(+)p Black 396 3839 a Ft(\225)p Black
-60 w Fn(subexpr)s Fq(?)396 3989 y Fv(The)20 b Fq(,)h
-Fv(operator)d(indicates)i(a)h(sequence)e(of)h(sub-models,)e(the)i
-Fq(|)h Fv(operator)d(describes)i(alternati)n(v)o(e)f(sub-models.)f(The)
-396 4096 y Fq(*)j Fv(indicates)f(zero)f(or)h(more)g(repetitions,)f(and)
-g Fq(+)i Fv(one)f(or)f(more)h(repetitions.)f(Finally)-5
-b(,)19 b Fq(?)i Fv(can)f(be)g(used)g(for)f(optional)396
-4204 y(sub-models.)g(As)i(atoms)f(the)g(re)o(ge)o(xp)e(can)i(contain)f
-(names)h(of)g(elements;)g(note)g(that)g(it)h(is)g(not)f(allo)n(wed)f
-(to)i(include)396 4312 y Fq(#PCDATA)p Fv(.)396 4462 y(The)f(e)o(xact)g
-(syntax)f(of)h(the)g(re)o(gular)f(e)o(xpressions)g(is)i(rather)e
-(strange.)h(This)g(can)g(be)g(e)o(xplained)f(best)h(by)g(a)g(list)i(of)
-396 4570 y(constraints:)p Black 396 4802 a Ft(\225)p
-Black 60 w Fv(The)e(outermost)f(e)o(xpression)g(must)h(not)g(be)g
-Fn(element-name)p Fv(.)p Black 3800 5278 a Fr(14)p Black
-eop
-%%Page: 15 15
-15 14 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 479 579 a(Ille)m(gal:)e Fq(<!ELEMENT)43 b(x)i(y>)p
-Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq(<!ELEMENT)43
-b(x)i(\(y\)>)p Fv(.)p Black 396 728 a Ft(\225)p Black
-60 w Fv(F)o(or)20 b(the)g(unary)f(operators)g Fn(subexpr)s
-Fq(*)p Fv(,)g Fn(subexpr)s Fq(+)p Fv(,)g(and)g Fn(subexpr)s
-Fq(?)p Fv(,)g(the)h Fn(subexpr)i Fv(must)f(not)f(be)g(again)f(an)479
-836 y(unary)g(operator)-5 b(.)479 986 y Fr(Ille)m(gal:)19
-b Fq(<!ELEMENT)43 b(x)i(y**>)p Fv(;)20 b(this)h(must)f(be)g(written)g
-(as)h Fq(<!ELEMENT)44 b(x)g(\(y*\)*>)p Fv(.)p Black 396
-1135 a Ft(\225)p Black 60 w Fv(Between)21 b Fq(\))f Fv(and)g(one)f(of)h
-(the)h(unary)d(operatory)g Fq(*)p Fv(,)j Fq(+)p Fv(,)f(or)g
-Fq(?)p Fv(,)g(there)g(must)g(not)g(be)g(whitespace.)479
-1285 y Fr(Ille)m(gal:)f Fq(<!ELEMENT)43 b(x)i(\(y|z\))f(*>)p
-Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq(<!ELEMENT)43
-b(x)i(\(y|z\)*>)p Fv(.)p Black 396 1434 a Ft(\225)p Black
-60 w Fv(There)20 b(is)h(the)f(additional)f(constraint)g(that)h(the)h
-(right)e(parenthsis)g(must)i(be)f(contained)e(in)j(the)f(same)g(entity)
-g(as)h(the)479 1542 y(left)g(parenthesis;)e(see)i(the)f(section)g
-(about)f(parsed)h(entities)g(belo)n(w)-5 b(.)396 1733
-y(Note)20 b(that)g(there)g(is)h(another)e(restriction)g(on)h(re)o
-(gular)e(e)o(xpressions)h(which)h(must)g(be)g(deterministic.)f(This)h
-(means)g(that)396 1841 y(the)g(parser)g(must)g(be)g(able)g(to)h(see)g
-(by)e(looking)g(at)i(the)f(ne)o(xt)f(tok)o(en)h(which)f(alternati)n(v)o
-(e)g(is)i(actually)f(used,)g(or)f(whether)396 1949 y(the)h(repetition)f
-(stops.)i(The)f(reason)f(for)g(this)i(is)g(simply)f(compatability)f
-(with)h(SGML)g(\(there)g(is)h(no)f(intrinsic)f(reason)396
-2057 y(for)h(this)h(rule;)e(XML)i(can)f(li)n(v)o(e)g(without)f(this)i
-(restriction\).)396 2302 y Fj(Example)479 2449 y Fi(The)e(elements)g
-(are)g(declared)h(as)f(follo)n(ws:)479 2620 y Fh(<!ELEMENT)44
-b(q)c(\(r?,)i(\(s)f(|)g(t\)+\)>)479 2707 y(<!ELEMENT)j(r)c
-(\(#PCDATA\)>)479 2795 y(<!ELEMENT)k(s)c(EMPTY>)479 2882
-y(<!ELEMENT)k(t)c(\(q)i(|)e(r\)>)479 3062 y Fi(This)19
-b(is)f(a)h(le)o(gal)g(instance:)479 3233 y Fh(<q><r>Some)44
-b(characters</r><s/><)q(/q>)479 3413 y Fi(\(Note:)19
-b Fg(<)p Fh(s/)p Fg(>)g Fi(is)g(an)g(abbre)n(viation)h(for)f
-Fg(<)p Fh(s)p Fg(><)p Fh(/s)p Fg(>)p Fi(.\))g(It)f(w)o(ould)i(be)f
-(ille)o(gal)f(to)h(lea)o(v)o(e)g Fh(<s/>)h Fi(out)f(because)h(at)f
-(least)f(one)479 3510 y(instance)i(of)f Fh(s)g Fi(or)g
-Fh(t)g Fi(must)g(be)g(present.)g(It)f(w)o(ould)i(be)f(ille)o(gal,)f
-(too,)h(if)f(characters)i(e)o(xisted)f(outside)h(the)e
-Fh(r)i Fi(element;)f(the)g(only)479 3607 y(e)o(xception)h(is)f(white)g
-(space.)g(\226)g(This)f(is)h(le)o(gal,)f(too:)479 3778
-y Fh(<q><s/><t><q><s/><)q(/q>)q(</t)q(></)q(q>)-2 4230
-y Fp(1.2.4.)35 b(Attrib)n(ute)e(lists)h(and)g(A)-11 b(TTLIST)34
-b(dec)n(larations)396 4398 y Fv(Elements)20 b(may)g(ha)n(v)o(e)f
-(attrib)n(utes.)h(These)g(are)g(put)g(into)g(the)g(start)h(tag)f(of)g
-(an)g(element)g(as)h(follo)n(ws:)396 4578 y Fo(<)p Fn(element-name)43
-b(attribute)1444 4608 y Fk(1)1469 4578 y Fq(=")p Fn(value)1784
-4608 y Fk(1)1810 4578 y Fq(")i(...)f Fn(attribute)2484
-4608 y Fk(n)2509 4578 y Fq(=")p Fn(value)2824 4608 y
-Fk(n)2850 4578 y Fq(")p Fo(>)396 4769 y Fv(Instead)20
-b(of)g Fq(")p Fn(value)1017 4799 y Fk(k)1043 4769 y Fq(")g
-Fv(it)h(is)g(also)g(possible)f(to)g(use)g(single)g(quotes)g(as)h(in)f
-Fq(')p Fn(value)2817 4799 y Fk(k)2843 4769 y Fq(')p Fv(.)g(Note)h(that)
-f(you)f(cannot)g(use)396 4877 y(double)g(quotes)h(literally)g(within)g
-(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(if)h(double)d(quotes)i(are)
-g(the)g(delimiters;)g(the)g(same)p Black 3800 5278 a
-Fr(15)p Black eop
-%%Page: 16 16
-16 15 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 396 579 a Fv(applies)f(to)h(single)f(quotes.)f(Y)-9
-b(ou)20 b(can)g(generally)e(not)i(use)g Fm(<)h Fv(and)e(&)i(as)g
-(characters)e(in)h(attrib)n(ute)g(v)n(alues.)g(It)g(is)396
-687 y(possible)g(to)h(include)e(the)h(paraphrases)e(<,)j(>,)f
-(&,)f(',)g(and)h(")f(\(and)g(an)o(y)g(other)h(reference)e
-(to)j(a)396 795 y(general)e(entity)h(as)h(long)f(as)g(the)h(entity)f
-(is)h(not)e(de\002ned)h(by)f(an)i(e)o(xternal)d(\002le\))j(as)g(well)g
-(as)g(&#)p Fl(n)p Fv(;.)396 944 y(Before)f(you)f(can)h(use)h(an)f
-(attrib)n(ute)g(you)f(must)h(declare)g(it.)g(An)g(A)-9
-b(TTLIST)20 b(declaration)e(looks)i(as)h(follo)n(ws:)396
-1124 y Fo(<)p Fq(!ATTLIST)43 b Fn(element-name)845 1222
-y(attribute-name)f(attribute-type)h(attribute-default)845
-1319 y Fq(...)845 1416 y Fn(attribute-name)f(attribute-type)h
-(attribute-default)396 1513 y Fo(>)396 1704 y Fv(There)20
-b(are)g(a)g(lot)h(of)f(types,)f(b)n(ut)i(most)f(important)f(are:)p
-Black 396 2061 a Ft(\225)p Black 60 w Fq(CDATA)p Fv(:)h(Ev)o(ery)f
-(string)h(is)h(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)p
-Black 396 2169 a Ft(\225)p Black 60 w Fq(NMTOKEN)p Fv(:)g(Ev)o(ery)f
-(nametok)o(en)f(is)j(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)g
-(Nametok)o(ens)f(consist)h(\(mainly\))f(of)g(letters,)479
-2277 y(digits,)h(.,)h(:,)f(-,)g(_)h(in)f(arbitrary)f(order)-5
-b(.)p Black 396 2385 a Ft(\225)p Black 60 w Fq(NMTOKENS)p
-Fv(:)20 b(A)g(space-separated)f(list)i(of)f(nametok)o(ens)e(is)k(allo)n
-(wed)d(as)i(attrib)n(ute)f(v)n(alue.)396 2534 y(The)g(most)g
-(interesting)g(def)o(ault)f(declarations)g(are:)p Black
-396 2767 a Ft(\225)p Black 60 w Fq(#REQUIRED)p Fv(:)h(The)f(attrib)n
-(ute)h(must)g(be)h(speci\002ed.)p Black 396 2874 a Ft(\225)p
-Black 60 w Fq(#IMPLIED)p Fv(:)e(The)h(attrib)n(ute)f(can)g(be)h
-(speci\002ed)f(b)n(ut)h(also)g(can)f(be)h(left)g(out.)f(The)g
-(application)g(can)g(\002nd)g(out)h(whether)479 2982
-y(the)g(attrib)n(ute)g(w)o(as)h(present)f(or)g(not.)p
-Black 396 3090 a Ft(\225)p Black 60 w Fq(")p Fn(value)p
-Fq(")g Fv(or)f Fq(')p Fn(value)p Fq(')p Fv(:)h(This)g(particular)e(v)n
-(alue)i(is)g(used)g(as)h(def)o(ault)e(if)h(the)g(attrib)n(ute)g(is)g
-(omitted)g(in)g(the)g(element.)396 3378 y Fj(Example)479
-3525 y Fi(This)f(is)f(a)h(v)n(alid)g(attrib)o(ute)g(declaration)g(for)g
-(element)g(type)h Fh(r)p Fi(:)479 3695 y Fh(<!ATTLIST)44
-b(r)883 3782 y(x)c(CDATA)164 b(#REQUIRED)883 3870 y(y)40
-b(NMTOKEN)84 b(#IMPLIED)883 3957 y(z)40 b(NMTOKENS)k("one)d(two)h
-(three">)479 4137 y Fi(This)19 b(means)g(that)g Fh(x)g
-Fi(is)g(a)g(required)g(attrib)o(ute)f(that)h(cannot)h(be)f(left)g(out,)
-f(while)h Fh(y)g Fi(and)h Fh(z)f Fi(are)g(optional.)g(The)g(XML)g
-(parser)479 4235 y(indicates)h(the)f(application)g(whether)h
-Fh(y)f Fi(is)f(present)i(or)f(not,)f(b)o(ut)h(if)f Fh(z)h
-Fi(is)g(missing)g(the)g(def)o(ault)h(v)n(alue)f("one)h(tw)o(o)f(three")
-h(is)479 4332 y(returned)g(automatically)-5 b(.)479 4470
-y(This)19 b(is)f(a)h(v)n(alid)g(e)o(xample)h(of)f(these)g(attrib)o
-(utes:)479 4641 y Fh(<r)41 b(x="He)h(said:)h("I)f(don't)g(like)g
-(quotes!"")j(y='1'>)p Black 3798 5278 a Fr(16)p
-Black eop
-%%Page: 17 17
-17 16 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black -2 583 a Fp(1.2.5.)35 b(P)l(ar)n(sed)g(entities)396
-751 y Fv(Elements)20 b(describe)f(the)i(logical)e(structure)h(of)g(the)
-g(document,)e(while)i Fr(entities)g Fv(determine)f(the)h(physical)g
-(structure.)396 859 y(Entities)h(are)f(the)g(pieces)g(of)g(te)o(xt)g
-(the)g(parser)g(operates)f(on,)h(mostly)g(\002les)h(and)f(macros.)f
-(Entities)h(may)g(be)g Fr(par)o(sed)i Fv(in)396 967 y(which)e(case)h
-(the)f(parser)f(reads)h(the)g(te)o(xt)h(and)e(interprets)g(it)i(as)g
-(XML)g(markup,)d(or)i Fr(unpar)o(sed)h Fv(which)e(simply)h(means)396
-1075 y(that)h(the)f(data)g(of)g(the)g(entity)g(has)g(a)h(foreign)d
-(format)h(\(e.g.)h(a)g(GIF)h(icon\).)396 1224 y(If)f(the)g(parsed)f
-(entity)g(is)i(going)e(to)h(be)g(used)f(as)i(part)e(of)h(the)g(DTD,)g
-(it)g(is)h(called)f(a)g Fr(par)o(ameter)f(entity)p Fv(.)h(Y)-9
-b(ou)19 b(can)h(declare)396 1332 y(a)h(parameter)e(entity)g(with)i(a)f
-(\002x)o(ed)g(te)o(xt)g(as)h(content)e(by:)396 1512 y
-Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(")p Fn(value)p
-Fq(")p Fo(>)396 1703 y Fv(W)m(ithin)20 b(the)h(DTD,)f(you)f(can)h
-Fr(r)m(efer)h(to)f Fv(this)h(entity)-5 b(,)19 b(i.e.)i(read)e(the)h(te)
-o(xt)g(of)g(the)h(entity)-5 b(,)19 b(by:)396 1883 y Fq(\045)p
-Fn(name)p Fq(;)396 2074 y Fv(Such)h(entities)h(beha)n(v)o(e)e(lik)o(e)h
-(macros,)f(i.e.)i(when)e(the)o(y)h(are)g(referred)e(to,)i(the)g(macro)g
-(te)o(xt)g(is)h(inserted)e(and)h(read)396 2182 y(instead)g(of)g(the)g
-(original)f(te)o(xt.)396 2478 y Fj(Example)479 2625 y
-Fi(F)o(or)g(e)o(xample,)g(you)h(can)f(declare)h(tw)o(o)f(elements)g
-(with)f(the)h(same)h(content)f(model)h(by:)479 2795 y
-Fh(<!ENTITY)43 b(\045)e(model)h("a)f(|)g(b)g(|)f(c">)479
-2882 y(<!ELEMENT)k(x)c(\(\045model;\)>)479 2970 y(<!ELEMENT)k(y)c
-(\(\045model;\)>)396 3202 y Fv(If)20 b(the)h(contents)e(of)h(the)g
-(entity)g(are)g(gi)n(v)o(en)f(as)i(string)f(constant,)f(the)h(entity)g
-(is)h(called)f(an)g Fr(internal)g Fv(entity)-5 b(.)19
-b(It)i(is)g(also)396 3310 y(possible)f(to)h(name)e(a)i(\002le)g(to)f
-(be)g(used)g(as)h(content)e(\(an)h Fr(e)n(xternal)g Fv(entity\):)396
-3490 y Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(SYSTEM)g(")p
-Fn(file)g(name)p Fq(")p Fo(>)396 3681 y Fv(There)20 b(are)g(some)g
-(restrictions)f(for)h(parameter)f(entities:)p Black 396
-4038 a Ft(\225)p Black 60 w Fv(If)h(the)h(internal)e(parameter)g
-(entity)g(contains)h(the)g(\002rst)h(tok)o(en)e(of)h(a)h(declaration)e
-(\(i.e.)g Fo(<)p Fq(!)p Fv(\),)h(it)h(must)f(also)h(contain)479
-4146 y(the)f(last)i(tok)o(en)d(of)h(the)g(declaration,)e(i.e.)j(the)f
-Fo(>)p Fv(.)g(This)g(means)g(that)h(the)f(entity)g(either)g(contains)f
-(a)i(whole)e(number)479 4254 y(of)h(complete)f(declarations,)g(or)h
-(some)g(te)o(xt)g(from)f(the)h(middle)g(of)g(one)f(declaration.)479
-4404 y Fr(Ille)m(gal:)479 4542 y Fq(<!ENTITY)44 b(\045)g(e)h("\(a)f(|)h
-(b)g(|)f(c\)>">)479 4639 y(<!ELEMENT)g(x)g(\045e;)479
-4789 y Fv(Because)21 b Fo(<)p Fq(!)f Fv(is)h(contained)e(in)h(the)g
-(main)g(entity)-5 b(,)19 b(and)h(the)g(corresponding)d
-Fo(>)j Fv(is)h(contained)e(in)h(the)h(entity)e Fq(e)p
-Fv(.)p Black 3797 5278 a Fr(17)p Black eop
-%%Page: 18 18
-18 17 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black Black 396 579 a Ft(\225)p Black 60 w Fv(If)f(the)h(internal)e
-(parameter)g(entity)g(contains)h(a)h(left)f(paranthesis,)f(it)i(must)f
-(also)h(contain)e(the)h(corresponding)d(right)479 687
-y(paranthesis.)479 836 y Fr(Ille)m(gal:)479 975 y Fq(<!ENTITY)44
-b(\045)g(e)h("\(a)f(|)h(b)g(|)f(c">)479 1072 y(<!ELEMENT)g(x)g
-(\045e;\)>)479 1222 y Fv(Because)21 b Fq(\()f Fv(is)h(contained)e(in)h
-(the)g(entity)g Fq(e)p Fv(,)h(and)e(the)i(corresponding)16
-b Fq(\))21 b Fv(is)g(contained)e(in)h(the)g(main)g(entity)-5
-b(.)p Black 396 1371 a Ft(\225)p Black 60 w Fv(When)20
-b(reading)e(te)o(xt)i(from)f(an)g(entity)-5 b(,)19 b(the)h(parser)f
-(automatically)f(inserts)i(one)g(space)f(character)g(before)f(the)i
-(entity)479 1479 y(te)o(xt)g(and)g(one)g(space)g(character)f(after)h
-(the)g(entity)g(te)o(xt.)f(Ho)n(we)n(v)o(er)m(,)f(this)j(rule)f(is)h
-(not)f(applied)f(within)h(the)g(de\002nition)479 1587
-y(of)g(another)f(entity)-5 b(.)479 1736 y Fr(Le)m(gal:)479
-1875 y Fq(<!ENTITY)44 b(\045)g(suffix)g("gif">)479 1972
-y(<!ENTITY)g(iconfile)f('icon.\045suffix;'>)479 2121
-y Fv(Because)21 b Fq(\045suffix;)e Fv(is)i(referenced)d(within)i(the)g
-(de\002nition)f(te)o(xt)h(for)g Fq(iconfile)p Fv(,)f(no)h(additional)f
-(spaces)h(are)479 2229 y(added.)479 2379 y Fr(Ille)m(gal:)479
-2517 y Fq(<!ENTITY)44 b(\045)g(suffix)g("test">)479 2615
-y(<!ELEMENT)g(x.\045suffix;)f(ANY>)479 2764 y Fv(Because)21
-b Fq(\045suffix;)e Fv(is)i(referenced)d(outside)i(the)g(de\002nition)f
-(te)o(xt)h(of)g(another)f(entity)-5 b(,)19 b(the)h(parser)g(replaces)
-479 2872 y Fq(\045suffix;)g Fv(by)f Fn(space)p Fq(test)p
-Fn(space)p Fv(.)479 3021 y Fr(Ille)m(gal:)479 3160 y
-Fq(<!ENTITY)44 b(\045)g(e)h("\(a)f(|)h(b)g(|)f(c\)">)479
-3257 y(<!ELEMENT)g(x)g(\045e;*>)479 3407 y Fv(Because)21
-b(there)e(is)j(a)e(whitespace)g(between)f Fq(\))i Fv(and)e
-Fq(*)p Fv(,)i(which)e(is)i(ille)o(gal.)p Black 396 3556
-a Ft(\225)p Black 60 w Fv(An)f(e)o(xternal)f(parameter)g(entity)h(must)
-g(al)o(w)o(ays)h(consist)f(of)g(a)h(whole)e(number)g(of)h(complete)f
-(declarations.)p Black 396 3664 a Ft(\225)p Black 60
-w Fv(In)h(the)g(internal)g(subset)g(of)g(the)g(DTD,)g(a)h(reference)d
-(to)j(a)f(parameter)f(entity)h(\(internal)f(or)h(e)o(xternal\))e(is)k
-(only)479 3772 y(allo)n(wed)e(at)h(positions)e(where)h(a)g(ne)n(w)g
-(declaration)f(can)h(start.)396 3963 y(If)g(the)f(parsed)g(entity)g(is)
-h(going)e(to)i(be)f(used)g(in)h(the)f(document)e(instance,)i(it)h(is)h
-(called)e(a)h Fr(g)o(ener)o(al)e(entity)p Fv(.)h(Such)g(entities)396
-4071 y(can)h(be)g(used)g(as)h(abbre)n(viations)d(for)i(frequent)e
-(phrases,)i(or)g(to)g(include)f(e)o(xternal)g(\002les.)i(Internal)e
-(general)g(entities)i(are)396 4179 y(declared)e(as)i(follo)n(ws:)396
-4359 y Fo(<)p Fq(!ENTITY)44 b Fn(name)g Fq(")p Fn(value)p
-Fq(")p Fo(>)396 4550 y Fv(External)19 b(general)g(entities)i(are)f
-(declared)f(this)i(w)o(ay:)396 4730 y Fo(<)p Fq(!ENTITY)44
-b Fn(name)g Fq(SYSTEM)g(")p Fn(file)g(name)p Fq(")p Fo(>)p
-Black 3800 5278 a Fr(18)p Black eop
-%%Page: 19 19
-19 18 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 396 579 a Fv(References)f(to)g(general)f(entities)i(are)f
-(written)g(as:)396 759 y Fq(&)p Fn(name)p Fq(;)396 950
-y Fv(The)g(main)g(dif)n(ference)e(between)h(parameter)g(and)h(general)f
-(entities)h(is)i(that)e(the)g(former)f(are)h(only)f(recognized)f(in)j
-(the)396 1058 y(DTD)g(and)e(that)i(the)f(latter)g(are)g(only)g
-(recognized)e(in)i(the)g(document)e(instance.)i(As)h(the)f(DTD)g(is)i
-(parsed)d(before)g(the)396 1166 y(document,)f(the)i(parameter)f
-(entities)i(are)f(e)o(xpanded)d(\002rst;)k(for)f(e)o(xample)f(it)i(is)g
-(possible)f(to)g(use)h(the)f(content)f(of)h(a)396 1274
-y(parameter)f(entity)h(as)h(the)f(name)g(of)f(a)i(general)e(entity:)h
-Fq(&\045name;;)2557 1241 y Ff(1)2580 1274 y Fv(.)396
-1423 y(General)g(entities)g(must)h(respect)e(the)i(element)e(hierarchy)
--5 b(.)17 b(This)k(means)f(that)g(there)g(must)g(be)g(an)g(end)g(tag)g
-(for)g(e)n(v)o(ery)396 1531 y(start)h(tag)f(in)h(the)f(entity)g(v)n
-(alue,)f(and)h(that)g(end)f(tags)i(without)e(corresponding)e(start)k
-(tags)f(are)g(not)g(allo)n(wed.)396 1777 y Fj(Example)479
-1924 y Fi(If)f(the)f(author)i(of)f(a)f(document)j(changes)f(sometimes,)
-f(it)f(is)g(w)o(orthwhile)h(to)g(set)f(up)i(a)e(general)i(entity)e
-(containing)i(the)f(names)479 2021 y(of)g(the)g(authors.)h(If)e(the)h
-(author)h(changes,)g(you)f(need)h(only)g(to)e(change)j(the)e
-(de\002nition)g(of)g(the)g(entity)-5 b(,)18 b(and)i(do)f(not)h(need)f
-(to)479 2118 y(check)h(all)f(occurrences)h(of)f(authors')h(names:)479
-2289 y Fh(<!ENTITY)43 b(authors)g("Gerd)f(Stolpmann">)479
-2469 y Fi(In)19 b(the)g(document)i(te)o(xt,)d(you)i(can)f(no)n(w)h
-(refer)e(to)h(the)g(author)h(names)f(by)h(writing)e Fh(&authors;)p
-Fi(.)479 2607 y Fe(Ille)m(gal:)h Fi(The)g(follo)n(wing)g(tw)o(o)g
-(entities)g(are)g(ille)o(gal)f(because)i(the)f(elements)g(in)g(the)g
-(de\002nition)g(do)g(not)h(nest)f(properly:)479 2778
-y Fh(<!ENTITY)43 b(lengthy-tag)i("<section)e(textcolor='white')j
-(background='graphi)q(c'>)q(">)479 2865 y(<!ENTITY)d(nonsense)165
-b("<a></b>">)396 3139 y Fv(Earlier)20 b(in)g(this)h(introduction)d(we)i
-(e)o(xplained)e(that)j(there)e(are)i(substitutes)f(for)g(reserv)o(ed)e
-(characters:)i(<,)g(>,)396 3247 y(&,)f(',)h(and)f
-(".)g(These)h(are)g(simply)g(prede\002ned)e(general)h(entities;)i
-(note)f(that)g(the)o(y)g(are)g(the)g(only)396 3355 y(prede\002ned)e
-(entities.)j(It)f(is)h(allo)n(wed)f(to)g(de\002ne)g(these)g(entities)h
-(again)e(as)i(long)e(as)i(the)f(meaning)f(is)i(unchanged.)-2
-3725 y Fp(1.2.6.)35 b(Notations)g(and)e(unpar)n(sed)i(entities)396
-3892 y Fv(Unparsed)19 b(entities)i(ha)n(v)o(e)e(a)i(foreign)d(format)i
-(and)f(can)h(thus)g(not)g(be)g(read)g(by)g(the)g(XML)g(parser)-5
-b(.)20 b(Unparsed)f(entities)396 4000 y(are)h(al)o(w)o(ays)h(e)o
-(xternal.)e(The)h(format)f(of)h(an)g(unparsed)e(entity)i(must)g(ha)n(v)
-o(e)g(been)f(declared,)g(such)h(a)h(format)e(is)i(called)f(a)396
-4108 y Fr(notation)p Fv(.)f(The)g(entity)h(can)g(then)g(be)g(declared)f
-(by)h(referring)e(to)i(this)h(notation.)e(As)i(unparsed)d(entities)j
-(do)f(not)396 4216 y(contain)f(XML)i(te)o(xt,)e(it)i(is)h(not)d
-(possible)h(to)h(include)e(them)h(directly)f(into)h(the)g(document;)e
-(you)i(can)g(only)f(declare)396 4324 y(attrib)n(utes)h(such)g(that)h
-(names)e(of)h(unparsed)f(entities)h(are)h(acceptable)e(v)n(alues.)396
-4474 y(As)i(you)f(can)g(see,)g(unparsed)f(entities)h(are)g(too)g
-(complicated)f(in)h(order)f(to)h(ha)n(v)o(e)g(an)o(y)f(purpose.)g(It)h
-(is)h(almost)f(al)o(w)o(ays)396 4581 y(better)g(to)g(simply)g(pass)h
-(the)f(name)g(of)g(the)g(data)g(\002le)h(as)g(normal)e(attrib)n(ute)g
-(v)n(alue,)h(and)f(let)i(the)f(application)f(recognize)396
-4689 y(and)h(process)g(the)g(foreign)e(format.)p Black
-3800 5278 a Fr(19)p Black eop
-%%Page: 20 20
-20 19 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black -2 597 a Fx(1.3.)39 b(A)g(complete)f(e)n(xample:)g(The)h
-Fd(readme)k Fx(DTD)396 777 y Fv(The)20 b(reason)g(for)f
-Fr(r)m(eadme)h Fv(w)o(as)h(that)f(I)g(often)g(wrote)g(tw)o(o)g(v)o
-(ersions)f(of)h(\002les)h(such)f(as)h(README)g(and)e(INST)-8
-b(ALL)396 885 y(which)20 b(e)o(xplain)f(aspects)h(of)g(a)h(distrib)n
-(uted)e(softw)o(are)h(archi)n(v)o(e;)f(one)g(v)o(ersion)g(w)o(as)i
-(ASCII-formatted,)d(the)i(other)g(w)o(as)396 993 y(written)g(in)h
-(HTML.)e(Maintaining)g(both)g(v)o(ersions)h(means)f(double)g(amount)g
-(of)h(w)o(ork,)f(and)h(changes)f(of)h(one)f(v)o(ersion)396
-1101 y(may)h(be)g(for)o(gotten)e(in)i(the)g(other)f(v)o(ersion.)g(T)-7
-b(o)20 b(impro)o(v)o(e)e(this)j(situation)e(I)i(in)m(v)o(ented)d(the)i
-Fr(r)m(eadme)g Fv(DTD)g(which)f(allo)n(ws)396 1209 y(me)h(to)h
-(maintain)e(only)h(one)f(source)h(written)g(as)g(XML)h(document,)d(and)
-h(to)i(generate)e(the)h(ASCII)g(and)g(the)g(HTML)396
-1317 y(v)o(ersion)f(from)g(it.)396 1466 y(In)h(this)h(section,)f(I)g(e)
-o(xplain)f(only)g(the)i(DTD.)f(The)f Fr(r)m(eadme)h Fv(DTD)h(is)g
-(contained)d(in)j(the)f(PXP)h(distrib)n(ution)e(together)396
-1574 y(with)i(the)f(tw)o(o)g(con)m(v)o(erters)e(to)j(produce)d(ASCII)i
-(and)g(HTML.)g(Another)e(section)i(of)g(this)h(manual)e(describes)h
-(the)396 1682 y(HTML)g(con)m(v)o(erter)-5 b(.)396 1831
-y(The)20 b(documents)f(ha)n(v)o(e)g(a)i(simple)f(structure:)f(There)h
-(are)g(up)g(to)g(three)g(le)n(v)o(els)g(of)g(nested)g(sections,)g
-(paragraphs,)d(item)396 1939 y(lists,)22 b(footnotes,)c(hyperlinks,)g
-(and)h(te)o(xt)h(emphasis.)g(The)g(outermost)f(element)g(has)i(usually)
-e(the)h(type)g Fq(readme)p Fv(,)g(it)h(is)396 2047 y(declared)e(by)396
-2228 y Fq(<!ELEMENT)44 b(readme)f(\(sect1+\)>)396 2325
-y(<!ATTLIST)h(readme)845 2422 y(title)g(CDATA)g(#REQUIRED>)396
-2613 y Fv(This)21 b(means)f(that)g(this)h(element)e(contains)h(one)f
-(or)h(more)f(sections)i(of)f(the)g(\002rst)h(le)n(v)o(el)f(\(element)f
-(type)h Fq(sect1)p Fv(\),)f(and)396 2721 y(that)i(the)f(element)f(has)i
-(a)f(required)f(attrib)n(ute)h Fq(title)f Fv(containing)g(character)g
-(data)h(\(CD)m(A)-9 b(T)h(A\).)19 b(Note)h(that)h Fq(readme)396
-2829 y Fv(elements)f(must)g(not)g(contain)f(te)o(xt)h(data.)396
-2978 y(The)g(three)g(le)n(v)o(els)g(of)g(sections)g(are)g(declared)f
-(as)i(follo)n(ws:)396 3158 y Fq(<!ELEMENT)44 b(sect1)g
-(\(title,\(sect2|p|ul\)+\)>)396 3352 y(<!ELEMENT)g(sect2)g
-(\(title,\(sect3|p|ul\)+\)>)396 3547 y(<!ELEMENT)g(sect3)g
-(\(title,\(p|ul\)+\)>)396 3738 y Fv(Ev)o(ery)19 b(section)h(has)g(a)h
-Fq(title)f Fv(element)g(as)g(\002rst)h(subelement.)e(After)h(the)g
-(title)h(an)f(arbitrary)f(b)n(ut)h(non-empty)396 3846
-y(sequence)f(of)h(inner)g(sections,)g(paragraphs)e(and)h(item)i(lists)g
-(follo)n(ws.)f(Note)g(that)g(the)g(inner)g(sections)g(must)g(belong)f
-(to)396 3954 y(the)h(ne)o(xt)g(higher)f(section)h(le)n(v)o(el;)g
-Fq(sect3)g Fv(elements)f(must)i(not)f(contain)f(inner)g(sections)h
-(because)g(there)g(is)h(no)e(ne)o(xt)396 4061 y(higher)g(le)n(v)o(el.)
-396 4211 y(Ob)o(viously)-5 b(,)18 b(all)j(three)f(declarations)e(allo)n
-(w)j(paragraphs)d(\()p Fq(p)p Fv(\))h(and)h(item)g(lists)i(\()p
-Fq(ul)p Fv(\).)e(The)f(de\002nition)g(can)h(be)396 4319
-y(simpli\002ed)g(at)h(this)g(point)e(by)h(using)f(a)i(parameter)e
-(entity:)396 4499 y Fq(<!ENTITY)44 b(\045)g(p.like)g("p|ul">)396
-4693 y(<!ELEMENT)g(sect1)g(\(title,\(sect2|\045p.like;\)+\)>)p
-Black 3800 5278 a Fr(20)p Black eop
-%%Page: 21 21
-21 20 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 396 579 a Fq(<!ELEMENT)44 b(sect2)g
-(\(title,\(sect3|\045p.like;\)+\)>)396 773 y(<!ELEMENT)g(sect3)g
-(\(title,\(\045p.like;\)+\)>)396 964 y Fv(Here,)20 b(the)g(entity)g
-Fq(p.like)g Fv(is)h(nothing)e(b)n(ut)h(a)g(macro)g(abbre)n(viating)d
-(the)j(same)h(sequence)e(of)h(declarations;)f(if)h(ne)n(w)396
-1072 y(elements)f(on)h(the)f(same)h(le)n(v)o(el)f(as)h
-Fq(p)g Fv(and)f Fq(ul)h Fv(are)f(later)h(added,)e(it)i(is)h(suf)n
-(\002cient)e(only)f(to)i(change)e(the)i(entity)f(de\002nition.)396
-1180 y(Note)h(that)h(there)e(are)i(some)f(restrictions)f(on)h(the)g
-(usage)g(of)g(entities)h(in)f(this)h(conte)o(xt;)e(most)h(important,)e
-(entities)396 1288 y(containing)h(a)h(left)h(paranthesis)e(must)h(also)
-h(contain)e(the)h(corresponding)d(right)i(paranthesis.)396
-1437 y(Note)h(that)h(the)f(entity)g Fq(p.like)g Fv(is)h(a)f
-Fr(par)o(ameter)i Fv(entity)-5 b(,)19 b(i.e.)h(the)g(ENTITY)g
-(declaration)e(contains)i(a)g(percent)f(sign,)396 1545
-y(and)h(the)g(entity)g(is)h(referred)e(to)h(by)g Fq(\045p.like;)p
-Fv(.)f(This)h(kind)g(of)f(entity)h(must)h(be)f(used)g(to)g(abbre)n
-(viate)e(parts)j(of)f(the)396 1653 y(DTD;)g(the)g Fr(g)o(ener)o(al)f
-Fv(entities)h(declared)e(without)h(percent)g(sign)h(and)f(referred)f
-(to)i(as)g Fq(&name;)f Fv(are)h(not)f(allo)n(wed)g(in)h(this)396
-1761 y(conte)o(xt.)396 1911 y(The)g Fq(title)g Fv(element)g
-(speci\002es)g(the)h(title)f(of)g(the)h(section)f(in)g(which)g(it)g
-(occurs.)g(The)f(title)i(is)h(gi)n(v)o(en)c(as)j(character)396
-2019 y(data,)f(optionally)f(interspersed)f(with)j(line)f(breaks)g(\()p
-Fq(br)p Fv(\):)396 2199 y Fq(<!ELEMENT)44 b(title)g(\(#PCDATA|br\)*>)
-396 2390 y Fv(Compared)19 b(with)h(the)g Fq(title)g Fr(attrib)n(ute)g
-Fv(of)g(the)h Fq(readme)e Fv(element,)h(this)g(element)g(allo)n(ws)g
-(inner)g(markup)e(\(i.e.)i Fq(br)p Fv(\))396 2498 y(while)g(attrib)n
-(ute)g(v)n(alues)g(do)g(not:)g(It)g(is)h(an)g(error)e(if)h(an)g(attrib)
-n(ute)g(v)n(alue)g(contains)f(the)h(left)h(angle)e(brack)o(et)g
-Fm(<)i Fv(literally)396 2605 y(such)f(that)g(it)h(is)h(impossible)d(to)
-h(include)g(inner)f(elements.)396 2755 y(The)h(paragraph)e(element)h
-Fq(p)i Fv(has)f(a)h(structure)e(similar)i(to)f Fq(title)p
-Fv(,)g(b)n(ut)g(it)h(allo)n(ws)f(more)g(inner)f(elements:)396
-2935 y Fq(<!ENTITY)44 b(\045)g(text)h("br|code|em|footnote|a">)396
-3129 y(<!ELEMENT)f(p)g(\(#PCDATA|\045text;\)*>)396 3320
-y Fv(Line)20 b(breaks)g(do)f(not)h(ha)n(v)o(e)g(inner)f(structure,)g
-(so)i(the)o(y)e(are)h(declared)f(as)i(being)e(empty:)396
-3500 y Fq(<!ELEMENT)44 b(br)g(EMPTY>)396 3691 y Fv(This)21
-b(means)f(that)g(really)g(nothing)e(is)j(allo)n(wed)f(within)g
-Fq(br)p Fv(;)g(you)f(must)i(al)o(w)o(ays)f(write)h Fq(<br></br>)e
-Fv(or)h(abbre)n(viated)396 3799 y Fq(<br/>)p Fv(.)396
-3949 y(Code)g(samples)h(should)e(be)h(mark)o(ed)f(up)h(by)f(the)h
-Fq(code)h Fv(tag;)f(emphasized)f(te)o(xt)h(can)g(be)g(indicated)f(by)h
-Fq(em)p Fv(:)396 4129 y Fq(<!ELEMENT)44 b(code)g(\(#PCDATA\)>)396
-4323 y(<!ELEMENT)g(em)g(\(#PCDATA|\045text;\)*>)396 4514
-y Fv(That)20 b Fq(code)g Fv(elements)g(are)g(not)g(allo)n(wed)g(to)g
-(contain)f(further)g(markup)f(while)i Fq(em)h Fv(elements)f(do)g(is)h
-(a)f(design)g(decision)396 4622 y(by)g(the)g(author)f(of)h(the)g(DTD.)
-396 4772 y(Unordered)e(lists)k(simply)d(consists)i(of)f(one)g(or)g
-(more)f(list)i(items,)g(and)e(a)i(list)g(item)g(may)e(contain)g
-(paragraph-le)n(v)o(el)396 4879 y(material:)p Black 3800
-5278 a Fr(21)p Black eop
-%%Page: 22 22
-22 21 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 396 579 a Fq(<!ELEMENT)44 b(ul)g(\(li+\)>)396 773
-y(<!ELEMENT)g(li)g(\(\045p.like;\)*>)396 964 y Fv(F)o(ootnotes)19
-b(are)h(described)f(by)h(the)g(te)o(xt)g(of)g(the)g(note;)g(this)h(te)o
-(xt)f(may)g(contain)f(te)o(xt-le)n(v)o(el)g(markup.)f(There)h(is)i(no)
-396 1072 y(mechanism)e(to)i(describe)e(the)h(numbering)e(scheme)h(of)h
-(footnotes,)f(or)h(to)g(specify)g(ho)n(w)f(footnote)g(references)f(are)
-396 1180 y(printed.)396 1360 y Fq(<!ELEMENT)44 b(footnote)f
-(\(#PCDATA|\045text;\)*>)396 1551 y Fv(Hyperlinks)19
-b(are)h(written)g(as)h(in)f(HTML.)g(The)g(anchor)f(tag)h(contains)f
-(the)h(te)o(xt)g(describing)f(where)h(the)g(link)g(points)g(to,)396
-1659 y(and)g(the)g Fq(href)g Fv(attrib)n(ute)g(is)h(the)f(pointer)f
-(\(as)i(URL\).)f(There)f(is)j(no)d(w)o(ay)i(to)f(describe)f(locations)h
-(of)g("hash)g(marks".)f(If)396 1767 y(the)h(link)g(refers)g(to)g
-(another)f Fr(r)m(eadme)h Fv(document,)e(the)i(attrib)n(ute)g
-Fq(readmeref)f Fv(should)g(be)h(used)g(instead)g(of)g
-Fq(href)p Fv(.)396 1875 y(The)g(reason)g(is)h(that)f(the)g(con)m(v)o
-(erted)e(document)g(has)i(usually)g(a)h(dif)n(ferent)d(system)i
-(identi\002er)g(\(\002le)h(name\),)d(and)i(the)396 1983
-y(link)g(to)h(a)f(con)m(v)o(erted)e(document)g(must)i(be)g(con)m(v)o
-(erted,)e(too.)396 2163 y Fq(<!ELEMENT)44 b(a)g(\(#PCDATA\)*>)396
-2260 y(<!ATTLIST)g(a)845 2357 y(href)268 b(CDATA)44 b(#IMPLIED)845
-2454 y(readmeref)f(CDATA)h(#IMPLIED)396 2552 y(>)396
-2742 y Fv(Note)20 b(that)h(although)d(it)j(is)g(only)e(sensible)i(to)f
-(specify)g(one)f(of)h(the)g(tw)o(o)h(attrib)n(utes,)f(the)g(DTD)g(has)h
-(no)e(means)h(to)396 2850 y(e)o(xpress)g(this)g(restriction.)396
-3000 y(So)h(f)o(ar)f(the)g(DTD.)g(Finally)-5 b(,)19 b(here)h(is)h(a)g
-(document)d(for)i(it:)396 3180 y Fq(<?xml)44 b(version="1.0")f
-(encoding="ISO-8859-1"?>)396 3277 y(<!DOCTYPE)h(readme)f(SYSTEM)h
-("readme.dtd">)396 3374 y(<readme)g(title="How)f(to)i(use)f(the)g
-(readme)g(converters">)396 3471 y(<sect1>)486 3569 y
-(<title>Usage</title>)486 3666 y(<p>)576 3763 y(The)g(<em>readme</em>)e
-(converter)i(is)g(invoked)g(on)g(the)h(command)e(line)h(by:)486
-3860 y(</p>)486 3957 y(<p>)576 4054 y(<code>readme)e([)j(-text)f(|)h
-(-html)f(])g(input.xml</code>)486 4151 y(</p>)486 4248
-y(<p>)576 4346 y(Here)g(a)g(list)h(of)f(options:)486
-4443 y(</p>)486 4540 y(<ul>)576 4637 y(<li>)665 4734
-y(<p><code>-)396 4831 y(text</code>:)f(specifies)g(that)i(ASCII)f
-(output)f(should)h(be)h(produced</p>)p Black 3800 5278
-a Fr(22)p Black eop
-%%Page: 23 23
-23 22 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
-Black 576 579 a Fq(</li>)576 676 y(<li>)665 773 y(<p><code>-)396
-870 y(html</code>:)43 b(specifies)g(that)i(HTML)f(output)g(should)f(be)
-i(produced</p>)576 967 y(</li>)486 1065 y(</ul>)486 1162
-y(<p>)576 1259 y(The)f(input)g(file)g(must)g(be)h(given)f(on)g(the)h
-(command)e(line.)h(The)h(converted)e(output)h(is)576
-1356 y(printed)f(to)i(<em>stdout</em>.)486 1453 y(</p>)396
-1550 y(</sect1>)396 1647 y(<sect1>)486 1745 y(<title>Author</title>)486
-1842 y(<p>)576 1939 y(The)f(program)g(has)g(been)g(written)g(by)576
-2036 y(<a)g(href="mailto:Gerd.Stolpmann@darmstadt.ne)o(tsurf.)o(de">Ge)
-o(rd)39 b(Stolpmann</a>.)486 2133 y(</p>)396 2230 y(</sect1>)396
-2327 y(</readme>)-2 2746 y Fx(Notes)p Black 396 2926
-a Fv(1.)p Black 70 w(This)20 b(construct)g(is)h(only)e(allo)n(wed)h
-(within)g(the)g(de\002nition)f(of)h(another)e(entity;)i(otherwise)g(e)o
-(xtra)f(spaces)i(w)o(ould)529 3034 y(be)f(added)f(\(as)i(e)o(xplained)d
-(abo)o(v)o(e\).)g(Such)i(indirection)e(is)j(not)f(recommended.)529
-3172 y Fi(Complete)f(e)o(xample:)529 3343 y Fh(<!ENTITY)43
-b(\045)e(variant)i("a">)243 b(<!-)42 b(or)f("b")g(->)529
-3430 y(<!ENTITY)i(text-a)g("This)f(is)f(text)h(A.">)529
-3518 y(<!ENTITY)h(text-b)g("This)f(is)f(text)h(B.">)529
-3605 y(<!ENTITY)h(text)f("&text-\045variant)q(;;")q(>)529
-3785 y Fi(Y)-8 b(ou)19 b(can)h(no)n(w)f(write)f Fh(&text;)j
-Fi(in)e(the)g(document)h(instance,)f(and)h(depending)h(on)e(the)g(v)n
-(alue)g(of)g Fh(variant)i Fi(either)e Fh(text-a)i Fi(or)529
-3882 y Fh(text-b)g Fi(is)d(inserted.)p Black 3800 5278
-a Fr(23)p Black eop
-%%Page: 24 24
-24 23 bop Black Black -2 621 a Fs(Chapter)48 b(2.)f(Using)i(PXP)-2
-1055 y Fx(2.1.)39 b(V)-9 b(alidation)396 1235 y Fv(The)20
-b(parser)g(can)g(be)g(used)g(to)g Fr(validate)f Fv(a)i(document.)d
-(This)i(means)g(that)g(all)h(the)f(constraints)g(that)g(must)g(hold)g
-(for)f(a)396 1343 y(v)n(alid)h(document)e(are)i(actually)g(check)o(ed.)
-f(V)-9 b(alidation)19 b(is)i(the)f(def)o(ault)f(mode)h(of)g(PXP,)g
-(i.e.)h(e)n(v)o(ery)d(document)h(is)396 1451 y(v)n(alidated)g(while)i
-(it)f(is)i(being)d(parsed.)396 1600 y(In)h(the)g Fq(examples)g
-Fv(directory)e(of)i(the)g(distrib)n(ution)f(you)h(\002nd)g(the)g
-Fq(pxpvalidate)f Fv(application.)f(It)j(is)g(in)m(v)n(ok)o(ed)d(in)j
-(the)396 1708 y(follo)n(wing)e(w)o(ay:)396 1888 y Fq(pxpvalidate)43
-b([)i(-wf)f(])h Fn(file)p Fq(...)396 2079 y Fv(The)20
-b(\002les)h(mentioned)e(on)g(the)i(command)d(line)i(are)g(v)n
-(alidated,)f(and)h(e)n(v)o(ery)e(w)o(arning)h(and)h(e)n(v)o(ery)f
-(error)g(messages)h(are)396 2187 y(printed)f(to)i(stderr)-5
-b(.)396 2337 y(The)20 b(-wf)g(switch)h(modi\002es)e(the)i(beha)n(viour)
-d(such)i(that)g(a)h(well-formedness)d(parser)h(is)i(simulated.)f(In)g
-(this)g(mode,)f(the)396 2445 y(ELEMENT)-6 b(,)19 b(A)-9
-b(TTLIST)j(,)19 b(and)g(NO)m(T)-8 b(A)f(TION)20 b(declarations)f(of)h
-(the)g(DTD)g(are)g(ignored,)e(and)i(only)f(the)i(ENTITY)396
-2553 y(declarations)e(will)i(tak)o(e)f(ef)n(fect.)g(This)g(mode)f(is)i
-(intended)e(for)h(documents)e(lacking)h(a)i(DTD.)f(Please)h(note)f
-(that)g(the)396 2661 y(parser)g(still)h(scans)g(the)f(DTD)g(fully)g
-(and)g(will)h(report)e(all)h(errors)g(in)g(the)g(DTD;)h(such)f(checks)f
-(are)h(not)g(required)f(by)g(a)396 2769 y(well-formedness)f(parser)-5
-b(.)396 2918 y(The)20 b Fq(pxpvalidate)f Fv(application)g(is)i(the)f
-(simplest)h(sensible)f(program)e(using)i(PXP,)g(you)g(may)f(consider)g
-(it)i(as)396 3026 y("hello)f(w)o(orld")f(program.)-2
-3445 y Fx(2.2.)39 b(Ho)n(w)g(to)g(par)n(se)f(a)i(document)d(fr)m(om)i
-(an)g(application)396 3624 y Fv(Let)21 b(me)f(\002rst)h(gi)n(v)o(e)e(a)
-i(rough)d(o)o(v)o(ervie)n(w)g(of)i(the)h(object)e(model)g(of)h(the)h
-(parser)-5 b(.)19 b(The)h(follo)n(wing)f(items)h(are)h(represented)396
-3732 y(by)f(objects:)p Black 396 4055 a Ft(\225)p Black
-60 w Fr(Documents:)f Fv(The)h(document)e(representation)g(is)j(more)e
-(or)h(less)h(the)f(anchor)f(for)g(the)h(application;)f(all)i(accesses)g
-(to)479 4163 y(the)f(parsed)g(entities)h(start)f(here.)g(It)g(is)h
-(described)e(by)h(the)g(class)h Fq(document)f Fv(contained)e(in)j(the)f
-(module)479 4271 y Fq(Pxp_document)p Fv(.)f(Y)-9 b(ou)19
-b(can)h(get)h(some)f(global)f(information,)e(such)j(as)h(the)f(XML)h
-(declaration)d(the)j(document)479 4379 y(be)o(gins)f(with,)g(the)g(DTD)
-g(of)g(the)g(document,)e(global)i(processing)e(instructions,)h(and)h
-(most)g(important,)f(the)479 4487 y(document)f(tree.)p
-Black 396 4595 a Ft(\225)p Black 60 w Fr(The)j(contents)e(of)h
-(documents:)f Fv(The)h(contents)f(ha)n(v)o(e)h(the)g(structure)f(of)h
-(a)h(tree:)f(Elements)g(contain)f(other)g(elements)479
-4703 y(and)h(te)o(xt)744 4670 y Ff(1)768 4703 y Fv(.)h(The)e(common)g
-(type)h(to)g(represent)f(both)g(kinds)h(of)g(content)f(is)i
-Fq(node)f Fv(which)g(is)h(a)g(class)g(type)e(that)479
-4811 y(uni\002es)h(the)h(properties)d(of)i(elements)g(and)g(character)f
-(data.)h(Ev)o(ery)e(node)i(has)g(a)h(list)g(of)f(children)f(\(which)g
-(is)i(empty)p Black 3800 5278 a Fr(24)p Black eop
-%%Page: 25 25
-25 24 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 479 579 a Fv(if)h(the)f(element)g(is)h(empty)e(or)h(the)g(node)f
-(represents)h(te)o(xt\);)f(nodes)h(may)g(ha)n(v)o(e)f(attrib)n(utes;)h
-(nodes)g(ha)n(v)o(e)f(al)o(w)o(ays)i(te)o(xt)479 687
-y(contents.)d(There)g(are)g(tw)o(o)h(implementations)e(of)h
-Fq(node)p Fv(,)h(the)f(class)i Fq(element_impl)d Fv(for)h(elements,)g
-(and)g(the)h(class)479 795 y Fq(data_impl)h Fv(for)f(te)o(xt)h(data.)g
-(Y)-9 b(ou)20 b(\002nd)f(these)i(classes)g(and)f(class)h(types)f(in)g
-(the)g(module)f Fq(Pxp_document)p Fv(,)g(too.)479 944
-y(Note)h(that)h(attrib)n(ute)f(lists)h(are)f(represented)f(by)g
-(non-class)h(v)n(alues.)p Black 396 1094 a Ft(\225)p
-Black 60 w Fr(The)h(node)e(e)n(xtension:)g Fv(F)o(or)h(adv)n(anced)e
-(usage,)i(e)n(v)o(ery)e(node)i(of)f(the)i(document)d(may)i(ha)n(v)o(e)f
-(an)h(associated)479 1202 y Fr(e)n(xtension)g Fv(which)g(is)h(simply)f
-(a)g(second)f(object.)h(This)g(object)g(must)g(ha)n(v)o(e)g(the)g
-(three)g(methods)f Fq(clone)p Fv(,)g Fq(node)p Fv(,)h(and)479
-1310 y Fq(set_node)f Fv(as)h(bare)f(minimum,)e(b)n(ut)j(you)e(are)i
-(free)e(to)i(add)f(methods)f(as)i(you)f(w)o(ant.)g(This)g(is)i(the)e
-(preferred)e(w)o(ay)j(to)479 1417 y(add)g(functionality)e(to)i(the)h
-(document)d(tree)1746 1384 y Ff(2)1770 1417 y Fv(.)j(The)e(class)j
-(type)d Fq(extension)h Fv(is)h(de\002ned)e(in)h Fq(Pxp_document)p
-Fv(,)f(too.)p Black 396 1525 a Ft(\225)p Black 60 w Fr(The)i(DTD:)f
-Fv(Sometimes)g(it)h(is)g(necessary)e(to)i(access)f(the)h(DTD)f(of)g(a)h
-(document;)d(the)i(a)n(v)o(erage)f(application)g(does)479
-1633 y(not)h(need)g(this)g(feature.)f(The)h(class)h Fq(dtd)g
-Fv(describes)e(DTDs,)i(and)e(mak)o(es)h(it)h(possible)f(to)h(get)f
-(representations)e(of)479 1741 y(element,)i(entity)-5
-b(,)19 b(and)h(notation)e(declarations)h(as)i(well)g(as)g(processing)e
-(instructions)g(contained)f(in)j(the)f(DTD.)479 1849
-y(This)g(class,)g(and)f Fq(dtd_element)p Fv(,)g Fq(dtd_notation)p
-Fv(,)e(and)i Fq(proc_instruction)f Fv(can)h(be)h(found)e(in)i(the)f
-(module)479 1957 y Fq(Pxp_dtd)p Fv(.)h(There)f(are)h(a)h(couple)e(of)h
-(classes)h(representing)d(dif)n(ferent)h(kinds)g(of)h(entities;)h
-(these)f(can)g(be)g(found)f(in)479 2065 y(the)h(module)f
-Fq(Pxp_entity)p Fv(.)396 2214 y(Additionally)-5 b(,)18
-b(the)i(follo)n(wing)f(modules)g(play)h(a)g(role:)p Black
-396 2447 a Ft(\225)p Black 60 w Fr(Pxp_yacc:)e Fv(Here)i(the)h(main)e
-(parsing)h(functions)e(such)i(as)h Fq(parse_document_entity)c
-Fv(are)k(located.)e(Some)479 2555 y(additional)g(types)h(and)g
-(functions)f(allo)n(w)h(the)g(parser)f(to)i(be)f(con\002gured)e(in)i(a)
-h(non-standard)c(w)o(ay)-5 b(.)p Black 396 2663 a Ft(\225)p
-Black 60 w Fr(Pxp_types:)19 b Fv(This)h(is)h(a)g(collection)e(of)h
-(basic)g(types)g(and)g(e)o(xceptions.)396 2812 y(There)g(are)g(some)g
-(further)e(modules)i(that)g(are)g(needed)f(internally)g(b)n(ut)h(are)g
-(not)g(part)g(of)g(the)g(API.)396 2962 y(Let)h(the)f(document)e(to)i
-(be)h(parsed)e(be)h(stored)g(in)g(a)h(\002le)g(called)f
-Fq(doc.xml)p Fv(.)f(The)h(parsing)f(process)h(is)h(started)f(by)396
-3070 y(calling)g(the)g(function)396 3250 y Fq(val)45
-b(parse_document_entity)c(:)k(config)e(->)i(source)f(->)g('ext)g(spec)h
-(->)f('ext)g(document)396 3441 y Fv(de\002ned)19 b(in)i(the)f(module)f
-Fq(Pxp_yacc)p Fv(.)g(The)h(\002rst)h(ar)o(gument)d(speci\002es)i(some)g
-(global)g(properties)e(of)i(the)g(parser;)g(it)h(is)396
-3549 y(recommended)c(to)j(start)g(with)g(the)g Fq(default_config)p
-Fv(.)e(The)h(second)g(ar)o(gument)e(determines)i(where)g(the)h
-(document)396 3657 y(to)h(be)f(parsed)f(comes)h(from;)f(this)i(may)f
-(be)g(a)g(\002le,)h(a)g(channel,)d(or)i(an)g(entity)g(ID.)g(T)-7
-b(o)21 b(parse)f Fq(doc.xml)p Fv(,)f(it)i(is)g(suf)n(\002cient)396
-3764 y(to)g(pass)f Fq(from_file)44 b("doc.xml")p Fv(.)396
-3914 y(The)20 b(third)g(ar)o(gument)e(passes)i(the)h(object)e
-(speci\002cation)h(to)g(use.)g(Roughly)f(speaking,)g(it)i(determines)e
-(which)g(classes)396 4022 y(implement)g(the)h(node)g(objects)f(of)h
-(which)g(element)g(types,)f(and)h(which)g(e)o(xtensions)f(are)h(to)g
-(be)g(used.)g(The)g Fq('ext)396 4130 y Fv(polymorphic)d(v)n(ariable)i
-(is)j(the)e(type)f(of)h(the)h(e)o(xtension.)d(F)o(or)i(the)g(moment,)f
-(let)i(us)f(simply)g(pass)h Fq(default_spec)d Fv(as)396
-4238 y(this)j(ar)o(gument,)d(and)h(ignore)g(it.)396 4387
-y(So)i(the)f(follo)n(wing)e(e)o(xpression)h(parses)h
-Fq(doc.xml)p Fv(:)396 4567 y Fq(open)44 b(Pxp_yacc)396
-4664 y(let)h(d)f(=)h(parse_document_entity)c(default_config)i
-(\(from_file)g("doc.xml"\))g(de-)396 4762 y(fault_spec)p
-Black 3800 5278 a Fr(25)p Black eop
-%%Page: 26 26
-26 25 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fv(Note)g(that)h Fq(default_config)d
-Fv(implies)i(that)h(w)o(arnings)e(are)h(collected)g(b)n(ut)g(not)g
-(printed.)e(Errors)h(raise)i(one)f(of)g(the)396 687 y(e)o(xception)f
-(de\002ned)g(in)h Fq(Pxp_types)p Fv(;)f(to)i(get)f(readable)f(errors)g
-(and)h(w)o(arnings)f(catch)h(the)g(e)o(xceptions)f(as)i(follo)n(ws:)396
-867 y Fq(class)44 b(warner)g(=)486 964 y(object)576 1061
-y(method)f(warn)i(w)f(=)665 1158 y(print_endline)f(\("WARNING:)g(")i(^)
-f(w\))486 1256 y(end)396 1353 y(;;)396 1547 y(try)486
-1644 y(let)g(config)g(=)h({)f(default_config)f(with)h(warner)g(=)h(new)
-f(warner)g(})g(in)486 1741 y(let)g(d)h(=)g(parse_document_entity)c
-(config)j(\(from_file)f("doc.xml"\))g(default_spec)486
-1838 y(in)576 1935 y(...)396 2033 y(with)531 2130 y(e)h(->)620
-2227 y(print_endline)f(\(Pxp_types.string_of_exn)e(e\))396
-2418 y Fv(No)n(w)20 b Fq(d)h Fv(is)g(an)f(object)g(of)g(the)g
-Fq(document)f Fv(class.)i(If)f(you)g(w)o(ant)g(the)g(node)f(tree,)h
-(you)g(can)g(get)g(the)g(root)f(element)h(by)396 2598
-y Fq(let)45 b(root)f(=)g(d)h(#)g(root)396 2789 y Fv(and)20
-b(if)g(you)g(w)o(ould)f(rather)h(lik)o(e)g(to)g(access)h(the)f(DTD,)g
-(determine)f(it)i(by)396 2969 y Fq(let)45 b(dtd)f(=)h(d)f(#)h(dtd)396
-3160 y Fv(As)21 b(it)g(is)g(more)f(interesting,)f(let)h(us)h(in)m(v)o
-(estigate)e(the)h(node)f(tree)h(no)n(w)-5 b(.)19 b(Gi)n(v)o(en)g(the)i
-(root)e(element,)g(it)i(is)h(possible)d(to)396 3268 y(recursi)n(v)o
-(ely)f(tra)n(v)o(erse)h(the)h(whole)f(tree.)g(The)g(children)g(of)g(a)h
-(node)f Fq(n)h Fv(are)f(returned)f(by)h(the)h(method)e
-Fq(sub_nodes)p Fv(,)g(and)396 3376 y(the)i(type)g(of)g(a)h(node)e(is)i
-(returned)d(by)i Fq(node_type)p Fv(.)f(This)i(function)d(tra)n(v)o
-(erses)i(the)g(tree,)g(and)g(prints)g(the)g(type)f(of)h(each)396
-3484 y(node:)396 3664 y Fq(let)45 b(rec)f(print_structure)e(n)j(=)486
-3761 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 3858
-y(match)g(ntype)g(with)576 3955 y(T_element)f(name)h(->)665
-4053 y(print_endline)f(\("Element)g(of)i(type)f(")h(^)f(name\);)665
-4150 y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665
-4247 y(List.iter)h(print_structure)e(children)486 4344
-y(|)j(T_data)e(->)665 4441 y(print_endline)g("Data")486
-4538 y(|)i(_)f(->)665 4635 y(\(*)h(Other)f(node)g(types)g(are)g(not)h
-(possible)e(unless)h(the)g(parser)g(is)h(configured)800
-4733 y(differently.)710 4830 y(*\))p Black 3798 5278
-a Fr(26)p Black eop
-%%Page: 27 27
-27 26 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 665 579 a Fq(assert)44 b(false)396 770 y Fv(Y)-9
-b(ou)20 b(can)g(call)g(this)h(function)e(by)396 950 y
-Fq(print_structure)43 b(root)396 1141 y Fv(The)20 b(type)g(returned)e
-(by)i Fq(node_type)f Fv(is)i(either)f Fq(T_element)43
-b(name)21 b Fv(or)e Fq(T_data)p Fv(.)h(The)g Fq(name)g
-Fv(of)g(the)g(element)g(type)396 1249 y(is)h(the)g(string)e(included)g
-(in)i(the)f(angle)f(brack)o(ets.)h(Note)g(that)g(only)f(elements)h(ha)n
-(v)o(e)g(children;)f(data)h(nodes)f(are)h(al)o(w)o(ays)396
-1357 y(lea)n(v)o(es)h(of)e(the)i(tree.)396 1506 y(There)f(are)g(some)g
-(more)f(methods)g(in)i(order)e(to)h(access)h(a)f(parsed)g(node)f(tree:)
-p Black 396 1739 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(parent)p
-Fv(:)19 b(Returns)h(the)h(parent)e(node,)g(or)h(raises)h
-Fq(Not_found)e Fv(if)h(the)g(node)g(is)h(already)e(the)h(root)p
-Black 396 1847 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(root)p
-Fv(:)20 b(Returns)g(the)g(root)g(of)f(the)i(node)e(tree.)p
-Black 396 1955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(attribute)e(a)p
-Fv(:)21 b(Returns)f(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(with)h
-(name)e Fq(a)p Fv(.)i(The)e(method)g(returns)h(a)g(v)n(alue)g(for)479
-2063 y(e)n(v)o(ery)f Fr(declar)m(ed)j Fv(attrib)n(ute,)d(independently)
-e(of)j(whether)f(the)i(attrib)n(ute)e(instance)h(is)h(de\002ned)e(or)h
-(not.)g(If)g(the)479 2170 y(attrib)n(ute)g(is)h(not)f(declared,)f
-Fq(Not_found)g Fv(will)i(be)f(raised.)g(\(In)f(well-formedness)f(mode,)
-h(e)n(v)o(ery)g(attrib)n(ute)h(is)479 2278 y(considered)f(as)i(being)e
-(implicitly)h(declared)e(with)j(type)f Fq(CDATA)p Fv(.\))479
-2428 y(The)g(follo)n(wing)f(return)g(v)n(alues)g(are)i(possible:)f
-Fq(Value)44 b(s)p Fv(,)20 b Fq(Valuelist)43 b(sl)21 b
-Fv(,)f(and)g Fq(Implied_value)p Fv(.)e(The)i(\002rst)479
-2536 y(tw)o(o)h(v)n(alue)e(types)h(indicate)g(that)g(the)g(attrib)n
-(ute)g(v)n(alue)g(is)h(a)n(v)n(ailable,)e(either)h(because)g(there)f
-(is)i(a)g(de\002nition)479 2644 y Fn(a)p Fq(=")p Fn(value)p
-Fq(")f Fv(in)g(the)g(XML)g(te)o(xt,)g(or)g(because)g(there)f(is)i(a)g
-(def)o(ault)e(v)n(alue)h(\(declared)f(in)h(the)g(DTD\).)g(Only)g(if)g
-(both)479 2752 y(the)g(instance)g(de\002nition)f(and)h(the)g(def)o
-(ault)g(declaration)e(are)i(missing,)g(the)h(latter)f(v)n(alue)f
-Fq(Implied_value)g Fv(will)479 2860 y(be)h(returned.)479
-3009 y(In)g(the)g(DTD,)h(e)n(v)o(ery)d(attrib)n(ute)i(is)h(typed.)e
-(There)h(are)g(single-v)n(alue)e(types)i(\(CD)m(A)-9
-b(T)h(A,)20 b(ID,)g(IDREF)-7 b(,)21 b(ENTITY)-11 b(,)479
-3117 y(NMT)o(OKEN,)19 b(enumerations\),)f(in)i(which)g(case)g(the)h
-(method)d(passes)j Fq(Value)44 b(s)21 b Fv(back,)e(where)h
-Fq(s)g Fv(is)h(the)479 3225 y(normalized)e(string)g(v)n(alue)h(of)g
-(the)g(attrib)n(ute.)g(The)f(other)h(types)g(\(IDREFS,)g(ENTITIES,)f
-(NMT)o(OKENS\))479 3333 y(represent)g(list)j(v)n(alues,)d(and)h(the)g
-(parser)g(splits)h(the)f(XML)g(literal)h(into)e(se)n(v)o(eral)h(tok)o
-(ens)g(and)f(returns)h(these)g(tok)o(ens)479 3441 y(as)h
-Fq(Valuelist)44 b(sl)p Fv(.)479 3590 y(Normalization)19
-b(means)h(that)g(entity)g(references)e(\(the)i Fq(&)p
-Fn(name)p Fq(;)g Fv(tok)o(ens\))f(and)h(character)f(references)479
-3698 y(\()p Fq(&#)p Fn(number)s Fq(;)p Fv(\))g(are)h(replaced)f(by)g
-(the)i(te)o(xt)f(the)o(y)f(represent,)g(and)h(that)g(white)g(space)g
-(characters)f(are)i(con)m(v)o(erted)479 3806 y(into)f(plain)g(spaces.)p
-Black 396 3955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(data)p
-Fv(:)20 b(Returns)g(the)g(character)f(data)h(contained)f(in)h(the)g
-(node.)f(F)o(or)h(data)g(nodes,)f(the)h(meaning)f(is)i(ob)o(vious)479
-4063 y(as)g(this)g(is)g(the)f(main)g(content)f(of)h(data)g(nodes.)f(F)o
-(or)h(element)g(nodes,)f(this)i(method)e(returns)g(the)h(concatenated)
-479 4171 y(contents)g(of)g(all)g(inner)g(data)g(nodes.)479
-4321 y(Note)g(that)h(entity)f(references)e(included)h(in)h(the)h(te)o
-(xt)f(are)g(resolv)o(ed)f(while)h(the)o(y)f(are)h(being)g(parsed;)f
-(for)h(e)o(xample)479 4429 y(the)g(te)o(xt)h("a)f(<>)g(b")g(will)
-h(be)f(returned)e(as)j("a)g(<>)f(b")g(by)g(this)h(method.)d(Spaces)j
-(of)f(data)g(nodes)f(are)h(al)o(w)o(ays)479 4537 y(preserv)o(ed.)e(Ne)n
-(wlines)j(are)f(preserv)o(ed,)e(b)n(ut)i(al)o(w)o(ays)g(con)m(v)o
-(erted)e(to)i(\\n)h(characters)e(e)n(v)o(en)g(if)i(ne)n(wlines)e(are)i
-(encoded)479 4644 y(as)g(\\r\\n)f(or)g(\\r)-5 b(.)21
-b(Normally)e(you)g(will)i(ne)n(v)o(er)e(see)i(tw)o(o)f(adjacent)f(data)
-i(nodes)e(because)h(the)g(parser)f(collapses)h(all)h(data)479
-4752 y(material)f(at)h(one)e(location)h(into)g(one)f(node.)g(\(Ho)n(we)
-n(v)o(er)m(,)f(if)i(you)g(create)g(your)f(o)n(wn)g(tree)h(or)g
-(transform)f(the)h(parsed)479 4860 y(tree,)g(it)h(is)g(possible)f(to)h
-(ha)n(v)o(e)e(adjacent)h(data)g(nodes.\))p Black 3797
-5278 a Fr(27)p Black eop
-%%Page: 28 28
-28 27 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 479 579 a Fv(Note)g(that)h(elements)f(that)g(do)g
-Fr(not)h Fv(allo)n(w)f(#PCD)m(A)-9 b(T)h(A)20 b(as)h(content)e(will)i
-(not)f(ha)n(v)o(e)g(data)g(nodes)f(as)i(children.)e(This)479
-687 y(means)h(that)g(spaces)h(and)f(ne)n(wlines,)f(the)h(only)g
-(character)f(material)g(allo)n(wed)h(for)g(such)f(elements,)h(are)g
-(silently)479 795 y(dropped.)396 986 y(F)o(or)g(e)o(xample,)e(if)i(the)
-f(task)h(is)h(to)f(print)f(all)h(contents)f(of)g(elements)h(with)f
-(type)h("v)n(aluable")e(whose)h(attrib)n(ute)g("priority")396
-1094 y(is)i("1",)f(this)h(function)d(can)i(help:)396
-1274 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
-1371 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 1468
-y(match)g(ntype)g(with)576 1565 y(T_element)f("valuable")g(when)h(n)h
-(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
-1662 y(print_endline)f("Valuable)g(node)h(with)h(priotity)e(1)i
-(found:";)665 1759 y(print_endline)e(\(n)h(#)h(data\))486
-1857 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 1954
-y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665 2051
-y(List.iter)h(print_valuable_prio1)d(children)486 2148
-y(|)k(_)f(->)665 2245 y(assert)g(false)396 2436 y Fv(Y)-9
-b(ou)20 b(can)g(call)g(this)h(function)e(by:)396 2616
-y Fq(print_valuable_prio1)42 b(root)396 2807 y Fv(If)20
-b(you)g(lik)o(e)g(a)h(DSSSL-lik)o(e)f(style,)g(you)g(can)g(mak)o(e)f
-(the)h(function)f Fq(process_children)f Fv(e)o(xplicit:)396
-2987 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
-3182 y(let)f(process_children)e(n)j(=)576 3279 y(let)f(children)f(=)i
-(n)g(#)f(sub_nodes)g(in)576 3376 y(List.iter)f(print_valuable_prio1)e
-(children)486 3473 y(in)486 3667 y(let)j(ntype)g(=)h(n)g(#)f(node_type)
-g(in)486 3764 y(match)g(ntype)g(with)576 3862 y(T_element)f("valuable")
-g(when)h(n)h(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
-3959 y(print_endline)f("Valuable)g(node)h(with)h(priority)e(1)i
-(found:";)665 4056 y(print_endline)e(\(n)h(#)h(data\))486
-4153 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 4250
-y(process_children)e(n)486 4347 y(|)j(_)f(->)665 4444
-y(assert)g(false)396 4635 y Fv(So)21 b(f)o(ar)m(,)e(O'Caml)h(is)i(no)n
-(w)d(a)i(simple)f("style-sheet)g(language":)e(Y)-9 b(ou)20
-b(can)g(form)f(a)h(big)g("match")g(e)o(xpression)e(to)396
-4743 y(distinguish)h(between)h(all)h(signi\002cant)e(cases,)i(and)f
-(pro)o(vide)e(dif)n(ferent)g(reactions)i(on)g(dif)n(ferent)e
-(conditions.)h(But)h(this)396 4851 y(technique)f(has)h(limitations;)g
-(the)h("match")e(e)o(xpression)g(tends)h(to)g(get)g(lar)o(ger)f(and)h
-(lar)o(ger)m(,)e(and)i(it)g(is)i(dif)n(\002cult)d(to)i(store)p
-Black 3800 5278 a Fr(28)p Black eop
-%%Page: 29 29
-29 28 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fv(intermediate)f(v)n(alues)h(as)h(there)e(is)j(only)d
-(one)h(big)f(recursion.)g(Alternati)n(v)o(ely)-5 b(,)18
-b(it)j(is)g(also)f(possible)g(to)h(represent)e(the)396
-687 y(v)n(arious)g(cases)i(as)g(classes,)g(and)f(to)g(use)h(dynamic)d
-(method)h(lookup)g(to)h(\002nd)g(the)g(appropiate)e(class.)j(The)f(ne)o
-(xt)f(section)396 795 y(e)o(xplains)g(this)i(technique)e(in)h(detail.)
--2 1213 y Fx(2.3.)39 b(Class-based)e(pr)m(ocessing)g(of)j(the)f(node)f
-(tree)396 1393 y Fv(By)21 b(def)o(ault,)e(the)h(parsed)g(node)f(tree)h
-(consists)h(of)f(objects)g(of)g(the)g(same)g(class;)h(this)g(is)g(a)g
-(good)e(design)g(as)i(long)e(as)i(you)396 1501 y(w)o(ant)g(only)e(to)h
-(access)h(selected)f(parts)g(of)g(the)h(document.)c(F)o(or)j(comple)o
-(x)f(transformations,)e(it)k(may)f(be)g(better)g(to)g(use)396
-1609 y(dif)n(ferent)f(classes)i(for)f(objects)g(describing)e(dif)n
-(ferent)h(element)g(types.)396 1758 y(F)o(or)h(e)o(xample,)f(if)h(the)g
-(DTD)h(declares)e(the)i(element)e(types)h Fq(a)p Fv(,)h
-Fq(b)p Fv(,)f(and)g Fq(c)p Fv(,)g(and)g(if)g(the)g(task)h(is)g(to)f
-(con)m(v)o(ert)e(an)j(arbitrary)396 1866 y(document)d(into)i(a)h
-(printable)e(format,)g(the)h(idea)g(is)h(to)f(de\002ne)g(for)g(e)n(v)o
-(ery)f(element)g(type)h(a)g(separate)g(class)h(that)g(has)f(a)396
-1974 y(method)f Fq(print)p Fv(.)h(The)g(classes)h(are)f
-Fq(eltype_a)p Fv(,)f Fq(eltype_b)p Fv(,)g(and)h Fq(eltype_c)p
-Fv(,)f(and)h(e)n(v)o(ery)f(class)i(implements)396 2082
-y Fq(print)f Fv(such)g(that)g(elements)g(of)g(the)g(type)g
-(corresponding)d(to)j(the)g(class)i(are)e(con)m(v)o(erted)d(to)k(the)f
-(output)f(format.)396 2232 y(The)h(parser)g(supports)f(such)h(a)g
-(design)g(directly)-5 b(.)19 b(As)i(it)g(is)g(impossible)e(to)i(deri)n
-(v)o(e)d(recursi)n(v)o(e)h(classes)i(in)g(O'Caml)3703
-2198 y Ff(3)3727 2232 y Fv(,)g(the)396 2340 y(specialized)f(element)f
-(classes)j(cannot)d(be)h(formed)f(by)g(simply)h(inheriting)f(from)g
-(the)h(b)n(uilt-in)g(classes)h(of)f(the)g(parser)396
-2447 y(and)g(adding)f(methods)g(for)g(customized)g(functionality)-5
-b(.)18 b(T)-7 b(o)20 b(get)g(around)f(this)h(limitation,)g(e)n(v)o(ery)
-f(node)g(of)h(the)396 2555 y(document)e(tree)j(is)g(represented)d(by)i
-Fr(two)h Fv(objects,)e(one)h(called)g("the)g(node")f(and)h(containing)e
-(the)i(recursi)n(v)o(e)396 2663 y(de\002nition)f(of)h(the)g(tree,)g
-(one)g(called)g("the)g(e)o(xtension".)e(Ev)o(ery)h(node)g(object)h(has)
-g(a)h(reference)e(to)h(the)g(e)o(xtension,)f(and)396
-2771 y(the)h(e)o(xtension)f(has)i(a)f(reference)f(to)h(the)g(node.)f
-(The)h(adv)n(antage)e(of)i(this)h(model)e(is)i(that)g(it)g(is)g(no)n(w)
-e(possible)h(to)396 2879 y(customize)g(the)g(e)o(xtension)f(without)g
-(af)n(fecting)g(the)h(typing)f(constraints)g(of)h(the)h(recursi)n(v)o
-(e)d(node)h(de\002nition.)396 3029 y(Ev)o(ery)g(e)o(xtension)g(must)h
-(ha)n(v)o(e)g(the)g(three)g(methods)f Fq(clone)p Fv(,)g
-Fq(node)p Fv(,)h(and)g Fq(set_node)p Fv(.)f(The)h(method)f
-Fq(clone)h Fv(creates)396 3137 y(a)h(deep)e(cop)o(y)h(of)g(the)g(e)o
-(xtension)f(object)g(and)h(returns)f(it;)i Fq(node)f
-Fv(returns)g(the)g(node)f(object)h(for)f(this)i(e)o(xtension)e(object;)
-396 3244 y(and)h Fq(set_node)f Fv(is)i(used)f(to)h(tell)g(the)f(e)o
-(xtension)f(object)g(which)h(node)f(is)i(associated)f(with)g(it,)h
-(this)g(method)e(is)396 3352 y(automatically)g(called)h(when)g(the)g
-(node)f(tree)h(is)h(initialized.)f(The)g(follo)n(wing)e(de\002nition)h
-(is)i(a)g(good)e(starting)h(point)396 3460 y(for)g(these)g(methods;)f
-(usually)h Fq(clone)g Fv(must)g(be)g(further)f(re\002ned)g(when)h
-(instance)g(v)n(ariables)f(are)h(added)f(to)h(the)h(class:)396
-3640 y Fq(class)44 b(custom_extension)e(=)486 3738 y(object)i(\(self\))
-576 3932 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(custom_extension)d
-(node)i(option\))576 4126 y(method)f(clone)h(=)h({<)g(>})576
-4223 y(method)e(node)i(=)665 4320 y(match)f(node)g(with)845
-4418 y(None)g(->)934 4515 y(assert)g(false)755 4612 y(|)h(Some)f(n)g
-(->)h(n)576 4709 y(method)e(set_node)h(n)h(=)665 4806
-y(node)f(<-)h(Some)f(n)p Black 3800 5278 a Fr(29)p Black
-eop
-%%Page: 30 30
-30 29 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 486 676 a Fq(end)396 867 y Fv(This)h(part)e(of)h(the)h(e)o
-(xtension)d(is)j(usually)f(the)g(same)h(for)e(all)i(classes,)g(so)g(it)
-g(is)g(a)f(good)f(idea)h(to)g(consider)396 975 y Fq(custom_extension)e
-Fv(as)j(the)f(super)n(-class)g(of)g(the)h(further)d(class)j
-(de\002nitions.)e(Continuining)f(the)j(e)o(xample)d(of)396
-1083 y(abo)o(v)o(e,)h(we)h(can)g(de\002ne)g(the)g(element)g(type)f
-(classes)j(as)e(follo)n(ws:)396 1263 y Fq(class)44 b(virtual)g
-(custom_extension)e(=)486 1360 y(object)i(\(self\))576
-1457 y(...)g(clone,)g(node,)g(set_node)f(defined)h(as)g(above)g(...)576
-1652 y(method)f(virtual)h(print)g(:)h(out_channel)e(->)h(unit)486
-1749 y(end)396 1943 y(class)g(eltype_a)g(=)486 2040 y(object)g
-(\(self\))576 2137 y(inherit)f(custom_extension)576 2234
-y(method)g(print)h(ch)h(=)g(...)486 2332 y(end)396 2526
-y(class)f(eltype_b)g(=)486 2623 y(object)g(\(self\))576
-2720 y(inherit)f(custom_extension)576 2817 y(method)g(print)h(ch)h(=)g
-(...)486 2914 y(end)396 3109 y(class)f(eltype_c)g(=)486
-3206 y(object)g(\(self\))576 3303 y(inherit)f(custom_extension)576
-3400 y(method)g(print)h(ch)h(=)g(...)486 3497 y(end)396
-3688 y Fv(The)20 b(method)f Fq(print)h Fv(can)g(no)n(w)f(be)i
-(implemented)d(for)h(e)n(v)o(ery)g(element)h(type)g(separately)-5
-b(.)18 b(Note)i(that)h(you)e(get)h(the)396 3796 y(associated)g(node)f
-(by)h(in)m(v)n(oking)396 3976 y Fq(self)44 b(#)h(node)396
-4167 y Fv(and)20 b(you)f(get)h(the)h(e)o(xtension)d(object)i(of)g(a)h
-(node)e Fq(n)h Fv(by)g(writing)396 4347 y Fq(n)45 b(#)g(extension)396
-4538 y Fv(It)21 b(is)g(guaranteed)d(that)396 4718 y Fq(self)44
-b(#)h(node)f(#)h(extension)e(==)i(self)p Black 3800 5278
-a Fr(30)p Black eop
-%%Page: 31 31
-31 30 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fv(al)o(w)o(ays)h(holds.)396 728 y(Here)f(are)g(sample)
-g(de\002nitions)g(of)g(the)g Fq(print)g Fv(methods:)396
-909 y Fq(class)44 b(eltype_a)g(=)486 1006 y(object)g(\(self\))576
-1103 y(inherit)f(custom_extension)576 1200 y(method)g(print)h(ch)h(=)
-665 1297 y(\(*)g(Nodes)f(<a>...</a>)f(are)h(only)g(containers:)f(*\))
-665 1394 y(output_string)g(ch)h("\(";)665 1491 y(List.iter)755
-1588 y(\(fun)g(n)h(->)f(n)h(#)g(extension)e(#)i(print)f(ch\))755
-1686 y(\(self)g(#)h(node)f(#)g(sub_nodes\);)665 1783
-y(output_string)f(ch)h("\)";)486 1880 y(end)396 2074
-y(class)g(eltype_b)g(=)486 2171 y(object)g(\(self\))576
-2268 y(inherit)f(custom_extension)576 2366 y(method)g(print)h(ch)h(=)
-665 2463 y(\(*)g(Print)f(the)g(value)g(of)h(the)f(CDATA)g(attribute)f
-("print":)h(*\))665 2560 y(match)g(self)g(#)h(node)f(#)h(attribute)e
-("print")h(with)755 2657 y(Value)g(s)314 b(->)44 b(output_string)f(ch)h
-(s)665 2754 y(|)h(Implied_value)e(->)h(output_string)f(ch)h
-("<missing>")665 2851 y(|)h(Valuelist)e(l)135 b(->)44
-b(assert)g(false)1517 2948 y(\(*)h(not)f(possible)f(because)h(the)g
-(att)h(is)f(CDATA)g(*\))486 3045 y(end)396 3240 y(class)g(eltype_c)g(=)
-486 3337 y(object)g(\(self\))576 3434 y(inherit)f(custom_extension)576
-3531 y(method)g(print)h(ch)h(=)665 3628 y(\(*)g(Print)f(the)g(contents)
-g(of)g(this)g(element:)g(*\))665 3725 y(output_string)f(ch)h(\(self)g
-(#)h(node)f(#)h(data\))486 3823 y(end)396 4017 y(class)f
-(null_extension)f(=)486 4114 y(object)h(\(self\))576
-4211 y(inherit)f(custom_extension)576 4308 y(method)g(print)h(ch)h(=)g
-(assert)e(false)486 4405 y(end)396 4638 y Fv(The)20 b(remaining)f(task)
-h(is)h(to)g(con\002gure)d(the)i(parser)g(such)g(that)g(these)g(e)o
-(xtension)f(classes)i(are)f(actually)g(used.)g(Here)396
-4746 y(another)f(problem)f(arises:)j(It)g(is)g(not)f(possible)g(to)g
-(dynamically)e(select)j(the)f(class)h(of)f(an)g(object)g(to)g(be)h
-(created.)e(As)396 4854 y(w)o(orkaround,)e(PXP)k(allo)n(ws)g(the)f
-(user)g(to)g(specify)g Fr(e)n(xemplar)g(objects)g Fv(for)f(the)h(v)n
-(arious)g(element)f(types;)h(instead)g(of)p Black 3800
-5278 a Fr(31)p Black eop
-%%Page: 32 32
-32 31 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fv(creating)f(the)i(nodes)e(of)h(the)g(tree)g(by)g
-(applying)f(the)h Fq(new)g Fv(operator)e(the)j(nodes)e(are)h(produced)e
-(by)i(duplicating)e(the)396 687 y(e)o(x)o(emplars.)h(As)h(object)g
-(duplication)f(preserv)o(es)g(the)h(class)h(of)f(the)g(object,)f(one)h
-(can)g(create)g(fresh)g(objects)g(of)g(e)n(v)o(ery)396
-795 y(class)h(for)f(which)g(pre)n(viously)e(an)i(e)o(x)o(emplar)e(has)j
-(been)e(re)o(gistered.)396 944 y(Ex)o(emplars)g(are)h(meant)g(as)h
-(objects)f(without)f(contents,)g(the)h(only)g(interesting)f(thing)g(is)
-j(that)e(e)o(x)o(emplars)e(are)396 1052 y(instances)i(of)g(a)h(certain)
-f(class.)g(The)g(creation)f(of)h(an)h(e)o(x)o(emplar)d(for)h(an)h
-(element)g(node)f(can)h(be)g(done)f(by:)396 1232 y Fq(let)45
-b(element_exemplar)d(=)i(new)h(element_impl)e(extension_exemplar)396
-1423 y Fv(And)20 b(a)h(data)f(node)f(e)o(x)o(emplar)f(is)j(created)f
-(by:)396 1603 y Fq(let)45 b(data_exemplar)d(=)j(new)f(data_impl)g
-(extension_exemplar)396 1794 y Fv(The)20 b(classes)h
-Fq(element_impl)e Fv(and)h Fq(data_impl)f Fv(are)h(de\002ned)f(in)i
-(the)f(module)f Fq(Pxp_document)p Fv(.)f(The)396 1902
-y(constructors)h(initialize)h(the)g(fresh)g(objects)g(as)h(empty)e
-(objects,)h(i.e.)g(without)g(children,)e(without)i(data)g(contents,)f
-(and)396 2010 y(so)i(on.)e(The)h Fq(extension_exemplar)e
-Fv(is)j(the)f(initial)h(e)o(xtension)e(object)g(the)h(e)o(x)o(emplars)f
-(are)h(associated)g(with.)396 2160 y(Once)g(the)g(e)o(x)o(emplars)f
-(are)h(created)f(and)h(stored)g(some)n(where)f(\(e.g.)g(in)h(a)h(hash)f
-(table\),)f(you)h(can)g(tak)o(e)g(an)g(e)o(x)o(emplar)396
-2268 y(and)g(create)g(a)g(concrete)f(instance)h(\(with)g(contents\))f
-(by)h(duplicating)e(it.)j(As)g(user)f(of)g(the)g(parser)g(you)f(are)h
-(normally)396 2376 y(not)g(concerned)e(with)i(this)h(as)g(this)g(is)g
-(part)f(of)g(the)g(internal)f(logic)h(of)g(the)g(parser)m(,)f(b)n(ut)h
-(as)h(background)c(kno)n(wledge)h(it)396 2483 y(is)j(w)o(orthwhile)e
-(to)i(mention)e(that)h(the)g(tw)o(o)h(methods)e Fq(create_element)f
-Fv(and)i Fq(create_data)f Fv(actually)g(perform)396 2591
-y(the)h(duplication)f(of)g(the)i(e)o(x)o(emplar)d(for)h(which)h(the)o
-(y)f(are)h(in)m(v)n(ok)o(ed,)e(additionally)g(apply)i(modi\002cations)e
-(to)j(the)f(clone,)396 2699 y(and)g(\002nally)g(return)f(the)h(ne)n(w)g
-(object.)f(Moreo)o(v)o(er)m(,)f(the)i(e)o(xtension)e(object)i(is)h
-(copied,)e(too,)h(and)f(the)i(ne)n(w)f(node)f(object)396
-2807 y(is)i(associated)f(with)g(the)g(fresh)g(e)o(xtension)e(object.)i
-(Note)g(that)g(this)g(is)h(the)f(reason)g(why)f(e)n(v)o(ery)g(e)o
-(xtension)f(object)i(must)396 2915 y(ha)n(v)o(e)g(a)g
-Fq(clone)g Fv(method.)396 3065 y(The)g(con\002guration)e(of)i(the)g
-(set)h(of)f(e)o(x)o(emplars)e(is)j(passed)f(to)h(the)f
-Fq(parse_document_entity)d Fv(function)i(as)i(third)396
-3173 y(ar)o(gument.)d(In)i(our)f(e)o(xample,)g(this)h(ar)o(gument)e
-(can)i(be)g(set)h(up)f(as)h(follo)n(ws:)396 3353 y Fq(let)45
-b(spec)f(=)486 3450 y(make_spec_from_alist)576 3547 y(~data_exemplar:)
-535 b(\(new)44 b(data_impl)g(\(new)g(null_extension\)\))576
-3644 y(~default_element_exemplar:)c(\(new)k(element_impl)f(\(new)h
-(null_extension\)\))576 3741 y(~element_alist:)710 3838
-y([)h("a",)89 b(new)44 b(element_impl)f(\(new)h(eltype_a\);)800
-3935 y("b",)89 b(new)44 b(element_impl)f(\(new)h(eltype_b\);)800
-4033 y("c",)89 b(new)44 b(element_impl)f(\(new)h(eltype_c\);)710
-4130 y(])576 4227 y(\(\))396 4418 y Fv(The)20 b Fq(~element_alist)f
-Fv(function)f(ar)o(gument)g(de\002nes)i(the)g(mapping)e(from)h(element)
-h(types)g(to)g(e)o(x)o(emplars)f(as)396 4526 y(associati)n(v)o(e)h
-(list.)h(The)f(ar)o(gument)e Fq(~data_exemplar)g Fv(speci\002es)j(the)f
-(e)o(x)o(emplar)e(for)i(data)g(nodes,)f(and)h(the)396
-4634 y Fq(~default_element_exemplar)d Fv(is)k(used)f(whene)n(v)o(er)e
-(the)i(parser)g(\002nds)g(an)g(element)g(type)f(for)h(which)g(the)396
-4742 y(associati)n(v)o(e)g(list)h(does)f(not)g(de\002ne)g(an)g(e)o(x)o
-(emplar)-5 b(.)p Black 3800 5278 a Fr(32)p Black eop
-%%Page: 33 33
-33 32 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fv(The)g(con\002guration)e(is)j(no)n(w)e(complete.)g(Y)
--9 b(ou)20 b(can)g(still)h(use)g(the)f(same)g(parsing)f(functions,)g
-(only)g(the)h(initialization)g(is)396 687 y(a)h(bit)f(dif)n(ferent.)f
-(F)o(or)g(e)o(xample,)g(call)i(the)f(parser)f(by:)396
-867 y Fq(let)45 b(d)f(=)h(parse_document_entity)c(default_config)i
-(\(from_file)g("doc.xml"\))g(spec)396 1058 y Fv(Note)20
-b(that)h(the)f(resulting)f(document)f Fq(d)j Fv(has)f(a)h(usable)f
-(type;)g(especially)f(the)i Fq(print)f Fv(method)e(we)j(added)e(is)i
-(visible.)396 1166 y(So)g(you)e(can)h(print)g(your)e(document)h(by)396
-1346 y Fq(d)45 b(#)g(root)f(#)g(extension)g(#)g(print)g(stdout)396
-1578 y Fv(This)21 b(object-oriented)c(approach)h(looks)i(rather)f
-(complicated;)g(this)h(is)i(mostly)d(caused)h(by)g(w)o(orking)e(around)
-h(some)396 1686 y(problems)g(of)h(the)g(strict)h(typing)e(system)h(of)g
-(O'Caml.)g(Some)g(auxiliary)f(concepts)g(such)h(as)h(e)o(xtensions)e
-(were)396 1794 y(needed,)g(b)n(ut)h(the)g(practical)g(consequences)e
-(are)i(lo)n(w)-5 b(.)20 b(In)g(the)g(ne)o(xt)f(section,)h(one)g(of)g
-(the)g(e)o(xamples)f(of)h(the)396 1902 y(distrib)n(ution)f(is)i(e)o
-(xplained,)d(a)j(con)m(v)o(erter)d(from)h Fr(r)m(eadme)h
-Fv(documents)e(to)i(HTML.)-2 2321 y Fx(2.4.)39 b(Example:)f(An)h(HTML)f
-(bac)m(kend)g(f)m(or)h(the)g Fd(readme)44 b Fx(DTD)396
-2501 y Fv(The)20 b(con)m(v)o(erter)e(from)h Fr(r)m(eadme)h
-Fv(documents)e(to)i(HTML)g(documents)f(follo)n(ws)h(strictly)g(the)g
-(approach)e(to)j(de\002ne)e(one)396 2609 y(class)i(per)f(element)g
-(type.)f(The)h(HTML)g(code)g(is)h(similar)f(to)g(the)h
-Fr(r)m(eadme)e Fv(source,)g(because)h(of)g(this)h(most)f(elements)396
-2716 y(can)g(be)g(con)m(v)o(erted)e(in)i(the)g(follo)n(wing)f(w)o(ay:)h
-(Gi)n(v)o(en)g(the)g(input)f(element)396 2897 y Fq(<e>content</e>)396
-3088 y Fv(the)h(con)m(v)o(ersion)e(te)o(xt)i(is)h(the)f(concatenation)e
-(of)i(a)h(computed)d(pre\002x,)h(the)h(recursi)n(v)o(ely)f(con)m(v)o
-(erted)e(content,)i(and)h(a)396 3195 y(computed)e(suf)n(\002x.)396
-3345 y(Only)i(one)g(element)f(type)h(cannot)f(be)h(handled)f(by)h(this)
-g(scheme:)g Fq(footnote)p Fv(.)f(F)o(ootnotes)g(are)h(collected)g
-(while)g(the)o(y)396 3453 y(are)g(found)f(in)h(the)g(input)g(te)o(xt,)g
-(and)f(the)o(y)h(are)g(printed)f(after)h(the)g(main)g(te)o(xt)g(has)g
-(been)g(con)m(v)o(erted)d(and)j(printed.)-2 3781 y Fp(2.4.1.)35
-b(Header)396 4021 y Fq(open)44 b(Pxp_types)396 4118 y(open)g
-(Pxp_document)-2 4571 y Fp(2.4.2.)35 b(T)-7 b(ype)34
-b(dec)n(larations)396 4811 y Fq(class)44 b(type)g(footnote_printer)f(=)
-p Black 3800 5278 a Fr(33)p Black eop
-%%Page: 34 34
-34 33 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 486 579 a Fq(object)576 676 y(method)43 b(footnote_to_html)g(:)h
-(store_type)f(-)p Fo(>)i Fq(out_channel)e(-)p Fo(>)h
-Fq(unit)486 773 y(end)396 967 y(and)h(store_type)e(=)486
-1065 y(object)576 1162 y(method)g(alloc_footnote)g(:)i
-(footnote_printer)d(-)p Fo(>)i Fq(int)576 1259 y(method)f
-(print_footnotes)g(:)h(out_channel)f(-)p Fo(>)i Fq(unit)486
-1356 y(end)396 1453 y(;;)-2 1906 y Fp(2.4.3.)35 b(Class)g
-Fc(store)396 2073 y Fv(The)20 b Fq(store)g Fv(is)h(a)g(container)d(for)
-i(footnotes.)f(Y)-9 b(ou)19 b(can)h(add)g(a)g(footnote)f(by)h(in)m(v)n
-(oking)e Fq(alloc_footnote)p Fv(;)g(the)396 2181 y(ar)o(gument)g(is)j
-(an)f(object)g(of)g(the)g(class)h Fq(footnote_printer)p
-Fv(,)d(the)i(method)f(returns)g(the)i(number)d(of)i(the)g(footnote.)396
-2289 y(The)g(interesting)f(property)f(of)i(a)h(footnote)d(is)k(that)e
-(it)h(can)f(be)g(con)m(v)o(erted)d(to)k(HTML,)e(so)i(a)g
-Fq(footnote_printer)d Fv(is)396 2397 y(an)i(object)g(with)g(a)h(method)
-e Fq(footnote_to_html)p Fv(.)f(The)i(class)h Fq(footnote)e
-Fv(which)h(is)h(de\002ned)e(belo)n(w)h(has)g(a)396 2505
-y(compatible)f(method)g Fq(footnote_to_html)f Fv(such)i(that)g(objects)
-g(created)f(from)h(it)h(can)f(be)g(used)g(as)396 2613
-y Fq(footnote_printer)p Fv(s.)396 2763 y(The)g(other)f(method,)g
-Fq(print_footnotes)f Fv(prints)i(the)g(footnotes)f(as)i(de\002nition)e
-(list,)i(and)f(is)h(typically)e(in)m(v)n(ok)o(ed)396
-2870 y(after)h(the)g(main)g(material)g(of)g(the)g(page)g(has)g(already)
-f(been)h(printed.)e(Ev)o(ery)h(item)h(of)g(the)h(list)g(is)g(printed)e
-(by)396 2978 y Fq(footnote_to_html)p Fv(.)396 3200 y
-Fq(class)44 b(store)g(=)486 3297 y(object)g(\(self\))576
-3491 y(val)g(mutable)g(footnotes)f(=)i(\()f([])h(:)f(\(int)h(*)f
-(footnote_printer\))e(list)i(\))576 3589 y(val)g(mutable)g
-(next_footnote_number)d(=)k(1)576 3783 y(method)e(alloc_footnote)g(n)i
-(=)665 3880 y(let)g(number)e(=)i(next_footnote_number)d(in)665
-3977 y(next_footnote_number)g Fo(<)p Fq(-)i(number+1;)665
-4074 y(footnotes)g Fo(<)p Fq(-)g(footnotes)f(@)i([)g(number,)e(n)i(];)
-665 4171 y(number)576 4366 y(method)e(print_footnotes)g(ch)h(=)665
-4463 y(if)h(footnotes)e Fo(<>)h Fq([])h(then)f(begin)396
-4560 y(output_string)f(ch)h(")p Fo(<)p Fq(hr)g(align=left)g
-(noshade=noshade)e(width=\\"30\045\\")p Fo(>)p Fq(\\n";)396
-4657 y(output_string)h(ch)h(")p Fo(<)p Fq(dl)p Fo(>)p
-Fq(\\n";)396 4754 y(List.iter)486 4851 y(\(fun)g(\(_,n\))g(-)p
-Fo(>)p Black 3800 5278 a Fr(34)p Black eop
-%%Page: 35 35
-35 34 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 620 579 a Fq(n)45 b(#)g(footnote_to_html)d(\(self)i(:)h
-(#store_type)e(:)p Fo(>)h Fq(store_type\))f(ch\))486
-676 y(footnotes;)396 773 y(output_string)g(ch)h(")p Fo(<)p
-Fq(/dl)p Fo(>)p Fq(\\n";)665 870 y(end)486 1065 y(end)396
-1162 y(;;)-2 1614 y Fp(2.4.4.)35 b(Function)f Fc(escape_html)396
-1782 y Fv(This)21 b(function)d(con)m(v)o(erts)h(the)h(characters)f
-Fm(<)p Fv(,)h Fm(>)p Fv(,)g(&,)g(and)g(")h(to)f(their)g(HTML)g
-(representation.)e(F)o(or)h(e)o(xample,)396 1890 y Fq(escape_html)43
-b(")p Fo(<>)p Fq(")h(=)h("<>")p Fv(.)19 b(Other)g(characters)h
-(are)g(left)g(unchanged.)396 2070 y Fq(let)45 b(escape_html)e(s)h(=)486
-2167 y(Str.global_substitute)576 2264 y(\(Str.regexp)f(")p
-Fo(<)p Fq(\\\\|)p Fo(>)p Fq(\\\\|&\\\\|\\""\))576 2362
-y(\(fun)h(s)g(-)p Fo(>)665 2459 y Fq(match)g(Str.matched_string)e(s)j
-(with)755 2556 y(")p Fo(<)p Fq(")f(-)p Fo(>)h Fq("<")665
-2653 y(|)g(")p Fo(>)p Fq(")f(-)p Fo(>)h Fq(">")665
-2750 y(|)g("&")f(-)p Fo(>)h Fq("&")665 2847 y(|)g("\\"")f(-)p
-Fo(>)g Fq(""")665 2944 y(|)h(_)g(-)p Fo(>)f Fq(assert)g(false\))
-576 3042 y(s)396 3139 y(;;)-2 3591 y Fp(2.4.5.)35 b(Vir)r(tual)f(c)n
-(lass)h Fc(shared)396 3759 y Fv(This)21 b(virtual)e(class)i(is)g(the)g
-(abstract)f(superclass)g(of)f(the)i(e)o(xtension)d(classes)k(sho)n(wn)d
-(belo)n(w)-5 b(.)19 b(It)i(de\002nes)f(the)g(standard)396
-3867 y(methods)f Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)g
-Fq(set_node)p Fv(,)f(and)g(declares)h(the)g(type)g(of)g(the)g(virtual)g
-(method)e Fq(to_html)p Fv(.)i(This)396 3975 y(method)f(recursi)n(v)o
-(ely)f(tra)n(v)o(erses)i(the)g(whole)g(element)g(tree,)g(and)f(prints)h
-(the)g(con)m(v)o(erted)e(HTML)i(code)f(to)i(the)f(output)396
-4083 y(channel)f(passed)h(as)h(second)f(ar)o(gument.)d(The)j(\002rst)h
-(ar)o(gument)d(is)j(the)f(reference)f(to)h(the)g(global)f
-Fq(store)h Fv(object)g(which)396 4191 y(collects)h(the)f(footnotes.)396
-4371 y Fq(class)44 b(virtual)g(shared)g(=)486 4468 y(object)g(\(self\))
-576 4662 y(\(*)g(--)h(default_ext)e(--)h(*\))576 4857
-y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(shared)f(node)g(option\))p
-Black 3800 5278 a Fr(35)p Black eop
-%%Page: 36 36
-36 35 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 576 676 a Fq(method)43 b(clone)h(=)h({)p Fo(<)f(>)p
-Fq(})576 773 y(method)f(node)i(=)665 870 y(match)f(node)g(with)845
-967 y(None)g(-)p Fo(>)934 1065 y Fq(assert)g(false)755
-1162 y(|)h(Some)f(n)g(-)p Fo(>)h Fq(n)576 1259 y(method)e(set_node)h(n)
-h(=)665 1356 y(node)f Fo(<)p Fq(-)h(Some)f(n)576 1550
-y(\(*)g(--)h(virtual)e(--)i(*\))576 1745 y(method)e(virtual)h(to_html)g
-(:)g(store)g(-)p Fo(>)h Fq(out_channel)e(-)p Fo(>)h Fq(unit)486
-1939 y(end)396 2036 y(;;)-2 2489 y Fp(2.4.6.)35 b(Class)g
-Fc(only_data)396 2656 y Fv(This)21 b(class)g(de\002nes)f
-Fq(to_html)f Fv(such)h(that)h(the)f(character)f(data)h(of)g(the)g
-(current)f(node)g(is)i(con)m(v)o(erted)d(to)i(HTML.)g(Note)396
-2764 y(that)h Fq(self)f Fv(is)h(an)f(e)o(xtension)f(object,)g
-Fq(self)44 b(#)h(node)20 b Fv(is)h(the)f(node)f(object,)h(and)f
-Fq(self)45 b(#)f(node)g(#)h(data)20 b Fv(returns)396
-2872 y(the)g(character)f(data)h(of)g(the)h(node.)396
-3052 y Fq(class)44 b(only_data)g(=)486 3149 y(object)g(\(self\))576
-3247 y(inherit)f(shared)576 3441 y(method)g(to_html)h(store)g(ch)h(=)
-665 3538 y(output_string)e(ch)h(\(escape_html)f(\(self)h(#)h(node)f(#)h
-(data\)\))486 3635 y(end)396 3732 y(;;)-2 4185 y Fp(2.4.7.)35
-b(Class)g Fc(readme)396 4353 y Fv(This)21 b(class)g(con)m(v)o(erts)d
-(elements)i(of)g(type)g Fq(readme)g Fv(to)g(HTML.)g(Such)f(an)h
-(element)g(is)h(\(by)f(de\002nition\))e(al)o(w)o(ays)j(the)396
-4461 y(root)f(element)f(of)h(the)g(document.)e(First,)j(the)f(HTML)g
-(header)f(is)j(printed;)d(the)h Fq(title)g Fv(attrib)n(ute)f(of)h(the)h
-(element)396 4568 y(determines)e(the)i(title)f(of)g(the)h(HTML)f(page.)
-f(Some)h(aspects)g(of)g(the)g(HTML)g(page)g(can)g(be)g(con\002gured)e
-(by)h(setting)396 4676 y(certain)h(parameter)f(entities,)h(for)g(e)o
-(xample)e(the)i(background)d(color)m(,)i(the)h(te)o(xt)g(color)m(,)f
-(and)h(link)g(colors.)f(After)h(the)396 4784 y(header)m(,)f(the)h
-Fq(body)g Fv(tag,)g(and)g(the)g(headline)f(ha)n(v)o(e)g(been)h
-(printed,)f(the)h(contents)f(of)h(the)g(page)g(are)g(con)m(v)o(erted)e
-(by)p Black 3798 5278 a Fr(36)p Black eop
-%%Page: 37 37
-37 36 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fv(in)m(v)n(oking)e Fq(to_html)i Fv(on)g(all)g
-(children)f(of)h(the)g(current)f(node)g(\(which)h(is)h(the)f(root)f
-(node\).)g(Then,)g(the)h(footnotes)f(are)396 687 y(appended)f(to)j
-(this)f(by)g(telling)g(the)g(global)f Fq(store)h Fv(object)g(to)g
-(print)g(the)g(footnotes.)f(Finally)-5 b(,)19 b(the)h(end)g(tags)g(of)g
-(the)396 795 y(HTML)g(pages)g(are)g(printed.)396 944
-y(This)h(class)g(is)g(an)f(e)o(xample)f(ho)n(w)g(to)i(access)g(the)f(v)
-n(alue)f(of)h(an)g(attrib)n(ute:)g(The)g(v)n(alue)g(is)h(determined)d
-(by)i(in)m(v)n(oking)396 1052 y Fq(self)44 b(#)h(node)f(#)h(attribute)e
-("title")p Fv(.)20 b(As)h(this)f(attrib)n(ute)g(has)g(been)g(declared)f
-(as)i(CD)m(A)-9 b(T)h(A)20 b(and)g(as)h(being)396 1160
-y(required,)d(the)j(v)n(alue)e(has)i(al)o(w)o(ays)f(the)g(form)g
-Fq(Value)44 b(s)20 b Fv(where)g Fq(s)g Fv(is)h(the)g(string)e(v)n(alue)
-h(of)g(the)g(attrib)n(ute.)396 1310 y(Y)-9 b(ou)20 b(can)g(also)g(see)h
-(ho)n(w)f(entity)g(contents)f(can)h(be)g(accessed.)g(A)h(parameter)e
-(entity)g(object)h(can)g(be)g(look)o(ed)f(up)h(by)396
-1417 y Fq(self)44 b(#)h(node)f(#)h(dtd)f(#)h(par_entity)e("name")p
-Fv(,)20 b(and)f(by)h(in)m(v)n(oking)e Fq(replacement_text)g
-Fv(the)i(v)n(alue)g(of)396 1525 y(the)g(entity)g(is)h(returned)e(after)
-h(inner)f(parameter)g(and)g(character)g(entities)i(ha)n(v)o(e)f(been)f
-(processed.)g(Note)h(that)g(you)396 1633 y(must)g(use)h
-Fq(gen_entity)e Fv(instead)h(of)g Fq(par_entity)f Fv(to)h(access)h
-(general)e(entities.)396 1855 y Fq(class)44 b(readme)g(=)486
-1952 y(object)g(\(self\))576 2049 y(inherit)f(shared)576
-2244 y(method)g(to_html)h(store)g(ch)h(=)665 2341 y(\(*)g(output)f
-(header)f(*\))665 2438 y(output_string)396 2535 y(ch)i(")p
-Fo(<)p Fq(!DOCTYPE)e(HTML)h(PUBLIC)g(\\"-//W3C//DTD)e(HTML)j(3.2)f
-(Final//EN\\")p Fo(>)p Fq(";)665 2632 y(output_string)396
-2729 y(ch)h(")p Fo(<)p Fq(!-)f(WARNING!)f(This)h(is)h(a)g(generated)e
-(file,)h(do)g(not)h(edit!)f(-)p Fo(>)p Fq(\\n";)665 2826
-y(let)h(title)f(=)396 2923 y(match)g(self)g(#)h(node)f(#)h(attribute)e
-("title")h(with)576 3021 y(Value)g(s)g(-)p Fo(>)h Fq(s)486
-3118 y(|)g(_)f(-)p Fo(>)h Fq(assert)e(false)665 3215
-y(in)665 3312 y(let)i(html_header,)d(_)j(=)396 3409 y(try)g(\(self)f(#)
-g(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:header"\))934
-3506 y(#)i(replacement_text)396 3603 y(with)f(WF_error)g(_)h(-)p
-Fo(>)f Fq("",)g(false)g(in)665 3701 y(let)h(html_trailer,)d(_)j(=)396
-3798 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
-("readme:html:trailer"\))934 3895 y(#)i(replacement_text)396
-3992 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
-4089 y(let)h(html_bgcolor,)d(_)j(=)396 4186 y(try)g(\(self)f(#)g(node)g
-(#)h(dtd)f(#)h(par_entity)e("readme:html:bgcolor"\))934
-4283 y(#)i(replacement_text)396 4380 y(with)f(WF_error)g(_)h(-)p
-Fo(>)f Fq("white",)f(false)h(in)665 4478 y(let)h(html_textcolor,)d(_)j
-(=)396 4575 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
-("readme:html:textcolor"\))934 4672 y(#)i(replacement_text)396
-4769 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
-4866 y(let)h(html_alinkcolor,)d(_)i(=)p Black 3797 5278
-a Fr(37)p Black eop
-%%Page: 38 38
-38 37 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fq(try)45 b(\(self)f(#)g(node)g(#)h(dtd)f(#)h
-(par_entity)e("readme:html:alinkcolor"\))934 676 y(#)i
-(replacement_text)396 773 y(with)f(WF_error)g(_)h(-)p
-Fo(>)f Fq("",)g(false)g(in)665 870 y(let)h(html_vlinkcolor,)d(_)i(=)396
-967 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
-("readme:html:vlinkcolor"\))934 1065 y(#)i(replacement_text)396
-1162 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
-1259 y(let)h(html_linkcolor,)d(_)j(=)396 1356 y(try)g(\(self)f(#)g
-(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:linkcolor"\))934
-1453 y(#)i(replacement_text)396 1550 y(with)f(WF_error)g(_)h(-)p
-Fo(>)f Fq("",)g(false)g(in)665 1647 y(let)h(html_background,)d(_)i(=)
-396 1745 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
-("readme:html:background"\))934 1842 y(#)i(replacement_text)396
-1939 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
-2133 y(output_string)f(ch)h(")p Fo(<)p Fq(html)p Fo(><)p
-Fq(header)p Fo(><)p Fq(title)p Fo(>)p Fq(\\n";)665 2230
-y(output_string)f(ch)h(\(escape_html)f(title\);)665 2327
-y(output_string)g(ch)h(")p Fo(<)p Fq(/title)p Fo(><)p
-Fq(/header)p Fo(>)p Fq(\\n";)665 2424 y(output_string)f(ch)h(")p
-Fo(<)p Fq(body)g(";)665 2522 y(List.iter)396 2619 y(\(fun)g
-(\(name,value\))f(-)p Fo(>)531 2716 y Fq(if)h(value)g
-Fo(<>)h Fq("")f(then)620 2813 y(output_string)f(ch)i(\(name)f(^)g
-("=\\"")g(^)h(escape_html)e(value)h(^)h("\\")f("\)\))396
-2910 y([)h("bgcolor",)178 b(html_bgcolor;)486 3007 y("text",)313
-b(html_textcolor;)486 3104 y("link",)g(html_linkcolor;)486
-3202 y("alink",)268 b(html_alinkcolor;)486 3299 y("vlink",)g
-(html_vlinkcolor;)396 3396 y(];)665 3493 y(output_string)43
-b(ch)h(")p Fo(>)p Fq(\\n";)665 3590 y(output_string)f(ch)h
-(html_header;)665 3687 y(output_string)f(ch)h(")p Fo(<)p
-Fq(h1)p Fo(>)p Fq(";)665 3784 y(output_string)f(ch)h(\(escape_html)f
-(title\);)665 3882 y(output_string)g(ch)h(")p Fo(<)p
-Fq(/h1)p Fo(>)p Fq(\\n";)665 3979 y(\(*)h(process)e(main)i(content:)e
-(*\))665 4076 y(List.iter)396 4173 y(\(fun)h(n)h(-)p
-Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
-4270 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 4367
-y(\(*)g(now)f(process)g(footnotes)f(*\))665 4464 y(store)h(#)h
-(print_footnotes)d(ch;)665 4561 y(\(*)j(trailer)e(*\))665
-4659 y(output_string)g(ch)h(html_trailer;)665 4756 y(output_string)f
-(ch)h(")p Fo(<)p Fq(/html)p Fo(>)p Fq(\\n";)p Black 3800
-5278 a Fr(38)p Black eop
-%%Page: 39 39
-39 38 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 486 579 a Fq(end)396 676 y(;;)-2 1129 y Fp(2.4.8.)35
-b(Classes)h Fc(section)p Fp(,)31 b Fc(sect1)p Fp(,)g
-Fc(sect2)p Fp(,)g(and)j Fc(sect3)396 1296 y Fv(As)21
-b(the)f(con)m(v)o(ersion)e(process)i(is)h(v)o(ery)e(similar)m(,)h(the)g
-(con)m(v)o(ersion)d(classes)22 b(of)e(the)g(three)g(section)f(le)n(v)o
-(els)i(are)f(deri)n(v)o(ed)396 1404 y(from)f(the)i(more)e(general)g
-Fq(section)h Fv(class.)h(The)e(HTML)h(code)g(of)g(the)g(section)g(le)n
-(v)o(els)g(only)f(dif)n(fers)h(in)g(the)g(type)g(of)396
-1512 y(the)g(headline,)f(and)h(because)f(of)h(this)h(the)f(classes)i
-(describing)c(the)i(section)g(le)n(v)o(els)g(can)g(be)h(computed)d(by)i
-(replacing)396 1620 y(the)g(class)i(ar)o(gument)17 b
-Fq(the_tag)j Fv(of)g Fq(section)g Fv(by)f(the)i(HTML)e(name)h(of)g(the)
-g(headline)f(tag.)396 1770 y(Section)h(elements)g(are)g(con)m(v)o
-(erted)e(to)i(HTML)g(by)g(printing)e(a)j(headline)e(and)h(then)f(con)m
-(v)o(erting)f(the)i(contents)f(of)h(the)396 1878 y(element)g(recursi)n
-(v)o(ely)-5 b(.)18 b(More)h(precisely)-5 b(,)19 b(the)h(\002rst)h
-(sub-element)e(is)i(al)o(w)o(ays)f(a)h Fq(title)f Fv(element,)f(and)h
-(the)g(other)396 1985 y(elements)g(are)g(the)g(contents)g(of)g(the)g
-(section.)g(This)g(structure)f(is)j(declared)c(in)j(the)f(DTD,)g(and)g
-(it)h(is)g(guaranteed)d(that)396 2093 y(the)i(document)f(matches)g(the)
-i(DTD.)f(Because)g(of)g(this)h(the)f(title)h(node)e(can)h(be)g
-(separated)f(from)g(the)h(rest)h(without)f(an)o(y)396
-2201 y(checks.)396 2351 y(Both)g(the)h(title)g(node,)e(and)g(the)h
-(body)f(nodes)h(are)g(then)f(con)m(v)o(erted)f(to)i(HTML)g(by)g
-(calling)g Fq(to_html)f Fv(on)h(them.)396 2572 y Fq(class)44
-b(section)g(the_tag)g(=)486 2670 y(object)g(\(self\))576
-2767 y(inherit)f(shared)576 2961 y(val)h(tag)g(=)h(the_tag)576
-3155 y(method)e(to_html)h(store)g(ch)h(=)665 3252 y(let)g(sub_nodes)e
-(=)i(self)f(#)g(node)h(#)f(sub_nodes)g(in)665 3350 y(match)g(sub_nodes)
-g(with)486 3447 y(title_node)f(::)i(rest)f(-)p Fo(>)576
-3544 y Fq(output_string)e(ch)j(\(")p Fo(<)p Fq(")f(^)g(tag)h(^)f(")p
-Fo(>)p Fq(\\n"\);)576 3641 y(title_node)f(#)h(extension)g(#)g(to_html)g
-(store)g(ch;)576 3738 y(output_string)e(ch)j(\("\\n)p
-Fo(<)p Fq(/")e(^)i(tag)f(^)h(")p Fo(>)p Fq("\);)576 3835
-y(List.iter)665 3932 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)
-i(to_html)e(store)h(ch\))665 4029 y(rest)396 4127 y(|)h(_)g(-)p
-Fo(>)576 4224 y Fq(assert)e(false)486 4321 y(end)396
-4418 y(;;)396 4612 y(class)h(sect1)g(=)h(section)f("h1";;)396
-4709 y(class)g(sect2)g(=)h(section)f("h3";;)396 4807
-y(class)g(sect3)g(=)h(section)f("h4";;)p Black 3800 5278
-a Fr(39)p Black eop
-%%Page: 40 40
-40 39 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black -2 583 a Fp(2.4.9.)35 b(Classes)h Fc(map_tag)p
-Fp(,)31 b Fc(p)p Fp(,)i Fc(em)p Fp(,)f Fc(ul)p Fp(,)h
-Fc(li)396 751 y Fv(Se)n(v)o(eral)20 b(element)f(types)h(are)g(con)m(v)o
-(erted)e(to)i(HTML)g(by)g(simply)g(mapping)e(them)i(to)g(corresponding)
-d(HTML)396 859 y(element)j(types.)g(The)f(class)j Fq(map_tag)d
-Fv(implements)g(this,)i(and)f(the)g(class)h(ar)o(gument)d
-Fq(the_target_tag)396 967 y Fv(determines)h(the)i(tag)f(name)f(to)i
-(map)e(to.)h(The)g(output)f(consists)i(of)f(the)g(start)h(tag,)f(the)g
-(recursi)n(v)o(ely)e(con)m(v)o(erted)g(inner)396 1075
-y(elements,)i(and)g(the)g(end)f(tag.)396 1255 y Fq(class)44
-b(map_tag)g(the_target_tag)e(=)486 1352 y(object)i(\(self\))576
-1449 y(inherit)f(shared)576 1643 y(val)h(target_tag)f(=)i
-(the_target_tag)576 1838 y(method)e(to_html)h(store)g(ch)h(=)665
-1935 y(output_string)e(ch)h(\(")p Fo(<)p Fq(")g(^)h(target_tag)e(^)i(")
-p Fo(>)p Fq(\\n"\);)665 2032 y(List.iter)396 2129 y(\(fun)f(n)h(-)p
-Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
-2226 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 2323
-y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/")g(^)h(target_tag)e(^)h(")p
-Fo(>)p Fq("\);)486 2420 y(end)396 2518 y(;;)396 2712
-y(class)g(p)h(=)g(map_tag)e("p";;)396 2809 y(class)h(em)h(=)f(map_tag)g
-("b";;)396 2906 y(class)g(ul)h(=)f(map_tag)g("ul";;)396
-3003 y(class)g(li)h(=)f(map_tag)g("li";;)-2 3456 y Fp(2.4.10.)36
-b(Class)f Fc(br)396 3624 y Fv(Element)20 b(of)g(type)f
-Fq(br)i Fv(are)f(mapped)f(to)h(the)g(same)g(HTML)g(type.)g(Note)g(that)
-g(HTML)g(forbids)f(the)h(end)g(tag)g(of)g Fq(br)p Fv(.)396
-3804 y Fq(class)44 b(br)h(=)486 3901 y(object)f(\(self\))576
-3998 y(inherit)f(shared)576 4192 y(method)g(to_html)h(store)g(ch)h(=)
-665 4289 y(output_string)e(ch)h(")p Fo(<)p Fq(br)p Fo(>)p
-Fq(\\n";)665 4387 y(List.iter)396 4484 y(\(fun)g(n)h(-)p
-Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
-4581 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)486 4678
-y(end)396 4775 y(;;)p Black 3800 5278 a Fr(40)p Black
-eop
-%%Page: 41 41
-41 40 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black -2 583 a Fp(2.4.11.)36 b(Class)f Fc(code)396 751
-y Fv(The)20 b Fq(code)g Fv(type)g(is)h(con)m(v)o(erted)d(to)i(a)h
-Fq(pre)f Fv(section)g(\(preformatted)d(te)o(xt\).)i(As)i(the)g(meaning)
-d(of)i(tabs)h(is)g(unspeci\002ed)e(in)396 859 y(HTML,)h(tabs)g(are)h(e)
-o(xpanded)c(to)k(spaces.)396 1039 y Fq(class)44 b(code)g(=)486
-1136 y(object)g(\(self\))576 1233 y(inherit)f(shared)576
-1427 y(method)g(to_html)h(store)g(ch)h(=)665 1525 y(let)g(data)f(=)g
-(self)h(#)f(node)g(#)h(data)f(in)665 1622 y(\(*)h(convert)e(tabs)i(*\))
-665 1719 y(let)g(l)f(=)h(String.length)e(data)h(in)665
-1816 y(let)h(rec)f(preprocess)f(i)i(column)f(=)396 1913
-y(\(*)h(this)f(is)g(very)h(ineffective)e(but)h(comprehensive:)e(*\))396
-2010 y(if)j(i)f Fo(<)h Fq(l)g(then)486 2107 y(match)f(data.[i])f(with)
-665 2205 y('\\t')h(-)p Fo(>)396 2302 y Fq(let)h(n)f(=)h(8)g(-)f
-(\(column)g(mod)g(8\))h(in)396 2399 y(String.make)e(n)i(')g(')f(^)h
-(preprocess)e(\(i+1\))h(\(column)g(+)g(n\))576 2496 y(|)g('\\n')g(-)p
-Fo(>)396 2593 y Fq("\\n")g(^)h(preprocess)e(\(i+1\))h(0)576
-2690 y(|)g(c)h(-)p Fo(>)396 2787 y Fq(String.make)e(1)i(c)g(^)f
-(preprocess)f(\(i+1\))h(\(column)g(+)h(1\))396 2884 y(else)486
-2982 y("")665 3079 y(in)665 3176 y(output_string)e(ch)h(")p
-Fo(<)p Fq(p)p Fo(><)p Fq(pre)p Fo(>)p Fq(";)665 3273
-y(output_string)f(ch)h(\(escape_html)f(\(preprocess)g(0)i(0\)\);)665
-3370 y(output_string)e(ch)h(")p Fo(<)p Fq(/pre)p Fo(><)p
-Fq(/p)p Fo(>)p Fq(";)486 3564 y(end)396 3662 y(;;)-2
-4114 y Fp(2.4.12.)36 b(Class)f Fc(a)396 4282 y Fv(Hyperlinks,)19
-b(e)o(xpressed)g(by)g(the)i Fq(a)f Fv(element)g(type,)f(are)h(con)m(v)o
-(erted)e(to)i(the)g(HTML)g Fq(a)h Fv(type.)e(If)i(the)f(tar)o(get)f(of)
-h(the)396 4390 y(hyperlink)e(is)j(gi)n(v)o(en)d(by)i
-Fq(href)p Fv(,)g(the)g(URL)g(of)g(this)g(attrib)n(ute)g(can)g(be)g
-(used)g(directly)-5 b(.)18 b(Alternati)n(v)o(ely)-5 b(,)18
-b(the)i(tar)o(get)f(can)h(be)396 4498 y(gi)n(v)o(en)f(by)h
-Fq(readmeref)f Fv(in)i(which)e(case)i(the)f(".html")g(suf)n(\002x)f
-(must)i(be)f(added)f(to)h(the)g(\002le)h(name.)396 4647
-y(Note)f(that)h(within)f Fq(a)g Fv(only)g(#PCD)m(A)-9
-b(T)h(A)20 b(is)h(allo)n(wed,)e(so)i(the)f(contents)f(can)h(be)g(con)m
-(v)o(erted)e(directly)h(by)h(applying)396 4755 y Fq(escape_html)f
-Fv(to)i(the)f(character)f(data)h(contents.)p Black 3800
-5278 a Fr(41)p Black eop
-%%Page: 42 42
-42 41 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fq(class)44 b(a)h(=)486 676 y(object)f(\(self\))576
-773 y(inherit)f(shared)576 967 y(method)g(to_html)h(store)g(ch)h(=)665
-1065 y(output_string)e(ch)h(")p Fo(<)p Fq(a)h(";)665
-1162 y(let)g(href)f(=)396 1259 y(match)g(self)g(#)h(node)f(#)h
-(attribute)e("href")h(with)576 1356 y(Value)g(v)g(-)p
-Fo(>)h Fq(escape_html)e(v)486 1453 y(|)i(Valuelist)e(_)i(-)p
-Fo(>)f Fq(assert)g(false)486 1550 y(|)h(Implied_value)d(-)p
-Fo(>)665 1647 y Fq(begin)i(match)g(self)g(#)h(node)f(#)h(attribute)e
-("readmeref")g(with)486 1745 y(Value)h(v)h(-)p Fo(>)f
-Fq(escape_html)f(v)i(^)f(".html")396 1842 y(|)h(Valuelist)e(_)i(-)p
-Fo(>)f Fq(assert)g(false)396 1939 y(|)h(Implied_value)e(-)p
-Fo(>)576 2036 y Fq("")665 2133 y(end)665 2230 y(in)665
-2327 y(if)i(href)f Fo(<>)g Fq("")h(then)396 2424 y(output_string)e(ch)h
-(\("href=\\"")88 b(^)45 b(href)f(^)h("\\""\);)665 2522
-y(output_string)e(ch)h(")p Fo(>)p Fq(";)665 2619 y(output_string)f(ch)h
-(\(escape_html)f(\(self)h(#)h(node)f(#)h(data\)\);)665
-2716 y(output_string)e(ch)h(")p Fo(<)p Fq(/a)p Fo(>)p
-Fq(";)486 2910 y(end)396 3007 y(;;)-2 3460 y Fp(2.4.13.)36
-b(Class)f Fc(footnote)396 3628 y Fv(The)20 b Fq(footnote)g
-Fv(class)h(has)f(tw)o(o)h(methods:)e Fq(to_html)g Fv(to)i(con)m(v)o
-(ert)d(the)i(footnote)f(reference)f(to)i(HTML,)g(and)396
-3736 y Fq(footnote_to_html)e Fv(to)j(con)m(v)o(ert)d(the)i(footnote)f
-(te)o(xt)h(itself.)396 3885 y(The)g(footnote)f(reference)f(is)j(con)m
-(v)o(erted)d(to)i(a)h(local)f(hyperlink;)e(more)h(precisely)-5
-b(,)19 b(to)h(tw)o(o)h(anchor)d(tags)j(which)e(are)396
-3993 y(connected)g(with)h(each)g(other)-5 b(.)19 b(The)h(te)o(xt)g
-(anchor)f(points)h(to)g(the)g(footnote)f(anchor)m(,)f(and)h(the)i
-(footnote)d(anchor)h(points)396 4101 y(to)i(the)f(te)o(xt)g(anchor)-5
-b(.)396 4250 y(The)20 b(footnote)f(must)h(be)g(allocated)f(in)i(the)f
-Fq(store)g Fv(object.)f(By)i(allocating)e(the)h(footnote,)f(you)g(get)h
-(the)g(number)f(of)396 4358 y(the)h(footnote,)f(and)g(the)i(te)o(xt)f
-(of)f(the)i(footnote)d(is)j(stored)f(until)g(the)g(end)g(of)g(the)g
-(HTML)g(page)f(is)j(reached)c(when)i(the)396 4466 y(footnotes)f(can)h
-(be)g(printed.)f(The)h Fq(to_html)f Fv(method)g(stores)i(simply)e(the)i
-(object)e(itself,)i(such)f(that)g(the)396 4574 y Fq(footnote_to_html)e
-Fv(method)h(is)i(in)m(v)n(ok)o(ed)e(on)g(the)i(same)f(object)g(that)g
-(encountered)d(the)k(footnote.)p Black 3800 5278 a Fr(42)p
-Black eop
-%%Page: 43 43
-43 42 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black 396 579 a Fv(The)g Fq(to_html)g Fv(only)f(allocates)h(the)h
-(footnote,)d(and)h(prints)h(the)g(reference)f(anchor)m(,)f(b)n(ut)i(it)
-h(does)f(not)g(print)g(nor)396 687 y(con)m(v)o(ert)e(the)j(contents)e
-(of)h(the)g(note.)g(This)g(is)h(deferred)d(until)j(the)f(footnotes)e
-(actually)i(get)g(printed,)f(i.e.)h(the)g(recursi)n(v)o(e)396
-795 y(call)h(of)f Fq(to_html)f Fv(on)h(the)g(sub)g(nodes)g(is)h(done)e
-(by)h Fq(footnote_to_html)p Fv(.)396 944 y(Note)g(that)h(this)f
-(technique)f(does)h(not)g(w)o(ork)f(if)i(you)e(mak)o(e)h(another)f
-(footnote)f(within)i(a)h(footnote;)d(the)i(second)396
-1052 y(footnote)f(gets)h(allocated)g(b)n(ut)g(not)g(printed.)396
-1274 y Fq(class)44 b(footnote)g(=)486 1371 y(object)g(\(self\))576
-1468 y(inherit)f(shared)576 1662 y(val)h(mutable)g(footnote_number)e(=)
-j(0)576 1857 y(method)e(to_html)h(store)g(ch)h(=)665
-1954 y(let)g(number)e(=)396 2051 y(store)h(#)h(alloc_footnote)d(\(self)
-i(:)h(#shared)f(:)p Fo(>)g Fq(footnote_printer\))e(in)665
-2148 y(let)j(foot_anchor)e(=)396 2245 y("footnote")g(^)i(string_of_int)
-e(number)h(in)665 2342 y(let)h(text_anchor)e(=)396 2439
-y("textnote")g(^)i(string_of_int)e(number)h(in)665 2537
-y(footnote_number)f Fo(<)p Fq(-)h(number;)665 2634 y(output_string)f
-(ch)h(\()h(")p Fo(<)p Fq(a)f(name=\\"")g(^)g(text_anchor)f(^)i("\\")f
-(href=\\"#")g(^)441 2731 y(foot_anchor)f(^)i("\\")p Fo(>)p
-Fq([")e(^)i(string_of_int)e(number)h(^)441 2828 y("])p
-Fo(<)p Fq(/a)p Fo(>)p Fq(")g(\))576 3022 y(method)f(footnote_to_html)g
-(store)h(ch)g(=)665 3119 y(\(*)h(prerequisite:)d(we)j(are)f(in)h(a)f
-(definition)g(list)g Fo(<)p Fq(dl)p Fo(>)p Fq(...)p Fo(<)p
-Fq(/dl)p Fo(>)e Fq(*\))665 3217 y(let)j(foot_anchor)e(=)396
-3314 y("footnote")g(^)i(string_of_int)e(footnote_number)f(in)665
-3411 y(let)j(text_anchor)e(=)396 3508 y("textnote")g(^)i(string_of_int)
-e(footnote_number)f(in)665 3605 y(output_string)h(ch)h(\(")p
-Fo(<)p Fq(dt)p Fo(><)p Fq(a)g(name=\\"")f(^)i(foot_anchor)e(^)h("\\")h
-(href=\\"#")e(^)396 3702 y(text_anchor)g(^)i("\\")p Fo(>)p
-Fq([")f(^)g(string_of_int)f(footnote_number)f(^)396 3799
-y("])p Fo(<)p Fq(/a)p Fo(><)p Fq(/dt)p Fo(>)p Fq(\\n)p
-Fo(<)p Fq(dd)p Fo(>)p Fq("\);)665 3896 y(List.iter)396
-3994 y(\(fun)i(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e
-(store)h(ch\))396 4091 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665
-4188 y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/dd)p
-Fo(>)p Fq("\))486 4382 y(end)396 4479 y(;;)p Black 3800
-5278 a Fr(43)p Black eop
-%%Page: 44 44
-44 43 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black -2 583 a Fp(2.4.14.)36 b(The)d(speci\002cation)j(of)e(the)f
-(document)i(model)396 751 y Fv(This)21 b(code)e(sets)i(up)f(the)g(hash)
-g(table)g(that)h(connects)e(element)h(types)g(with)g(the)g(e)o(x)o
-(emplars)f(of)h(the)g(e)o(xtension)f(classes)396 859
-y(that)i(con)m(v)o(ert)d(the)i(elements)g(to)g(HTML.)396
-1039 y Fq(open)44 b(Pxp_yacc)396 1233 y(let)h(tag_map)e(=)486
-1330 y(make_spec_from_alist)576 1427 y(~data_exemplar:\(new)e
-(data_impl)j(\(new)g(only_data\)\))576 1525 y
-(~default_element_exemplar:\(new)39 b(element_impl)k(\(new)h
-(no_markup\)\))576 1622 y(~element_alist:)665 1719 y([)h("readme",)e
-(\(new)h(element_impl)f(\(new)h(readme\)\);)396 1816
-y("sect1",)89 b(\(new)44 b(element_impl)f(\(new)h(sect1\)\);)396
-1913 y("sect2",)89 b(\(new)44 b(element_impl)f(\(new)h(sect2\)\);)396
-2010 y("sect3",)89 b(\(new)44 b(element_impl)f(\(new)h(sect3\)\);)396
-2107 y("title",)89 b(\(new)44 b(element_impl)f(\(new)h(no_markup\)\);)
-396 2205 y("p",)269 b(\(new)44 b(element_impl)f(\(new)h(p\)\);)396
-2302 y("br",)224 b(\(new)44 b(element_impl)f(\(new)h(br\)\);)396
-2399 y("code",)134 b(\(new)44 b(element_impl)f(\(new)h(code\)\);)396
-2496 y("em",)224 b(\(new)44 b(element_impl)f(\(new)h(em\)\);)396
-2593 y("ul",)224 b(\(new)44 b(element_impl)f(\(new)h(ul\)\);)396
-2690 y("li",)224 b(\(new)44 b(element_impl)f(\(new)h(li\)\);)396
-2787 y("footnote",)f(\(new)h(element_impl)f(\(new)h(footnote)g(:)h
-(#shared)e(:)p Fo(>)i Fq(shared\)\);)396 2884 y("a",)269
-b(\(new)44 b(element_impl)f(\(new)h(a\)\);)665 2982 y(])576
-3079 y(\(\))396 3176 y(;;)-2 3678 y Fx(Notes)p Black
-396 3857 a Fv(1.)p Black 70 w(Elements)20 b(may)g(also)g(contain)f
-(processing)g(instructions.)g(Unlik)o(e)h(other)f(document)g(models,)g
-(PXP)i(separates)529 3965 y(processing)e(instructions)g(from)g(the)i
-(rest)f(of)g(the)g(te)o(xt)g(and)g(pro)o(vides)e(a)j(second)e(interf)o
-(ace)h(to)g(access)h(them)529 4073 y(\(method)e Fq(pinstr)p
-Fv(\).)g(Ho)n(we)n(v)o(er)m(,)f(there)h(is)j(a)e(parser)g(option)f(\()p
-Fq(enable_pinstr_nodes)p Fv(\))e(which)i(changes)g(the)529
-4181 y(beha)n(viour)f(of)i(the)g(parser)g(such)g(that)g(e)o(xtra)g
-(nodes)f(for)h(processing)e(instructions)i(are)g(included)e(into)i(the)
-h(tree.)529 4320 y Fi(Furthermore,)e(the)g(tree)g(does)g(normally)h
-(not)f(contain)h(nodes)g(for)e(XML)h(comments;)h(the)o(y)f(are)g
-(ignored)h(by)f(def)o(ault.)g(Again,)529 4417 y(there)g(is)g(an)g
-(option)h(\()p Fh(enable_comment_nodes)p Fi(\))25 b(changing)c(this.)p
-Black 396 4566 a Fv(2.)p Black 70 w(Due)f(to)h(the)f(typing)f(system)h
-(it)h(is)g(more)e(or)h(less)i(impossible)d(to)i(deri)n(v)o(e)d(recursi)
-n(v)o(e)h(classes)i(in)g(O'Caml.)f(T)-7 b(o)20 b(get)529
-4674 y(around)e(this,)j(it)g(is)g(common)d(practice)i(to)g(put)g(the)g
-(modi\002able)f(or)h(e)o(xtensible)f(part)h(of)g(recursi)n(v)o(e)f
-(objects)h(into)529 4782 y(parallel)g(objects.)p Black
-3800 5278 a Fr(44)p Black eop
-%%Page: 45 45
-45 44 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
-Black Black 396 579 a Fv(3.)p Black 70 w(The)g(problem)e(is)k(that)e
-(the)g(subclass)h(is)g(usually)e(not)h(a)h(subtype)e(in)h(this)h(case)f
-(because)g(O'Caml)g(has)h(a)529 687 y(contra)n(v)n(ariant)d(subtyping)g
-(rule.)p Black 3800 5278 a Fr(45)p Black eop
-%%Page: 46 46
-46 45 bop Black Black -2 621 a Fs(Chapter)48 b(3.)f(The)h(objects)g
-(representing)g(the)-2 845 y(document)396 1093 y Fr(This)21
-b(description)e(might)h(be)g(out-of-date)o(.)e(See)i(the)g(module)f
-(interface)h(\002les)g(for)h(updated)d(information.)-2
-1470 y Fx(3.1.)39 b(The)g Fb(document)44 b Fx(c)m(lass)396
-1722 y Fq(class)g([)h('ext)f(])h(document)e(:)486 1819
-y(Pxp_types.collect_warnings)d(->)486 1916 y(object)576
-2013 y(method)j(init_xml_version)g(:)h(string)g(->)h(unit)576
-2111 y(method)e(init_root)h(:)g('ext)h(node)f(->)g(unit)576
-2305 y(method)f(xml_version)g(:)i(string)576 2402 y(method)e
-(xml_standalone)g(:)i(bool)576 2499 y(method)e(dtd)i(:)f(dtd)576
-2596 y(method)f(root)i(:)f('ext)g(node)576 2791 y(method)f(encoding)h
-(:)h(Pxp_types.rep_encoding)576 2985 y(method)e(add_pinstr)h(:)g
-(proc_instruction)e(->)j(unit)576 3082 y(method)e(pinstr)h(:)h(string)f
-(->)g(proc_instruction)e(list)576 3179 y(method)h(pinstr_names)g(:)i
-(string)f(list)576 3373 y(method)f(write)h(:)h(Pxp_types.output_stream)
-c(->)k(Pxp_types.encoding)c(->)k(unit)486 3568 y(end)396
-3665 y(;;)396 3856 y Fv(The)20 b(methods)f(be)o(ginning)f(with)i
-Fq(init_)g Fv(are)g(only)g(for)f(internal)h(use)g(of)g(the)g(parser)-5
-b(.)p Black 396 4088 a Ft(\225)p Black 60 w Fq(xml_version)p
-Fv(:)19 b(returns)h(the)g(v)o(ersion)f(string)h(at)g(the)g(be)o
-(ginning)e(of)i(the)g(document.)e(F)o(or)i(e)o(xample,)f("1.0")g(is)479
-4196 y(returned)g(if)h(the)g(document)f(be)o(gins)g(with)h
-Fo(<)p Fq(?xml)44 b(version="1.0"?)p Fo(>)p Fv(.)p Black
-396 4304 a Ft(\225)p Black 60 w Fq(xml_standalone)p Fv(:)19
-b(returns)g(the)h(boolean)f(v)n(alue)g(of)h Fq(standalone)f
-Fv(declaration)g(in)h(the)h(XML)f(declaration.)e(If)479
-4412 y(the)i Fq(standalone)g Fv(attrib)n(ute)f(is)i(missing,)f
-Fq(false)g Fv(is)h(returned.)p Black 396 4520 a Ft(\225)p
-Black 60 w Fq(dtd)p Fv(:)g(returns)e(a)i(reference)d(to)i(the)h(global)
-e(DTD)h(object.)p Black 396 4628 a Ft(\225)p Black 60
-w Fq(root)p Fv(:)g(returns)g(a)g(reference)f(to)h(the)g(root)g
-(element.)p Black 396 4736 a Ft(\225)p Black 60 w Fq(encoding)p
-Fv(:)g(returns)f(the)h(internal)g(encoding)e(of)i(the)g(document.)e
-(This)i(means)g(that)g(all)h(strings)f(of)g(which)g(the)479
-4844 y(document)e(consists)j(are)f(encoded)f(in)h(this)h(character)e
-(set.)p Black 3798 5278 a Fr(46)p Black eop
-%%Page: 47 47
-47 46 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
-Black 60 w Fq(pinstr)p Fv(:)g(returns)f(the)i(processing)d
-(instructions)i(outside)f(the)h(DTD)h(and)e(outside)h(the)g(root)g
-(element.)f(The)479 687 y(ar)o(gument)f(passed)i(to)h(the)f(method)f
-(names)g(a)i Fr(tar)m(g)o(et)q Fv(,)g(and)e(the)h(method)f(returns)g
-(all)i(instructions)e(with)i(this)g(tar)o(get.)479 795
-y(The)f(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(inside)h
-Fo(<)p Fq(?)h Fv(and)e Fq(?)p Fo(>)p Fv(.)p Black 396
-903 a Ft(\225)p Black 60 w Fq(pinstr_names)p Fv(:)g(returns)g(the)i
-(names)e(of)h(the)h(processing)d(instructions)p Black
-396 1011 a Ft(\225)p Black 60 w Fq(add_pinstr)p Fv(:)h(adds)h(another)f
-(processing)g(instruction.)f(This)j(method)e(is)i(used)f(by)f(the)h
-(parser)g(itself)h(to)f(enter)g(the)479 1119 y(instructions)f(returned)
-g(by)h Fq(pinstr)p Fv(,)f(b)n(ut)h(you)g(can)g(also)g(enter)g
-(additional)f(instructions.)p Black 396 1226 a Ft(\225)p
-Black 60 w Fq(write)p Fv(:)h(writes)h(the)f(document)e(to)j(the)f
-(passed)g(stream)g(as)h(XML)f(te)o(xt)g(using)g(the)g(passed)g(\(e)o
-(xternal\))e(encoding.)479 1334 y(The)i(generated)f(te)o(xt)h(is)h(al)o
-(w)o(ays)f(v)n(alid)g(XML)g(and)g(can)g(be)g(parsed)g(by)f(PXP;)i(ho)n
-(we)n(v)o(er)m(,)d(the)i(te)o(xt)g(is)h(badly)479 1442
-y(formatted)e(\(this)h(is)h(not)f(a)h(pretty)e(printer\).)-2
-1861 y Fx(3.2.)39 b(The)g(c)m(lass)g(type)g Fb(node)396
-2041 y Fv(From)20 b Fq(Pxp_document)p Fv(:)396 2221 y
-Fq(type)44 b(node_type)g(=)486 2318 y(T_data)396 2415
-y(|)h(T_element)e(of)i(string)396 2512 y(|)g(T_super_root)396
-2609 y(|)g(T_pinstr)e(of)i(string)396 2706 y(|)g(T_comment)396
-2804 y Fn(and)g(some)f(other,)g(reserved)f(types)396
-2901 y Fq(;;)396 3095 y(class)h(type)g([)h('ext)f(])h(node)f(=)486
-3192 y(object)g(\('self\))576 3289 y(constraint)f('ext)h(=)h('ext)f
-(node)g(#extension)576 3484 y(\(*)g Fn(General)g(observers)f
-Fq(*\))576 3678 y(method)g(extension)h(:)g('ext)576 3775
-y(method)f(dtd)i(:)f(dtd)576 3872 y(method)f(parent)h(:)h('ext)f(node)
-576 3969 y(method)f(root)i(:)f('ext)g(node)576 4066 y(method)f
-(sub_nodes)h(:)g('ext)h(node)f(list)576 4164 y(method)f(iter_nodes)h(:)
-g(\('ext)g(node)g(-)p Fo(>)h Fq(unit\))f(-)p Fo(>)g Fq(unit)576
-4261 y(method)f(iter_nodes_sibl)g(:)889 4358 y(\('ext)h(node)h(option)e
-(-)p Fo(>)i Fq('ext)f(node)g(-)p Fo(>)g Fq('ext)h(node)f(option)g(-)p
-Fo(>)g Fq(unit\))g(-)396 4455 y Fo(>)h Fq(unit)576 4552
-y(method)e(node_type)h(:)g(node_type)576 4649 y(method)f(encoding)h(:)h
-(Pxp_types.rep_encoding)576 4746 y(method)e(data)i(:)f(string)576
-4843 y(method)f(position)h(:)h(\(string)e(*)i(int)f(*)h(int\))p
-Black 3797 5278 a Fr(47)p Black eop
-%%Page: 48 48
-48 47 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 576 579 a Fq(method)43
-b(comment)h(:)h(string)f(option)576 676 y(method)f(pinstr)h(:)h(string)
-f(-)p Fo(>)g Fq(proc_instruction)e(list)576 773 y(method)h
-(pinstr_names)g(:)i(string)f(list)576 870 y(method)f(write)h(:)h
-(Pxp_types.output_stream)c(->)k(Pxp_types.encoding)c(->)k(unit)576
-1065 y(\(*)f Fn(Attribute)f(observers)h Fq(*\))576 1259
-y(method)f(attribute)h(:)g(string)g(-)p Fo(>)h Fq(Pxp_types.att_value)
-576 1356 y(method)e(required_string_attribute)e(:)k(string)f(-)p
-Fo(>)g Fq(string)576 1453 y(method)f(optional_string_attribute)e(:)k
-(string)f(-)p Fo(>)g Fq(string)g(option)576 1550 y(method)f
-(required_list_attribute)e(:)k(string)f(-)p Fo(>)g Fq(string)g(list)576
-1647 y(method)f(optional_list_attribute)e(:)k(string)f(-)p
-Fo(>)g Fq(string)g(list)576 1745 y(method)f(attribute_names)g(:)h
-(string)g(list)576 1842 y(method)f(attribute_type)g(:)i(string)e(-)p
-Fo(>)i Fq(Pxp_types.att_type)576 1939 y(method)e(attributes)h(:)g
-(\(string)g(*)h(Pxp_types.att_value\))c(list)576 2036
-y(method)i(id_attribute_name)f(:)j(string)576 2133 y(method)e
-(id_attribute_value)f(:)j(string)576 2230 y(method)e
-(idref_attribute_names)f(:)i(string)576 2424 y(\(*)g
-Fn(Modifying)f(methods)h Fq(*\))576 2619 y(method)f(add_node)h(:)h
-(?force:bool)e(-)p Fo(>)h Fq('ext)g(node)g(-)p Fo(>)h
-Fq(unit)576 2716 y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p
-Fo(>)j Fq(unit)576 2813 y(method)e(delete)h(:)h(unit)576
-2910 y(method)e(set_nodes)h(:)g('ext)h(node)f(list)g(-)p
-Fo(>)g Fq(unit)576 3007 y(method)f(quick_set_attributes)f(:)j(\(string)
-e(*)i(Pxp_types.att_value\))c(list)j(-)p Fo(>)h Fq(unit)576
-3104 y(method)e(set_comment)g(:)i(string)f(option)g(-)p
-Fo(>)g Fq(unit)576 3299 y(\(*)g Fn(Cloning)g(methods)f
-Fq(*\))576 3493 y(method)g(orphaned_clone)g(:)i('self)576
-3590 y(method)e(orphaned_flat_clone)f(:)j('self)576 3687
-y(method)e(create_element)g(:)1024 3784 y(?position:\(string)f(*)j(int)
-f(*)h(int\))f(-)p Fo(>)1024 3882 y Fq(dtd)g(-)p Fo(>)h
-Fq(node_type)e(-)p Fo(>)h Fq(\(string)g(*)h(string\))e(list)h(-)p
-Fo(>)1203 3979 y Fq('ext)g(node)576 4076 y(method)f(create_data)g(:)i
-(dtd)f(-)p Fo(>)h Fq(string)f(-)p Fo(>)g Fq('ext)g(node)576
-4173 y(method)f(keep_always_whitespace_mode)e(:)j(unit)576
-4367 y(\(*)g Fn(Validating)f(methods)h Fq(*\))576 4561
-y(method)f(local_validate)g(:)i(?use_dfa:bool)d(->)j(unit)f(->)g(unit)
-576 4756 y(\(*)g(...)g(Internal)g(methods)g(are)g(undocumented.)f(*\))p
-Black 3800 5278 a Fr(48)p Black eop
-%%Page: 49 49
-49 48 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 486 579 a Fq(end)396
-676 y(;;)396 867 y Fv(In)g(the)g(module)f Fq(Pxp_types)g
-Fv(you)h(can)g(\002nd)g(another)e(type)i(de\002nition)f(that)h(is)i
-(important)c(in)j(this)f(conte)o(xt:)396 1047 y Fq(type)44
-b(Pxp_types.att_value)e(=)576 1144 y(Value)223 b(of)44
-b(string)486 1241 y(|)h(Valuelist)e(of)h(string)g(list)486
-1339 y(|)h(Implied_value)396 1436 y(;;)-2 1847 y Fp(3.2.1.)35
-b(The)f(structure)f(of)g(document)i(trees)396 2015 y
-Fv(A)21 b(node)e(represents)g(either)h(an)g(element)g(or)g(a)g
-(character)f(data)h(section.)g(There)g(are)g(tw)o(o)g(classes)h
-(implementing)d(the)396 2122 y(tw)o(o)j(aspects)f(of)g(nodes:)g
-Fq(element_impl)e Fv(and)i Fq(data_impl)p Fv(.)f(The)h(latter)g(class)h
-(does)f(not)g(implement)f(all)i(methods)396 2230 y(because)f(some)g
-(methods)f(do)h(not)g(mak)o(e)f(sense)i(for)e(data)h(nodes.)396
-2380 y(\(Note:)g(PXP)h(also)g(supports)e(a)h(mode)g(which)f(forces)h
-(that)g(processing)f(instructions)g(and)h(comments)f(are)396
-2488 y(represented)g(as)i(nodes)e(of)h(the)g(document)e(tree.)i(Ho)n
-(we)n(v)o(er)m(,)e(these)j(nodes)e(are)h(instances)g(of)g
-Fq(element_impl)f Fv(with)396 2596 y(node)g(types)h Fq(T_pinstr)g
-Fv(and)f Fq(T_comment)p Fv(,)g(respecti)n(v)o(ely)-5
-b(.)18 b(This)j(mode)e(must)h(be)g(e)o(xplicitly)g(con\002gured;)d(the)
-k(basic)396 2704 y(representation)d(kno)n(ws)i(only)f(element)h(and)f
-(data)h(nodes.\))396 2853 y(The)g(follo)n(wing)f(\002gure)g(\()p
-Fr(A)h(tr)m(ee)h(with)g(element)f(nodes,)f(data)g(nodes,)h(and)f
-(attrib)n(utes)p Fv(\))h(sho)n(ws)g(an)g(e)o(xample)f(ho)n(w)h(a)396
-2961 y(tree)g(is)i(constructed)c(from)h(element)h(and)f(data)i(nodes.)e
-(The)h(circular)f(areas)h(represent)f(element)h(nodes)f(whereas)h(the)
-396 3069 y(o)o(v)n(als)f(denote)f(data)i(nodes.)e(Only)h(elements)g
-(may)g(ha)n(v)o(e)g(subnodes;)f(data)h(nodes)g(are)g(al)o(w)o(ays)h
-(lea)n(v)o(es)f(of)h(the)f(tree.)g(The)396 3177 y(subnodes)g(of)h(an)g
-(element)g(can)g(be)g(either)g(element)f(or)h(data)g(nodes;)g(in)g
-(both)f(cases)i(the)g(O'Caml)f(objects)g(storing)f(the)396
-3285 y(nodes)h(ha)n(v)o(e)f(the)i(class)g(type)e Fq(node)p
-Fv(.)396 3434 y(Attrib)n(utes)h(\(the)g(clouds)g(in)g(the)g(picture\))f
-(are)h(not)g(directly)g(inte)o(grated)e(into)i(the)g(tree;)h(there)e
-(is)i(al)o(w)o(ays)g(an)f(e)o(xtra)g(link)396 3542 y(to)h(the)f(attrib)
-n(ute)g(list.)h(This)f(is)h(also)g(true)f(for)f(processing)g
-(instructions)g(\(not)h(sho)n(wn)f(in)h(the)h(picture\).)d(This)j
-(means)396 3650 y(that)g(there)e(are)h(separated)g(access)g(methods)g
-(for)f(attrib)n(utes)h(and)g(processing)f(instructions.)p
-Black 3800 5278 a Fr(49)p Black eop
-%%Page: 50 50
-50 49 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-1.)f(A)i
-(tr)o(ee)e(with)i(element)f(nodes,)h(data)e(nodes,)i(and)f(attrib)n
-(utes)396 2578 y
- currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
- 396 2578 a @beginspecial 0 @llx 0 @lly
-329 @urx 218 @ury 3290 @rwi @setspecial
-%%BeginDocument: pic/node_term.ps
-%!PS-Adobe-2.0 EPSF-2.0
-%%Title: src/pic/node_term.fig
-%%Creator: fig2dev Version 3.2 Patchlevel 1
-%%CreationDate: Sun Aug 27 02:05:42 2000
-%%For: gerd@ice (Gerd Stolpmann)
-%%Orientation: Portrait
-%%BoundingBox: 0 0 329 218
-%%Pages: 0
-%%BeginSetup
-%%EndSetup
-%%Magnification: 0.8000
-%%EndComments
-/$F2psDict 200 dict def
-$F2psDict begin
-$F2psDict /mtrx matrix put
-/col-1 {0 setgray} bind def
-/col0 {0.000 0.000 0.000 srgb} bind def
-/col1 {0.000 0.000 1.000 srgb} bind def
-/col2 {0.000 1.000 0.000 srgb} bind def
-/col3 {0.000 1.000 1.000 srgb} bind def
-/col4 {1.000 0.000 0.000 srgb} bind def
-/col5 {1.000 0.000 1.000 srgb} bind def
-/col6 {1.000 1.000 0.000 srgb} bind def
-/col7 {1.000 1.000 1.000 srgb} bind def
-/col8 {0.000 0.000 0.560 srgb} bind def
-/col9 {0.000 0.000 0.690 srgb} bind def
-/col10 {0.000 0.000 0.820 srgb} bind def
-/col11 {0.530 0.810 1.000 srgb} bind def
-/col12 {0.000 0.560 0.000 srgb} bind def
-/col13 {0.000 0.690 0.000 srgb} bind def
-/col14 {0.000 0.820 0.000 srgb} bind def
-/col15 {0.000 0.560 0.560 srgb} bind def
-/col16 {0.000 0.690 0.690 srgb} bind def
-/col17 {0.000 0.820 0.820 srgb} bind def
-/col18 {0.560 0.000 0.000 srgb} bind def
-/col19 {0.690 0.000 0.000 srgb} bind def
-/col20 {0.820 0.000 0.000 srgb} bind def
-/col21 {0.560 0.000 0.560 srgb} bind def
-/col22 {0.690 0.000 0.690 srgb} bind def
-/col23 {0.820 0.000 0.820 srgb} bind def
-/col24 {0.500 0.190 0.000 srgb} bind def
-/col25 {0.630 0.250 0.000 srgb} bind def
-/col26 {0.750 0.380 0.000 srgb} bind def
-/col27 {1.000 0.500 0.500 srgb} bind def
-/col28 {1.000 0.630 0.630 srgb} bind def
-/col29 {1.000 0.750 0.750 srgb} bind def
-/col30 {1.000 0.880 0.880 srgb} bind def
-/col31 {1.000 0.840 0.000 srgb} bind def
-
-end
-save
--1.0 251.0 translate
-1 -1 scale
-
-/cp {closepath} bind def
-/ef {eofill} bind def
-/gr {grestore} bind def
-/gs {gsave} bind def
-/sa {save} bind def
-/rs {restore} bind def
-/l {lineto} bind def
-/m {moveto} bind def
-/rm {rmoveto} bind def
-/n {newpath} bind def
-/s {stroke} bind def
-/sh {show} bind def
-/slc {setlinecap} bind def
-/slj {setlinejoin} bind def
-/slw {setlinewidth} bind def
-/srgb {setrgbcolor} bind def
-/rot {rotate} bind def
-/sc {scale} bind def
-/sd {setdash} bind def
-/ff {findfont} bind def
-/sf {setfont} bind def
-/scf {scalefont} bind def
-/sw {stringwidth} bind def
-/tr {translate} bind def
-/tnt {dup dup currentrgbcolor
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
- bind def
-/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
- 4 -2 roll mul srgb} bind def
-/reencdict 12 dict def /ReEncode { reencdict begin
-/newcodesandnames exch def /newfontname exch def /basefontname exch def
-/basefontdict basefontname findfont def /newfont basefontdict maxlength dict def
-basefontdict { exch dup /FID ne { dup /Encoding eq
-{ exch dup length array copy newfont 3 1 roll put }
-{ exch newfont 3 1 roll put } ifelse } { pop pop } ifelse } forall
-newfont /FontName newfontname put newcodesandnames aload pop
-128 1 255 { newfont /Encoding get exch /.notdef put } for
-newcodesandnames length 2 idiv { newfont /Encoding get 3 1 roll put } repeat
-newfontname newfont definefont pop end } def
-/isovec [
-8#200 /grave 8#201 /acute 8#202 /circumflex 8#203 /tilde
-8#204 /macron 8#205 /breve 8#206 /dotaccent 8#207 /dieresis
-8#210 /ring 8#211 /cedilla 8#212 /hungarumlaut 8#213 /ogonek 8#214 /caron
-8#220 /dotlessi 8#230 /oe 8#231 /OE
-8#240 /space 8#241 /exclamdown 8#242 /cent 8#243 /sterling
-8#244 /currency 8#245 /yen 8#246 /brokenbar 8#247 /section 8#250 /dieresis
-8#251 /copyright 8#252 /ordfeminine 8#253 /guillemotleft 8#254 /logicalnot
-8#255 /endash 8#256 /registered 8#257 /macron 8#260 /degree 8#261 /plusminus
-8#262 /twosuperior 8#263 /threesuperior 8#264 /acute 8#265 /mu 8#266 /paragraph
-8#267 /periodcentered 8#270 /cedilla 8#271 /onesuperior 8#272 /ordmasculine
-8#273 /guillemotright 8#274 /onequarter 8#275 /onehalf
-8#276 /threequarters 8#277 /questiondown 8#300 /Agrave 8#301 /Aacute
-8#302 /Acircumflex 8#303 /Atilde 8#304 /Adieresis 8#305 /Aring
-8#306 /AE 8#307 /Ccedilla 8#310 /Egrave 8#311 /Eacute
-8#312 /Ecircumflex 8#313 /Edieresis 8#314 /Igrave 8#315 /Iacute
-8#316 /Icircumflex 8#317 /Idieresis 8#320 /Eth 8#321 /Ntilde 8#322 /Ograve
-8#323 /Oacute 8#324 /Ocircumflex 8#325 /Otilde 8#326 /Odieresis 8#327 /multiply
-8#330 /Oslash 8#331 /Ugrave 8#332 /Uacute 8#333 /Ucircumflex
-8#334 /Udieresis 8#335 /Yacute 8#336 /Thorn 8#337 /germandbls 8#340 /agrave
-8#341 /aacute 8#342 /acircumflex 8#343 /atilde 8#344 /adieresis 8#345 /aring
-8#346 /ae 8#347 /ccedilla 8#350 /egrave 8#351 /eacute
-8#352 /ecircumflex 8#353 /edieresis 8#354 /igrave 8#355 /iacute
-8#356 /icircumflex 8#357 /idieresis 8#360 /eth 8#361 /ntilde 8#362 /ograve
-8#363 /oacute 8#364 /ocircumflex 8#365 /otilde 8#366 /odieresis 8#367 /divide
-8#370 /oslash 8#371 /ugrave 8#372 /uacute 8#373 /ucircumflex
-8#374 /udieresis 8#375 /yacute 8#376 /thorn 8#377 /ydieresis] def
-/Helvetica-Bold /Helvetica-Bold-iso isovec ReEncode
-/Helvetica /Helvetica-iso isovec ReEncode
-/Helvetica-Oblique /Helvetica-Oblique-iso isovec ReEncode
- /DrawEllipse {
- /endangle exch def
- /startangle exch def
- /yrad exch def
- /xrad exch def
- /y exch def
- /x exch def
- /savematrix mtrx currentmatrix def
- x y tr xrad yrad sc 0 0 1 startangle endangle arc
- closepath
- savematrix setmatrix
- } def
-
-/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
-/$F2psEnd {$F2psEnteredState restore end} def
-%%EndProlog
-
-$F2psBegin
-10 setmiterlimit
-n -1000 5962 m -1000 -1000 l 7537 -1000 l 7537 5962 l cp clip
- 0.05039 0.05039 sc
-% Polyline
-7.500 slw
-n 1770 2700 m 1665 2700 1665 3045 105 arcto 4 {pop} repeat
- 1665 3150 2730 3150 105 arcto 4 {pop} repeat
- 2835 3150 2835 2805 105 arcto 4 {pop} repeat
- 2835 2700 1770 2700 105 arcto 4 {pop} repeat
- cp gs col7 0.75 shd ef gr gs col0 s gr
-% Ellipse
-n 2250 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 1575 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2925 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 900 2925 242 242 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Polyline
-n 420 3825 m 315 3825 315 4170 105 arcto 4 {pop} repeat
- 315 4275 1380 4275 105 arcto 4 {pop} repeat
- 1485 4275 1485 3930 105 arcto 4 {pop} repeat
- 1485 3825 420 3825 105 arcto 4 {pop} repeat
- cp gs col7 0.75 shd ef gr gs col0 s gr
-% Polyline
-n 2085 1275 m 1582 1807 l gs col0 s gr
-% Polyline
-n 2407 1297 m 2940 1800 l gs col0 s gr
-% Polyline
-n 1417 2190 m 900 2692 l gs col0 s gr
-% Polyline
-n 1740 2190 m 2257 2700 l gs col0 s gr
-% Polyline
-n 892 3180 m 892 3825 l gs col0 s gr
-% Polyline
-n 45 675 m 6525 675 l 6525 4950 l 45 4950 l cp gs col0 s gr
-% Polyline
-n 2250 3600 m 2263 3597 l 2277 3594 l 2293 3592 l 2309 3589 l 2326 3586 l
- 2344 3583 l 2362 3580 l 2381 3578 l 2399 3575 l 2418 3572 l
- 2436 3569 l 2454 3566 l 2471 3563 l 2488 3561 l 2504 3558 l
- 2520 3555 l 2537 3552 l 2555 3548 l 2571 3545 l 2588 3541 l
- 2604 3537 l 2621 3533 l 2637 3528 l 2653 3524 l 2669 3520 l
- 2684 3517 l 2700 3514 l 2715 3512 l 2730 3510 l 2745 3510 l
- 2762 3511 l 2777 3512 l 2793 3514 l 2807 3517 l 2821 3520 l
- 2835 3524 l 2849 3528 l 2863 3532 l 2877 3537 l 2893 3542 l
- 2908 3548 l 2925 3555 l 2938 3561 l 2951 3568 l 2965 3575 l
- 2978 3584 l 2992 3593 l 3007 3602 l 3021 3612 l 3035 3623 l
- 3050 3633 l 3064 3643 l 3079 3652 l 3093 3661 l 3108 3670 l
- 3122 3677 l 3136 3684 l 3150 3690 l 3166 3696 l 3182 3701 l
- 3198 3706 l 3214 3710 l 3230 3713 l 3246 3716 l 3263 3719 l
- 3279 3721 l 3295 3724 l 3311 3726 l 3327 3729 l 3343 3731 l
- 3359 3733 l 3375 3735 l 3391 3736 l 3407 3737 l 3423 3738 l
- 3439 3738 l 3455 3738 l 3471 3738 l 3488 3737 l 3504 3737 l
- 3520 3736 l 3536 3736 l 3552 3735 l 3568 3735 l 3584 3735 l
- 3600 3735 l 3616 3735 l 3632 3735 l 3648 3734 l 3663 3734 l
- 3678 3733 l 3693 3732 l 3708 3731 l 3723 3730 l 3739 3729 l
- 3755 3729 l 3771 3729 l 3788 3730 l 3806 3732 l 3825 3735 l
- 3840 3738 l 3856 3741 l 3874 3745 l 3892 3749 l 3911 3753 l
- 3931 3757 l 3951 3762 l 3972 3767 l 3993 3772 l 4014 3777 l
- 4034 3782 l 4054 3787 l 4072 3793 l 4089 3799 l 4105 3805 l
- 4119 3811 l 4130 3818 l 4140 3825 l 4150 3835 l 4157 3846 l
- 4161 3858 l 4163 3870 l 4164 3883 l 4163 3897 l 4161 3911 l
- 4159 3925 l 4156 3939 l 4154 3952 l 4151 3966 l 4148 3979 l
- 4144 3992 l 4140 4005 l 4135 4018 l 4128 4031 l 4121 4045 l
- 4112 4058 l 4104 4073 l 4095 4087 l 4085 4101 l 4075 4116 l
- 4065 4129 l 4055 4143 l 4043 4155 l 4032 4166 l 4019 4176 l
- 4005 4185 l 3992 4192 l 3978 4197 l 3963 4202 l 3947 4206 l
- 3930 4210 l 3913 4213 l 3896 4216 l 3878 4218 l 3861 4220 l
- 3843 4222 l 3825 4224 l 3807 4226 l 3789 4228 l 3771 4229 l
- 3753 4230 l 3735 4230 l 3717 4230 l 3698 4228 l 3678 4226 l
- 3659 4224 l 3639 4220 l 3619 4216 l 3598 4212 l 3578 4208 l
- 3557 4203 l 3536 4199 l 3516 4195 l 3496 4191 l 3477 4189 l
- 3457 4187 l 3438 4185 l 3420 4185 l 3402 4185 l 3384 4186 l
- 3367 4188 l 3350 4190 l 3333 4193 l 3317 4196 l 3301 4200 l
- 3285 4203 l 3269 4207 l 3253 4211 l 3237 4214 l 3220 4218 l
- 3203 4221 l 3186 4224 l 3168 4227 l 3150 4230 l 3132 4233 l
- 3113 4236 l 3094 4239 l 3074 4242 l 3055 4246 l 3035 4249 l
- 3015 4253 l 2995 4257 l 2974 4260 l 2954 4264 l 2934 4267 l
- 2914 4270 l 2894 4272 l 2874 4274 l 2855 4275 l 2835 4275 l
- 2815 4275 l 2795 4274 l 2775 4272 l 2755 4270 l 2734 4268 l
- 2713 4265 l 2692 4262 l 2671 4259 l 2650 4256 l 2630 4252 l
- 2609 4249 l 2590 4245 l 2571 4242 l 2553 4238 l 2536 4234 l
- 2520 4230 l 2503 4225 l 2487 4219 l 2473 4213 l 2460 4207 l
- 2448 4200 l 2437 4192 l 2426 4185 l 2415 4178 l 2404 4170 l
- 2393 4163 l 2380 4157 l 2368 4151 l 2354 4145 l 2340 4140 l
- 2325 4135 l 2310 4131 l 2294 4128 l 2277 4125 l 2260 4122 l
- 2243 4120 l 2225 4118 l 2208 4115 l 2191 4113 l 2174 4110 l
- 2158 4107 l 2143 4104 l 2128 4100 l 2115 4095 l 2101 4089 l
- 2087 4083 l 2074 4076 l 2061 4070 l 2049 4063 l 2037 4056 l
- 2025 4049 l 2014 4042 l 2004 4034 l 1995 4025 l 1987 4016 l
- 1980 4005 l 1975 3993 l 1972 3980 l 1971 3965 l 1970 3949 l
- 1971 3932 l 1972 3915 l 1973 3898 l 1974 3881 l 1976 3865 l
- 1977 3850 l 1978 3837 l 1980 3825 l 1983 3812 l 1986 3801 l
- 1990 3792 l 1994 3784 l 1998 3776 l 2003 3768 l 2008 3761 l
- 2013 3752 l 2019 3744 l 2025 3735 l 2032 3726 l 2040 3717 l
- 2048 3707 l 2057 3698 l 2066 3688 l 2075 3678 l 2084 3669 l
- 2094 3660 l 2104 3652 l 2115 3645 l 2127 3639 l 2138 3633 l
- 2150 3628 l 2162 3624 l 2174 3620 l 2186 3617 l 2200 3613 l
- 2214 3609 l 2231 3604 l cp gs col0 s gr
-% Polyline
-n 3645 1080 m 3660 1077 l 3677 1074 l 3694 1071 l 3713 1068 l 3733 1065 l
- 3754 1063 l 3775 1060 l 3798 1058 l 3820 1056 l 3843 1053 l
- 3866 1051 l 3889 1049 l 3912 1047 l 3934 1045 l 3955 1043 l
- 3976 1041 l 3996 1039 l 4015 1038 l 4033 1036 l 4050 1035 l
- 4071 1034 l 4090 1033 l 4109 1032 l 4127 1032 l 4144 1031 l
- 4161 1031 l 4177 1031 l 4193 1031 l 4209 1031 l 4225 1031 l
- 4241 1031 l 4257 1032 l 4273 1032 l 4289 1033 l 4304 1034 l
- 4320 1035 l 4337 1037 l 4354 1039 l 4371 1041 l 4387 1044 l
- 4403 1047 l 4419 1050 l 4435 1053 l 4450 1057 l 4466 1060 l
- 4481 1063 l 4497 1067 l 4513 1071 l 4529 1075 l 4545 1080 l
- 4561 1085 l 4577 1091 l 4592 1097 l 4607 1103 l 4622 1110 l
- 4637 1118 l 4651 1125 l 4666 1132 l 4681 1140 l 4697 1147 l
- 4713 1153 l 4731 1159 l 4750 1165 l 4770 1170 l 4787 1174 l
- 4804 1177 l 4823 1180 l 4842 1182 l 4863 1184 l 4884 1186 l
- 4906 1188 l 4928 1189 l 4950 1190 l 4972 1192 l 4994 1193 l
- 5016 1195 l 5037 1197 l 5058 1200 l 5077 1203 l 5096 1206 l
- 5113 1210 l 5130 1215 l 5148 1221 l 5165 1228 l 5181 1235 l
- 5197 1242 l 5212 1250 l 5228 1259 l 5243 1267 l 5257 1276 l
- 5272 1285 l 5286 1294 l 5299 1303 l 5312 1312 l 5324 1322 l
- 5336 1331 l 5346 1340 l 5355 1350 l 5365 1363 l 5373 1378 l
- 5380 1392 l 5386 1408 l 5390 1424 l 5394 1440 l 5398 1456 l
- 5401 1472 l 5402 1488 l 5403 1502 l 5403 1517 l 5400 1530 l
- 5395 1543 l 5389 1555 l 5381 1568 l 5372 1580 l 5363 1592 l
- 5354 1604 l 5343 1616 l 5331 1627 l 5318 1638 l 5303 1648 l
- 5286 1657 l 5265 1665 l 5251 1669 l 5235 1673 l 5219 1677 l
- 5201 1680 l 5182 1683 l 5162 1685 l 5141 1688 l 5119 1690 l
- 5097 1692 l 5075 1694 l 5053 1696 l 5030 1697 l 5008 1699 l
- 4986 1701 l 4964 1703 l 4943 1704 l 4921 1706 l 4901 1707 l
- 4880 1709 l 4860 1710 l 4840 1711 l 4819 1712 l 4799 1713 l
- 4779 1713 l 4758 1713 l 4738 1714 l 4717 1714 l 4697 1714 l
- 4676 1714 l 4655 1714 l 4635 1714 l 4614 1714 l 4594 1714 l
- 4573 1714 l 4553 1713 l 4533 1713 l 4513 1713 l 4494 1712 l
- 4474 1711 l 4455 1710 l 4434 1709 l 4413 1707 l 4392 1705 l
- 4372 1703 l 4351 1701 l 4331 1698 l 4311 1695 l 4291 1692 l
- 4271 1690 l 4251 1687 l 4231 1684 l 4211 1681 l 4191 1678 l
- 4172 1675 l 4152 1673 l 4133 1670 l 4114 1668 l 4095 1665 l
- 4074 1662 l 4053 1659 l 4033 1657 l 4012 1654 l 3992 1651 l
- 3972 1648 l 3951 1645 l 3931 1643 l 3911 1640 l 3891 1637 l
- 3872 1634 l 3852 1631 l 3833 1628 l 3815 1626 l 3797 1623 l
- 3780 1620 l 3761 1617 l 3743 1614 l 3725 1611 l 3708 1608 l
- 3692 1605 l 3675 1602 l 3659 1600 l 3643 1597 l 3627 1594 l
- 3612 1591 l 3597 1587 l 3582 1584 l 3568 1580 l 3555 1575 l
- 3541 1569 l 3527 1563 l 3514 1556 l 3501 1550 l 3489 1543 l
- 3477 1536 l 3465 1529 l 3454 1522 l 3444 1514 l 3435 1505 l
- 3427 1496 l 3420 1485 l 3415 1473 l 3412 1460 l 3411 1445 l
- 3410 1430 l 3411 1414 l 3412 1397 l 3413 1380 l 3414 1364 l
- 3416 1348 l 3417 1333 l 3418 1318 l 3420 1305 l 3423 1290 l
- 3425 1275 l 3428 1261 l 3431 1247 l 3434 1233 l 3437 1220 l
- 3442 1207 l 3447 1194 l 3455 1182 l 3465 1170 l 3474 1162 l
- 3483 1155 l 3493 1148 l 3504 1141 l 3515 1134 l 3526 1127 l
- 3538 1121 l 3550 1114 l 3563 1108 l 3577 1102 l 3591 1096 l
- 3607 1090 l 3625 1085 l cp gs col0 s gr
-% Polyline
-n 2475 1215 m 2477 1217 l 2482 1221 l 2491 1229 l 2503 1239 l 2517 1252 l
- 2534 1267 l 2552 1282 l 2570 1296 l 2588 1310 l 2605 1322 l
- 2621 1332 l 2638 1342 l 2655 1350 l 2669 1356 l 2684 1362 l
- 2700 1368 l 2717 1374 l 2734 1380 l 2752 1386 l 2770 1392 l
- 2789 1398 l 2808 1403 l 2827 1409 l 2846 1415 l 2865 1420 l
- 2884 1425 l 2902 1429 l 2920 1433 l 2937 1436 l 2954 1438 l
- 2970 1440 l 2988 1441 l 3006 1441 l 3024 1440 l 3041 1439 l
- 3059 1437 l 3076 1434 l 3094 1431 l 3111 1428 l 3129 1425 l
- 3146 1421 l 3162 1417 l 3179 1414 l 3195 1409 l 3211 1405 l
- 3226 1400 l 3240 1395 l 3256 1388 l 3271 1380 l 3287 1370 l
- 3304 1358 l 3322 1344 l 3340 1329 l 3359 1314 l 3376 1299 l
- 3391 1286 l 3404 1275 l 3412 1267 l 3418 1262 l 3420 1260 l gs col0 s gr
-% Polyline
-n 1125 3060 m 1126 3063 l 1127 3068 l 1129 3078 l 1132 3093 l 1136 3112 l
- 1141 3135 l 1146 3162 l 1153 3190 l 1159 3219 l 1166 3248 l
- 1173 3275 l 1180 3301 l 1187 3324 l 1193 3345 l 1200 3364 l
- 1207 3381 l 1215 3397 l 1224 3414 l 1234 3429 l 1245 3444 l
- 1256 3459 l 1267 3473 l 1279 3486 l 1291 3499 l 1304 3512 l
- 1316 3525 l 1329 3537 l 1342 3550 l 1355 3562 l 1368 3574 l
- 1382 3585 l 1396 3596 l 1410 3607 l 1425 3617 l 1441 3626 l
- 1457 3635 l 1473 3644 l 1490 3653 l 1507 3661 l 1524 3669 l
- 1542 3677 l 1559 3685 l 1577 3692 l 1595 3700 l 1613 3706 l
- 1631 3713 l 1649 3718 l 1668 3723 l 1687 3727 l 1704 3730 l
- 1723 3732 l 1743 3733 l 1764 3734 l 1788 3734 l 1814 3733 l
- 1841 3732 l 1869 3731 l 1898 3729 l 1926 3727 l 1952 3725 l
- 1975 3724 l 1993 3722 l 2008 3721 l 2017 3721 l 2022 3720 l
- 2025 3720 l gs col0 s gr
-/Helvetica-iso ff 180.00 scf sf
-3600 1260 m
-gs 1 -1 sc (attributes:) col0 sh gr
-/Helvetica-iso ff 180.00 scf sf
-3600 1485 m
-gs 1 -1 sc ("att" -> Value "apple") col0 sh gr
-/Helvetica-iso ff 180.00 scf sf
-2250 3780 m
-gs 1 -1 sc (attributes:) col0 sh gr
-/Helvetica-Oblique-iso ff 180.00 scf sf
-390 4725 m
-gs 1 -1 sc (<a att="apple"><b><a att="orange">An orange</a>Cherries</b><c/></a>) col0 sh gr
-/Helvetica-iso ff 180.00 scf sf
-2250 4005 m
-gs 1 -1 sc ("att" -> Value "orange") col0 sh gr
-/Helvetica-Bold-iso ff 180.00 scf sf
-1815 3015 m
-gs 1 -1 sc ("Cherries") col0 sh gr
-/Helvetica-Bold-iso ff 180.00 scf sf
-375 4125 m
-gs 1 -1 sc ("An orange") col0 sh gr
-/Helvetica-Bold-iso ff 180.00 scf sf
-750 2985 m
-gs 1 -1 sc (<a>) col0 sh gr
-/Helvetica-Bold-iso ff 180.00 scf sf
-1410 2085 m
-gs 1 -1 sc (<b>) col0 sh gr
-/Helvetica-Bold-iso ff 180.00 scf sf
-2790 2070 m
-gs 1 -1 sc (<c>) col0 sh gr
-/Helvetica-Bold-iso ff 180.00 scf sf
-2100 1200 m
-gs 1 -1 sc (<a>) col0 sh gr
-$F2psEnd
-rs
-
-%%EndDocument
- @endspecial 396 2578 a
- currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
-neg exch translate
- 396 2578 a 357 x Fv(Only)g(elements,)g(data)g
-(sections,)g(attrib)n(utes)g(and)g(processing)e(instructions)i(\(and)f
-(comments,)g(if)h(con\002gured\))e(can,)396 3043 y(directly)i(or)g
-(indirectly)-5 b(,)18 b(occur)h(in)h(the)h(document)d(tree.)i(It)g(is)h
-(impossible)f(to)g(add)g(entity)g(references)f(to)h(the)g(tree;)g(if)
-396 3151 y(the)g(parser)g(\002nds)g(such)g(a)h(reference,)d(not)i(the)g
-(reference)f(as)i(such)f(b)n(ut)g(the)g(referenced)e(te)o(xt)i(\(i.e.)g
-(the)g(tree)396 3259 y(representing)e(the)j(structured)d(te)o(xt\))i
-(is)h(included)e(in)h(the)g(tree.)396 3409 y(Note)g(that)h(the)f
-(parser)f(collapses)i(as)g(much)e(data)h(material)g(into)g(one)f(data)h
-(node)f(as)i(possible)f(such)g(that)g(there)g(are)396
-3517 y(normally)f(ne)n(v)o(er)g(tw)o(o)h(adjacent)f(data)i(nodes.)e
-(This)h(in)m(v)n(ariant)f(is)i(enforced)d(e)n(v)o(en)h(if)i(data)f
-(material)f(is)j(included)c(by)396 3625 y(entity)i(references)f(or)h
-(CD)m(A)-9 b(T)h(A)20 b(sections,)g(or)g(if)h(a)f(data)g(sequence)f(is)
-j(interrupted)c(by)h(comments.)g(So)i Fq(a)44 b(&)g(b)396
-3732 y Fo(<)p Fq(-)h(comment)e(-)p Fo(>)i Fq(c)f Fo(<)p
-Fq(![CDATA[)g Fo(<>)g Fq(d]])p Fo(>)20 b Fv(is)h(represented)d(by)i
-(only)g(one)f(data)h(node,)f(for)h(instance.)396 3840
-y(Ho)n(we)n(v)o(er)m(,)e(you)i(can)g(create)g(document)e(trees)i
-(manually)f(which)h(break)f(this)i(in)m(v)n(ariant;)d(it)j(is)g(only)f
-(the)g(w)o(ay)g(the)396 3948 y(parser)g(forms)f(the)h(tree.)p
-Black 3800 5278 a Fr(50)p Black eop
-%%Page: 51 51
-51 50 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-2.)f
-(Nodes)h(ar)o(e)g(doubly)g(link)o(ed)i(tr)o(ees)396 1537
-y
- currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
- 396 1537 a @beginspecial 0 @llx 0 @lly 138 @urx 93
-@ury 1380 @rwi @setspecial
-%%BeginDocument: pic/node_general.ps
-%!PS-Adobe-2.0 EPSF-2.0
-%%Title: src/pic/node_general.fig
-%%Creator: fig2dev Version 3.2 Patchlevel 1
-%%CreationDate: Sun Aug 27 02:05:42 2000
-%%For: gerd@ice (Gerd Stolpmann)
-%%Orientation: Portrait
-%%BoundingBox: 0 0 138 93
-%%Pages: 0
-%%BeginSetup
-%%EndSetup
-%%Magnification: 0.8000
-%%EndComments
-/$F2psDict 200 dict def
-$F2psDict begin
-$F2psDict /mtrx matrix put
-/col-1 {0 setgray} bind def
-/col0 {0.000 0.000 0.000 srgb} bind def
-/col1 {0.000 0.000 1.000 srgb} bind def
-/col2 {0.000 1.000 0.000 srgb} bind def
-/col3 {0.000 1.000 1.000 srgb} bind def
-/col4 {1.000 0.000 0.000 srgb} bind def
-/col5 {1.000 0.000 1.000 srgb} bind def
-/col6 {1.000 1.000 0.000 srgb} bind def
-/col7 {1.000 1.000 1.000 srgb} bind def
-/col8 {0.000 0.000 0.560 srgb} bind def
-/col9 {0.000 0.000 0.690 srgb} bind def
-/col10 {0.000 0.000 0.820 srgb} bind def
-/col11 {0.530 0.810 1.000 srgb} bind def
-/col12 {0.000 0.560 0.000 srgb} bind def
-/col13 {0.000 0.690 0.000 srgb} bind def
-/col14 {0.000 0.820 0.000 srgb} bind def
-/col15 {0.000 0.560 0.560 srgb} bind def
-/col16 {0.000 0.690 0.690 srgb} bind def
-/col17 {0.000 0.820 0.820 srgb} bind def
-/col18 {0.560 0.000 0.000 srgb} bind def
-/col19 {0.690 0.000 0.000 srgb} bind def
-/col20 {0.820 0.000 0.000 srgb} bind def
-/col21 {0.560 0.000 0.560 srgb} bind def
-/col22 {0.690 0.000 0.690 srgb} bind def
-/col23 {0.820 0.000 0.820 srgb} bind def
-/col24 {0.500 0.190 0.000 srgb} bind def
-/col25 {0.630 0.250 0.000 srgb} bind def
-/col26 {0.750 0.380 0.000 srgb} bind def
-/col27 {1.000 0.500 0.500 srgb} bind def
-/col28 {1.000 0.630 0.630 srgb} bind def
-/col29 {1.000 0.750 0.750 srgb} bind def
-/col30 {1.000 0.880 0.880 srgb} bind def
-/col31 {1.000 0.840 0.000 srgb} bind def
-
-end
-save
--22.0 126.0 translate
-1 -1 scale
-
-/cp {closepath} bind def
-/ef {eofill} bind def
-/gr {grestore} bind def
-/gs {gsave} bind def
-/sa {save} bind def
-/rs {restore} bind def
-/l {lineto} bind def
-/m {moveto} bind def
-/rm {rmoveto} bind def
-/n {newpath} bind def
-/s {stroke} bind def
-/sh {show} bind def
-/slc {setlinecap} bind def
-/slj {setlinejoin} bind def
-/slw {setlinewidth} bind def
-/srgb {setrgbcolor} bind def
-/rot {rotate} bind def
-/sc {scale} bind def
-/sd {setdash} bind def
-/ff {findfont} bind def
-/sf {setfont} bind def
-/scf {scalefont} bind def
-/sw {stringwidth} bind def
-/tr {translate} bind def
-/tnt {dup dup currentrgbcolor
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
- bind def
-/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
- 4 -2 roll mul srgb} bind def
- /DrawEllipse {
- /endangle exch def
- /startangle exch def
- /yrad exch def
- /xrad exch def
- /y exch def
- /x exch def
- /savematrix mtrx currentmatrix def
- x y tr xrad yrad sc 0 0 1 startangle endangle arc
- closepath
- savematrix setmatrix
- } def
-
-/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
-/$F2psEnd {$F2psEnteredState restore end} def
-%%EndProlog
-
-$F2psBegin
-10 setmiterlimit
-n -1000 3487 m -1000 -1000 l 4162 -1000 l 4162 3487 l cp clip
- 0.05039 0.05039 sc
-7.500 slw
-% Ellipse
-n 2025 2025 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 1350 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2700 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2025 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Polyline
-gs clippath
-1743 1345 m 1845 1275 l 1788 1385 l 1877 1284 l 1832 1244 l cp
-clip
-n 1380 1800 m 1845 1275 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 1743 1345 m 1845 1275 l 1788 1385 l 1765 1365 l 1743 1345 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-1384 1745 m 1282 1815 l 1339 1705 l 1250 1807 l 1295 1846 l cp
-clip
-n 1815 1207 m 1282 1815 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 1384 1745 m 1282 1815 l 1339 1705 l 1361 1725 l 1384 1745 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2025 1470 m 2055 1350 l 2085 1470 l 2085 1335 l 2025 1335 l cp
-clip
-n 2055 1792 m 2055 1350 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 2025 1470 m 2055 1350 l 2085 1470 l 2055 1470 l 2025 1470 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2010 1687 m 1980 1807 l 1950 1687 l 1950 1822 l 2010 1822 l cp
-clip
-n 1980 1350 m 1980 1807 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 2010 1687 m 1980 1807 l 1950 1687 l 1980 1687 l 2010 1687 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2511 1750 m 2550 1867 l 2461 1782 l 2533 1896 l 2583 1864 l cp
-clip
-n 2190 1297 m 2550 1867 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 2511 1750 m 2550 1867 l 2461 1782 l 2486 1766 l 2511 1750 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2262 1353 m 2220 1237 l 2312 1320 l 2237 1208 l 2187 1241 l cp
-clip
-n 2602 1807 m 2220 1237 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 2262 1353 m 2220 1237 l 2312 1320 l 2287 1337 l 2262 1353 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-n 450 675 m 3150 675 l 3150 2475 l 450 2475 l cp gs col0 s gr
-/Courier ff 150.00 scf sf
-2377 1342 m
-gs 1 -1 sc (parent) col0 sh gr
-/Courier ff 150.00 scf sf
-645 1628 m
-gs 1 -1 sc (sub_nodes) col0 sh gr
-$F2psEnd
-rs
-
-%%EndDocument
- @endspecial 396 1537 a
- currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
-neg exch translate
- 396 1537 a 357 x Fv(The)e(node)f(tree)h(has)h
-(links)f(in)g(both)g(directions:)f(Ev)o(ery)g(node)g(has)h(a)h(link)f
-(to)g(its)i(parent)d(\(if)h(an)o(y\),)f(and)g(it)i(has)g(links)f(to)396
-2002 y(the)g(subnodes)f(\(see)i(\002gure)e Fr(Nodes)h(ar)m(e)h(doubly)d
-(link)o(ed)i(tr)m(ees)p Fv(\).)h(Ob)o(viously)-5 b(,)18
-b(this)i(doubly-link)o(ed)d(structure)396 2110 y(simpli\002es)k(the)f
-(na)n(vigation)e(in)j(the)f(tree;)g(b)n(ut)g(has)h(also)f(some)g
-(consequences)f(for)g(the)h(possible)g(operations)f(on)h(trees.)396
-2259 y(Because)h(e)n(v)o(ery)d(node)i(must)g(ha)n(v)o(e)f(at)i(most)f
-Fr(one)g Fv(parent)f(node,)g(operations)g(are)h(ille)o(gal)g(if)g(the)o
-(y)f(violate)h(this)396 2367 y(condition.)e(The)i(follo)n(wing)f
-(\002gure)g(\()p Fr(A)h(node)g(can)f(only)h(be)g(added)f(if)i(it)g(is)g
-(a)f(r)l(oot)q Fv(\))g(sho)n(ws)h(on)e(the)i(left)f(side)h(that)f(node)
-396 2475 y Fq(y)h Fv(is)g(added)e(to)h Fq(x)h Fv(as)g(ne)n(w)f(subnode)
-e(which)i(is)h(allo)n(wed)f(because)f Fq(y)i Fv(does)f(not)g(ha)n(v)o
-(e)f(a)i(parent)e(yet.)h(The)g(right)f(side)i(of)396
-2583 y(the)f(picture)g(illustrates)g(what)h(w)o(ould)e(happen)g(if)h
-Fq(y)h Fv(had)e(a)i(parent)e(node;)g(this)i(is)g(ille)o(gal)f(because)f
-Fq(y)i Fv(w)o(ould)e(ha)n(v)o(e)h(tw)o(o)396 2691 y(parents)g(after)g
-(the)g(operation.)396 2923 y Fu(Figur)o(e)g(3-3.)f(A)i(node)f(can)g
-(only)g(be)h(added)g(if)f(it)h(is)g(a)f(r)o(oot)396 4165
-y
- currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
- 396 4165 a @beginspecial 0 @llx 0 @lly 422 @urx 127
-@ury 4220 @rwi @setspecial
-%%BeginDocument: pic/node_add.ps
-%!PS-Adobe-2.0 EPSF-2.0
-%%Title: src/pic/node_add.fig
-%%Creator: fig2dev Version 3.2 Patchlevel 1
-%%CreationDate: Sun Aug 27 02:05:42 2000
-%%For: gerd@ice (Gerd Stolpmann)
-%%Orientation: Portrait
-%%BoundingBox: 0 0 422 127
-%%Pages: 0
-%%BeginSetup
-%%EndSetup
-%%Magnification: 0.8000
-%%EndComments
-/$F2psDict 200 dict def
-$F2psDict begin
-$F2psDict /mtrx matrix put
-/col-1 {0 setgray} bind def
-/col0 {0.000 0.000 0.000 srgb} bind def
-/col1 {0.000 0.000 1.000 srgb} bind def
-/col2 {0.000 1.000 0.000 srgb} bind def
-/col3 {0.000 1.000 1.000 srgb} bind def
-/col4 {1.000 0.000 0.000 srgb} bind def
-/col5 {1.000 0.000 1.000 srgb} bind def
-/col6 {1.000 1.000 0.000 srgb} bind def
-/col7 {1.000 1.000 1.000 srgb} bind def
-/col8 {0.000 0.000 0.560 srgb} bind def
-/col9 {0.000 0.000 0.690 srgb} bind def
-/col10 {0.000 0.000 0.820 srgb} bind def
-/col11 {0.530 0.810 1.000 srgb} bind def
-/col12 {0.000 0.560 0.000 srgb} bind def
-/col13 {0.000 0.690 0.000 srgb} bind def
-/col14 {0.000 0.820 0.000 srgb} bind def
-/col15 {0.000 0.560 0.560 srgb} bind def
-/col16 {0.000 0.690 0.690 srgb} bind def
-/col17 {0.000 0.820 0.820 srgb} bind def
-/col18 {0.560 0.000 0.000 srgb} bind def
-/col19 {0.690 0.000 0.000 srgb} bind def
-/col20 {0.820 0.000 0.000 srgb} bind def
-/col21 {0.560 0.000 0.560 srgb} bind def
-/col22 {0.690 0.000 0.690 srgb} bind def
-/col23 {0.820 0.000 0.820 srgb} bind def
-/col24 {0.500 0.190 0.000 srgb} bind def
-/col25 {0.630 0.250 0.000 srgb} bind def
-/col26 {0.750 0.380 0.000 srgb} bind def
-/col27 {1.000 0.500 0.500 srgb} bind def
-/col28 {1.000 0.630 0.630 srgb} bind def
-/col29 {1.000 0.750 0.750 srgb} bind def
-/col30 {1.000 0.880 0.880 srgb} bind def
-/col31 {1.000 0.840 0.000 srgb} bind def
-
-end
-save
--33.0 171.0 translate
-1 -1 scale
-
-/cp {closepath} bind def
-/ef {eofill} bind def
-/gr {grestore} bind def
-/gs {gsave} bind def
-/sa {save} bind def
-/rs {restore} bind def
-/l {lineto} bind def
-/m {moveto} bind def
-/rm {rmoveto} bind def
-/n {newpath} bind def
-/s {stroke} bind def
-/sh {show} bind def
-/slc {setlinecap} bind def
-/slj {setlinejoin} bind def
-/slw {setlinewidth} bind def
-/srgb {setrgbcolor} bind def
-/rot {rotate} bind def
-/sc {scale} bind def
-/sd {setdash} bind def
-/ff {findfont} bind def
-/sf {setfont} bind def
-/scf {scalefont} bind def
-/sw {stringwidth} bind def
-/tr {translate} bind def
-/tnt {dup dup currentrgbcolor
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
- bind def
-/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
- 4 -2 roll mul srgb} bind def
- /DrawEllipse {
- /endangle exch def
- /startangle exch def
- /yrad exch def
- /xrad exch def
- /y exch def
- /x exch def
- /savematrix mtrx currentmatrix def
- x y tr xrad yrad sc 0 0 1 startangle endangle arc
- closepath
- savematrix setmatrix
- } def
-
-/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
-/$F2psEnd {$F2psEnteredState restore end} def
-%%EndProlog
-
-$F2psBegin
-10 setmiterlimit
-n -1000 4387 m -1000 -1000 l 10012 -1000 l 10012 4387 l cp clip
- 0.05039 0.05039 sc
-7.500 slw
-% Ellipse
-n 6141 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 6141 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 5426 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 6856 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 7571 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 8524 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 8047 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 1866 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 1866 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 1151 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2581 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3296 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 4249 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3772 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 8325 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Polyline
-gs clippath
-5507 1945 m 5402 2017 l 5460 1904 l 5369 2008 l 5415 2049 l cp
-clip
-n 5910 1440 m 5402 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 5507 1945 m 5402 2017 l 5460 1904 l 5484 1924 l 5507 1945 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6134 1902 m 6101 2025 l 6072 1901 l 6070 2039 l 6132 2041 l cp
-clip
-n 6109 1590 m 6101 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 6134 1902 m 6101 2025 l 6072 1901 l 6103 1901 l 6134 1902 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6649 1952 m 6697 2070 l 6599 1989 l 6681 2100 l 6731 2064 l cp
-clip
-n 6307 1537 m 6697 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 6649 1952 m 6697 2070 l 6599 1989 l 6624 1970 l 6649 1952 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-7696 2606 m 7602 2692 l 7645 2572 l 7568 2687 l 7619 2722 l cp
-clip
-n 7832 2347 m 7602 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 7696 2606 m 7602 2692 l 7645 2572 l 7671 2589 l 7696 2606 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-8306 2632 m 8349 2752 l 8255 2666 l 8332 2782 l 8383 2747 l cp
-clip
-n 8150 2452 m 8349 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 8306 2632 m 8349 2752 l 8255 2666 l 8281 2649 l 8306 2632 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-5853 1564 m 5958 1492 l 5899 1605 l 5991 1501 l 5945 1460 l cp
-clip
-n 5490 2017 m 5958 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 5853 1564 m 5958 1492 l 5899 1605 l 5876 1584 l 5853 1564 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-6140 1698 m 6173 1575 l 6201 1699 l 6204 1561 l 6142 1559 l cp
-clip
-n 6164 2010 m 6173 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 6140 1698 m 6173 1575 l 6201 1699 l 6170 1699 l 6140 1698 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-6404 1588 m 6355 1470 l 6454 1551 l 6371 1440 l 6321 1476 l cp
-clip
-n 6768 2025 m 6355 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 6404 1588 m 6355 1470 l 6454 1551 l 6429 1569 l 6404 1588 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-7784 2499 m 7880 2415 l 7835 2534 l 7914 2420 l 7863 2385 l cp
-clip
-n 7673 2715 m 7880 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 7784 2499 m 7880 2415 l 7835 2534 l 7810 2517 l 7784 2499 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-8263 2535 m 8222 2415 l 8315 2502 l 8240 2386 l 8188 2419 l cp
-clip
-n 8412 2707 m 8222 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 8263 2535 m 8222 2415 l 8315 2502 l 8289 2519 l 8263 2535 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-1232 1945 m 1127 2017 l 1185 1904 l 1094 2008 l 1140 2049 l cp
-clip
-n 1635 1440 m 1127 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 1232 1945 m 1127 2017 l 1185 1904 l 1209 1924 l 1232 1945 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-1859 1902 m 1826 2025 l 1797 1901 l 1795 2039 l 1857 2041 l cp
-clip
-n 1834 1590 m 1826 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 1859 1902 m 1826 2025 l 1797 1901 l 1828 1902 l 1859 1902 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2374 1952 m 2422 2070 l 2324 1989 l 2406 2100 l 2456 2064 l cp
-clip
-n 2032 1537 m 2422 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 2374 1952 m 2422 2070 l 2324 1989 l 2349 1970 l 2374 1952 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-3421 2606 m 3327 2692 l 3370 2572 l 3293 2687 l 3344 2722 l cp
-clip
-n 3557 2347 m 3327 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 3421 2606 m 3327 2692 l 3370 2572 l 3396 2589 l 3421 2606 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-4031 2632 m 4074 2752 l 3980 2666 l 4057 2782 l 4108 2747 l cp
-clip
-n 3875 2452 m 4074 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 4031 2632 m 4074 2752 l 3980 2666 l 4006 2649 l 4031 2632 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-1578 1564 m 1683 1492 l 1624 1605 l 1716 1501 l 1670 1460 l cp
-clip
-n 1215 2017 m 1683 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 1578 1564 m 1683 1492 l 1624 1605 l 1601 1584 l 1578 1564 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-1865 1698 m 1898 1575 l 1926 1699 l 1929 1561 l 1867 1559 l cp
-clip
-n 1889 2010 m 1898 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 1865 1698 m 1898 1575 l 1926 1699 l 1895 1698 l 1865 1698 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2129 1588 m 2080 1470 l 2179 1551 l 2096 1440 l 2046 1476 l cp
-clip
-n 2493 2025 m 2080 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 2129 1588 m 2080 1470 l 2179 1551 l 2154 1569 l 2129 1588 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-3509 2499 m 3605 2415 l 3560 2534 l 3639 2420 l 3588 2385 l cp
-clip
-n 3398 2715 m 3605 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 3509 2499 m 3605 2415 l 3560 2534 l 3535 2517 l 3509 2499 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-3988 2535 m 3947 2415 l 4040 2502 l 3965 2386 l 3913 2419 l cp
-clip
-n 4137 2707 m 3947 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
-
-% arrowhead
-n 3988 2535 m 3947 2415 l 4040 2502 l 4014 2519 l 3988 2535 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
- [60] 0 sd
-n 6387 1372 m 8023 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd
-% Polyline
-n 4950 900 m 9000 900 l 9000 3375 l 4950 3375 l cp gs col0 s gr
-% Polyline
- [60] 0 sd
-n 2112 1372 m 3748 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd
-% Polyline
-n 675 900 m 4725 900 l 4725 3375 l 675 3375 l cp gs col0 s gr
-% Polyline
-gs clippath
-8119 1904 m 8055 2010 l 8061 1886 l 8022 2016 l 8079 2033 l cp
-clip
-n 8197 1545 m 8055 2010 l gs col0 s gr gr
-
-% arrowhead
-n 8119 1904 m 8055 2010 l 8061 1886 l 8090 1895 l 8119 1904 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-8214 1695 m 8280 1590 l 8271 1713 l 8313 1585 l 8256 1566 l cp
-clip
-n 8137 2025 m 8280 1590 l gs col0 s gr gr
-
-% arrowhead
-n 8214 1695 m 8280 1590 l 8271 1713 l 8243 1704 l 8214 1695 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-30.000 slw
-gs clippath
-7687 2205 m 7502 2333 l 7594 2129 l 7410 2351 l 7503 2428 l cp
-clip
-n 7875 1500 m 7620 1965 l 7845 1920 l 7485 2355 l gs col0 s gr gr
-
-% arrowhead
-15.000 slw
-n 7687 2205 m 7502 2333 l 7594 2129 l 7618 2195 l 7687 2205 l cp gs 0.00 setgray ef gr col0 s
-/Courier-Bold ff 195.00 scf sf
-6094 1379 m
-gs 1 -1 sc (x) col0 sh gr
-/Courier-Bold ff 195.00 scf sf
-7991 2265 m
-gs 1 -1 sc (y) col0 sh gr
-/Courier-Bold ff 195.00 scf sf
-1819 1379 m
-gs 1 -1 sc (x) col0 sh gr
-/Courier-Bold ff 195.00 scf sf
-3716 2265 m
-gs 1 -1 sc (y) col0 sh gr
-/Courier ff 180.00 scf sf
-6459 1335 m
-gs 1 -1 sc (x # add_node y) col0 sh gr
-/Courier ff 180.00 scf sf
-2214 1365 m
-gs 1 -1 sc (x # add_node y) col0 sh gr
-$F2psEnd
-rs
-
-%%EndDocument
- @endspecial 396 4165 a
- currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
-neg exch translate
- 396 4165 a 357 x Fv(The)g("delete")g(operation)
-e(simply)i(remo)o(v)o(es)f(the)h(links)g(between)f(tw)o(o)i(nodes.)e
-(In)h(the)g(picture)f(\()p Fr(A)i(deleted)e(node)396
-4629 y(becomes)h(the)g(r)l(oot)g(of)h(the)f(subtr)m(ee)p
-Fv(\))g(the)g(node)f Fq(x)i Fv(is)g(deleted)e(from)h(the)g(list)h(of)f
-(subnodes)f(of)h Fq(y)p Fv(.)g(After)g(that,)g Fq(x)396
-4737 y Fv(becomes)g(the)g(root)f(of)h(the)g(subtree)g(starting)g(at)g
-(this)h(node.)p Black 3800 5278 a Fr(51)p Black eop
-%%Page: 52 52
-52 51 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-4.)f(A)i
-(deleted)f(node)g(becomes)h(the)f(r)o(oot)f(of)h(the)g(subtr)o(ee)396
-1912 y
- currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
- 396 1912 a @beginspecial 0 @llx 0 @lly 388 @urx
-138 @ury 3880 @rwi @setspecial
-%%BeginDocument: pic/node_delete.ps
-%!PS-Adobe-2.0 EPSF-2.0
-%%Title: src/pic/node_delete.fig
-%%Creator: fig2dev Version 3.2 Patchlevel 1
-%%CreationDate: Sun Aug 27 02:05:42 2000
-%%For: gerd@ice (Gerd Stolpmann)
-%%Orientation: Portrait
-%%BoundingBox: 0 0 388 138
-%%Pages: 0
-%%BeginSetup
-%%EndSetup
-%%Magnification: 0.8000
-%%EndComments
-/$F2psDict 200 dict def
-$F2psDict begin
-$F2psDict /mtrx matrix put
-/col-1 {0 setgray} bind def
-/col0 {0.000 0.000 0.000 srgb} bind def
-/col1 {0.000 0.000 1.000 srgb} bind def
-/col2 {0.000 1.000 0.000 srgb} bind def
-/col3 {0.000 1.000 1.000 srgb} bind def
-/col4 {1.000 0.000 0.000 srgb} bind def
-/col5 {1.000 0.000 1.000 srgb} bind def
-/col6 {1.000 1.000 0.000 srgb} bind def
-/col7 {1.000 1.000 1.000 srgb} bind def
-/col8 {0.000 0.000 0.560 srgb} bind def
-/col9 {0.000 0.000 0.690 srgb} bind def
-/col10 {0.000 0.000 0.820 srgb} bind def
-/col11 {0.530 0.810 1.000 srgb} bind def
-/col12 {0.000 0.560 0.000 srgb} bind def
-/col13 {0.000 0.690 0.000 srgb} bind def
-/col14 {0.000 0.820 0.000 srgb} bind def
-/col15 {0.000 0.560 0.560 srgb} bind def
-/col16 {0.000 0.690 0.690 srgb} bind def
-/col17 {0.000 0.820 0.820 srgb} bind def
-/col18 {0.560 0.000 0.000 srgb} bind def
-/col19 {0.690 0.000 0.000 srgb} bind def
-/col20 {0.820 0.000 0.000 srgb} bind def
-/col21 {0.560 0.000 0.560 srgb} bind def
-/col22 {0.690 0.000 0.690 srgb} bind def
-/col23 {0.820 0.000 0.820 srgb} bind def
-/col24 {0.500 0.190 0.000 srgb} bind def
-/col25 {0.630 0.250 0.000 srgb} bind def
-/col26 {0.750 0.380 0.000 srgb} bind def
-/col27 {1.000 0.500 0.500 srgb} bind def
-/col28 {1.000 0.630 0.630 srgb} bind def
-/col29 {1.000 0.750 0.750 srgb} bind def
-/col30 {1.000 0.880 0.880 srgb} bind def
-/col31 {1.000 0.840 0.000 srgb} bind def
-
-end
-save
--78.0 205.0 translate
-1 -1 scale
-
-/cp {closepath} bind def
-/ef {eofill} bind def
-/gr {grestore} bind def
-/gs {gsave} bind def
-/sa {save} bind def
-/rs {restore} bind def
-/l {lineto} bind def
-/m {moveto} bind def
-/rm {rmoveto} bind def
-/n {newpath} bind def
-/s {stroke} bind def
-/sh {show} bind def
-/slc {setlinecap} bind def
-/slj {setlinejoin} bind def
-/slw {setlinewidth} bind def
-/srgb {setrgbcolor} bind def
-/rot {rotate} bind def
-/sc {scale} bind def
-/sd {setdash} bind def
-/ff {findfont} bind def
-/sf {setfont} bind def
-/scf {scalefont} bind def
-/sw {stringwidth} bind def
-/tr {translate} bind def
-/tnt {dup dup currentrgbcolor
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
- bind def
-/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
- 4 -2 roll mul srgb} bind def
- /DrawEllipse {
- /endangle exch def
- /startangle exch def
- /yrad exch def
- /xrad exch def
- /y exch def
- /x exch def
- /savematrix mtrx currentmatrix def
- x y tr xrad yrad sc 0 0 1 startangle endangle arc
- closepath
- savematrix setmatrix
- } def
-
-/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
-/$F2psEnd {$F2psEnteredState restore end} def
-%%EndProlog
-
-$F2psBegin
-10 setmiterlimit
-n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
- 0.05039 0.05039 sc
-7.500 slw
-% Ellipse
-n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Polyline
-gs clippath
-2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
-clip
-n 2535 2857 m 2235 3360 l gs col0 s gr gr
-
-% arrowhead
-n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
-clip
-n 2782 2932 m 3000 3420 l gs col0 s gr gr
-
-% arrowhead
-n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
-clip
-n 2317 3367 m 2587 2910 l gs col0 s gr gr
-
-% arrowhead
-n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
-clip
-n 3060 3375 m 2842 2887 l gs col0 s gr gr
-
-% arrowhead
-n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s
-% Ellipse
-n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Polyline
-gs clippath
-2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
-clip
-n 2737 2460 m 2737 2032 l gs col0 s gr gr
-
-% arrowhead
-n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
-clip
-n 2662 2032 m 2662 2467 l gs col0 s gr gr
-
-% arrowhead
-n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-1 slj
-60.000 slw
-n 4050 2610 m 4725 2610 l gs col0 s gr
-% Polyline
-n 4050 2745 m 4725 2745 l gs col0 s gr
-% Polyline
-1 slc
-n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr
-% Polyline
-0 slj
-0 slc
-7.500 slw
-gs clippath
-2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
-clip
-n 2490 1905 m 2025 2467 l gs col0 s gr gr
-
-% arrowhead
-n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
-clip
-n 2827 2002 m 3202 2542 l gs col0 s gr gr
-
-% arrowhead
-n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
-clip
-n 2115 2475 m 2535 1965 l gs col0 s gr gr
-
-% arrowhead
-n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
-clip
-n 3255 2505 m 2872 1957 l gs col0 s gr gr
-
-% arrowhead
-n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
-clip
-n 6135 1905 m 5670 2467 l gs col0 s gr gr
-
-% arrowhead
-n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
-clip
-n 6472 2002 m 6847 2542 l gs col0 s gr gr
-
-% arrowhead
-n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
-clip
-n 5760 2475 m 6180 1965 l gs col0 s gr gr
-
-% arrowhead
-n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
-clip
-n 6900 2505 m 6517 1957 l gs col0 s gr gr
-
-% arrowhead
-n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
-clip
-n 8160 1957 m 7860 2460 l gs col0 s gr gr
-
-% arrowhead
-n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
-clip
-n 8407 2032 m 8625 2520 l gs col0 s gr gr
-
-% arrowhead
-n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
-clip
-n 7942 2467 m 8212 2010 l gs col0 s gr gr
-
-% arrowhead
-n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
-clip
-n 8685 2475 m 8467 1987 l gs col0 s gr gr
-
-% arrowhead
-n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s
-/Courier ff 180.00 scf sf
-3960 2250 m
-gs 1 -1 sc (x # delete) col0 sh gr
-% Polyline
-1 slj
-1 slc
-45.000 slw
-n 2595 2362 m 2820 2137 l gs col0 s gr
-% Polyline
-n 2595 2137 m 2820 2362 l gs col0 s gr
-% Polyline
-0 slj
-0 slc
-7.500 slw
-n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr
-/Courier-Bold ff 180.00 scf sf
-2640 2752 m
-gs 1 -1 sc (x) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-8280 1845 m
-gs 1 -1 sc (x) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-2655 1845 m
-gs 1 -1 sc (y) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-6300 1845 m
-gs 1 -1 sc (y) col0 sh gr
-$F2psEnd
-rs
-
-%%EndDocument
- @endspecial 396 1912 a
- currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
-neg exch translate
- 396 1912 a 357 x Fv(It)g(is)h(also)e(possible)h
-(to)f(mak)o(e)h(a)g(clone)e(of)i(a)g(subtree;)f(illustrated)g(in)h
-Fr(The)f(clone)g(of)h(a)f(subtr)m(ee)p Fv(.)h(In)f(this)h(case,)g(the)f
-(clone)396 2377 y(is)i(a)g(cop)o(y)e(of)h(the)g(original)f(subtree)h(e)
-o(xcept)f(that)h(it)h(is)h(no)d(longer)g(a)i(subnode.)d(Because)i
-(cloning)f(ne)n(v)o(er)g(k)o(eeps)h(the)396 2485 y(connection)e(to)j
-(the)f(parent,)f(the)h(clones)g(are)g(called)g Fr(orphaned)r
-Fv(.)396 2717 y Fu(Figur)o(e)g(3-5.)f(The)i(clone)f(of)g(a)g(subtr)o
-(ee)396 4050 y
- currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
- 396 4050 a @beginspecial 0 @llx 0 @lly
-388 @urx 138 @ury 3880 @rwi @setspecial
-%%BeginDocument: pic/node_clone.ps
-%!PS-Adobe-2.0 EPSF-2.0
-%%Title: src/pic/node_clone.fig
-%%Creator: fig2dev Version 3.2 Patchlevel 1
-%%CreationDate: Sun Aug 27 02:05:42 2000
-%%For: gerd@ice (Gerd Stolpmann)
-%%Orientation: Portrait
-%%BoundingBox: 0 0 388 138
-%%Pages: 0
-%%BeginSetup
-%%EndSetup
-%%Magnification: 0.8000
-%%EndComments
-/$F2psDict 200 dict def
-$F2psDict begin
-$F2psDict /mtrx matrix put
-/col-1 {0 setgray} bind def
-/col0 {0.000 0.000 0.000 srgb} bind def
-/col1 {0.000 0.000 1.000 srgb} bind def
-/col2 {0.000 1.000 0.000 srgb} bind def
-/col3 {0.000 1.000 1.000 srgb} bind def
-/col4 {1.000 0.000 0.000 srgb} bind def
-/col5 {1.000 0.000 1.000 srgb} bind def
-/col6 {1.000 1.000 0.000 srgb} bind def
-/col7 {1.000 1.000 1.000 srgb} bind def
-/col8 {0.000 0.000 0.560 srgb} bind def
-/col9 {0.000 0.000 0.690 srgb} bind def
-/col10 {0.000 0.000 0.820 srgb} bind def
-/col11 {0.530 0.810 1.000 srgb} bind def
-/col12 {0.000 0.560 0.000 srgb} bind def
-/col13 {0.000 0.690 0.000 srgb} bind def
-/col14 {0.000 0.820 0.000 srgb} bind def
-/col15 {0.000 0.560 0.560 srgb} bind def
-/col16 {0.000 0.690 0.690 srgb} bind def
-/col17 {0.000 0.820 0.820 srgb} bind def
-/col18 {0.560 0.000 0.000 srgb} bind def
-/col19 {0.690 0.000 0.000 srgb} bind def
-/col20 {0.820 0.000 0.000 srgb} bind def
-/col21 {0.560 0.000 0.560 srgb} bind def
-/col22 {0.690 0.000 0.690 srgb} bind def
-/col23 {0.820 0.000 0.820 srgb} bind def
-/col24 {0.500 0.190 0.000 srgb} bind def
-/col25 {0.630 0.250 0.000 srgb} bind def
-/col26 {0.750 0.380 0.000 srgb} bind def
-/col27 {1.000 0.500 0.500 srgb} bind def
-/col28 {1.000 0.630 0.630 srgb} bind def
-/col29 {1.000 0.750 0.750 srgb} bind def
-/col30 {1.000 0.880 0.880 srgb} bind def
-/col31 {1.000 0.840 0.000 srgb} bind def
-
-end
-save
--78.0 205.0 translate
-1 -1 scale
-
-/cp {closepath} bind def
-/ef {eofill} bind def
-/gr {grestore} bind def
-/gs {gsave} bind def
-/sa {save} bind def
-/rs {restore} bind def
-/l {lineto} bind def
-/m {moveto} bind def
-/rm {rmoveto} bind def
-/n {newpath} bind def
-/s {stroke} bind def
-/sh {show} bind def
-/slc {setlinecap} bind def
-/slj {setlinejoin} bind def
-/slw {setlinewidth} bind def
-/srgb {setrgbcolor} bind def
-/rot {rotate} bind def
-/sc {scale} bind def
-/sd {setdash} bind def
-/ff {findfont} bind def
-/sf {setfont} bind def
-/scf {scalefont} bind def
-/sw {stringwidth} bind def
-/tr {translate} bind def
-/tnt {dup dup currentrgbcolor
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
- bind def
-/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
- 4 -2 roll mul srgb} bind def
- /DrawEllipse {
- /endangle exch def
- /startangle exch def
- /yrad exch def
- /xrad exch def
- /y exch def
- /x exch def
- /savematrix mtrx currentmatrix def
- x y tr xrad yrad sc 0 0 1 startangle endangle arc
- closepath
- savematrix setmatrix
- } def
-
-/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
-/$F2psEnd {$F2psEnteredState restore end} def
-%%EndProlog
-
-$F2psBegin
-10 setmiterlimit
-n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
- 0.05039 0.05039 sc
-7.500 slw
-% Ellipse
-n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 6345 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 5895 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 6795 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Polyline
-1 slj
-60.000 slw
-n 4050 2610 m 4725 2610 l gs col0 s gr
-% Polyline
-n 4050 2745 m 4725 2745 l gs col0 s gr
-% Polyline
-1 slc
-n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr
-% Polyline
-0 slj
-0 slc
-7.500 slw
-gs clippath
-2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
-clip
-n 2490 1905 m 2025 2467 l gs col0 s gr gr
-
-% arrowhead
-n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
-clip
-n 2827 2002 m 3202 2542 l gs col0 s gr gr
-
-% arrowhead
-n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
-clip
-n 2115 2475 m 2535 1965 l gs col0 s gr gr
-
-% arrowhead
-n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
-clip
-n 3255 2505 m 2872 1957 l gs col0 s gr gr
-
-% arrowhead
-n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
-clip
-n 6135 1905 m 5670 2467 l gs col0 s gr gr
-
-% arrowhead
-n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
-clip
-n 6472 2002 m 6847 2542 l gs col0 s gr gr
-
-% arrowhead
-n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
-clip
-n 5760 2475 m 6180 1965 l gs col0 s gr gr
-
-% arrowhead
-n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
-clip
-n 6900 2505 m 6517 1957 l gs col0 s gr gr
-
-% arrowhead
-n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
-clip
-n 8160 1957 m 7860 2460 l gs col0 s gr gr
-
-% arrowhead
-n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
-clip
-n 8407 2032 m 8625 2520 l gs col0 s gr gr
-
-% arrowhead
-n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
-clip
-n 7942 2467 m 8212 2010 l gs col0 s gr gr
-
-% arrowhead
-n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
-clip
-n 8685 2475 m 8467 1987 l gs col0 s gr gr
-
-% arrowhead
-n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-6352 2152 m 6382 2032 l 6412 2152 l 6412 2017 l 6352 2017 l cp
-clip
-n 6382 2460 m 6382 2032 l gs col0 s gr gr
-
-% arrowhead
-n 6352 2152 m 6382 2032 l 6412 2152 l 6382 2152 l 6352 2152 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-6337 2347 m 6307 2467 l 6277 2347 l 6277 2482 l 6337 2482 l cp
-clip
-n 6307 2032 m 6307 2467 l gs col0 s gr gr
-
-% arrowhead
-n 6337 2347 m 6307 2467 l 6277 2347 l 6307 2347 l 6337 2347 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-5967 3272 m 5880 3360 l 5916 3242 l 5847 3358 l 5898 3388 l cp
-clip
-n 6180 2857 m 5880 3360 l gs col0 s gr gr
-
-% arrowhead
-n 5967 3272 m 5880 3360 l 5916 3242 l 5941 3257 l 5967 3272 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6623 3298 m 6645 3420 l 6569 3323 l 6624 3446 l 6679 3421 l cp
-clip
-n 6427 2932 m 6645 3420 l gs col0 s gr gr
-
-% arrowhead
-n 6623 3298 m 6645 3420 l 6569 3323 l 6596 3310 l 6623 3298 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-6145 2998 m 6232 2910 l 6197 3029 l 6265 2912 l 6214 2882 l cp
-clip
-n 5962 3367 m 6232 2910 l gs col0 s gr gr
-
-% arrowhead
-n 6145 2998 m 6232 2910 l 6197 3029 l 6171 3013 l 6145 2998 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-6509 3009 m 6487 2887 l 6563 2984 l 6508 2861 l 6453 2886 l cp
-clip
-n 6705 3375 m 6487 2887 l gs col0 s gr gr
-
-% arrowhead
-n 6509 3009 m 6487 2887 l 6563 2984 l 6536 2997 l 6509 3009 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
-clip
-n 2737 2460 m 2737 2032 l gs col0 s gr gr
-
-% arrowhead
-n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
-clip
-n 2662 2032 m 2662 2467 l gs col0 s gr gr
-
-% arrowhead
-n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
-clip
-n 2535 2857 m 2235 3360 l gs col0 s gr gr
-
-% arrowhead
-n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
-clip
-n 2782 2932 m 3000 3420 l gs col0 s gr gr
-
-% arrowhead
-n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s
-% Polyline
-gs clippath
-2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
-clip
-n 2317 3367 m 2587 2910 l gs col0 s gr gr
-
-% arrowhead
-n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-gs clippath
-2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
-clip
-n 3060 3375 m 2842 2887 l gs col0 s gr gr
-
-% arrowhead
-n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s
-% Polyline
-n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr
-/Courier-Bold ff 180.00 scf sf
-2655 1845 m
-gs 1 -1 sc (y) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-6300 1845 m
-gs 1 -1 sc (y) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-6285 2752 m
-gs 1 -1 sc (x) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-2640 2752 m
-gs 1 -1 sc (x) col0 sh gr
-/Courier ff 180.00 scf sf
-3690 2025 m
-gs 1 -1 sc (let x' =) col0 sh gr
-/Courier ff 180.00 scf sf
-3690 2205 m
-gs 1 -1 sc (x # orphaned_clone) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-8235 1845 m
-gs 1 -1 sc (x') col0 sh gr
-$F2psEnd
-rs
-
-%%EndDocument
- @endspecial 396 4050 a
- currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
-neg exch translate
- 396 4050 a -2 4627 a Fp(3.2.2.)35
-b(The)f(methods)g(of)f(the)h(c)n(lass)h(type)f Fc(node)p
-Black 3800 5278 a Fr(52)p Black eop
-%%Page: 53 53
-53 52 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fu(General)g(obser)o(v)o
-(ers)g(.)p Black 396 866 a Ft(\225)p Black 60 w Fq(extension)p
-Fv(:)g(The)f(reference)g(to)h(the)h(e)o(xtension)d(object)i(which)g
-(belongs)f(to)h(this)h(node)e(\(see)h(...\).)p Black
-396 974 a Ft(\225)p Black 60 w Fq(dtd)p Fv(:)h(Returns)f(a)g(reference)
-f(to)h(the)g(global)g(DTD.)g(All)h(nodes)e(of)h(a)h(tree)f(must)g
-(share)g(the)g(same)h(DTD.)p Black 396 1082 a Ft(\225)p
-Black 60 w Fq(parent)p Fv(:)f(Get)h(the)f(f)o(ather)f(node.)g(Raises)j
-Fq(Not_found)d Fv(in)i(the)f(case)g(the)h(node)e(does)h(not)f(ha)n(v)o
-(e)h(a)h(parent,)e(i.e.)h(the)479 1190 y(node)f(is)j(the)e(root.)p
-Black 396 1298 a Ft(\225)p Black 60 w Fq(root)p Fv(:)g(Gets)h(the)g
-(reference)d(to)i(the)h(root)e(node)g(of)h(the)g(tree.)g(Ev)o(ery)f
-(node)g(is)i(contained)e(in)h(a)h(tree)f(with)h(a)f(root,)f(so)479
-1406 y(this)h(method)f(al)o(w)o(ays)h(succeeds.)e(Note)i(that)g(this)g
-(method)e Fr(sear)m(c)o(hes)h Fv(the)h(root,)e(which)h(costs)h(time)g
-(proportional)d(to)479 1514 y(the)j(length)g(of)g(the)g(path)g(to)g
-(the)g(root.)p Black 396 1622 a Ft(\225)p Black 60 w
-Fq(sub_nodes)p Fv(:)g(Returns)g(references)e(to)j(the)f(children.)f
-(The)g(returned)g(list)i(re\003ects)g(the)f(order)f(of)h(the)g
-(children.)e(F)o(or)479 1730 y(data)i(nodes,)g(this)g(method)f(returns)
-g(the)i(empty)e(list.)p Black 396 1838 a Ft(\225)p Black
-60 w Fq(iter_nodes)43 b(f)p Fv(:)21 b(Iterates)f(o)o(v)o(er)f(the)h
-(children,)f(and)g(calls)i Fq(f)g Fv(for)e(e)n(v)o(ery)g(child)h(in)g
-(turn.)p Black 396 1945 a Ft(\225)p Black 60 w Fq(iter_nodes_sibl)43
-b(f)p Fv(:)20 b(Iterates)g(o)o(v)o(er)f(the)h(children,)f(and)h(calls)g
-Fq(f)h Fv(for)f(e)n(v)o(ery)e(child)i(in)h(turn.)e Fq(f)h
-Fv(gets)h(as)479 2053 y(ar)o(guments)d(the)j(pre)n(vious)d(node,)h(the)
-h(current)f(node,)g(and)h(the)g(ne)o(xt)f(node.)p Black
-396 2161 a Ft(\225)p Black 60 w Fq(node_type)p Fv(:)h(Returns)g(either)
-f Fq(T_data)h Fv(which)g(means)g(that)g(the)g(node)f(is)i(a)g(data)f
-(node,)f(or)h Fq(T_element)43 b(n)479 2269 y Fv(which)20
-b(means)g(that)g(the)g(node)f(is)j(an)e(element)f(of)h(type)g
-Fq(n)p Fv(.)g(If)g(con\002gured,)e(possible)i(node)f(types)h(are)g
-(also)479 2377 y Fq(T_pinstr)44 b(t)20 b Fv(indicating)f(that)h(the)h
-(node)e(represents)g(a)i(processing)e(instruction)g(with)h(tar)o(get)f
-Fq(t)p Fv(,)i(and)479 2485 y Fq(T_comment)f Fv(in)g(which)g(case)g(the)
-g(node)g(is)h(a)f(comment.)p Black 396 2593 a Ft(\225)p
-Black 60 w Fq(encoding)p Fv(:)g(Returns)g(the)g(encoding)e(of)i(the)g
-(strings.)p Black 396 2701 a Ft(\225)p Black 60 w Fq(data)p
-Fv(:)g(Returns)g(the)h(character)e(data)h(of)g(this)g(node)f(and)h(all)
-h(children,)d(concatenated)h(as)i(one)e(string.)h(The)479
-2809 y(encoding)e(of)i(the)h(string)e(is)j(what)e(the)g(method)f
-Fq(encoding)g Fv(returns.)g(-)i(F)o(or)e(data)h(nodes,)g(this)g(method)
-f(simply)479 2917 y(returns)h(the)g(represented)e(characters.)h(F)o(or)
-h(elements,)g(the)g(meaning)f(of)g(the)i(method)d(has)j(been)e(e)o
-(xtended)g(such)479 3025 y(that)i(it)f(returns)g(something)e(useful,)i
-(i.e.)g(the)g(ef)n(fecti)n(v)o(ely)f(contained)f(characters,)h(without)
-h(markup.)e(\(F)o(or)479 3133 y Fq(T_pinstr)i Fv(and)f
-Fq(T_comment)h Fv(nodes,)f(the)h(method)f(returns)g(the)h(empty)g
-(string.\))p Black 396 3241 a Ft(\225)p Black 60 w Fq(position)p
-Fv(:)g(If)g(con\002gured,)d(this)k(method)e(returns)g(the)h(position)g
-(of)g(the)g(element)g(as)g(triple)g(\(entity)-5 b(,)19
-b(line,)479 3349 y(byteposition\).)f(F)o(or)i(data)g(nodes,)f(the)h
-(position)g(is)h(not)f(stored.)f(If)h(the)g(position)g(is)h(not)f(a)n
-(v)n(ailable)f(the)i(triple)f Fq("?",)479 3456 y(0,)45
-b(0)20 b Fv(is)h(returned.)p Black 396 3564 a Ft(\225)p
-Black 60 w Fq(comment)p Fv(:)f(Returns)g Fq(Some)44 b(text)20
-b Fv(for)f(comment)g(nodes,)g(and)g Fq(None)h Fv(for)g(other)f(nodes.)g
-(The)h Fq(text)f Fv(is)i(e)n(v)o(erything)479 3672 y(between)f(the)g
-(comment)f(delimiters)g Fo(<)p Fq(-)i Fv(and)e Fq(-)p
-Fo(>)p Fv(.)p Black 396 3780 a Ft(\225)p Black 60 w Fq(pinstr)44
-b(n)p Fv(:)21 b(Returns)f(all)h(processing)d(instructions)i(that)g(are)
-g(directly)f(contained)g(in)h(this)h(element)e(and)h(that)g(ha)n(v)o(e)
-479 3888 y(a)h Fr(tar)m(g)o(et)h Fv(speci\002cation)d(of)h
-Fq(n)p Fv(.)g(The)g(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(after)h
-(the)g Fo(<)p Fq(?)p Fv(.)p Black 396 3996 a Ft(\225)p
-Black 60 w Fq(pinstr_names)p Fv(:)f(Returns)h(the)g(list)i(of)e(all)g
-(tar)o(gets)g(of)g(processing)f(instructions)g(directly)g(contained)g
-(in)h(this)479 4104 y(element.)p Black 396 4212 a Ft(\225)p
-Black 60 w Fq(write)44 b(s)h(enc)p Fv(:)20 b(Prints)h(the)f(node)f(and)
-h(all)h(subnodes)d(to)j(the)f(passed)g(output)f(stream)h(as)h(v)n(alid)
-f(XML)g(te)o(xt,)g(using)479 4320 y(the)g(passed)h(e)o(xternal)e
-(encoding.)396 4511 y Fu(Attrib)n(ute)h(obser)o(v)o(ers)h(.)p
-Black 396 4743 a Ft(\225)p Black 60 w Fq(attribute)44
-b(n)p Fv(:)20 b(Returns)g(the)h(v)n(alue)e(of)h(the)g(attrib)n(ute)g
-(with)g(name)g Fq(n)p Fv(.)g(This)h(method)d(returns)i(a)g(v)n(alue)g
-(for)f(e)n(v)o(ery)479 4851 y(declared)g(attrib)n(ute,)h(and)f(it)i
-(raises)g Fq(Not_found)e Fv(for)h(an)o(y)f(undeclared)f(attrib)n(ute.)i
-(Note)g(that)g(it)h(e)n(v)o(en)e(returns)h(a)p Black
-3800 5278 a Fr(53)p Black eop
-%%Page: 54 54
-54 53 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 479 579 a Fv(v)n(alue)g(if)g(the)g
-(attrib)n(ute)g(is)h(actually)f(missing)g(b)n(ut)g(is)h(declared)e(as)i
-Fq(#IMPLIED)f Fv(or)g(has)g(a)h(def)o(ault)e(v)n(alue.)g(-)i(Possible)
-479 687 y(v)n(alues)f(are:)p Black 479 919 a Fa(\225)p
-Black 62 w Fq(Implied_value)p Fv(:)f(The)h(attrib)n(ute)g(has)g(been)g
-(declared)e(with)j(the)f(k)o(e)o(yw)o(ord)e Fq(#IMPLIED)p
-Fv(,)i(and)f(the)h(attrib)n(ute)g(is)562 1027 y(missing)g(in)h(the)f
-(attrib)n(ute)g(list)h(of)f(this)h(element.)p Black 479
-1135 a Fa(\225)p Black 62 w Fq(Value)44 b(s)p Fv(:)21
-b(The)f(attrib)n(ute)g(has)g(been)g(declared)e(as)j(type)f
-Fq(CDATA)p Fv(,)g(as)h Fq(ID)p Fv(,)f(as)h Fq(IDREF)p
-Fv(,)e(as)i Fq(ENTITY)p Fv(,)f(or)g(as)562 1243 y Fq(NMTOKEN)p
-Fv(,)g(or)g(as)g(enumeration)e(or)i(notation,)f(and)g(one)h(of)g(the)g
-(tw)o(o)h(conditions)d(holds:)i(\(1\))g(The)g(attrib)n(ute)562
-1351 y(v)n(alue)g(is)h(present)e(in)i(the)f(attrib)n(ute)g(list)h(in)f
-(which)g(case)h(the)f(v)n(alue)f(is)j(returned)c(in)i(the)h(string)e
-Fq(s)p Fv(.)i(\(2\))e(The)562 1459 y(attrib)n(ute)h(has)h(been)e
-(omitted,)g(and)h(the)g(DTD)g(declared)f(the)i(attrib)n(ute)e(with)i(a)
-f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)562 1567
-y(v)n(alue)f(is)i(returned)d(in)i Fq(s)p Fv(.)g(-)g(Summarized,)d
-Fq(Value)44 b(s)20 b Fv(is)h(returned)d(for)h(non-implied,)e(non-list)i
-(attrib)n(ute)g(v)n(alues.)p Black 479 1675 a Fa(\225)p
-Black 62 w Fq(Valuelist)44 b(l)p Fv(:)20 b(The)g(attrib)n(ute)g(has)g
-(been)g(declared)f(as)i(type)e Fq(IDREFS)p Fv(,)h(as)h
-Fq(ENTITIES)p Fv(,)e(or)h(as)h Fq(NMTOKENS)p Fv(,)562
-1783 y(and)f(one)g(of)f(the)i(tw)o(o)f(conditions)f(holds:)h(\(1\))f
-(The)h(attrib)n(ute)g(v)n(alue)f(is)i(present)f(in)g(the)h(attrib)n
-(ute)e(list)j(in)e(which)562 1891 y(case)h(the)f(space-separated)e(tok)
-o(ens)i(of)g(the)g(v)n(alue)g(are)g(returned)e(in)j(the)f(string)g
-(list)h Fq(l)p Fv(.)f(\(2\))g(The)g(attrib)n(ute)g(has)562
-1999 y(been)g(omitted,)f(and)h(the)g(DTD)g(declared)f(the)h(attrib)n
-(ute)g(with)h(a)f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)f(v)n(alue)
-h(is)h(returned)562 2107 y(in)g Fq(l)p Fv(.)f(-)g(Summarized,)f
-Fq(Valuelist)43 b(l)20 b Fv(is)i(returned)c(for)i(all)g(list-type)g
-(attrib)n(ute)g(v)n(alues.)396 2256 y(Note)g(that)h(before)d(the)j
-(attrib)n(ute)f(v)n(alue)f(is)i(returned,)d(the)i(v)n(alue)g(is)h
-(normalized.)d(This)j(means)e(that)i(ne)n(wlines)e(are)479
-2364 y(con)m(v)o(erted)f(to)i(spaces,)g(and)g(that)g(references)f(to)h
-(character)f(entities)i(\(i.e.)f Fq(&#)p Fn(n)p Fq(;)p
-Fv(\))g(and)f(general)g(entities)i(\(i.e.)479 2472 y
-Fq(&)p Fn(name)p Fq(;)p Fv(\))f(are)g(e)o(xpanded;)e(if)i(necessary)-5
-b(,)19 b(e)o(xpansion)f(is)j(performed)d(recursi)n(v)o(ely)-5
-b(.)479 2621 y(In)20 b(well-formedness)e(mode,)h(there)h(is)h(no)f(DTD)
-g(which)g(could)f(declare)h(an)g(attrib)n(ute.)f(Because)i(of)f(this,)g
-(e)n(v)o(ery)479 2729 y(occuring)f(attrib)n(ute)g(is)i(considered)e(as)
-i(a)f(CD)m(A)-9 b(T)h(A)21 b(attrib)n(ute.)p Black 396
-2879 a Ft(\225)p Black 60 w Fq(required_string_attribute)41
-b(n)p Fv(:)21 b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)
-g(n,)g(or)g(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
-2987 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
-(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
-(if)h(the)479 3094 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
-(method)f(will)i(f)o(ail.)g(-)f(This)g(method)f(is)i(con)m(v)o(enient)d
-(if)i(you)g(e)o(xpect)f(a)h(non-implied)479 3202 y(and)g(non-list)f
-(attrib)n(ute)h(v)n(alue.)p Black 396 3310 a Ft(\225)p
-Black 60 w Fq(optional_string_attribute)41 b(n)p Fv(:)21
-b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)g(n,)g(or)g
-(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
-3418 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
-(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
-(if)h(the)479 3526 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
-(method)f(returns)h(None.)f(-)h(This)h(method)e(is)i(con)m(v)o(enient)c
-(if)k(you)e(e)o(xpect)g(a)i(non-list)479 3634 y(attrib)n(ute)f(v)n
-(alue)g(including)e(the)i(implied)g(v)n(alue.)p Black
-396 3742 a Ft(\225)p Black 60 w Fq(required_list_attribute)41
-b(n)p Fv(:)20 b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f
-(called)g(n,)g(or)g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g
-(list)479 3850 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n
-(ute)g(v)n(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g
-(not)g(e)o(xists,)g(the)g(method)479 3958 y(will)h(f)o(ail.)g(-)f(This)
-g(method)f(is)i(con)m(v)o(enient)d(if)i(you)g(e)o(xpect)f(a)h(list)i
-(attrib)n(ute)d(v)n(alue.)p Black 396 4066 a Ft(\225)p
-Black 60 w Fq(optional_list_attribute)41 b(n)p Fv(:)20
-b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f(called)g(n,)g(or)
-g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g(list)479
-4174 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n(ute)g(v)n
-(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g(not)g(e)o
-(xists,)g(an)g(empty)g(list)479 4282 y(will)h(be)f(returned.)e(-)j
-(This)f(method)f(is)i(con)m(v)o(enient)d(if)i(you)f(e)o(xpect)h(a)g
-(list)i(attrib)n(ute)d(v)n(alue)h(or)g(the)g(implied)f(v)n(alue.)p
-Black 396 4390 a Ft(\225)p Black 60 w Fq(attribute_names)p
-Fv(:)g(returns)g(the)h(list)h(of)f(all)h(attrib)n(ute)f(names)g(of)g
-(this)g(element.)g(As)h(this)f(is)i(a)e(v)n(alidating)479
-4498 y(parser)m(,)f(this)i(list)g(is)g(equal)f(to)g(the)h(list)g(of)f
-(declared)f(attrib)n(utes.)p Black 396 4605 a Ft(\225)p
-Black 60 w Fq(attribute_type)43 b(n)p Fv(:)20 b(returns)g(the)g(type)g
-(of)g(the)g(attrib)n(ute)g(called)g Fq(n)p Fv(.)g(See)h(the)f(module)f
-Fq(Pxp_types)g Fv(for)g(a)479 4713 y(description)g(of)h(the)g(encoding)
-e(of)i(the)g(types.)p Black 396 4821 a Ft(\225)p Black
-60 w Fq(attributes)p Fv(:)f(returns)h(the)g(list)h(of)f(pairs)g(of)g
-(names)g(and)g(v)n(alues)g(for)f(all)i(attrib)n(utes)f(of)g(this)h
-(element.)p Black 3800 5278 a Fr(54)p Black eop
-%%Page: 55 55
-55 54 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
-Black 60 w Fq(id_attribute_name)p Fv(:)e(returns)h(the)i(name)e(of)h
-(the)g(attrib)n(ute)g(that)g(is)h(declared)e(with)h(type)g(ID.)g(There)
-f(is)i(at)g(most)479 687 y(one)f(such)g(attrib)n(ute.)f(The)h(method)f
-(raises)i Fq(Not_found)e Fv(if)i(there)e(is)i(no)f(declared)f(ID)i
-(attrib)n(ute)e(for)h(the)g(element)479 795 y(type.)p
-Black 396 903 a Ft(\225)p Black 60 w Fq(id_attribute_value)p
-Fv(:)e(returns)h(the)i(v)n(alue)e(of)h(the)g(attrib)n(ute)g(that)g(is)h
-(declared)e(with)i(type)e(ID.)i(There)e(is)i(at)479 1011
-y(most)g(one)e(such)h(attrib)n(ute.)g(The)g(method)e(raises)j
-Fq(Not_found)e Fv(if)i(there)f(is)h(no)e(declared)g(ID)i(attrib)n(ute)f
-(for)f(the)479 1119 y(element)h(type.)p Black 396 1226
-a Ft(\225)p Black 60 w Fq(idref_attribute_names)p Fv(:)d(returns)h(the)
-h(list)i(of)e(attrib)n(ute)f(names)h(that)h(are)f(declared)f(as)i
-(IDREF)f(or)g(IDREFS.)396 1417 y Fu(Modifying)h(methods)h(.)f
-Fv(The)g(follo)n(wing)f(methods)g(are)h(only)f(de\002ned)g(for)h
-(element)f(nodes)h(\(more)f(e)o(xactly:)g(the)396 1525
-y(methods)g(are)i(de\002ned)e(for)g(data)h(nodes,)f(too,)h(b)n(ut)g(f)o
-(ail)h(al)o(w)o(ays\).)p Black 396 1758 a Ft(\225)p Black
-60 w Fq(add_node)44 b(sn)p Fv(:)20 b(Adds)g(sub)g(node)g
-Fq(sn)g Fv(to)g(the)g(list)i(of)e(children.)e(This)j(operation)d(is)j
-(illustrated)f(in)g(the)g(picture)g Fr(A)479 1866 y(node)f(can)h(only)g
-(be)g(added)f(if)h(it)h(is)h(a)e(r)l(oot)q Fv(.)g(This)h(method)e(e)o
-(xpects)g(that)h Fq(sn)h Fv(is)g(a)g(root,)e(and)g(it)i(requires)f
-(that)g Fq(sn)g Fv(and)479 1974 y(the)g(current)f(object)h(share)g(the)
-g(same)h(DTD.)479 2123 y(Because)g Fq(add_node)e Fv(is)i(the)f(method)f
-(the)h(parser)g(itself)h(uses)g(to)f(add)g(ne)n(w)g(nodes)f(to)h(the)h
-(tree,)e(it)i(performs)e(by)479 2231 y(def)o(ault)h(some)g(simple)g(v)n
-(alidation)f(checks:)g(If)h(the)h(content)e(model)g(is)i(a)g(re)o
-(gular)e(e)o(xpression,)f(it)j(is)g(not)f(allo)n(wed)f(to)479
-2339 y(add)h(data)g(nodes)f(to)i(this)g(node)e(unless)h(the)g(ne)n(w)g
-(nodes)g(consist)g(only)f(of)h(whitespace.)g(In)g(this)g(case,)h(the)f
-(ne)n(w)g(data)479 2447 y(nodes)g(are)g(silently)g(dropped)e(\(you)h
-(can)h(change)f(this)h(by)g(in)m(v)n(oking)e Fq
-(keep_always_whitespace_mode)p Fv(\).)479 2596 y(If)i(the)h(document)d
-(is)j(\003agged)e(as)i(stand-alone,)d(these)j(data)f(nodes)f(only)g
-(containing)g(whitespace)g(are)h(e)n(v)o(en)479 2704
-y(forbidden)e(if)i(the)h(element)e(declaration)g(is)i(contained)d(in)j
-(an)f(e)o(xternal)f(entity)-5 b(.)19 b(This)h(case)h(is)g(detected)f
-(and)479 2812 y(rejected.)479 2962 y(If)g(the)h(content)e(model)g(is)i
-Fq(EMPTY)p Fv(,)f(it)h(is)g(not)f(allo)n(wed)f(to)i(add)e(an)o(y)h
-(data)g(node)f(unless)h(the)g(data)g(node)g(is)h(empty)-5
-b(.)18 b(In)479 3070 y(this)j(case,)f(the)h(ne)n(w)f(data)g(node)f(is)i
-(silently)f(dropped.)479 3219 y(These)g(checks)g(only)f(apply)h(if)g
-(there)g(is)h(a)f(DTD.)h(In)f(well-formedness)e(mode,)h(it)i(is)g
-(assumed)e(that)i(e)n(v)o(ery)d(element)479 3327 y(is)j(declared)e
-(with)i(content)e(model)g Fq(ANY)h Fv(which)g(prohibits)f(an)o(y)g(v)n
-(alidation)g(check.)g(Furthermore,)f(you)h(turn)h(these)479
-3435 y(checks)g(of)n(f)f(by)h(passing)g Fq(~force:true)f
-Fv(as)i(\002rst)g(ar)o(gument.)p Black 396 3584 a Ft(\225)p
-Black 60 w Fq(add_pinstr)43 b(pi)p Fv(:)21 b(Adds)f(the)g(processing)f
-(instruction)g Fq(pi)h Fv(to)h(the)f(list)h(of)f(processing)f
-(instructions.)p Black 396 3692 a Ft(\225)p Black 60
-w Fq(delete)p Fv(:)h(Deletes)h(this)g(node)e(from)g(the)h(tree.)g
-(After)g(this)h(operation,)d(this)i(node)g(is)h(no)f(longer)e(the)j
-(child)e(of)h(the)479 3800 y(former)f(f)o(ather)g(node;)f(and)i(the)g
-(node)e(loses)j(the)e(connection)f(to)i(the)g(f)o(ather)f(as)h(well.)h
-(This)e(operation)f(is)j(illustrated)479 3908 y(by)f(the)g(\002gure)g
-Fr(A)g(deleted)g(node)f(becomes)g(the)i(r)l(oot)f(of)g(the)h(subtr)m
-(ee)p Fv(.)p Black 396 4016 a Ft(\225)p Black 60 w Fq(set_nodes)44
-b(nl)p Fv(:)20 b(Sets)h(the)f(list)i(of)e(children)e(to)j
-Fq(nl)p Fv(.)f(It)g(is)i(required)c(that)i(e)n(v)o(ery)f(member)g(of)h
-Fq(nl)g Fv(is)h(a)g(root,)e(and)479 4124 y(that)i(all)f(members)f(and)h
-(the)g(current)f(object)h(share)g(the)g(same)g(DTD.)g(Unlik)o(e)g
-Fq(add_node)p Fv(,)g(no)f(v)n(alidation)g(checks)479
-4232 y(are)h(performed.)p Black 396 4340 a Ft(\225)p
-Black 60 w Fq(quick_set_attributes)42 b(atts)p Fv(:)20
-b(sets)h(the)f(attrib)n(utes)h(of)e(this)i(element)f(to)g
-Fq(atts)p Fv(.)g(It)g(is)i Fr(not)f Fv(check)o(ed)479
-4448 y(whether)e Fq(atts)i Fv(matches)e(the)i(DTD)f(or)g(not;)g(it)h
-(is)g(up)f(to)g(the)g(caller)g(of)g(this)h(method)e(to)h(ensure)g
-(this.)g(\(This)479 4556 y(method)f(may)h(be)g(useful)g(to)g(transform)
-e(the)j(attrib)n(ute)f(v)n(alues,)f(i.e.)h(apply)f(a)i(mapping)d(to)j
-(e)n(v)o(ery)e(attrib)n(ute.\))p Black 396 4664 a Ft(\225)p
-Black 60 w Fq(set_comment)43 b(text)p Fv(:)20 b(This)h(method)e(is)i
-(only)e(applicable)g(to)h Fq(T_comment)g Fv(nodes;)f(it)i(sets)g(the)g
-(comment)d(te)o(xt)479 4772 y(contained)h(by)h(such)g(nodes.)p
-Black 3800 5278 a Fr(55)p Black eop
-%%Page: 56 56
-56 55 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fu(Cloning)g(methods)h(.)
-p Black 396 811 a Ft(\225)p Black 60 w Fq(orphaned_clone)p
-Fv(:)e(Returns)h(a)g(clone)g(of)g(the)g(node)f(and)h(the)g(complete)f
-(tree)h(belo)n(w)g(this)h(node)e(\(deep)g(clone\).)479
-919 y(The)h(clone)g(does)g(not)g(ha)n(v)o(e)f(a)i(parent)e(\(i.e.)h
-(the)g(reference)f(to)h(the)g(parent)f(node)g(is)j Fr(not)f
-Fv(cloned\).)d(While)j(cop)o(ying)479 1027 y(the)f(subtree,)g(strings)g
-(are)g(skipped;)f(it)i(is)g(lik)o(ely)f(that)h(the)f(original)f(tree)h
-(and)g(the)g(cop)o(y)f(tree)h(share)g(strings.)479 1135
-y(Extension)f(objects)h(are)g(cloned)f(by)h(in)m(v)n(oking)e(the)i
-Fq(clone)g Fv(method)f(on)h(the)g(original)f(objects;)h(ho)n(w)g(much)f
-(of)h(the)479 1243 y(e)o(xtension)f(objects)h(is)h(cloned)e(depends)g
-(on)h(the)g(implemention)e(of)i(this)h(method.)479 1393
-y(This)g(operation)d(is)j(illustrated)f(by)g(the)g(\002gure)f
-Fr(The)i(clone)e(of)i(a)f(subtr)m(ee)p Fv(.)p Black 396
-1542 a Ft(\225)p Black 60 w Fq(orphaned_flat_clone)p
-Fv(:)e(Returns)i(a)h(clone)e(of)h(the)g(node,)f(b)n(ut)h(sets)i(the)e
-(list)h(of)f(sub)g(nodes)g(to)g([],)g(i.e.)g(the)g(sub)479
-1650 y(nodes)g(are)g(not)g(cloned.)p Black 396 1758 a
-Ft(\225)p Black 81 w Fq(create_element)42 b(dtd)i(nt)h(al)p
-Fv(:)20 b(Returns)f(a)i(\003at)f(cop)o(y)f(of)g(this)i(node)d(\(which)h
-(must)h(be)f(an)h(element\))f(with)h(the)479 1866 y(follo)n(wing)f
-(modi\002cations:)g(The)h(DTD)g(is)h(set)g(to)f Fq(dtd)p
-Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f Fq(nt)p Fv(,)g(and)g(the)g
-(ne)n(w)g(attrib)n(ute)g(list)h(is)479 1974 y(set)g(to)f
-Fq(al)g Fv(\(gi)n(v)o(en)e(as)i(list)h(of)f(\(name,v)n(alue\))d
-(pairs\).)i(The)g(cop)o(y)g(does)h(not)f(ha)n(v)o(e)g(children)g(nor)g
-(a)h(parent.)f(It)h(does)f(not)479 2082 y(contain)g(processing)g
-(instructions.)g(See)i(the)f(e)o(xample)f(belo)n(w.)479
-2231 y(Note)h(that)h(you)e(can)h(specify)g(the)g(position)f(of)h(the)g
-(ne)n(w)g(node)f(by)h(the)g(optional)f(ar)o(gument)f
-Fq(~position)p Fv(.)p Black 396 2380 a Ft(\225)p Black
-81 w Fq(create_data)43 b(dtd)h(cdata)p Fv(:)20 b(Returns)g(a)h(\003at)g
-(cop)o(y)e(of)h(this)h(node)e(\(which)g(must)h(be)h(a)f(data)g(node\))f
-(with)h(the)479 2488 y(follo)n(wing)f(modi\002cations:)g(The)h(DTD)g
-(is)h(set)g(to)f Fq(dtd)p Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f
-Fq(T_data)p Fv(;)g(the)g(attrib)n(ute)g(list)h(is)479
-2596 y(empty)f(\(data)f(nodes)h(ne)n(v)o(er)f(ha)n(v)o(e)g(attrib)n
-(utes\);)h(the)g(list)h(of)f(children)f(and)h(PIs)h(is)g(empty)-5
-b(,)19 b(too)g(\(same)h(reason\).)f(The)479 2704 y(ne)n(w)h(node)f
-(does)h(not)g(ha)n(v)o(e)g(a)g(parent.)f(The)h(v)n(alue)g
-Fq(cdata)g Fv(is)h(the)f(ne)n(w)g(character)f(content)g(of)h(the)g
-(node.)f(See)i(the)479 2812 y(e)o(xample)e(belo)n(w.)p
-Black 396 2920 a Ft(\225)p Black 60 w Fq(keep_always_whitespace_mode)p
-Fv(:)e(Ev)o(en)i(data)h(nodes)f(which)h(are)g(normally)f(dropped)e
-(because)j(the)o(y)f(only)479 3028 y(contain)g(ignorable)f(whitespace,)
-h(can)h(added)e(to)i(this)h(node)d(once)h(this)i(mode)e(is)h(turned)f
-(on.)g(\(This)h(mode)f(is)h(useful)479 3136 y(to)h(produce)d(canonical)
-h(XML.\))396 3327 y Fu(V)-8 b(alidating)20 b(methods)h(.)f
-Fv(There)f(is)j(one)d(method)g(which)h(locally)f(v)n(alidates)h(the)g
-(node,)f(i.e.)i(checks)e(whether)g(the)396 3435 y(subnodes)g(match)h
-(the)g(content)f(model)g(of)h(this)h(node.)p Black 396
-3667 a Ft(\225)p Black 60 w Fq(local_validate)p Fv(:)e(Checks)h(that)g
-(this)h(node)e(conforms)f(to)j(the)f(DTD)g(by)g(comparing)e(the)i(type)
-g(of)g(the)479 3775 y(subnodes)e(with)i(the)g(content)e(model)h(for)g
-(this)h(node.)e(\(Applications)g(need)h(not)g(call)h(this)h(method)d
-(unless)h(the)o(y)g(add)479 3883 y(ne)n(w)h(nodes)g(themselv)o(es)f(to)
-i(the)f(tree.\))-2 4294 y Fp(3.2.3.)35 b(The)f(c)n(lass)h
-Fc(element_impl)396 4462 y Fv(This)21 b(class)g(is)g(an)f
-(implementation)e(of)i Fq(node)g Fv(which)g(realizes)g(element)g
-(nodes:)396 4642 y Fq(class)44 b([)h('ext)f(])h(element_impl)e(:)h
-('ext)g(->)h([)g('ext)f(])g(node)396 4875 y Fu(Constructor)-8
-b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)p
-Black 3798 5278 a Fr(56)p Black eop
-%%Page: 57 57
-57 56 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fq(new)45
-b(element_impl)d Fn(extension_object)396 770 y Fv(which)20
-b(creates)g(a)h(special)f(form)f(of)h(empty)f(element)h(which)g
-(already)f(contains)g(a)i(reference)d(to)j(the)396 878
-y Fl(extension_object)p Fv(,)d(b)n(ut)i(is)h(otherwise)f(empty)-5
-b(.)18 b(This)j(special)f(form)f(is)i(called)f(an)g Fr(e)n(xemplar)r
-Fv(.)g(The)g(purpose)f(of)396 986 y(e)o(x)o(emplars)g(is)i(that)f(the)o
-(y)g(serv)o(e)f(as)i(patterns)f(that)g(can)g(be)g(duplicated)f(and)g
-(\002lled)i(with)f(data.)g(The)g(method)396 1094 y Fq(create_element)f
-Fv(is)i(designed)e(to)h(perform)e(this)j(action.)396
-1243 y Fu(Example.)f Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
-1423 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 1520
-y(let)h(exemplar)222 b(=)45 b(new)f(element_impl)f(exemplar_ext)g(in)
-396 1711 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
-(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(element)g
-(nodes)f(are)i(created:)396 1891 y Fq(let)45 b(element)e(=)i(exemplar)e
-(#)i(create_element)e(dtd)h(\(T_element)f(name\))h(attlist)396
-2082 y Fv(The)20 b Fq(element)g Fv(is)h(a)f(cop)o(y)g(of)g
-Fq(exemplar)f Fv(\(e)n(v)o(en)g(the)h(e)o(xtension)f
-Fq(exemplar_ext)g Fv(has)h(been)g(copied\))e(which)396
-2190 y(ensures)h(that)h Fq(element)f Fv(and)g(its)i(e)o(xtension)d(are)
-i(objects)f(of)h(the)f(same)h(class)h(as)f(the)g(e)o(x)o(emplars;)e
-(note)h(that)h(you)e(need)396 2298 y(not)i(to)g(pass)h(a)g(class)g
-(name)f(or)f(other)h(meta)g(information.)d(The)j(cop)o(y)g(is)h
-(initially)f(connected)e(with)j(the)f Fq(dtd)p Fv(,)g(it)h(gets)f(a)396
-2406 y(node)f(type,)h(and)g(the)g(attrib)n(ute)g(list)h(is)g(\002lled.)
-f(The)g Fq(element)g Fv(is)h(no)n(w)e(fully)h(functional;)e(it)j(can)f
-(be)g(added)f(to)i(another)396 2514 y(element)f(as)h(child,)e(and)h(it)
-h(can)f(contain)f(references)g(to)h(subnodes.)-2 2884
-y Fp(3.2.4.)35 b(The)f(c)n(lass)h Fc(data_impl)396 3051
-y Fv(This)21 b(class)g(is)g(an)f(implementation)e(of)i
-Fq(node)g Fv(which)g(should)f(be)h(used)g(for)f(all)i(character)e(data)
-h(nodes:)396 3232 y Fq(class)44 b([)h('ext)f(])h(data_impl)e(:)i('ext)f
-(->)g([)h('ext)f(])h(node)396 3464 y Fu(Constructor)-8
-b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)
-396 3644 y Fq(new)45 b(data_impl)e Fn(extension_object)396
-3835 y Fv(which)20 b(creates)g(an)g(empty)g(e)o(x)o(emplar)e(node)h
-(which)h(is)h(connected)d(to)i Fl(extension_object)p
-Fv(.)e(The)i(node)f(does)396 3943 y(not)h(contain)f(a)i(reference)d(to)
-j(an)o(y)e(DTD,)h(and)g(because)f(of)h(this)h(it)g(cannot)e(be)h(added)
-f(to)i(node)e(trees.)396 4093 y(T)-7 b(o)21 b(get)f(a)g(fully)g(w)o
-(orking)f(data)h(node,)f(apply)g(the)h(method)f Fq(create_data)g
-Fv(to)h(the)g(e)o(x)o(emplar)f(\(see)h(e)o(xample\).)396
-4242 y Fu(Example.)g Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
-4422 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 4519
-y(let)h(exemplar)222 b(=)45 b(new)f(exemplar_ext)f(data_impl)h(in)396
-4710 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
-(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(data)g(nodes)
-g(are)g(created:)p Black 3797 5278 a Fr(57)p Black eop
-%%Page: 58 58
-58 57 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fq(let)45
-b(data_node)e(=)i(exemplar)e(#)i(create_data)e(dtd)h("The)g(characters)
-f(con-)396 676 y(tained)h(in)h(the)f(data)g(node")396
-867 y Fv(The)20 b Fq(data_node)f Fv(is)i(a)g(cop)o(y)e(of)h
-Fq(exemplar)p Fv(.)g(The)f(cop)o(y)h(is)h(initially)f(connected)e(with)
-j(the)f Fq(dtd)p Fv(,)g(and)f(it)i(is)h(\002lled)396
-975 y(with)f(character)e(material.)g(The)h Fq(data_node)f
-Fv(is)i(no)n(w)f(fully)g(functional;)e(it)j(can)f(be)g(added)f(to)h(an)
-h(element)e(as)i(child.)-2 1345 y Fp(3.2.5.)35 b(The)f(type)g
-Fc(spec)396 1512 y Fv(The)20 b(type)g Fq(spec)g Fv(de\002nes)g(a)g(w)o
-(ay)h(to)f(handle)f(the)h(details)h(of)f(creating)f(nodes)g(from)h(e)o
-(x)o(emplars.)396 1692 y Fq(type)44 b('ext)h(spec)396
-1790 y(constraint)e('ext)i(=)f('ext)g(node)h(#extension)396
-1984 y(val)g(make_spec_from_mapping)c(:)665 2081 y
-(?super_root_exemplar)h(:)i('ext)h(node)f(->)665 2178
-y(?comment_exemplar)e(:)j('ext)f(node)g(->)665 2275 y
-(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
-2372 y(?pinstr_mapping)f(:)h(\(string,)g('ext)g(node\))g(Hashtbl.t)f
-(->)665 2469 y(data_exemplar:)g('ext)h(node)g(->)665
-2567 y(default_element_exemplar:)d('ext)j(node)g(->)665
-2664 y(element_mapping:)e(\(string,)i('ext)g(node\))g(Hashtbl.t)f(->)
-665 2761 y(unit)h(->)755 2858 y('ext)g(spec)396 3052
-y(val)h(make_spec_from_alist)c(:)665 3149 y(?super_root_exemplar)h(:)i
-('ext)h(node)f(->)665 3247 y(?comment_exemplar)e(:)j('ext)f(node)g(->)
-665 3344 y(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
-3441 y(?pinstr_alist)f(:)i(\(string)e(*)i('ext)f(node\))g(list)g(->)665
-3538 y(data_exemplar:)f('ext)h(node)g(->)665 3635 y
-(default_element_exemplar:)d('ext)j(node)g(->)665 3732
-y(element_alist:)f(\(string)g(*)i('ext)f(node\))g(list)g(->)665
-3829 y(unit)g(->)755 3927 y('ext)g(spec)396 4117 y Fv(The)20
-b(tw)o(o)h(functions)d Fq(make_spec_from_mapping)f Fv(and)j
-Fq(make_spec_from_alist)d Fv(create)j Fq(spec)g Fv(v)n(alues.)396
-4225 y(Both)g(functions)f(are)h(functionally)e(equi)n(v)n(alent)h(and)g
-(the)i(only)e(dif)n(ference)f(is)j(that)g(the)f(\002rst)h(function)d
-(prefers)396 4333 y(hashtables)i(and)g(the)g(latter)g(associati)n(v)o
-(e)g(lists)h(to)g(describe)e(mappings)g(from)g(names)h(to)g(e)o(x)o
-(emplars.)396 4483 y(Y)-9 b(ou)20 b(can)g(specify)f(e)o(x)o(emplars)g
-(for)g(the)i(v)n(arious)e(kinds)g(of)h(nodes)g(that)g(need)g(to)g(be)g
-(generated)e(when)i(an)g(XML)396 4591 y(document)e(is)k(parsed:)p
-Black 3800 5278 a Fr(58)p Black eop
-%%Page: 59 59
-59 58 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
-Black 60 w Fq(~super_root_exemplar)p Fv(:)e(This)i(e)o(x)o(emplar)e(is)
-j(used)f(to)h(create)f(the)g(super)f(root.)h(This)g(special)g(node)g
-(is)h(only)479 687 y(created)f(if)g(the)g(corresponding)d
-(con\002guration)h(option)h(has)h(been)g(selected;)g(it)h(is)g(the)f
-(parent)f(node)g(of)h(the)h(root)479 795 y(node)e(which)h(may)g(be)g
-(con)m(v)o(enient)d(if)k(e)n(v)o(ery)e(w)o(orking)f(node)i(must)g(ha)n
-(v)o(e)f(a)i(parent.)p Black 396 903 a Ft(\225)p Black
-60 w Fq(~comment_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)d(is)j(used)f
-(when)f(a)i(comment)e(node)g(must)h(be)g(created.)g(Note)g(that)g(such)
-479 1011 y(nodes)g(are)g(only)f(created)h(if)g(the)g(corresponding)d
-(con\002guration)h(option)h(is)i("on".)p Black 396 1119
-a Ft(\225)p Black 60 w Fq(~default_pinstr_exemplar)p
-Fv(:)c(If)j(a)h(node)e(for)g(a)i(processing)e(instruction)g(must)h(be)g
-(created,)f(and)h(the)479 1226 y(instruction)f(is)i(not)f(listed)h(in)f
-(the)g(table)h(passed)f(by)f Fq(~pinstr_mapping)g Fv(or)h
-Fq(~pinstr_alist)p Fv(,)e(this)j(e)o(x)o(emplar)479 1334
-y(is)g(used.)f(Again)f(the)i(con\002guration)c(option)i(must)h(be)g
-("on")g(in)g(order)f(to)i(create)e(such)h(nodes)g(at)h(all.)p
-Black 396 1442 a Ft(\225)p Black 60 w Fq(~pinstr_mapping)e
-Fv(or)g Fq(~pinstr_alist)p Fv(:)g(Map)h(the)g(tar)o(get)g(names)f(of)h
-(processing)f(instructions)g(to)479 1550 y(e)o(x)o(emplars.)g(These)h
-(mappings)e(are)i(only)g(used)g(when)f(nodes)h(for)f(processing)g
-(instructions)g(are)h(created.)p Black 396 1658 a Ft(\225)p
-Black 60 w Fq(~data_exemplar)p Fv(:)f(The)h(e)o(x)o(emplar)e(for)h
-(ordinary)f(data)i(nodes.)p Black 396 1766 a Ft(\225)p
-Black 60 w Fq(~default_element_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)
-e(is)k(used)e(if)g(an)g(element)g(node)f(must)h(be)g(created,)f(b)n(ut)
-i(the)479 1874 y(element)f(type)g(cannot)f(be)h(found)e(in)j(the)f
-(tables)g Fq(element_mapping)e Fv(or)i Fq(element_alist)p
-Fv(.)p Black 396 1982 a Ft(\225)p Black 60 w Fq(~element_mapping)e
-Fv(or)i Fq(~element_alist)p Fv(:)f(Map)h(the)g(element)f(types)h(to)h
-(e)o(x)o(emplars.)d(These)i(mappings)f(are)479 2090 y(used)h(to)h
-(create)e(element)h(nodes.)396 2239 y(In)g(most)g(cases,)h(you)e(only)h
-(w)o(ant)g(to)g(create)g Fq(spec)g Fv(v)n(alues)g(to)h(pass)f(them)g
-(to)g(the)h(parser)e(functions)g(found)f(in)396 2347
-y Fq(Pxp_yacc)p Fv(.)h(Ho)n(we)n(v)o(er)m(,)f(it)j(might)f(be)g(useful)
-g(to)g(apply)f Fq(spec)h Fv(v)n(alues)g(directly)-5 b(.)396
-2497 y(The)20 b(follo)n(wing)f(functions)f(create)i(v)n(arious)f(types)
-h(of)g(nodes)g(by)g(selecting)f(the)i(corresponding)16
-b(e)o(x)o(emplar)j(from)g(the)396 2605 y(passed)h Fq(spec)g
-Fv(v)n(alue,)g(and)f(by)h(calling)g Fq(create_element)e
-Fv(or)i Fq(create_data)f Fv(on)h(the)g(e)o(x)o(emplar)-5
-b(.)396 2785 y Fq(val)45 b(create_data_node)d(:)665 2882
-y('ext)i(spec)h(->)665 2979 y(dtd)g(->)665 3076 y(\(*)g(data)f
-(material:)f(*\))i(string)f(->)845 3173 y('ext)g(node)396
-3368 y(val)h(create_element_node)c(:)665 3465 y(?position:\(string)h(*)
-j(int)f(*)h(int\))f(->)665 3562 y('ext)g(spec)h(->)665
-3659 y(dtd)g(->)665 3756 y(\(*)g(element)e(type:)h(*\))h(string)f(->)
-665 3853 y(\(*)h(attributes:)e(*\))h(\(string)g(*)h(string\))e(list)h
-(->)845 3950 y('ext)g(node)396 4145 y(val)h(create_super_root_node)c(:)
-665 4242 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
-4339 y('ext)g(spec)h(->)710 4436 y(dtd)f(->)889 4533
-y('ext)h(node)396 4728 y(val)g(create_comment_node)c(:)665
-4825 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)p
-Black 3800 5278 a Fr(59)p Black eop
-%%Page: 60 60
-60 59 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 665 579 a Fq('ext)44
-b(spec)h(->)665 676 y(dtd)g(->)665 773 y(\(*)g(comment)e(text:)h(*\))h
-(string)f(->)845 870 y('ext)g(node)396 1065 y(val)h(create_pinstr_node)
-c(:)665 1162 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
-1259 y('ext)g(spec)h(->)665 1356 y(dtd)g(->)665 1453
-y(proc_instruction)d(->)845 1550 y('ext)i(node)-2 2003
-y Fp(3.2.6.)35 b(Examples)396 2171 y Fu(Building)22 b(tr)o(ees.)d
-Fv(Here)h(is)h(the)g(piece)e(of)h(code)g(that)g(creates)g(the)h(tree)f
-(of)g(the)g(\002gure)f Fr(A)i(tr)m(ee)g(with)f(element)g(nodes,)396
-2279 y(data)g(nodes,)f(and)g(attrib)n(utes)p Fv(.)h(The)g(e)o(xtension)
-f(object)h(and)f(the)h(DTD)h(are)f(be)o(yond)e(the)i(scope)g(of)g(this)
-g(e)o(xample.)396 2459 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(\(*)h
-(some)f(extension)f(*\))i(in)396 2556 y(let)g(dtd)f(=)h(...)f(\(*)g
-(some)h(DTD)f(*\))g(in)396 2750 y(let)h(element_exemplar)d(=)i(new)h
-(element_impl)e(exemplar_ext)f(in)396 2847 y(let)j(data_exemplar)177
-b(=)44 b(new)h(data_impl)178 b(exemplar_ext)42 b(in)396
-3042 y(let)j(a1)f(=)h(element_exemplar)d(#)j(cre-)396
-3139 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("apple"])396
-3236 y(and)h(b1)f(=)h(element_exemplar)d(#)j(create_element)d(dtd)i
-(\(T_element)g("b"\))g([])396 3333 y(and)h(c1)f(=)h(element_exemplar)d
-(#)j(create_element)d(dtd)i(\(T_element)g("c"\))g([])396
-3430 y(and)h(a2)f(=)h(element_exemplar)d(#)j(cre-)396
-3527 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("orange"])
-396 3624 y(in)396 3819 y(let)h(cherries)e(=)i(data_exemplar)d(#)j
-(create_data)e(dtd)h("Cherries")g(in)396 3916 y(let)h(orange)133
-b(=)45 b(data_exemplar)d(#)j(create_data)e(dtd)h("An)h(orange")e(in)396
-4110 y(a1)i(#)f(add_node)g(b1;)396 4207 y(a1)h(#)f(add_node)g(c1;)396
-4304 y(b1)h(#)f(add_node)g(a2;)396 4401 y(b1)h(#)f(add_node)g
-(cherries;)396 4499 y(a2)h(#)f(add_node)g(orange;)396
-4689 y Fv(Alternati)n(v)o(ely)-5 b(,)18 b(the)i(last)h(block)f(of)g
-(statements)g(could)f(also)i(be)f(written)g(as:)396 4870
-y Fq(a1)45 b(#)f(set_nodes)g([b1;)g(c1];)p Black 3800
-5278 a Fr(60)p Black eop
-%%Page: 61 61
-61 60 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fq(b1)45
-b(#)f(set_nodes)g([a2;)g(cherries];)396 676 y(a2)h(#)f(set_nodes)g
-([orange];)396 867 y Fv(The)20 b(root)g(of)g(the)g(tree)g(is)h
-Fq(a1)p Fv(,)f(i.e.)g(it)h(is)g(true)f(that)396 1047
-y Fq(x)45 b(#)g(root)f(==)g(a1)396 1238 y Fv(for)20 b(e)n(v)o(ery)f(x)h
-(from)f({)i Fq(a1)p Fv(,)f Fq(a2)p Fv(,)g Fq(b1)p Fv(,)g
-Fq(c1)p Fv(,)g Fq(cherries)p Fv(,)g Fq(orange)f Fv(}.)396
-1388 y(Furthermore,)f(the)i(follo)n(wing)f(properties)f(hold:)486
-1568 y Fq(a1)44 b(#)h(attribute)e("att")h(=)h(Value)f("apple")396
-1665 y(&)h(a2)f(#)h(attribute)e("att")h(=)h(Value)f("orange")396
-1859 y(&)h(cherries)e(#)i(data)f(=)h("Cherries")396 1956
-y(&)135 b(orange)43 b(#)i(data)f(=)h("An)f(orange")396
-2053 y(&)314 b(a1)44 b(#)h(data)f(=)h("CherriesAn)e(orange")396
-2248 y(&)314 b(a1)44 b(#)h(node_type)e(=)i(T_element)e("a")396
-2345 y(&)314 b(a2)44 b(#)h(node_type)e(=)i(T_element)e("a")396
-2442 y(&)314 b(b1)44 b(#)h(node_type)e(=)i(T_element)e("b")396
-2539 y(&)314 b(c1)44 b(#)h(node_type)e(=)i(T_element)e("c")396
-2636 y(&)i(cherries)e(#)i(node_type)e(=)i(T_data)396
-2733 y(&)135 b(orange)43 b(#)i(node_type)e(=)i(T_data)396
-2928 y(&)314 b(a1)44 b(#)h(sub_nodes)e(=)i([)g(b1;)f(c1)h(])396
-3025 y(&)314 b(a2)44 b(#)h(sub_nodes)e(=)i([)g(orange)f(])396
-3122 y(&)314 b(b1)44 b(#)h(sub_nodes)e(=)i([)g(a2;)f(cherries)g(])396
-3219 y(&)314 b(c1)44 b(#)h(sub_nodes)e(=)i([])396 3316
-y(&)g(cherries)e(#)i(sub_nodes)e(=)i([])396 3413 y(&)135
-b(orange)43 b(#)i(sub_nodes)e(=)i([])396 3608 y(&)314
-b(a2)44 b(#)h(parent)f(==)g(a1)396 3705 y(&)314 b(b1)44
-b(#)h(parent)f(==)g(b1)396 3802 y(&)314 b(c1)44 b(#)h(parent)f(==)g(a1)
-396 3899 y(&)h(cherries)e(#)i(parent)f(==)g(b1)396 3996
-y(&)135 b(orange)43 b(#)i(parent)f(==)g(a2)396 4229 y
-Fu(Sear)o(ching)19 b(nodes.)g Fv(The)g(follo)n(wing)e(function)h
-(searches)h(all)g(nodes)g(of)g(a)g(tree)h(for)e(which)h(a)g(certain)g
-(condition)e(holds:)396 4409 y Fq(let)45 b(rec)f(search)g(p)g(t)h(=)486
-4506 y(if)f(p)h(t)g(then)576 4603 y(t)f(::)h(search_list)e(p)h(\(t)h(#)
-g(sub_nodes\))486 4700 y(else)576 4797 y(search_list)e(p)h(\(t)h(#)f
-(sub_nodes\))p Black 3800 5278 a Fr(61)p Black eop
-%%Page: 62 62
-62 61 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 676 a Fq(and)45
-b(search_list)e(p)h(l)h(=)486 773 y(match)f(l)h(with)576
-870 y([])268 b(-)p Fo(>)45 b Fq([])486 967 y(|)g(t)f(::)h(l')f(-)p
-Fo(>)h Fq(\(search)e(p)i(t\))f(@)h(\(search_list)e(p)i(l'\))396
-1065 y(;;)396 1297 y Fv(F)o(or)20 b(e)o(xample,)f(if)h(you)f(w)o(ant)i
-(to)f(search)g(all)h(elements)f(of)f(a)i(certain)f(type)f
-Fq(et)p Fv(,)i(the)f(function)e Fq(search)i Fv(can)g(be)g(applied)396
-1405 y(as)h(follo)n(ws:)396 1585 y Fq(let)45 b(search_element_type)c
-(et)k(t)f(=)486 1682 y(search)g(\(fun)g(x)h(-)p Fo(>)f
-Fq(x)h(#)f(node_type)g(=)g(T_element)g(et\))g(t)396 1779
-y(;;)396 2012 y Fu(Getting)20 b(attrib)n(ute)f(v)o(alues.)h
-Fv(Suppose)f(we)i(ha)n(v)o(e)f(the)g(declaration:)396
-2192 y Fq(<!ATTLIST)44 b(e)g(a)h(CDATA)f(#REQUIRED)934
-2289 y(b)h(CDATA)f(#IMPLIED)934 2386 y(c)h(CDATA)f("12345">)396
-2577 y Fv(In)20 b(this)h(case,)f(e)n(v)o(ery)f(element)h
-Fq(e)g Fv(must)h(ha)n(v)o(e)e(an)h(attrib)n(ute)g Fq(a)p
-Fv(,)g(otherwise)g(the)g(parser)g(w)o(ould)f(indicate)h(an)g(error)-5
-b(.)19 b(If)h(the)396 2685 y(O'Caml)h(v)n(ariable)e Fq(n)h
-Fv(holds)g(the)g(node)f(of)h(the)g(tree)h(corresponding)16
-b(to)21 b(the)f(element,)f(you)g(can)h(get)h(the)f(v)n(alue)f(of)h(the)
-396 2793 y(attrib)n(ute)g Fq(a)h Fv(by)396 2973 y Fq(let)45
-b(value_of_a)e(=)h(n)h(#)g(required_string_attribute)40
-b("a")396 3164 y Fv(which)20 b(is)h(more)e(or)h(less)i(an)e(abbre)n
-(viation)d(for)396 3344 y Fq(let)45 b(value_of_a)e(=)486
-3442 y(match)h(n)h(#)f(attribute)g("a")g(with)576 3539
-y(Value)g(s)g(->)h(s)486 3636 y(|)g(_)313 b(->)45 b(assert)f(false)396
-3827 y Fv(-)21 b(as)g(the)f(attrib)n(ute)g(is)h(required,)d(the)i
-Fq(attribute)f Fv(method)g(al)o(w)o(ays)i(returns)e(a)i
-Fq(Value)p Fv(.)396 3976 y(In)f(contrast)g(to)g(this,)h(the)f(attrib)n
-(ute)g Fq(b)g Fv(can)g(be)g(omitted.)g(In)f(this)i(case,)g(the)f
-(method)396 4084 y Fq(required_string_attribute)d Fv(w)o(orks)j(only)f
-(if)h(the)h(attrib)n(ute)f(is)h(there,)e(and)h(the)g(method)f(will)i(f)
-o(ail)f(if)h(the)396 4192 y(attrib)n(ute)f(is)h(missing.)f(T)-7
-b(o)20 b(get)h(the)f(v)n(alue,)f(you)g(can)h(apply)g(the)g(method)f
-Fq(optional_string_attribute)p Fv(:)396 4372 y Fq(let)45
-b(value_of_b)e(=)h(n)h(#)g(optional_string_attribute)40
-b("b")396 4563 y Fv(No)n(w)-5 b(,)20 b Fq(value_of_b)f
-Fv(is)i(of)f(type)g Fq(string)43 b(option)p Fv(,)20 b(and)f
-Fq(None)i Fv(represents)e(the)h(omitted)g(attrib)n(ute.)f(Alternati)n
-(v)o(ely)-5 b(,)396 4671 y(you)20 b(could)f(also)h(use)h
-Fq(attribute)p Fv(:)396 4851 y Fq(let)45 b(value_of_b)e(=)p
-Black 3800 5278 a Fr(62)p Black eop
-%%Page: 63 63
-63 62 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 486 579 a Fq(match)44
-b(n)h(#)f(attribute)g("b")g(with)576 676 y(Value)g(s)313
-b(->)45 b(Some)f(s)486 773 y(|)h(Implied_value)d(->)j(None)486
-870 y(|)g(_)582 b(->)45 b(assert)f(false)396 1103 y Fv(The)20
-b(attrib)n(ute)g Fq(c)h Fv(beha)n(v)o(es)e(much)g(lik)o(e)h
-Fq(a)p Fv(,)h(because)e(it)i(has)g(al)o(w)o(ays)f(a)h(v)n(alue.)e(If)h
-(the)g(attrib)n(ute)g(is)h(omitted,)f(the)g(def)o(ault,)396
-1211 y(here)g("12345",)e(will)j(be)f(returned)e(instead.)i(Because)g
-(of)g(this,)h(you)e(can)h(again)f(use)396 1319 y Fq
-(required_string_attribute)e Fv(to)j(get)g(the)h(v)n(alue.)396
-1468 y(The)f(type)g Fq(CDATA)g Fv(is)h(the)f(most)g(general)f(string)h
-(type.)g(The)g(types)g Fq(NMTOKEN)p Fv(,)f Fq(ID)p Fv(,)h
-Fq(IDREF)p Fv(,)g Fq(ENTITY)p Fv(,)f(and)h(all)396 1576
-y(enumerators)e(and)i(notations)f(are)h(special)h(forms)e(of)h(string)g
-(types)g(that)g(restrict)g(the)h(possible)f(v)n(alues.)f(From)396
-1684 y(O'Caml,)h(the)o(y)g(beha)n(v)o(e)f(lik)o(e)h Fq(CDATA)p
-Fv(,)g(i.e.)g(you)f(can)h(use)h(the)f(methods)f Fq
-(required_string_attribute)e Fv(and)396 1792 y Fq
-(optional_string_attribute)p Fv(,)g(too.)396 1941 y(In)j(contrast)g(to)
-g(this,)h(the)f(types)g Fq(NMTOKENS)p Fv(,)f Fq(IDREFS)p
-Fv(,)g(and)h Fq(ENTITIES)g Fv(mean)f(lists)j(of)e(strings.)g(Suppose)f
-(we)h(ha)n(v)o(e)396 2049 y(the)g(declaration:)396 2229
-y Fq(<!ATTLIST)44 b(f)g(d)h(NMTOKENS)e(#REQUIRED)934
-2327 y(e)i(NMTOKENS)e(#IMPLIED>)396 2517 y Fv(The)20
-b(type)g Fq(NMTOKENS)f Fv(stands)i(for)e(lists)j(of)e(space-separated)e
-(tok)o(ens;)i(for)f(e)o(xample)g(the)h(v)n(alue)g Fq("1)44
-b(abc)h(23ef")396 2625 y Fv(means)20 b(the)g(list)i Fq(["1";)44
-b("abc";)f("23ef"])p Fv(.)20 b(\(Again,)e Fq(IDREFS)i
-Fv(and)g Fq(ENTITIES)f Fv(ha)n(v)o(e)h(more)f(restricted)h(v)n
-(alues.\))396 2733 y(T)-7 b(o)21 b(get)f(the)g(v)n(alue)g(of)f(attrib)n
-(ute)h Fq(d)p Fv(,)h(one)e(can)h(use)396 2913 y Fq(let)45
-b(value_of_d)e(=)h(n)h(#)g(required_list_attribute)c("d")396
-3104 y Fv(or)396 3285 y Fq(let)k(value_of_d)e(=)486 3382
-y(match)h(n)h(#)f(attribute)g("d")g(with)576 3479 y(Valuelist)f(l)i(->)
-f(l)486 3576 y(|)h(_)493 b(->)44 b(assert)g(false)396
-3767 y Fv(As)21 b Fq(d)g Fv(is)g(required,)d(the)i(attrib)n(ute)g
-(cannot)f(be)h(omitted,)g(and)f(the)h Fq(attribute)g
-Fv(method)e(returns)i(al)o(w)o(ays)g(a)396 3875 y Fq(Valuelist)p
-Fv(.)396 4024 y(F)o(or)g(optional)f(attrib)n(utes)h(lik)o(e)h
-Fq(e)p Fv(,)f(apply)396 4204 y Fq(let)45 b(value_of_e)e(=)h(n)h(#)g
-(optional_list_attribute)c("e")396 4395 y Fv(or)396 4576
-y Fq(let)k(value_of_e)e(=)486 4673 y(match)h(n)h(#)f(attribute)g("e")g
-(with)576 4770 y(Valuelist)f(l)134 b(->)45 b(l)486 4867
-y(|)g(Implied_value)d(->)j([])p Black 3800 5278 a Fr(63)p
-Black eop
-%%Page: 64 64
-64 63 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 486 579 a Fq(|)45
-b(_)582 b(->)45 b(assert)f(false)396 770 y Fv(Here,)20
-b(the)g(case)h(that)f(the)g(attrib)n(ute)g(is)h(missing)f(counts)g(lik)
-o(e)g(the)h(empty)e(list.)-2 1139 y Fp(3.2.7.)35 b(Iterator)n(s)396
-1307 y Fv(There)20 b(are)g(also)g(se)n(v)o(eral)g(iterators)g(in)g
-(Pxp_document;)d(please)j(see)h(the)f(mli)h(\002le)f(for)g(details.)g
-(Y)-9 b(ou)20 b(can)g(\002nd)396 1415 y(e)o(xamples)f(for)h(them)g(in)g
-(the)g("simple_transformation")d(directory)-5 b(.)396
-1595 y Fq(val)45 b(find)f(:)g(?deeply:bool)f(->)889 1692
-y(f:\('ext)h(node)g(->)h(bool\))f(->)g('ext)g(node)h(->)f('ext)g(node)
-396 1887 y(val)h(find_all)e(:)i(?deeply:bool)e(->)1069
-1984 y(f:\('ext)g(node)i(->)f(bool\))g(->)h('ext)f(node)g(->)g('ext)h
-(node)f(list)396 2178 y(val)h(find_element)d(:)j(?deeply:bool)e(->)1248
-2275 y(string)h(->)g('ext)h(node)f(->)g('ext)g(node)396
-2469 y(val)h(find_all_elements)d(:)i(?deeply:bool)f(->)1472
-2567 y(string)h(->)h('ext)f(node)g(->)g('ext)h(node)f(list)396
-2761 y(exception)g(Skip)396 2858 y(val)h(map_tree)e(:)90
-b(pre:\('exta)43 b(node)h(->)g('extb)g(node\))g(->)1069
-2955 y(?post:\('extb)f(node)h(->)g('extb)g(node\))g(->)1069
-3052 y('exta)g(node)g(->)1248 3149 y('extb)g(node)396
-3441 y(val)h(map_tree_sibl)d(:)755 3538 y(pre:)i(\('exta)g(node)g
-(option)g(->)g('exta)g(node)h(->)f('exta)g(node)g(option)g(->)1203
-3635 y('extb)g(node\))g(->)710 3732 y(?post:\('extb)f(node)h(option)g
-(->)g('extb)g(node)h(->)f('extb)g(node)g(option)g(->)1203
-3829 y('extb)g(node\))g(->)710 3927 y('exta)g(node)g(->)889
-4024 y('extb)g(node)396 4218 y(val)h(iter_tree)e(:)i(?pre:\('ext)e
-(node)h(->)g(unit\))g(->)1114 4315 y(?post:\('ext)f(node)h(->)g(unit\))
-g(->)1114 4412 y('ext)g(node)g(->)1293 4509 y(unit)396
-4704 y(val)h(iter_tree_sibl)d(:)710 4801 y(?pre:)i(\('ext)g(node)g
-(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g(unit\))g
-(->)p Black 3800 5278 a Fr(64)p Black eop
-%%Page: 65 65
-65 64 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 710 579 a Fq(?post:\('ext)43
-b(node)h(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g
-(unit\))g(->)710 676 y('ext)g(node)g(->)889 773 y(unit)-2
-1358 y Fx(3.3.)39 b(The)g(c)m(lass)g(type)g Fb(extension)396
-1610 y Fq(class)44 b(type)g([)h('node)f(])h(extension)e(=)486
-1707 y(object)h(\('self\))576 1804 y(method)f(clone)h(:)h('self)665
-1901 y(\(*)g("clone")e(should)h(return)g(an)h(exact)f(deep)g(copy)g(of)
-g(the)h(object.)e(*\))576 1998 y(method)g(node)i(:)f('node)665
-2095 y(\(*)h("node")f(returns)f(the)i(corresponding)d(node)i(of)h(this)
-f(extension.)f(This)h(method)710 2193 y(*)h(intended)e(to)i(return)f
-(exactly)f(what)h(previ-)396 2290 y(ously)g(has)h(been)f(set)g(by)h
-("set_node".)710 2387 y(*\))576 2484 y(method)e(set_node)h(:)h('node)f
-(->)g(unit)665 2581 y(\(*)h("set_node")e(is)h(invoked)g(once)g(the)h
-(extension)e(is)h(associated)g(to)g(a)h(new)710 2678
-y(*)g(node)f(object.)710 2775 y(*\))486 2873 y(end)396
-3063 y Fv(This)21 b(is)g(the)f(type)g(of)g(classes)h(used)f(for)f(node)
-h(e)o(xtensions.)e(F)o(or)i(e)n(v)o(ery)f(node)g(of)h(the)g(document)e
-(tree,)i(there)g(is)h(not)396 3171 y(only)f(the)g Fq(node)g
-Fv(object,)f(b)n(ut)h(also)g(an)g Fq(extension)f Fv(object.)h(The)f
-(latter)i(has)f(minimal)f(functionality;)f(it)j(has)f(only)g(the)396
-3279 y(necessary)g(methods)f(to)h(be)g(attached)g(to)g(the)g(node)f
-(object)h(containing)e(the)j(details)f(of)g(the)g(node)f(instance.)h
-(The)396 3387 y(e)o(xtension)f(object)h(is)h(called)f(e)o(xtension)f
-(because)g(its)i(purpose)e(is)i(e)o(xtensibility)-5 b(.)396
-3537 y(F)o(or)20 b(some)g(reasons,)g(it)h(is)g(impossible)e(to)i(deri)n
-(v)o(e)d(the)j Fq(node)f Fv(classes)h(\(i.e.)f Fq(element_impl)f
-Fv(and)g Fq(data_impl)p Fv(\))g(such)396 3645 y(that)i(the)f
-(subclasses)g(can)g(be)g(e)o(xtended)f(by)g(ne)n(w)h(ne)n(w)g(methods.)
-f(But)i(subclassing)f(nodes)f(is)i(a)g(great)f(feature,)396
-3753 y(because)g(it)h(allo)n(ws)f(the)g(user)g(to)h(pro)o(vide)d(dif)n
-(ferent)g(classes)k(for)d(dif)n(ferent)g(types)h(of)g(nodes.)f(The)h(e)
-o(xtension)f(objects)396 3860 y(are)h(a)h(w)o(orkaround)c(that)j(is)i
-(as)e(po)n(werful)f(as)i(direct)f(subclassing,)f(the)h(costs)h(are)f
-(some)g(notation)f(o)o(v)o(erhead.)p Black 3800 5278
-a Fr(65)p Black eop
-%%Page: 66 66
-66 65 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-6.)f
-(The)i(structur)o(e)f(of)g(nodes)g(and)h(extensions)396
-1928 y
- currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
- 396 1928 a @beginspecial 0 @llx 0 @lly 206 @urx
-140 @ury 2060 @rwi @setspecial
-%%BeginDocument: pic/extension_general.ps
-%!PS-Adobe-2.0 EPSF-2.0
-%%Title: src/pic/extension_general.fig
-%%Creator: fig2dev Version 3.2 Patchlevel 1
-%%CreationDate: Sun Aug 27 02:05:42 2000
-%%For: gerd@ice (Gerd Stolpmann)
-%%Orientation: Portrait
-%%BoundingBox: 0 0 206 140
-%%Pages: 0
-%%BeginSetup
-%%EndSetup
-%%Magnification: 0.8000
-%%EndComments
-/$F2psDict 200 dict def
-$F2psDict begin
-$F2psDict /mtrx matrix put
-/col-1 {0 setgray} bind def
-/col0 {0.000 0.000 0.000 srgb} bind def
-/col1 {0.000 0.000 1.000 srgb} bind def
-/col2 {0.000 1.000 0.000 srgb} bind def
-/col3 {0.000 1.000 1.000 srgb} bind def
-/col4 {1.000 0.000 0.000 srgb} bind def
-/col5 {1.000 0.000 1.000 srgb} bind def
-/col6 {1.000 1.000 0.000 srgb} bind def
-/col7 {1.000 1.000 1.000 srgb} bind def
-/col8 {0.000 0.000 0.560 srgb} bind def
-/col9 {0.000 0.000 0.690 srgb} bind def
-/col10 {0.000 0.000 0.820 srgb} bind def
-/col11 {0.530 0.810 1.000 srgb} bind def
-/col12 {0.000 0.560 0.000 srgb} bind def
-/col13 {0.000 0.690 0.000 srgb} bind def
-/col14 {0.000 0.820 0.000 srgb} bind def
-/col15 {0.000 0.560 0.560 srgb} bind def
-/col16 {0.000 0.690 0.690 srgb} bind def
-/col17 {0.000 0.820 0.820 srgb} bind def
-/col18 {0.560 0.000 0.000 srgb} bind def
-/col19 {0.690 0.000 0.000 srgb} bind def
-/col20 {0.820 0.000 0.000 srgb} bind def
-/col21 {0.560 0.000 0.560 srgb} bind def
-/col22 {0.690 0.000 0.690 srgb} bind def
-/col23 {0.820 0.000 0.820 srgb} bind def
-/col24 {0.500 0.190 0.000 srgb} bind def
-/col25 {0.630 0.250 0.000 srgb} bind def
-/col26 {0.750 0.380 0.000 srgb} bind def
-/col27 {1.000 0.500 0.500 srgb} bind def
-/col28 {1.000 0.630 0.630 srgb} bind def
-/col29 {1.000 0.750 0.750 srgb} bind def
-/col30 {1.000 0.880 0.880 srgb} bind def
-/col31 {1.000 0.840 0.000 srgb} bind def
-
-end
-save
--22.0 205.0 translate
-1 -1 scale
-
-/cp {closepath} bind def
-/ef {eofill} bind def
-/gr {grestore} bind def
-/gs {gsave} bind def
-/sa {save} bind def
-/rs {restore} bind def
-/l {lineto} bind def
-/m {moveto} bind def
-/rm {rmoveto} bind def
-/n {newpath} bind def
-/s {stroke} bind def
-/sh {show} bind def
-/slc {setlinecap} bind def
-/slj {setlinejoin} bind def
-/slw {setlinewidth} bind def
-/srgb {setrgbcolor} bind def
-/rot {rotate} bind def
-/sc {scale} bind def
-/sd {setdash} bind def
-/ff {findfont} bind def
-/sf {setfont} bind def
-/scf {scalefont} bind def
-/sw {stringwidth} bind def
-/tr {translate} bind def
-/tnt {dup dup currentrgbcolor
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add
- 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
- bind def
-/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
- 4 -2 roll mul srgb} bind def
- /DrawEllipse {
- /endangle exch def
- /startangle exch def
- /yrad exch def
- /xrad exch def
- /y exch def
- /x exch def
- /savematrix mtrx currentmatrix def
- x y tr xrad yrad sc 0 0 1 startangle endangle arc
- closepath
- savematrix setmatrix
- } def
-
-/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
-/$F2psEnd {$F2psEnteredState restore end} def
-%%EndProlog
-
-$F2psBegin
-10 setmiterlimit
-n -1000 5050 m -1000 -1000 l 5514 -1000 l 5514 5050 l cp clip
- 0.05039 0.05039 sc
-7.500 slw
-% Ellipse
-n 1575 2250 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 1575 3375 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 675 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2475 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3600 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 2880 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 4320 2475 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Ellipse
-n 3600 1485 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
-
-% Polyline
-n 675 3150 m 1395 2385 l gs col0 s gr
-% Polyline
-n 1575 2475 m 1575 3150 l gs col0 s gr
-% Polyline
-n 1755 2385 m 2475 3150 l gs col0 s gr
-% Polyline
- [60] 0 sd
-gs clippath
-3288 1467 m 3412 1462 l 3305 1524 l 3435 1487 l 3418 1429 l cp
-clip
-n 1537 2010 m 3412 1462 l gs col0 s gr gr
- [] 0 sd
-% arrowhead
-n 3288 1467 m 3412 1462 l 3305 1524 l col0 s
-% Polyline
- [60] 0 sd
-gs clippath
-1796 2042 m 1672 2047 l 1779 1984 l 1649 2022 l 1666 2080 l cp
-clip
-n 3412 1537 m 1672 2047 l gs col0 s gr gr
- [] 0 sd
-% arrowhead
-n 1796 2042 m 1672 2047 l 1779 1984 l col0 s
-% Polyline
- [60] 0 sd
-gs clippath
-2584 2524 m 2707 2512 l 2604 2581 l 2731 2535 l 2711 2479 l cp
-933 3183 m 810 3195 l 913 3126 l 786 3172 l 806 3228 l cp
-clip
-n 810 3195 m 2707 2512 l gs col0 s gr gr
- [] 0 sd
-% arrowhead
-n 933 3183 m 810 3195 l 913 3126 l col0 s
-% arrowhead
-n 2584 2524 m 2707 2512 l 2604 2581 l col0 s
-% Polyline
- [60] 0 sd
-gs clippath
-3319 2594 m 3442 2580 l 3340 2650 l 3467 2603 l 3446 2547 l cp
-1863 3203 m 1740 3217 l 1842 3147 l 1715 3194 l 1736 3250 l cp
-clip
-n 1740 3217 m 3442 2580 l gs col0 s gr gr
- [] 0 sd
-% arrowhead
-n 1863 3203 m 1740 3217 l 1842 3147 l col0 s
-% arrowhead
-n 3319 2594 m 3442 2580 l 3340 2650 l col0 s
-% Polyline
- [60] 0 sd
-gs clippath
-4054 2626 m 4177 2610 l 4076 2682 l 4202 2632 l 4180 2577 l cp
-2763 3194 m 2640 3210 l 2741 3138 l 2615 3188 l 2637 3243 l cp
-clip
-n 2640 3210 m 4177 2610 l gs col0 s gr gr
- [] 0 sd
-% arrowhead
-n 2763 3194 m 2640 3210 l 2741 3138 l col0 s
-% arrowhead
-n 4054 2626 m 4177 2610 l 4076 2682 l col0 s
-/Courier-Bold ff 180.00 scf sf
-3555 1530 m
-gs 1 -1 sc (x) col0 sh gr
-/Courier-Bold ff 180.00 scf sf
-1530 2295 m
-gs 1 -1 sc (n) col0 sh gr
-/Courier ff 180.00 scf sf
-1658 1950 m
-gs 1 -1 sc 17.0 rot (n # extension) col0 sh gr
-/Courier ff 180.00 scf sf
-2475 1950 m
-gs 1 -1 sc 17.0 rot (x # node) col0 sh gr
-/Helvetica ff 180.00 scf sf
-1020 4050 m
-gs 1 -1 sc (The node tree) col0 sh gr
-/Helvetica ff 180.00 scf sf
-3225 3285 m
-gs 1 -1 sc (The extensions) col0 sh gr
-$F2psEnd
-rs
-
-%%EndDocument
- @endspecial 396 1928 a
- currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
-neg exch translate
- 396 1928 a 357 x Fv(The)f(picture)f(sho)n(ws)i
-(ho)n(w)e(the)i(nodes)e(and)h(e)o(xtensions)f(are)h(link)o(ed)f
-(together)-5 b(.)19 b(Ev)o(ery)g(node)g(has)i(a)f(reference)f(to)h(its)
-396 2393 y(e)o(xtension,)f(and)g(e)n(v)o(ery)g(e)o(xtension)g(has)h(a)h
-(reference)d(to)j(its)g(node.)e(The)h(methods)f Fq(extension)g
-Fv(and)h Fq(node)g Fv(follo)n(w)396 2501 y(these)h(references;)e(a)h
-(typical)g(phrase)f(is)396 2681 y Fq(self)44 b(#)h(node)f(#)h
-(attribute)e("xy")396 2872 y Fv(to)21 b(get)f(the)g(v)n(alue)g(of)f(an)
-i(attrib)n(ute)e(from)h(a)g(method)f(de\002ned)g(in)h(the)h(e)o
-(xtension)d(object;)i(or)396 3053 y Fq(self)44 b(#)h(node)f(#)h(iter)
-486 3150 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)f(extension)g(#)g(my_method)
-g(...\))396 3341 y Fv(to)21 b(iterate)f(o)o(v)o(er)f(the)h(subnodes)f
-(and)g(to)i(call)f Fq(my_method)f Fv(of)h(the)h(corresponding)16
-b(e)o(xtension)j(objects.)396 3490 y(Note)h(that)h(e)o(xtension)d
-(objects)i(do)g(not)g(ha)n(v)o(e)g(references)e(to)j(subnodes)e(\(or)g
-("sube)o(xtensions"\))f(themselv)o(es;)h(in)i(order)396
-3598 y(to)g(get)f(one)f(of)h(the)h(children)d(of)i(an)g(e)o(xtension)f
-(you)g(must)i(\002rst)g(go)e(to)i(the)f(node)f(object,)h(then)f(get)h
-(the)h(child)e(node,)396 3706 y(and)h(\002nally)g(reach)f(the)i(e)o
-(xtension)d(that)j(is)g(logically)e(the)h(child)g(of)g(the)g(e)o
-(xtension)f(you)g(started)h(with.)-2 4034 y Fp(3.3.1.)35
-b(Ho)n(w)f(to)f(de\002ne)h(an)g(e)n(xtension)i(c)n(lass)396
-4202 y Fv(At)21 b(minimum,)e(you)g(must)h(de\002ne)g(the)g(methods)f
-Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)f Fq(set_node)h
-Fv(such)f(that)i(your)e(class)i(is)396 4310 y(compatible)e(with)h(the)h
-(type)e Fq(extension)p Fv(.)g(The)h(method)f Fq(set_node)g
-Fv(is)i(called)f(during)f(the)h(initialization)g(of)g(the)396
-4418 y(node,)f(or)h(after)g(a)h(node)e(has)h(been)g(cloned;)f(the)h
-(node)f(object)h(in)m(v)n(ok)o(es)f Fq(set_node)g Fv(on)h(the)g(e)o
-(xtension)f(object)h(to)g(tell)396 4526 y(it)h(that)f(this)h(node)e(is)
-i(no)n(w)f(the)g(object)g(the)g(e)o(xtension)f(is)i(link)o(ed)f(to.)g
-(The)f(e)o(xtension)g(must)h(return)f(the)i(node)e(object)396
-4633 y(passed)h(as)h(ar)o(gument)d(of)i Fq(set_node)f
-Fv(when)h(the)g Fq(node)g Fv(method)f(is)i(called.)p
-Black 3798 5278 a Fr(66)p Black eop
-%%Page: 67 67
-67 66 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fv(The)g
-Fq(clone)g Fv(method)f(must)h(return)f(a)i(cop)o(y)e(of)h(the)g(e)o
-(xtension)f(object;)h(at)g(least)h(the)f(object)g(itself)h(must)f(be)
-396 687 y(duplicated,)f(b)n(ut)h(if)g(required,)e(the)j(cop)o(y)e
-(should)g(deeply)g(duplicate)g(all)i(objects)f(and)g(v)n(alues)g(that)g
-(are)g(referred)e(by)396 795 y(the)i(e)o(xtension,)f(too.)h(Whether)f
-(this)i(is)g(required,)d(depends)h(on)h(the)g(application;)f
-Fq(clone)h Fv(is)h(in)m(v)n(ok)o(ed)d(by)i(the)g(node)396
-903 y(object)g(when)g(one)f(of)h(its)h(cloning)e(methods)g(is)i
-(called.)396 1052 y(A)g(good)e(starting)h(point)f(for)h(an)g(e)o
-(xtension)e(class:)396 1232 y Fq(class)44 b(custom_extension)e(=)486
-1329 y(object)i(\(self\))576 1524 y(val)g(mutable)g(node)g(=)g(\(None)g
-(:)h(custom_extension)d(node)i(option\))576 1718 y(method)f(clone)h(=)h
-({<)g(>})576 1912 y(method)e(node)i(=)665 2009 y(match)f(node)g(with)
-845 2107 y(None)g(->)934 2204 y(assert)g(false)755 2301
-y(|)h(Some)f(n)g(->)h(n)576 2495 y(method)e(set_node)h(n)h(=)665
-2592 y(node)f(<-)h(Some)f(n)486 2786 y(end)396 2977 y
-Fv(This)21 b(class)g(is)g(compatible)e(with)h Fq(extension)p
-Fv(.)f(The)h(purpose)e(of)i(de\002ning)f(such)h(a)h(class)g(is,)g(of)f
-(course,)f(adding)396 3085 y(further)g(methods;)g(and)h(you)f(can)h(do)
-g(it)h(without)e(restriction.)396 3235 y(Often,)h(you)f(w)o(ant)h(not)g
-(only)g(one)f(e)o(xtension)g(class.)i(In)f(this)h(case,)f(it)h(is)g
-(the)f(simplest)h(w)o(ay)f(that)g(all)h(your)e(classes)i(\(for)396
-3343 y(one)f(kind)f(of)h(document\))e(ha)n(v)o(e)i(the)g(same)g(type)g
-(\(with)g(respect)g(to)g(the)g(interf)o(ace;)g(i.e.)g(it)h(does)f(not)g
-(matter)g(if)g(your)396 3451 y(classes)i(dif)n(fer)d(in)h(the)g
-(de\002ned)f(pri)n(v)n(ate)h(methods)f(and)g(instance)h(v)n(ariables,)f
-(b)n(ut)h(public)g(methods)f(count\).)f(This)396 3559
-y(approach)g(a)n(v)n(oids)i(lots)h(of)f(coercions)f(and)h(problems)e
-(with)j(type)f(incompatibilities.)e(It)j(is)g(simple)f(to)g(implement:)
-396 3739 y Fq(class)44 b(custom_extension)e(=)486 3836
-y(object)i(\(self\))576 3933 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h
-(custom_extension)d(node)i(option\))576 4127 y(method)f(clone)h(=)h
-(...)269 b(\(*)44 b(see)g(above)g(*\))576 4224 y(method)f(node)i(=)f
-(...)314 b(\(*)44 b(see)g(above)g(*\))576 4322 y(method)f(set_node)h(n)
-h(=)f(...)h(\(*)f(see)g(above)g(*\))576 4516 y(method)f(virtual)h
-(my_method1)f(:)i(...)576 4613 y(method)e(virtual)h(my_method2)f(:)i
-(...)576 4710 y(...)f(\(*)g(etc.)h(*\))486 4807 y(end)p
-Black 3797 5278 a Fr(67)p Black eop
-%%Page: 68 68
-68 67 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 676 a Fq(class)44
-b(custom_extension_kind_A)d(=)486 773 y(object)j(\(self\))576
-870 y(inherit)f(custom_extension)576 1065 y(method)g(my_method1)h(=)g
-(...)576 1162 y(method)f(my_method2)h(=)g(...)486 1259
-y(end)396 1453 y(class)g(custom_extension_kind_B)d(=)486
-1550 y(object)j(\(self\))576 1647 y(inherit)f(custom_extension)576
-1842 y(method)g(my_method1)h(=)g(...)576 1939 y(method)f(my_method2)h
-(=)g(...)486 2036 y(end)396 2227 y Fv(If)20 b(a)h(class)g(does)f(not)g
-(need)f(a)i(method)e(\(e.g.)g(because)h(it)h(does)e(not)h(mak)o(e)g
-(sense,)g(or)g(it)h(w)o(ould)f(violate)f(some)396 2335
-y(important)g(condition\),)f(it)j(is)g(possible)f(to)g(de\002ne)g(the)g
-(method)f(and)g(to)i(al)o(w)o(ays)f(raise)h(an)f(e)o(xception)e(when)i
-(the)396 2443 y(method)f(is)i(in)m(v)n(ok)o(ed)e(\(e.g.)g
-Fq(assert)44 b(false)p Fv(\).)396 2592 y(The)20 b(latter)g(is)i(a)e
-(strong)g(recommendation:)c(do)k(not)g(try)g(to)g(further)f(specialize)
-h(the)g(types)g(of)g(e)o(xtension)f(objects.)h(It)g(is)396
-2700 y(dif)n(\002cult,)g(sometimes)g(e)n(v)o(en)f(impossible,)g(and)h
-(almost)g(ne)n(v)o(er)f(w)o(orth-while.)-2 3070 y Fp(3.3.2.)35
-b(Ho)n(w)f(to)f(bind)h(e)n(xtension)h(c)n(lasses)h(to)d(element)i
-(types)396 3237 y Fv(Once)20 b(you)f(ha)n(v)o(e)h(de\002ned)f(your)g(e)
-o(xtension)g(classes,)i(you)e(can)h(bind)g(them)f(to)i(element)e
-(types.)h(The)g(simplest)h(case)f(is)396 3345 y(that)h(you)e(ha)n(v)o
-(e)g(only)h(one)f(class)j(and)d(that)i(this)f(class)h(is)h(to)e(be)g
-(al)o(w)o(ays)h(used.)e(The)h(parsing)f(functions)g(in)h(the)h(module)
-396 3453 y Fq(Pxp_yacc)f Fv(tak)o(e)g(a)h Fq(spec)f Fv(ar)o(gument)d
-(which)j(can)g(be)g(customized.)f(If)h(your)f(single)h(class)h(has)g
-(the)f(name)f Fq(c)p Fv(,)i(this)396 3561 y(ar)o(gument)d(should)h(be)
-396 3741 y Fq(let)45 b(spec)f(=)486 3839 y(make_spec_from_alist)576
-3936 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
-4033 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
-4130 y(~element_alist:)535 b([])576 4227 y(\(\))396 4418
-y Fv(This)21 b(means)f(that)g(data)g(nodes)f(will)i(be)f(created)g
-(from)f(the)h(e)o(x)o(emplar)e(passed)i(by)g(~data_e)o(x)o(emplar)d
-(and)j(that)g(all)396 4526 y(element)g(nodes)f(will)i(be)f(made)g(from)
-f(the)h(e)o(x)o(emplar)e(speci\002ed)i(by)g(~def)o(ault_element_e)o(x)o
-(emplar)-5 b(.)15 b(In)396 4634 y(~element_alist,)k(you)h(can)g(pass)g
-(that)h(dif)n(ferent)d(e)o(x)o(emplars)h(are)h(to)g(be)g(used)g(for)g
-(dif)n(ferent)e(element)i(types;)g(b)n(ut)g(this)396
-4742 y(is)h(an)g(optional)d(feature.)h(If)h(you)g(do)g(not)f(need)h
-(it,)h(pass)f(the)g(empty)g(list.)p Black 3800 5278 a
-Fr(68)p Black eop
-%%Page: 69 69
-69 68 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fv(Remember)f(that)i(an)f
-(e)o(x)o(emplar)e(is)j(a)g(\(node,)d(e)o(xtension\))h(pair)g(that)i
-(serv)o(es)f(as)h(pattern)e(when)h(ne)n(w)g(nodes)f(\(and)g(the)396
-687 y(corresponding)e(e)o(xtension)i(objects\))g(are)h(added)f(to)i
-(the)f(document)e(tree.)i(In)g(this)h(case,)f(the)g(e)o(x)o(emplar)f
-(contains)g Fq(c)i Fv(as)396 795 y(e)o(xtension,)e(and)g(when)h(nodes)f
-(are)i(created,)e(the)h(e)o(x)o(emplar)e(is)j(cloned,)e(and)h(cloning)f
-(mak)o(es)h(also)g(a)h(cop)o(y)e(of)h Fq(c)h Fv(such)396
-903 y(that)g(all)f(nodes)g(of)g(the)g(document)e(tree)i(will)h(ha)n(v)o
-(e)f(a)g(cop)o(y)g(of)g Fq(c)g Fv(as)h(e)o(xtension.)396
-1052 y(The)f Fq(~element_alist)f Fv(ar)o(gument)e(can)j(bind)g
-(speci\002c)g(element)g(types)g(to)g(speci\002c)g(e)o(x)o(emplars;)f
-(as)i(e)o(x)o(emplars)396 1160 y(may)f(be)g(instances)g(of)g(dif)n
-(ferent)f(classes)i(it)g(is)g(ef)n(fecti)n(v)o(ely)d(possible)i(to)h
-(bind)e(element)h(types)g(to)g(classes.)h(F)o(or)396
-1268 y(e)o(xample,)e(if)h(the)g(element)g(type)g("p")g(is)h
-(implemented)d(by)i(class)h("c_p",)e(and)h("q")g(is)h(realized)f(by)f
-("c_q",)h(you)f(can)396 1376 y(pass)i(the)f(follo)n(wing)f(v)n(alue:)
-396 1556 y Fq(let)45 b(spec)f(=)486 1653 y(make_spec_from_alist)576
-1750 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
-1847 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
-1945 y(~element_alist:)665 2042 y([)i("p",)f(new)g(element_impl)f(c_p;)
-755 2139 y("q",)h(new)g(element_impl)f(c_q;)665 2236
-y(])576 2333 y(\(\))396 2524 y Fv(The)20 b(e)o(xtension)f(object)h
-Fq(c)g Fv(is)h(still)h(used)e(for)f(all)i(data)f(nodes)f(and)h(for)g
-(all)g(other)g(element)f(types.)-2 3026 y Fx(3.4.)39
-b(Details)f(of)i(the)f(mapping)e(fr)m(om)i(XML)g(te)n(xt)g(to)g(the)g
-(tree)-2 3212 y(representation)-2 3540 y Fp(3.4.1.)c(The)f
-(representation)h(of)e(c)o(haracter)n(-free)h(elements)396
-3708 y Fv(If)20 b(an)g(element)g(declaration)f(does)h(not)f(allo)n(w)i
-(the)f(element)f(to)i(contain)e(character)g(data,)h(the)g(follo)n(wing)
-e(rules)j(apply)-5 b(.)396 3858 y(If)20 b(the)h(element)e(must)h(be)g
-(empty)-5 b(,)19 b(i.e.)h(it)h(is)g(declared)e(with)i(the)f(k)o(e)o(yw)
-o(ord)e Fq(EMPTY)p Fv(,)i(the)g(element)g(instance)g(must)g(be)396
-3965 y(ef)n(fecti)n(v)o(ely)f(empty)g(\(it)h(must)h(not)f(e)n(v)o(en)f
-(contain)g(whitespace)h(characters\).)e(The)i(parser)g(guarantees)e
-(that)j(a)f(declared)396 4073 y Fq(EMPTY)g Fv(element)g(does)g(ne)n(v)o
-(er)f(contain)g(a)h(data)g(node,)f(e)n(v)o(en)g(if)i(the)f(data)g(node)
-f(represents)h(the)g(empty)f(string.)396 4223 y(If)h(the)h(element)e
-(declaration)g(only)g(permits)h(other)f(elements)h(to)h(occur)e(within)
-h(that)g(element)g(b)n(ut)g(not)g(character)396 4331
-y(data,)g(it)h(is)g(still)g(possible)f(to)h(insert)f(whitespace)g
-(characters)f(between)g(the)h(subelements.)f(The)h(parser)g(ignores)f
-(these)396 4439 y(characters,)g(too,)h(and)g(does)f(not)h(create)g
-(data)g(nodes)g(for)f(them.)396 4588 y Fu(Example.)h
-Fv(Consider)g(the)g(follo)n(wing)f(element)g(types:)396
-4768 y Fq(<!ELEMENT)44 b(x)g(\()h(#PCDATA)f(|)g(z)h(\)*)f(>)396
-4865 y(<!ELEMENT)g(y)g(\()h(z)g(\)*)f(>)p Black 3800
-5278 a Fr(69)p Black eop
-%%Page: 70 70
-70 69 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fq(<!ELEMENT)44
-b(z)g(EMPTY>)396 770 y Fv(Only)20 b Fq(x)h Fv(may)e(contain)h
-(character)e(data,)i(the)h(k)o(e)o(yw)o(ord)d Fq(#PCDATA)h
-Fv(indicates)h(this.)h(The)f(other)f(types)h(are)396
-878 y(character)n(-free.)396 1027 y(The)g(XML)g(term)396
-1207 y Fq(<x><z/>)44 b(<z/></x>)396 1398 y Fv(will)21
-b(be)f(internally)f(represented)g(by)g(an)i(element)e(node)g(for)h
-Fq(x)g Fv(with)h(three)f(subnodes:)e(the)j(\002rst)g
-Fq(z)f Fv(element,)g(a)g(data)396 1506 y(node)f(containing)g(the)h
-(space)g(character)m(,)e(and)i(the)g(second)g Fq(z)g
-Fv(element.)g(In)f(contrast)h(to)g(this,)h(the)f(term)396
-1686 y Fq(<y><z/>)44 b(<z/></y>)396 1877 y Fv(is)21 b(represented)e(by)
-h(an)g(element)f(node)g(for)h Fq(y)h Fv(with)f(only)f
-Fr(two)i Fv(subnodes,)e(the)h(tw)o(o)g Fq(z)h Fv(elements.)e(There)h
-(is)h(no)f(data)396 1985 y(node)f(for)h(the)g(space)g(character)f
-(because)h(spaces)g(are)g(ignored)f(in)h(the)g(character)n(-free)e
-(element)i Fq(y)p Fv(.)-2 2355 y Fp(3.4.2.)35 b(The)f(representation)h
-(of)e(c)o(haracter)h(data)396 2523 y Fv(The)20 b(XML)g(speci\002cation)
-g(allo)n(ws)g(all)h(Unicode)e(characters)g(in)i(XML)f(te)o(xts.)g(This)
-g(parser)g(can)g(be)g(con\002gured)e(such)396 2631 y(that)j(UTF-8)e(is)
-i(used)f(to)h(represent)e(the)h(characters)f(internally;)g(ho)n(we)n(v)
-o(er)m(,)f(the)i(def)o(ault)g(character)e(encoding)h(is)396
-2738 y(ISO-8859-1.)e(\(Currently)-5 b(,)18 b(no)i(other)f(encodings)g
-(are)h(possible)g(for)f(the)i(internal)e(string)h(representation;)e
-(the)i(type)396 2846 y Fq(Pxp_types.rep_encoding)d Fv(enumerates)i(the)
-h(possible)g(encodings.)e(Principially)-5 b(,)19 b(the)h(parser)g
-(could)f(use)h(an)o(y)396 2954 y(encoding)e(that)j(is)g
-(ASCII-compatible,)d(b)n(ut)i(there)g(are)g(currently)e(only)i(le)o
-(xical)f(analyzers)h(for)f(UTF-8)h(and)396 3062 y(ISO-8859-1.)d(It)k
-(is)g(currently)d(impossible)i(to)g(use)h(UTF-16)e(or)h(UCS-4)g(as)h
-(internal)f(encodings)e(\(or)i(other)f(multibyte)396
-3170 y(encodings)g(which)g(are)h(not)g(ASCII-compatible\))e(unless)i
-(major)g(parts)g(of)g(the)g(parser)g(are)g(re)n(written)f(-)i(unlik)o
-(ely)-5 b(...\))396 3320 y(The)20 b(internal)g(encoding)e(may)h(be)h
-(dif)n(ferent)f(from)g(the)h(e)o(xternal)f(encoding)f(\(speci\002ed)i
-(in)g(the)g(XML)h(declaration)396 3428 y Fo(<)p Fq(?xml)44
-b(...)g(encoding="..."?)p Fo(>)p Fv(\);)18 b(in)j(this)f(case)h(the)f
-(strings)g(are)g(automatically)f(con)m(v)o(erted)f(to)i(the)g(internal)
-396 3535 y(encoding.)396 3685 y(If)g(the)h(internal)e(encoding)f(is)j
-(ISO-8859-1,)c(it)k(is)g(possible)f(that)g(there)g(are)g(characters)g
-(that)g(cannot)f(be)h(represented.)396 3793 y(In)g(this)h(case,)f(the)g
-(parser)g(ignores)f(such)h(characters)f(and)h(prints)g(a)h(w)o(arning)e
-(\(to)h(the)g Fq(collect_warning)e Fv(object)396 3901
-y(that)j(must)f(be)g(passed)g(when)g(the)g(parser)f(is)i(called\).)396
-4050 y(The)f(XML)g(speci\002cation)g(allo)n(ws)g(lines)h(to)f(be)g
-(separated)g(by)f(single)h(LF)h(characters,)e(by)h(CR)h(LF)g(character)
-396 4158 y(sequences,)e(or)h(by)g(single)g(CR)i(characters.)d
-(Internally)-5 b(,)18 b(these)i(separators)f(are)h(al)o(w)o(ays)h(con)m
-(v)o(erted)d(to)i(single)g(LF)396 4266 y(characters.)396
-4416 y(The)g(parser)g(guarantees)e(that)j(there)e(are)i(ne)n(v)o(er)d
-(tw)o(o)j(adjacent)e(data)h(nodes;)g(if)g(necessary)-5
-b(,)19 b(data)h(material)g(that)g(w)o(ould)396 4523 y(otherwise)g(be)g
-(represented)e(by)i(se)n(v)o(eral)g(nodes)f(is)i(collapsed)f(into)f
-(one)h(node.)f(Note)h(that)g(you)g(can)g(still)h(create)f(node)396
-4631 y(trees)h(with)f(adjacent)g(data)g(nodes;)f(ho)n(we)n(v)o(er)m(,)f
-(the)i(parser)g(does)f(not)h(return)f(such)h(trees.)p
-Black 3800 5278 a Fr(70)p Black eop
-%%Page: 71 71
-71 70 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black 396 579 a Fv(Note)g(that)h(CD)m(A)
--9 b(T)h(A)20 b(sections)g(are)g(not)g(represented)f(specially;)h(such)
-g(sections)g(are)g(added)f(to)h(the)h(current)d(data)396
-687 y(material)i(that)g(being)g(collected)f(for)h(the)g(ne)o(xt)f(data)
-h(node.)-2 1056 y Fp(3.4.3.)35 b(The)f(representation)h(of)e(entities)h
-(within)g(documents)396 1224 y Fr(Entities)21 b(ar)m(e)f(not)g(r)m(epr)
-m(esented)f(within)i(documents!)d Fv(If)i(the)h(parser)e(\002nds)h(an)h
-(entity)e(reference)g(in)h(the)g(document)396 1332 y(content,)f(the)h
-(reference)f(is)i(immediately)e(e)o(xpanded,)e(and)j(the)g(parser)g
-(reads)g(the)g(e)o(xpansion)e(te)o(xt)i(instead)g(of)g(the)396
-1440 y(reference.)-2 1810 y Fp(3.4.4.)35 b(The)f(representation)h(of)e
-(attrib)n(utes)396 1977 y Fv(As)21 b(attrib)n(ute)f(v)n(alues)g(are)g
-(composed)e(of)i(Unicode)f(characters,)g(too,)h(the)g(same)h(problems)d
-(with)j(the)f(character)396 2085 y(encoding)e(arise)j(as)g(for)e
-(character)g(material.)h(Attrib)n(ute)g(v)n(alues)g(are)g(con)m(v)o
-(erted)d(to)k(the)f(internal)f(encoding,)f(too;)i(and)396
-2193 y(if)h(there)e(are)i(characters)e(that)h(cannot)f(be)h
-(represented,)e(these)j(are)f(dropped,)e(and)h(a)i(w)o(arning)e(is)i
-(printed.)396 2343 y(Attrib)n(ute)f(v)n(alues)g(are)g(normalized)e
-(before)h(the)o(y)h(are)g(returned)e(by)i(methods)f(lik)o(e)h
-Fq(attribute)p Fv(.)f(First,)i(an)o(y)396 2451 y(remaining)e(entity)h
-(references)e(are)i(e)o(xpanded;)e(if)j(necessary)-5
-b(,)19 b(e)o(xpansion)f(is)j(performed)c(recursi)n(v)o(ely)-5
-b(.)18 b(Second,)396 2558 y(ne)n(wline)i(characters)f(\(an)o(y)g(of)h
-(LF)-7 b(,)21 b(CR)g(LF)-7 b(,)21 b(or)f(CR)h(characters\))e(are)h(con)
-m(v)o(erted)e(to)i(single)g(space)h(characters.)e(Note)396
-2666 y(that)i(especially)e(the)i(latter)f(action)g(is)h(prescribed)d
-(by)i(the)g(XML)g(standard)f(\(b)n(ut)41 b(is)21 b(not)f(con)m(v)o
-(erted)e(such)i(that)g(it)h(is)396 2774 y(still)h(possible)e(to)g
-(include)f(line)h(feeds)g(into)g(attrib)n(utes\).)-2
-3144 y Fp(3.4.5.)35 b(The)f(representation)h(of)e(pr)n(ocessing)h
-(instructions)396 3312 y Fv(Processing)20 b(instructions)f(are)h
-(parsed)g(to)g(some)g(e)o(xtent:)f(The)h(\002rst)h(w)o(ord)f(of)g(the)g
-(PI)g(is)i(called)e(the)g(tar)o(get,)f(and)g(it)i(is)396
-3420 y(stored)f(separated)f(from)g(the)i(rest)f(of)g(the)g(PI:)396
-3600 y Fq(<?target)44 b(rest?>)396 3791 y Fv(The)20 b(e)o(xact)g
-(location)f(where)h(a)g(PI)h(occurs)e(is)i(not)f(represented)f(\(by)g
-(def)o(ault\).)g(The)h(parser)f(puts)i(the)f(PI)g(into)g(the)396
-3899 y(object)g(that)g(represents)g(the)g(embracing)e(construct)h(\(an)
-h(element,)f(a)i(DTD,)f(or)g(the)g(whole)g(document\);)e(that)i(means)
-396 4007 y(you)g(can)g(\002nd)f(out)h(which)g(PIs)h(occur)e(in)h(a)h
-(certain)f(element,)f(in)h(the)h(DTD,)f(or)g(in)g(the)g(whole)g
-(document,)e(b)n(ut)i(you)396 4114 y(cannot)f(lookup)g(the)h(e)o(xact)g
-(position)f(within)h(the)g(construct.)396 4264 y(If)g(you)g(require)e
-(the)j(e)o(xact)e(location)h(of)g(PIs,)g(it)h(is)g(possible)f(to)g
-(create)g(e)o(xtra)g(nodes)f(for)h(them.)f(This)i(mode)e(is)396
-4372 y(controled)g(by)g(the)i(option)e Fq(enable_pinstr_nodes)p
-Fv(.)e(The)j(additional)f(nodes)g(ha)n(v)o(e)h(the)g(node)f(type)h
-Fq(T_pinstr)396 4480 y Fn(target)p Fv(,)g(and)f(are)i(created)e(from)g
-(special)h(e)o(x)o(emplars)f(contained)f(in)j(the)f Fq(spec)g
-Fv(\(see)g(pxp_document.mli\).)p Black 3800 5278 a Fr(71)p
-Black eop
-%%Page: 72 72
-72 71 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
-m(esenting)g(the)g(document)p Black -2 583 a Fp(3.4.6.)35
-b(The)f(representation)h(of)e(comments)396 751 y Fv(Normally)-5
-b(,)19 b(comments)g(are)h(not)g(represented;)e(the)o(y)i(are)g(dropped)
-e(by)h(def)o(ault.)h(Ho)n(we)n(v)o(er)m(,)e(if)i(you)f(require)g(them,)
-h(it)h(is)396 859 y(possible)f(to)h(create)e Fq(T_comment)h
-Fv(nodes)f(for)h(them.)f(This)i(mode)e(can)h(be)g(speci\002ed)g(by)g
-(the)g(option)396 967 y Fq(enable_comment_nodes)p Fv(.)d(Comment)j
-(nodes)f(are)h(created)g(from)f(special)h(e)o(x)o(emplars)f(contained)f
-(in)j(the)f Fq(spec)396 1075 y Fv(\(see)h(pxp_document.mli\).)15
-b(Y)-9 b(ou)19 b(can)h(access)h(the)f(contents)g(of)g(comments)f
-(through)f(the)i(method)f Fq(comment)p Fv(.)-2 1444 y
-Fp(3.4.7.)35 b(The)f(attrib)n(utes)f Fc(xml:lang)d Fp(and)k
-Fc(xml:space)396 1612 y Fv(These)20 b(attrib)n(utes)g(are)g(not)g
-(supported)f(specially;)h(the)o(y)f(are)h(handled)f(lik)o(e)h(an)o(y)g
-(other)f(attrib)n(ute.)-2 1982 y Fp(3.4.8.)35 b(And)f(what)f(about)h
-(namespaces?)396 2149 y Fv(Currently)-5 b(,)19 b(there)g(is)i(no)f
-(special)h(support)d(for)i(namespaces.)f(Ho)n(we)n(v)o(er)m(,)f(the)i
-(parser)g(allo)n(ws)g(it)h(that)f(the)h(colon)e(occurs)396
-2257 y(in)i(names)e(such)h(that)h(it)g(is)g(possible)f(to)g(implement)f
-(namespaces)g(on)h(top)g(of)g(the)g(current)f(API.)396
-2407 y(Some)h(future)f(release)h(of)g(PXP)h(will)g(support)e
-(namespaces)g(as)i(b)n(uilt-in)f(feature...)p Black 3800
-5278 a Fr(72)p Black eop
-%%Page: 73 73
-73 72 bop Black Black -2 621 a Fs(Chapter)48 b(4.)f(Con\002guring)j
-(and)e(calling)f(the)h(par)m(ser)-2 1055 y Fx(4.1.)39
-b(Over)q(vie)n(w)396 1235 y Fv(There)20 b(are)g(the)g(follo)n(wing)f
-(main)g(functions)g(in)m(v)n(oking)f(the)i(parser)g(\(in)g(Pxp_yacc\):)
-p Black 396 1558 a Ft(\225)p Black 60 w Fr(par)o(se_document_entity:)d
-Fv(Y)-9 b(ou)19 b(w)o(ant)i(to)f(parse)g(a)g(complete)g(and)f(closed)h
-(document)e(consisting)i(of)g(a)g(DTD)h(and)479 1666
-y(the)f(document)f(body;)g(the)h(body)f(is)i(v)n(alidated)e(against)g
-(the)h(DTD.)h(This)f(mode)f(is)i(interesting)f(if)g(you)f(ha)n(v)o(e)h
-(a)h(\002le)479 1835 y Fq(<!DOCTYPE)44 b(root)g(...)g([)h(...)f(])h(>)f
-(<root>)g(...)h(</root>)396 1984 y Fv(and)20 b(you)f(can)h(accept)g(an)
-o(y)f(DTD)i(that)f(is)h(included)e(in)h(the)g(\002le)h(\(e.g.)f
-(because)f(the)h(\002le)h(is)g(under)e(your)g(control\).)p
-Black 396 2092 a Ft(\225)p Black 60 w Fr(par)o(se_wfdocument_entity:)e
-Fv(Y)-9 b(ou)20 b(w)o(ant)g(to)g(parse)g(a)h(complete)e(and)h(closed)f
-(document)g(consisting)g(of)h(a)h(DTD)479 2200 y(and)f(the)g(document)e
-(body;)h(b)n(ut)h(the)h(body)d(is)k(not)d(v)n(alidated,)g(only)h(check)
-o(ed)e(for)i(well-formedness.)e(This)i(mode)f(is)479
-2308 y(preferred)f(if)j(v)n(alidation)d(costs)j(too)f(much)f(time)i(or)
-f(if)g(the)g(DTD)h(is)g(missing.)p Black 396 2416 a Ft(\225)p
-Black 60 w Fr(par)o(se_dtd_entity:)d Fv(Y)-9 b(ou)20
-b(w)o(ant)g(only)f(to)i(parse)e(an)i(entity)e(\(\002le\))i(containing)d
-(the)i(e)o(xternal)f(subset)h(of)g(a)h(DTD.)479 2524
-y(Sometimes)f(it)h(is)g(interesting)e(to)i(read)e(such)h(a)h(DTD,)f
-(for)g(e)o(xample)e(to)j(compare)d(it)j(with)g(the)f(DTD)g(included)f
-(in)h(a)479 2632 y(document,)e(or)i(to)g(apply)g(the)g(ne)o(xt)f(mode:)
-p Black 396 2740 a Ft(\225)p Black 60 w Fr(par)o(se_content_entity:)e
-Fv(Y)-9 b(ou)20 b(w)o(ant)g(only)g(to)g(parse)g(an)g(entity)g
-(\(\002le\))g(containing)e(a)j(fragment)d(of)i(a)h(document)479
-2848 y(body;)e(this)i(fragment)d(is)j(v)n(alidated)f(against)f(the)h
-(DTD)h(you)e(pass)i(to)f(the)g(function.)e(Especially)-5
-b(,)19 b(the)i(fragment)479 2956 y(must)g(not)e(ha)n(v)o(e)h(a)65
-b Fo(<)p Fq(!DOCTYPE)p Fo(>)19 b Fv(clause,)h(and)g(must)g(directly)g
-(be)o(gin)f(with)h(an)g(element.)f(The)h(element)g(is)479
-3064 y(v)n(alidated)f(against)h(the)g(DTD.)g(This)h(mode)e(is)i
-(interesting)e(if)i(you)e(w)o(ant)h(to)h(check)e(documents)f(against)i
-(a)h(\002x)o(ed,)479 3172 y(immutable)e(DTD.)p Black
-396 3280 a Ft(\225)p Black 60 w Fr(par)o(se_wfcontent_entity:)f
-Fv(This)i(function)f(also)h(parses)g(a)h(single)f(element)g(without)f
-(DTD,)h(b)n(ut)g(does)g(not)g(v)n(alidate)479 3388 y(it.)p
-Black 396 3495 a Ft(\225)p Black 60 w Fr(e)n(xtr)o(act_dtd_fr)l
-(om_document_entity:)15 b Fv(This)20 b(function)f(e)o(xtracts)g(the)i
-(DTD)f(from)f(a)i(closed)f(document)479 3603 y(consisting)g(of)g(a)g
-(DTD)h(and)e(a)i(document)d(body)-5 b(.)18 b(Both)j(the)f(internal)f
-(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted.)396
-3794 y(In)f(man)o(y)f(cases,)i Fq(parse_document_entity)c
-Fv(is)k(the)f(preferred)e(mode)i(to)g(parse)g(a)g(document)f(in)h(a)h
-(v)n(alidating)396 3902 y(w)o(ay)-5 b(,)20 b(and)g Fq
-(parse_wfdocument_entity)c Fv(is)22 b(the)e(mode)f(of)h(choice)f(to)i
-(parse)f(a)g(\002le)h(while)f(only)g(checking)e(for)396
-4010 y(well-formedness.)396 4160 y(There)i(are)g(a)g(number)f(of)h(v)n
-(ariations)f(of)h(these)g(modes.)f(One)h(important)f(application)g(of)h
-(a)g(parser)g(is)h(to)f(check)396 4268 y(documents)f(of)h(an)g
-(untrusted)f(source)g(against)h(a)g(\002x)o(ed)g(DTD.)g(One)g(solution)
-f(is)i(to)g(not)f(allo)n(w)g(the)g Fo(<)p Fq(!DOCTYPE)p
-Fo(>)396 4375 y Fv(clause)g(in)h(these)f(documents,)e(and)i(treat)g
-(the)h(document)d(lik)o(e)i(a)h(fragment)d(\(using)i(mode)f
-Fr(par)o(se_content_entity)p Fv(\).)396 4483 y(This)i(is)g(v)o(ery)e
-(simple,)h(b)n(ut)g(in\003e)o(xible;)f(users)i(of)e(such)h(a)h(system)f
-(cannot)f(e)n(v)o(en)h(de\002ne)f(additional)g(entities)i(to)396
-4591 y(abbre)n(viate)e(frequent)f(phrases)i(of)g(their)g(te)o(xt.)396
-4741 y(It)h(may)e(be)i(necessary)e(to)h(ha)n(v)o(e)g(a)h(more)e
-(intelligent)g(check)o(er)-5 b(.)20 b(F)o(or)g(e)o(xample,)e(it)j(is)g
-(also)g(possible)e(to)i(parse)f(the)396 4849 y(document)e(to)j(check)e
-(fully)-5 b(,)19 b(i.e.)h(with)h(DTD,)f(and)f(to)i(compare)d(this)j
-(DTD)f(with)h(the)f(prescribed)f(one.)g(In)h(order)f(to)p
-Black 3800 5278 a Fr(73)p Black eop
-%%Page: 74 74
-74 73 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(fully)g(parse)g(the)g
-(document,)e(mode)h Fr(par)o(se_document_entity)e Fv(is)k(applied,)e
-(and)h(to)g(get)g(the)g(DTD)h(to)f(compare)f(with)396
-687 y(mode)g Fr(par)o(se_dtd_entity)f Fv(can)i(be)h(used.)396
-836 y(There)f(is)h(another)d(v)o(ery)i(important)e(con\002gurable)g
-(aspect)i(of)g(the)g(parser:)g(the)g(so-called)g(resolv)o(er)-5
-b(.)19 b(The)h(task)g(of)g(the)396 944 y(resolv)o(er)f(is)i(to)g
-(locate)f(the)g(contents)f(of)h(an)g(\(e)o(xternal\))f(entity)g(for)h
-(a)h(gi)n(v)o(en)e(entity)g(name,)h(and)f(to)i(mak)o(e)e(the)i
-(contents)396 1052 y(accessible)g(as)f(a)h(character)e(stream.)h
-(\(Furthermore,)d(it)k(also)f(normalizes)g(the)g(character)f(set;)i(b)n
-(ut)f(this)h(is)g(a)f(detail)h(we)396 1160 y(can)f(ignore)f(here.\))g
-(Consider)h(you)f(ha)n(v)o(e)h(a)g(\002le)h(called)f
-Fq("main.xml")f Fv(containing)396 1340 y Fq(<!ENTITY)44
-b(\045)g(sub)h(SYSTEM)f("sub/sub.xml">)396 1437 y(\045sub;)396
-1628 y Fv(and)20 b(a)h(\002le)f(stored)g(in)g(the)h(subdirectory)c
-Fq("sub")j Fv(with)h(name)e Fq("sub.xml")g Fv(containing)396
-1808 y Fq(<!ENTITY)44 b(\045)g(subsub)g(SYSTEM)g("subsub/subsub.xml">)
-396 1906 y(\045subsub;)396 2097 y Fv(and)20 b(a)g(\002le)h(stored)e(in)
-h(the)g(subdirectory)d Fq("subsub")j Fv(of)f Fq("sub")h
-Fv(with)g(name)f Fq("subsub.xml")g Fv(\(the)g(contents)h(of)f(this)396
-2204 y(\002le)i(do)f(not)g(matter\).)f(Here,)h(the)g(resolv)o(er)f
-(must)h(track)g(that)g(the)g(second)g(entity)g Fq(subsub)f
-Fv(is)i(located)f(in)g(the)h(directory)396 2312 y Fq("sub/subsub")p
-Fv(,)e(i.e.)h(the)g(dif)n(\002culty)f(is)i(to)g(interpret)e(the)h
-(system)g(\(\002le\))h(names)e(of)h(entities)h(relati)n(v)o(e)e(to)i
-(the)f(entities)396 2420 y(containing)f(them,)g(e)n(v)o(en)g(if)i(the)f
-(entities)h(are)f(deeply)f(nested.)396 2570 y(There)h(is)h(not)f(a)g
-(\002x)o(ed)g(resolv)o(er)f(already)g(doing)g(e)n(v)o(erything)e(right)
-j(-)g(resolving)f(entity)h(names)g(is)h(a)f(task)h(that)f(highly)396
-2678 y(depends)f(on)h(the)g(en)m(vironment.)d(The)j(XML)g
-(speci\002cation)f(only)h(demands)f(that)h Fq(SYSTEM)g
-Fv(entities)g(are)g(interpreted)396 2786 y(lik)o(e)h(URLs)g(\(which)e
-(is)i(not)f(v)o(ery)f(precise,)h(as)h(there)e(are)i(lots)f(of)g(URL)h
-(schemes)f(in)g(use\),)g(hoping)f(that)h(this)h(helps)396
-2894 y(o)o(v)o(ercoming)c(the)j(local)g(peculiarities)g(of)g(the)g(en)m
-(vironment;)d(the)k(idea)f(is)h(that)f(if)h(you)e(do)h(not)f(kno)n(w)h
-(your)396 3001 y(en)m(vironment)d(you)j(can)g(refer)f(to)h(other)g
-(entities)g(by)g(denoting)e(URLs)k(for)d(them.)h(I)g(think)g(that)g
-(this)h(interpretation)d(of)396 3109 y Fq(SYSTEM)i Fv(names)g(may)g(ha)
-n(v)o(e)f(some)h(applications)f(in)i(the)f(internet,)f(b)n(ut)h(it)h
-(is)g(not)f(the)g(\002rst)h(choice)f(in)g(general.)396
-3217 y(Because)h(of)f(this,)g(the)g(resolv)o(er)f(is)i(a)g(separate)f
-(module)e(of)i(the)h(parser)e(that)h(can)g(be)h(e)o(xchanged)c(by)j
-(another)f(one)g(if)396 3325 y(necessary;)h(more)f(precisely)-5
-b(,)19 b(the)h(parser)g(already)f(de\002nes)h(se)n(v)o(eral)f(resolv)o
-(ers.)396 3475 y(The)h(follo)n(wing)f(resolv)o(ers)g(do)h(already)f(e)o
-(xist:)p Black 396 3707 a Ft(\225)p Black 60 w Fv(Resolv)o(ers)h
-(reading)f(from)g(arbitrary)g(input)g(channels.)g(These)h(can)g(be)g
-(con\002gured)e(such)i(that)g(a)h(certain)f(ID)g(is)479
-3815 y(associated)g(with)h(the)f(channel;)f(in)h(this)h(case)g(inner)e
-(references)g(to)h(e)o(xternal)f(entities)i(can)f(be)g(resolv)o(ed.)e
-(There)i(is)479 3923 y(also)h(a)f(special)h(resolv)o(er)e(that)h
-(interprets)f(SYSTEM)i(IDs)f(as)h(URLs;)g(this)g(resolv)o(er)e(can)h
-(process)g(relati)n(v)o(e)479 4031 y(SYSTEM)h(names)e(and)h(determine)f
-(the)h(corresponding)d(absolute)i(URL.)p Black 396 4139
-a Ft(\225)p Black 60 w Fv(A)i(resolv)o(er)e(that)h(reads)g(al)o(w)o
-(ays)h(from)e(a)i(gi)n(v)o(en)d(O'Caml)j(string.)e(This)i(resolv)o(er)e
-(is)i(not)f(able)g(to)g(resolv)o(e)f(further)479 4247
-y(names)h(unless)g(the)h(string)f(is)h(not)f(associated)g(with)g(an)o
-(y)f(name,)h(i.e.)g(if)g(the)g(document)f(contained)f(in)j(the)f
-(string)479 4355 y(refers)g(to)g(an)g(e)o(xternal)f(entity)-5
-b(,)20 b(this)g(reference)f(cannot)g(be)h(follo)n(wed)f(in)h(this)h
-(case.)p Black 396 4463 a Ft(\225)p Black 60 w Fv(A)g(resolv)o(er)e
-(for)g(\002le)i(names.)f(The)g Fq(SYSTEM)g Fv(name)f(is)i(interpreted)e
-(as)i(\002le)f(URL)h(with)g(the)f(slash)h("/")f(as)h(separator)479
-4571 y(for)f(directories.)f(-)h(This)h(resolv)o(er)d(is)k(deri)n(v)o
-(ed)c(from)h(the)h(generic)f(URL)i(resolv)o(er)-5 b(.)396
-4720 y(The)20 b(interf)o(ace)f(a)i(resolv)o(er)e(must)h(ha)n(v)o(e)g
-(is)h(documented,)c(so)k(it)g(is)g(possible)f(to)g(write)g(your)f(o)n
-(wn)h(resolv)o(er)-5 b(.)19 b(F)o(or)396 4828 y(e)o(xample,)g(you)g
-(could)g(connect)g(the)h(parser)g(with)g(an)h(HTTP)f(client,)g(and)f
-(resolv)o(e)h(URLs)h(of)f(the)g(HTTP)g(namespace.)p Black
-3800 5278 a Fr(74)p Black eop
-%%Page: 75 75
-75 74 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(resolv)o(er)f
-(classes)i(support)e(that)h(se)n(v)o(eral)g(independent)e(resolv)o(ers)
-h(are)h(combined)e(to)i(one)g(more)f(po)n(werful)396
-687 y(resolv)o(er;)g(thus)h(it)h(is)g(possible)f(to)h(combine)d(a)j
-(self-written)e(resolv)o(er)g(with)i(the)f(already)f(e)o(xisting)g
-(resolv)o(ers.)396 836 y(Note)h(that)h(the)f(e)o(xisting)f(resolv)o
-(ers)h(only)f(interpret)g Fq(SYSTEM)h Fv(names,)f(not)h
-Fq(PUBLIC)g Fv(names.)g(If)g(it)h(helps)f(you,)f(it)h(is)396
-944 y(possible)g(to)f(de\002ne)h(resolv)o(ers)e(for)h
-Fq(PUBLIC)h Fv(names,)f(too;)g(for)g(e)o(xample,)f(such)i(a)g(resolv)o
-(er)e(could)h(look)g(up)g(the)h(public)396 1052 y(name)g(in)g(a)h(hash)
-f(table,)g(and)f(map)h(it)h(to)f(a)h(system)f(name)g(which)g(is)h
-(passed)f(o)o(v)o(er)f(to)h(the)g(e)o(xisting)g(resolv)o(er)e(for)396
-1160 y(system)j(names.)e(It)i(is)g(relati)n(v)o(ely)e(simple)h(to)g
-(pro)o(vide)f(such)g(a)i(resolv)o(er)-5 b(.)-2 1579 y
-Fx(4.2.)39 b(Resolver)n(s)e(and)i(sour)m(ces)-2 1907
-y Fp(4.2.1.)c(Using)f(the)g(b)n(uilt-in)f(resolver)n(s)i(\(called)g
-(sour)n(ces\))396 2075 y Fv(The)20 b(type)g Fq(source)g
-Fv(enumerates)e(the)j(tw)o(o)f(possibilities)h(where)e(the)h(document)f
-(to)h(parse)g(comes)g(from.)396 2255 y Fq(type)44 b(source)g(=)576
-2352 y(Entity)f(of)i(\(\(dtd)f(-)p Fo(>)g Fq(Pxp_entity.entity\))e(*)j
-(Pxp_reader.resolver\))486 2449 y(|)g(ExtID)f(of)g(\(ext_id)g(*)g
-(Pxp_reader.resolver\))396 2640 y Fv(Y)-9 b(ou)20 b(normally)e(need)i
-(not)g(to)g(w)o(orry)f(about)h(this)g(type)g(as)h(there)f(are)g(con)m
-(v)o(enience)d(functions)i(that)h(create)g Fq(source)396
-2748 y Fv(v)n(alues:)p Black 396 3105 a Ft(\225)p Black
-60 w Fq(from_file)44 b(s)p Fv(:)20 b(The)g(document)e(is)j(read)f(from)
-f(\002le)i Fq(s)p Fv(;)g(you)e(may)h(specify)f(absolute)h(or)g(relati)n
-(v)o(e)f(path)h(names.)479 3213 y(The)g(\002le)h(name)f(must)g(be)g
-(encoded)e(as)j(UTF-8)f(string.)479 3362 y(There)g(is)h(an)f(optional)f
-(ar)o(gument)f Fq(~system_encoding)g Fv(specifying)g(the)j(character)d
-(encoding)h(which)g(is)i(used)479 3470 y(for)f(the)g(names)g(of)g(the)g
-(\002le)h(system.)f(F)o(or)g(e)o(xample,)e(if)j(this)g(encoding)d(is)j
-(ISO-8859-1)c(and)j Fq(s)g Fv(is)i(also)e(a)479 3578
-y(ISO-8859-1)e(string,)h(you)h(can)g(form)f(the)h(source:)479
-3717 y Fq(let)45 b(s_utf8)88 b(=)i(recode_string)42 b
-(~in_enc:`Enc_iso88591)g(~out_enc:`Enc_utf8)g(s)i(in)479
-3814 y(from_file)g(~system_encoding:`Enc_iso88591)39
-b(s_utf8)479 4005 y Fv(This)21 b Fq(source)e Fv(has)i(the)f(adv)n
-(antage)e(that)j(it)f(is)i(able)e(to)g(resolv)o(e)f(inner)h(e)o
-(xternal)f(entities;)h(i.e.)g(if)h(your)e(document)479
-4113 y(includes)g(data)g(from)g(another)f(\002le)i(\(using)f(the)g
-Fq(SYSTEM)g Fv(attrib)n(ute\),)g(this)g(mode)g(will)h(\002nd)f(that)h
-(\002le.)g(Ho)n(we)n(v)o(er)m(,)d(this)479 4221 y(mode)j(cannot)f
-(resolv)o(e)g Fq(PUBLIC)h Fv(identi\002ers)f(nor)h Fq(SYSTEM)g
-Fv(identi\002ers)f(other)h(than)g("\002le:".)p Black
-396 4370 a Ft(\225)p Black 60 w Fq(from_channel)43 b(ch)p
-Fv(:)21 b(The)e(document)g(is)i(read)e(from)h(the)g(channel)f
-Fq(ch)p Fv(.)h(In)g(general,)f(this)h(source)g(also)g(supports)479
-4478 y(\002le)h(URLs)g(found)e(in)h(the)g(document;)f(ho)n(we)n(v)o(er)
-m(,)e(by)j(def)o(ault)f(only)h(absolute)f(URLs)i(are)f(understood.)e
-(It)i(is)479 4586 y(possible)g(to)h(associate)f(an)g(ID)g(with)h(the)f
-(channel)f(such)h(that)g(the)g(resolv)o(er)f(kno)n(ws)h(ho)n(w)f(to)i
-(interpret)e(relati)n(v)o(e)479 4694 y(URLs:)479 4832
-y Fq(from_channel)43 b(~id:\(System)g("file:///dir/dir1/"\))f(ch)p
-Black 3800 5278 a Fr(75)p Black eop
-%%Page: 76 76
-76 75 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(There)g(is)h(also)f
-(the)g(~system_encoding)e(ar)o(gument)f(specifying)i(ho)n(w)h(\002le)h
-(names)e(are)i(encoded.)d(-)i(The)g(e)o(xample)479 687
-y(from)f(abo)o(v)o(e)g(can)h(also)g(be)h(written)f(\(b)n(ut)f(it)i(is)g
-(no)f(longer)f(possible)h(to)g(interpret)f(relati)n(v)o(e)h(URLs)h
-(because)e(there)h(is)479 795 y(no)g(~id)g(ar)o(gument,)e(and)i
-(computing)d(this)k(ar)o(gument)d(is)j(relati)n(v)o(ely)e(complicated)g
-(because)g(it)i(must)f(be)h(a)f(v)n(alid)479 903 y(URL\):)479
-1041 y Fq(let)45 b(ch)f(=)h(open_in)e(s)i(in)479 1138
-y(let)g(src)f(=)h(from_channel)d(~system_encoding:`Enc_iso88591)e(ch)45
-b(in)479 1236 y(...;)479 1333 y(close_in)f(ch)p Black
-396 1482 a Ft(\225)p Black 60 w Fq(from_string)f(s)p
-Fv(:)21 b(The)f(string)g Fq(s)g Fv(is)h(the)g(document)d(to)i(parse.)g
-(This)g(mode)f(is)j(not)d(able)h(to)h(interpret)e(\002le)i(names)479
-1590 y(of)f Fq(SYSTEM)g Fv(clauses,)g(nor)g(it)h(can)f(look)f(up)h
-Fq(PUBLIC)f Fv(identi\002ers.)479 1740 y(Normally)-5
-b(,)19 b(the)h(encoding)e(of)i(the)g(string)g(is)h(detected)e(as)i
-(usual)f(by)g(analyzing)f(the)h(XML)g(declaration,)e(if)j(an)o(y)-5
-b(.)479 1847 y(Ho)n(we)n(v)o(er)m(,)18 b(it)j(is)g(also)g(possible)f
-(to)g(specify)g(the)g(encoding)e(directly:)479 1986 y
-Fq(let)45 b(src)f(=)h(from_string)e(~fixenc:`ISO-8859-2)e(s)p
-Black 396 2177 a Ft(\225)p Black 60 w Fq(ExtID)j(\(id,)g(r\))p
-Fv(:)21 b(The)f(document)e(to)i(parse)g(is)h(denoted)e(by)h(the)g
-(identi\002er)g Fq(id)g Fv(\(either)f(a)i Fq(SYSTEM)f
-Fv(or)g Fq(PUBLIC)479 2285 y Fv(clause\),)g(and)g(this)g(identi\002er)g
-(is)h(interpreted)d(by)i(the)g(resolv)o(er)f Fq(r)p Fv(.)i(Use)f(this)h
-(mode)e(if)i(you)e(ha)n(v)o(e)h(written)g(your)f(o)n(wn)479
-2393 y(resolv)o(er)-5 b(.)479 2542 y(Which)20 b(character)f(sets)j(are)
-e(possible)g(depends)e(on)i(the)g(passed)h(resolv)o(er)d
-Fq(r)p Fv(.)p Black 396 2692 a Ft(\225)p Black 60 w Fq(Entity)44
-b(\(get_entity,)f(r\))p Fv(:)20 b(The)g(document)e(to)j(parse)f(is)h
-(returned)d(by)i(the)g(function)f(in)m(v)n(ocation)479
-2800 y Fq(get_entity)43 b(dtd)p Fv(,)20 b(where)g Fq(dtd)g
-Fv(is)h(the)g(DTD)f(object)g(to)g(use)g(\(it)h(may)f(be)g(empty\).)f
-(Inner)f(e)o(xternal)h(references)479 2908 y(occuring)g(in)h(this)h
-(entity)e(are)i(resolv)o(ed)d(using)i(the)g(resolv)o(er)f
-Fq(r)p Fv(.)479 3057 y(Which)h(character)f(sets)j(are)e(possible)g
-(depends)e(on)i(the)g(passed)h(resolv)o(er)d Fq(r)p Fv(.)-2
-3510 y Fp(4.2.2.)35 b(The)f(resolver)g(API)396 3677 y
-Fv(A)21 b(resolv)o(er)e(is)i(an)f(object)g(that)g(can)g(be)g(opened)e
-(lik)o(e)j(a)f(\002le,)h(b)n(ut)f(you)f(do)h(not)g(pass)g(the)h(\002le)
-f(name)g(to)g(the)g(resolv)o(er)m(,)f(b)n(ut)396 3785
-y(the)h(XML)h(identi\002er)e(of)h(the)g(entity)g(to)h(read)e(from)g
-(\(either)h(a)g Fq(SYSTEM)g Fv(or)g Fq(PUBLIC)g Fv(clause\).)f(When)h
-(opened,)f(the)396 3893 y(resolv)o(er)g(must)h(return)f(the)i
-Fq(Lexing.lexbuf)d Fv(that)i(reads)g(the)h(characters.)e(The)g(resolv)o
-(er)g(can)h(be)h(closed,)e(and)h(it)396 4001 y(can)g(be)g(cloned.)f
-(Furthermore,)f(it)j(is)g(possible)f(to)g(tell)h(the)f(resolv)o(er)f
-(which)h(character)f(set)i(it)g(should)e(assume.)h(-)g(The)396
-4109 y(follo)n(wing)f(from)g(Pxp_reader:)396 4289 y Fq(exception)44
-b(Not_competent)396 4386 y(exception)g(Not_resolvable)e(of)j(exn)396
-4581 y(class)f(type)g(resolver)g(=)486 4678 y(object)576
-4775 y(method)f(init_rep_encoding)f(:)j(rep_encoding)e(->)h(unit)576
-4872 y(method)f(init_warner)g(:)i(collect_warnings)d(->)j(unit)p
-Black 3798 5278 a Fr(76)p Black eop
-%%Page: 77 77
-77 76 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
-b(rep_encoding)g(:)i(rep_encoding)576 676 y(method)e(open_in)h(:)h
-(ext_id)f(->)g(Lexing.lexbuf)576 773 y(method)f(close_in)h(:)h(unit)576
-870 y(method)e(change_encoding)g(:)h(string)g(->)h(unit)576
-967 y(method)e(clone)h(:)h(resolver)576 1065 y(method)e(close_all)h(:)g
-(unit)486 1162 y(end)396 1353 y Fv(The)20 b(resolv)o(er)f(object)h
-(must)g(w)o(ork)f(as)i(follo)n(ws:)p Black 396 1627 a
-Ft(\225)p Black 60 w Fv(When)f(the)h(parser)e(is)i(called,)f(it)h
-(tells)g(the)f(resolv)o(er)f(the)h(w)o(arner)g(object)f(and)h(the)g
-(internal)g(encoding)e(by)i(in)m(v)n(oking)479 1735 y
-Fq(init_warner)f Fv(and)h Fq(init_rep_encoding)p Fv(.)d(The)j(resolv)o
-(er)f(should)g(store)i(these)f(v)n(alues.)f(The)h(method)479
-1843 y Fq(rep_encoding)f Fv(should)g(return)g(the)h(internal)g
-(encoding.)p Black 396 1950 a Ft(\225)p Black 60 w Fv(If)g(the)h
-(parser)e(w)o(ants)i(to)f(read)g(from)f(the)h(resolv)o(er)m(,)e(it)j
-(in)m(v)n(ok)o(es)f(the)g(method)f Fq(open_in)p Fv(.)g(Either)h(the)g
-(resolv)o(er)479 2058 y(succeeds,)g(in)g(which)g(case)g(the)h
-Fq(Lexing.lexbuf)d Fv(reading)h(from)g(the)h(\002le)h(or)f(stream)g
-(must)g(be)h(returned,)d(or)479 2166 y(opening)h(f)o(ails.)h(In)g(the)g
-(latter)h(case)f(the)h(method)d(implementation)g(should)h(raise)i(an)f
-(e)o(xception)e(\(see)j(belo)n(w\).)p Black 396 2274
-a Ft(\225)p Black 60 w Fv(If)f(the)h(parser)e(\002nishes)i(reading,)d
-(it)j(calls)g(the)f Fq(close_in)g Fv(method.)p Black
-396 2382 a Ft(\225)p Black 60 w Fv(If)g(the)h(parser)e(\002nds)h(a)h
-(reference)d(to)j(another)e(e)o(xternal)f(entity)i(in)h(the)f(input)f
-(stream,)h(it)h(calls)g Fq(clone)f Fv(to)g(get)h(a)479
-2490 y(second)f(resolv)o(er)f(which)g(must)h(be)h(initially)f(closed)g
-(\(not)f(yet)h(connected)f(with)h(an)g(input)f(stream\).)h(The)g
-(parser)479 2598 y(then)g(in)m(v)n(ok)o(es)f Fq(open_in)h
-Fv(and)f(the)i(other)e(methods)g(as)i(described.)p Black
-396 2706 a Ft(\225)p Black 60 w Fv(If)f(you)g(already)f(kno)n(w)g(the)h
-(character)f(set)i(of)f(the)g(input)g(stream,)f(you)h(should)f(recode)g
-(it)i(to)f(the)g(internal)479 2814 y(encoding,)e(and)i(de\002ne)f(the)i
-(method)d Fq(change_encoding)h Fv(as)i(an)f(empty)f(method.)p
-Black 396 2922 a Ft(\225)p Black 60 w Fv(If)h(you)g(w)o(ant)g(to)g
-(support)f(multiple)h(e)o(xternal)f(character)g(sets,)i(the)f(object)f
-(must)i(follo)n(w)e(a)i(much)e(more)479 3030 y(complicated)g(protocol.)
-f(Directly)i(after)g Fq(open_in)f Fv(has)i(been)e(called,)h(the)g
-(resolv)o(er)f(must)h(return)f(a)i(le)o(xical)f(b)n(uf)n(fer)479
-3138 y(that)h(only)e(reads)h(one)g(byte)f(at)i(a)g(time.)f(This)g(is)h
-(only)f(possible)f(if)i(you)e(create)h(the)g(le)o(xical)g(b)n(uf)n(fer)
-f(with)479 3246 y Fq(Lexing.from_function)p Fv(;)e(the)j(function)d
-(must)j(then)f(al)o(w)o(ays)h(return)e(1)i(if)f(the)h(EOF)g(is)g(not)f
-(yet)h(reached,)e(and)h(0)479 3354 y(if)i(EOF)f(is)h(reached.)e(If)h
-(the)g(parser)g(has)g(read)g(the)g(\002rst)h(line)f(of)g(the)h
-(document,)c(it)k(will)g(in)m(v)n(ok)o(e)479 3461 y Fq(change_encoding)
-e Fv(to)h(tell)h(the)f(resolv)o(er)f(which)h(character)e(set)j(to)g
-(assume.)f(From)f(this)i(moment,)e(the)h(object)479 3569
-y(can)g(return)f(more)h(than)f(one)h(byte)g(at)g(once.)g(The)g(ar)o
-(gument)d(of)j Fq(change_encoding)f Fv(is)i(either)e(the)i(parameter)d
-(of)479 3677 y(the)i("encoding")e(attrib)n(ute)i(of)g(the)g(XML)h
-(declaration,)d(or)i(the)g(empty)f(string)h(if)h(there)e(is)j(not)d(an)
-o(y)h(XML)479 3785 y(declaration)f(or)h(if)g(the)h(declaration)d(does)i
-(not)g(contain)f(an)h(encoding)e(attrib)n(ute.)479 3935
-y(At)j(the)f(be)o(ginning)e(the)i(resolv)o(er)f(must)h(only)g(return)f
-(one)g(character)g(e)n(v)o(ery)g(time)h(something)f(is)i(read)f(from)f
-(the)479 4043 y(le)o(xical)h(b)n(uf)n(fer)-5 b(.)19 b(The)h(reason)f
-(for)h(this)h(is)g(that)f(you)f(otherwise)h(w)o(ould)f(not)h(e)o
-(xactly)g(kno)n(w)f(at)h(which)g(position)f(in)479 4151
-y(the)h(input)g(stream)g(the)g(character)f(set)i(changes.)479
-4300 y(If)f(you)g(w)o(ant)g(automatic)f(recognition)f(of)i(the)g
-(character)f(set,)i(it)g(is)g(up)f(to)g(the)g(resolv)o(er)f(object)h
-(to)g(implement)f(this.)p Black 396 4449 a Ft(\225)p
-Black 60 w Fv(If)h(an)g(error)g(occurs,)f(the)h(parser)g(calls)g(the)h
-(method)d Fq(close_all)i Fv(for)f(the)h(top-le)n(v)o(el)f(resolv)o(er;)
-g(this)i(method)479 4557 y(should)e(close)i(itself)g(\(if)f(not)g
-(already)f(done\))f(and)i(all)h(clones.)396 4748 y Fu(Exceptions.)f
-Fv(It)h(is)g(possible)f(to)g(chain)g(resolv)o(ers)f(such)h(that)g(when)
-g(the)g(\002rst)h(resolv)o(er)e(is)i(not)f(able)g(to)g(open)f(the)396
-4856 y(entity)-5 b(,)20 b(the)g(other)f(resolv)o(ers)g(of)h(the)g
-(chain)g(are)g(tried)g(in)g(turn.)g(The)g(method)e Fq(open_in)i
-Fv(should)f(raise)i(the)f(e)o(xception)p Black 3797 5278
-a Fr(77)p Black eop
-%%Page: 78 78
-78 77 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(Not_competent)f
-Fv(to)h(indicate)g(that)g(the)g(ne)o(xt)g(resolv)o(er)f(should)g(try)h
-(to)g(open)f(the)i(entity)-5 b(.)19 b(If)h(the)g(resolv)o(er)f(is)i
-(able)f(to)396 687 y(handle)f(the)i(ID,)f(b)n(ut)g(some)g(other)f
-(error)g(occurs,)g(the)i(e)o(xception)d Fq(Not_resolvable)g
-Fv(should)i(be)g(raised)g(to)g(force)396 795 y(that)h(the)f(chain)f
-(breaks.)396 944 y(Example:)g(Ho)n(w)h(to)h(de\002ne)e(a)i(resolv)o(er)
-e(that)h(is)h(equi)n(v)n(alent)e(to)h(from_string:)e(...)-2
-1314 y Fp(4.2.3.)35 b(Prede\002ned)f(resolver)h(components)396
-1482 y Fv(There)20 b(are)g(some)g(classes)h(in)f(Pxp_reader)e(that)j
-(de\002ne)e(common)g(resolv)o(er)f(beha)n(viour)-5 b(.)396
-1662 y Fq(class)44 b(resolve_read_this_channel)d(:)576
-1759 y(?id:ext_id)i(->)576 1856 y(?fixenc:encoding)f(->)576
-1953 y(?auto_close:bool)g(->)576 2050 y(in_channel)h(->)755
-2147 y(resolver)396 2338 y Fv(Reads)21 b(from)e(the)h(passed)g(channel)
-f(\(it)i(may)f(be)g(e)n(v)o(en)f(a)i(pipe\).)e(If)h(the)g
-Fq(~id)g Fv(ar)o(gument)e(is)j(passed)f(to)h(the)f(object,)f(the)396
-2446 y(created)h(resolv)o(er)f(accepts)h(only)f(this)i(ID.)f(Otherwise)
-g(all)h(IDs)f(are)g(accepted.)f(-)i(Once)f(the)g(resolv)o(er)f(has)h
-(been)396 2554 y(cloned,)f(it)h(does)g(not)f(accept)h(an)o(y)f(ID.)g
-(This)h(means)g(that)g(this)g(resolv)o(er)e(cannot)h(handle)g(inner)g
-(references)f(to)i(e)o(xternal)396 2662 y(entities.)h(Note)f(that)g
-(you)f(can)h(combine)f(this)i(resolv)o(er)e(with)h(another)f(resolv)o
-(er)g(that)h(can)g(handle)f(inner)g(references)396 2770
-y(\(such)h(as)h(resolv)o(e_as_\002le\);)d(see)j(class)g('combine')d
-(belo)n(w)-5 b(.)19 b(-)h(If)g(you)g(pass)g(the)h Fq(~fixenc)e
-Fv(ar)o(gument,)f(the)i(encoding)396 2878 y(of)g(the)g(channel)f(is)i
-(set)g(to)g(the)f(passed)g(v)n(alue,)f(re)o(gardless)g(of)h(an)o(y)f
-(auto-recognition)e(or)j(an)o(y)f(XML)h(declaration.)f(-)h(If)396
-2986 y Fq(~auto_close)43 b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o
-(ault\),)e(the)h(channel)f(is)i(closed)f(after)g(use.)g(If)g
-Fq(~auto_close)43 b(=)396 3094 y(false)p Fv(,)20 b(the)g(channel)f(is)i
-(left)g(open.)396 3315 y Fq(class)44 b(resolve_read_any_channel)d(:)576
-3413 y(?auto_close:bool)h(->)576 3510 y(channel_of_id:\(ext_id)f(->)j
-(\(in_channel)f(*)i(encoding)f(option\)\))f(->)755 3607
-y(resolver)396 3798 y Fv(This)21 b(resolv)o(er)e(calls)h(the)h
-(function)d Fq(~channel_of_id)h Fv(to)h(open)f(a)i(ne)n(w)f(channel)f
-(for)g(the)h(passed)g Fq(ext_id)p Fv(.)g(This)396 3906
-y(function)f(must)h(either)g(return)f(the)h(channel)f(and)h(the)g
-(encoding,)e(or)i(it)g(must)h(f)o(ail)f(with)h(Not_competent.)c(The)396
-4014 y(function)i(must)h(return)f Fq(None)h Fv(as)h(encoding)d(if)j
-(the)f(def)o(ault)f(mechanism)g(to)h(recognize)f(the)h(encoding)e
-(should)h(be)396 4122 y(used.)g(It)i(must)e(return)g
-Fq(Some)44 b(e)20 b Fv(if)g(it)h(is)f(already)f(kno)n(wn)f(that)i(the)g
-(encoding)d(of)j(the)f(channel)g(is)i Fq(e)p Fv(.)e(If)h
-Fq(~auto_close)396 4230 y(=)45 b(true)19 b Fv(\(which)g(is)h(the)f(def)
-o(ault\),)f(the)i(channel)e(is)i(closed)f(after)g(use.)h(If)f
-Fq(~auto_close)43 b(=)h(false)p Fv(,)19 b(the)h(channel)e(is)396
-4337 y(left)j(open.)396 4559 y Fq(class)44 b(resolve_read_url_channel)d
-(:)576 4656 y(?base_url:Neturl.url)g(->)576 4753 y(?auto_close:bool)h
-(->)576 4851 y(url_of_id:\(ext_id)g(->)i(Neturl.url\))f(->)p
-Black 3800 5278 a Fr(78)p Black eop
-%%Page: 79 79
-79 78 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 576 579 a Fq
-(channel_of_url:\(Neturl.url)40 b(->)45 b(\(in_channel)e(*)h(encoding)g
-(option\)\))f(->)755 676 y(resolver)396 867 y Fv(When)20
-b(this)h(resolv)o(er)e(gets)h(an)h(ID)f(to)g(read)g(from,)f(it)i(calls)
-g(the)f(function)e Fq(~url_of_id)h Fv(to)i(get)f(the)g(corresponding)
-396 975 y(URL.)h(This)f(URL)h(may)f(be)g(a)g(relati)n(v)o(e)g(URL;)h
-(ho)n(we)n(v)o(er)m(,)c(a)k(URL)g(scheme)f(must)g(be)g(used)g(which)f
-(contains)h(a)h(path.)396 1083 y(The)f(resolv)o(er)f(con)m(v)o(erts)g
-(the)h(URL)h(to)f(an)g(absolute)f(URL)i(if)g(necessary)-5
-b(.)19 b(The)g(second)h(function,)396 1191 y Fq(~channel_of_url)p
-Fv(,)e(is)j(fed)f(with)h(the)f(absolute)f(URL)i(as)g(input.)e(This)h
-(function)f(opens)g(the)i(resource)e(to)h(read)396 1299
-y(from,)f(and)h(returns)f(the)h(channel)f(and)h(the)g(encoding)e(of)i
-(the)g(resource.)396 1448 y(Both)g(functions,)f Fq(~url_of_id)g
-Fv(and)h Fq(~channel_of_url)p Fv(,)e(can)i(raise)g(Not_competent)e(to)i
-(indicate)g(that)g(the)396 1556 y(object)g(is)h(not)f(able)g(to)g(read)
-g(from)f(the)h(speci\002ed)g(resource.)f(Ho)n(we)n(v)o(er)m(,)f(there)i
-(is)h(a)f(dif)n(ference:)f(A)h(Not_competent)396 1664
-y(from)f Fq(~url_of_id)g Fv(is)j(left)e(as)h(it)g(is,)g(b)n(ut)f(a)h
-(Not_competent)c(from)i Fq(~channel_of_url)g Fv(is)i(con)m(v)o(erted)c
-(to)396 1772 y(Not_resolv)n(able.)h(So)i(only)g Fq(~url_of_id)f
-Fv(decides)h(which)f(URLs)i(are)f(accepted)g(by)f(the)i(resolv)o(er)e
-(and)g(which)h(not.)396 1921 y(The)g(function)f Fq(~channel_of_url)f
-Fv(must)i(return)f Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o(ault)f
-(mechanism)g(to)i(recognize)396 2029 y(the)f(encoding)f(should)g(be)h
-(used.)g(It)g(must)g(return)f Fq(Some)44 b(e)21 b Fv(if)g(it)f(is)i
-(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i(the)396
-2137 y(channel)f(is)i Fq(e)p Fv(.)396 2287 y(If)f Fq(~auto_close)43
-b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o(ault\),)e(the)h(channel)f
-(is)i(closed)f(after)g(use.)g(If)g Fq(~auto_close)43
-b(=)396 2395 y(false)p Fv(,)20 b(the)g(channel)f(is)i(left)g(open.)396
-2544 y(Objects)f(of)g(this)g(class)h(contain)e(a)h(base)g(URL)g(relati)
-n(v)o(e)f(to)h(which)g(relati)n(v)o(e)f(URLs)h(are)g(interpreted.)e
-(When)i(creating)e(a)396 2652 y(ne)n(w)i(object,)g(you)f(can)h(specify)
-f(the)i(base)f(URL)h(by)f(passing)f(it)i(as)g Fq(~base_url)e
-Fv(ar)o(gument.)f(When)i(an)g(e)o(xisting)396 2760 y(object)g(is)h
-(cloned,)e(the)h(base)g(URL)h(of)f(the)g(clone)g(is)h(the)f(URL)h(of)f
-(the)g(original)f(object.)h(-)g(Note)g(that)g(the)h(term)f("base)396
-2868 y(URL")h(has)f(a)h(strict)g(de\002nition)e(in)h(RFC)i(1808.)396
-3089 y Fq(class)44 b(resolve_read_this_string)d(:)576
-3187 y(?id:ext_id)i(->)576 3284 y(?fixenc:encoding)f(->)576
-3381 y(string)h(->)755 3478 y(resolver)396 3669 y Fv(Reads)21
-b(from)e(the)h(passed)g(string.)g(If)g(the)g Fq(~id)h
-Fv(ar)o(gument)c(is)k(passed)g(to)f(the)g(object,)g(the)g(created)f
-(resolv)o(er)g(accepts)396 3777 y(only)h(this)g(ID.)g(Otherwise)g(all)h
-(IDs)g(are)f(accepted.)f(-)h(Once)g(the)g(resolv)o(er)f(has)i(been)e
-(cloned,)g(it)i(does)f(not)g(accept)g(an)o(y)396 3885
-y(ID.)g(This)h(means)f(that)g(this)h(resolv)o(er)e(cannot)g(handle)g
-(inner)g(references)g(to)h(e)o(xternal)f(entities.)i(Note)f(that)g(you)
-f(can)396 3993 y(combine)g(this)i(resolv)o(er)e(with)h(another)f
-(resolv)o(er)g(that)h(can)g(handle)f(inner)g(references)g(\(such)h(as)h
-(resolv)o(e_as_\002le\);)396 4101 y(see)g(class)g('combine')d(belo)n(w)
--5 b(.)19 b(-)i(If)f(you)f(pass)i(the)f Fq(~fixenc)f
-Fv(ar)o(gument,)f(the)i(encoding)e(of)i(the)g(string)g(is)h(set)g(to)g
-(the)396 4209 y(passed)f(v)n(alue,)g(re)o(gardless)e(of)i(an)o(y)g
-(auto-recognition)c(or)k(an)o(y)f(XML)i(declaration.)396
-4430 y Fq(class)44 b(resolve_read_any_string)d(:)576
-4527 y(string_of_id:\(ext_id)g(->)k(\(string)e(*)i(encoding)e
-(option\)\))h(->)755 4625 y(resolver)p Black 3800 5278
-a Fr(79)p Black eop
-%%Page: 80 80
-80 79 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(This)h(resolv)o(er)e
-(calls)h(the)h(function)d Fq(~string_of_id)h Fv(to)h(get)g(the)g
-(string)g(for)g(the)g(passed)g Fq(ext_id)p Fv(.)g(This)g(function)396
-687 y(must)g(either)g(return)f(the)i(string)e(and)h(the)g(encoding,)e
-(or)i(it)h(must)f(f)o(ail)h(with)f(Not_competent.)e(The)h(function)g
-(must)396 795 y(return)g Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o
-(ault)g(mechanism)e(to)j(recognize)d(the)i(encoding)f(should)g(be)h
-(used.)g(It)g(must)396 903 y(return)f Fq(Some)44 b(e)21
-b Fv(if)g(it)f(is)i(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i
-(the)g(string)g(is)h Fq(e)p Fv(.)396 1124 y Fq(class)44
-b(resolve_as_file)f(:)576 1222 y(?file_prefix:[)f(`Not_recognized)g(|)j
-(`Allowed)f(|)g(`Required)g(])g(->)576 1319 y(?host_prefix:[)e
-(`Not_recognized)g(|)j(`Allowed)f(|)g(`Required)g(])g(->)576
-1416 y(?system_encoding:encoding)c(->)576 1513 y(?url_of_id:\(ext_id)h
-(->)k(Neturl.url\))e(->)576 1610 y(?channel_of_url:)f(\(Neturl.url)h
-(->)h(\(in_channel)f(*)i(encoding)e(option\)\))h(->)576
-1707 y(unit)g(->)755 1804 y(resolver)396 1995 y Fv(Reads)21
-b(from)e(the)h(local)g(\002le)h(system.)f(Ev)o(ery)f(\002le)i(name)f
-(is)h(interpreted)d(as)j(\002le)g(name)f(of)f(the)i(local)f(\002le)h
-(system,)f(and)396 2103 y(the)g(referred)f(\002le)i(is)g(read.)396
-2253 y(The)f(full)g(form)f(of)h(a)h(\002le)g(URL)g(is:)g
-(\002le://host/path,)e(where)h('host')f(speci\002es)i(the)f(host)g
-(system)g(where)g(the)g(\002le)396 2361 y(identi\002ed)g('path')f
-(resides.)h(host)g(=)g("")h(or)f(host)g(=)h("localhost")e(are)h
-(accepted;)f(other)h(v)n(alues)f(will)i(raise)396 2468
-y(Not_competent.)d(The)i(standard)f(for)g(\002le)i(URLs)g(is)g
-(de\002ned)e(in)i(RFC)g(1738.)396 2618 y(Option)f Fq(~file_prefix)p
-Fv(:)e(Speci\002es)j(ho)n(w)f(the)g("\002le:")h(pre\002x)e(of)h(\002le)
-h(names)f(is)h(handled:)p Black 396 2850 a Ft(\225)p
-Black 60 w Fq(`Not_recognized:)p Fv(The)c(pre\002x)j(is)h(not)f
-(recognized.)p Black 396 2958 a Ft(\225)p Black 60 w
-Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h(allo)n(wed)e(b)n(ut)i(not)f
-(required)e(\(the)i(def)o(ault\).)p Black 396 3066 a
-Ft(\225)p Black 60 w Fq(`Required:)f Fv(The)h(pre\002x)g(is)h
-(required.)396 3257 y(Option)f Fq(~host_prefix:)e Fv(Speci\002es)j(ho)n
-(w)e(the)i("//host")f(phrase)f(of)h(\002le)h(names)f(is)h(handled:)p
-Black 396 3490 a Ft(\225)p Black 60 w Fq(`Not_recognized:)p
-Fv(The)c(pre\002x)j(is)h(not)f(recognized.)p Black 396
-3598 a Ft(\225)p Black 60 w Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h
-(allo)n(wed)e(b)n(ut)i(not)f(required)e(\(the)i(def)o(ault\).)p
-Black 396 3706 a Ft(\225)p Black 60 w Fq(`Required:)f
-Fv(The)h(pre\002x)g(is)h(required.)396 3896 y(Option)f
-Fq(~system_encoding:)e Fv(Speci\002es)i(the)g(encoding)e(of)i(\002le)h
-(names)f(of)g(the)g(local)g(\002le)h(system.)f(Def)o(ault:)396
-4004 y(UTF-8.)396 4154 y(Options)g Fq(~url_of_id)p Fv(,)f
-Fq(~channel_of_url)p Fv(:)f(Not)i(for)g(the)g(casual)g(user!)396
-4376 y Fq(class)44 b(combine)g(:)576 4473 y(?prefer:resolver)e(->)576
-4570 y(resolver)h(list)h(->)755 4667 y(resolver)p Black
-3800 5278 a Fr(80)p Black eop
-%%Page: 81 81
-81 80 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Combines)g(se)n(v)o
-(eral)f(resolv)o(er)g(objects.)h(If)g(a)h(concrete)e(entity)g(with)i
-(an)f Fq(ext_id)g Fv(is)h(to)f(be)g(opened,)f(the)h(combined)396
-687 y(resolv)o(er)f(tries)i(the)f(contained)f(resolv)o(ers)g(in)h(turn)
-g(until)g(a)g(resolv)o(er)f(accepts)h(opening)f(the)h(entity)g(\(i.e.)g
-(it)g(does)g(not)396 795 y(raise)h(Not_competent)c(on)j(open_in\).)396
-944 y(Clones:)h(If)f(the)g('clone')f(method)g(is)i(in)m(v)n(ok)o(ed)d
-(before)h('open_in',)e(all)k(contained)e(resolv)o(ers)g(are)h(cloned)f
-(separately)396 1052 y(and)h(again)f(combined.)f(If)i(the)g('clone')f
-(method)g(is)i(in)m(v)n(ok)o(ed)e(after)g('open_in')f(\(i.e.)i(while)g
-(the)g(resolv)o(er)f(is)i(open\),)396 1160 y(additionally)e(the)h
-(clone)f(of)h(the)h(acti)n(v)o(e)e(resolv)o(er)g(is)i(\003agged)f(as)g
-(being)g(preferred,)d(i.e.)k(it)f(is)i(tried)e(\002rst.)-2
-1662 y Fx(4.3.)39 b(The)g(DTD)g(c)m(lasses)396 1841 y
-Fr(Sorry)-5 b(,)21 b(not)f(yet)g(written.)h(P)-7 b(erhaps)20
-b(the)g(interface)g(de\002nition)e(of)j(Pxp_dtd)d(e)n(xpr)m(esses)j
-(the)f(same:)396 2063 y Fq(\(****************************************)o
-(******)o(******)o(******)o(******)o(*****)o(*\))396
-2160 y(\(*)3048 b(*\))396 2257 y(\(*)45 b(Pxp_dtd:)2643
-b(*\))396 2354 y(\(*)224 b(Object)44 b(model)g(of)g(document)g(type)g
-(declarations)939 b(*\))396 2452 y(\(*)3048 b(*\))396
-2549 y(\(****************************************)o(******)o(******)o
-(******)o(******)o(*****)o(*\))396 2743 y(\(*)45 b
-(======================================)o(======)o(======)o(======)o
-(======)o(=====)o(===)441 2840 y(*)g(OVERVIEW)441 2937
-y(*)441 3034 y(*)g(class)f(dtd)g(...............)e(represents)i(the)g
-(whole)g(DTD,)g(including)f(element)441 3132 y(*)1210
-b(declarations,)43 b(entity)h(declarations,)f(notation)441
-3229 y(*)1210 b(declarations,)43 b(and)h(processing)g(instructions)441
-3326 y(*)h(class)f(dtd_element)f(.......)g(represents)h(an)g(element)g
-(declaration)f(consisting)441 3423 y(*)1210 b(of)45 b(a)g(content)e
-(model)h(and)h(an)f(attribute)f(list)441 3520 y(*)1210
-b(declaration)441 3617 y(*)45 b(class)f(dtd_notation)f(......)g
-(represents)h(a)g(notation)g(declaration)441 3714 y(*)h(class)f
-(proc_instruction)e(..)i(represents)g(a)g(processing)f(instruction)441
-3811 y(*)i(======================================)o(======)o(======)o
-(======)o(======)o(=====)o(===)441 3909 y(*)441 4006
-y(*\))396 4297 y(class)f(dtd)h(:)486 4394 y(\(*)f(Creation:)531
-4491 y(*)134 b(new)44 b(dtd)531 4589 y(*)g(creates)g(a)h(new,)f(empty)g
-(DTD)g(object)g(without)g(any)g(declaration,)f(without)g(a)i(root)531
-4686 y(*)f(element,)g(without)g(an)g(ID.)531 4783 y(*\))p
-Black 3800 5278 a Fr(81)p Black eop
-%%Page: 82 82
-82 81 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 486 579 a Fq
-(Pxp_types.collect_warnings)40 b(-)p Fo(>)486 676 y Fq
-(Pxp_types.rep_encoding)h(-)p Fo(>)486 773 y Fq(object)576
-870 y(method)i(root)i(:)f(string)g(option)665 967 y(\(*)h(get)f(the)g
-(name)h(of)f(the)g(root)h(element)e(if)i(present)e(*\))576
-1162 y(method)g(set_root)h(:)h(string)e(-)p Fo(>)i Fq(unit)665
-1259 y(\(*)g(set)f(the)g(name)h(of)f(the)g(root)h(element.)e(This)h
-(method)g(can)g(be)h(invoked)710 1356 y(*)g(only)f(once)710
-1453 y(*\))576 1647 y(method)f(id)i(:)g(Pxp_types.dtd_id)d(option)665
-1745 y(\(*)j(get)f(the)g(identifier)g(for)g(this)g(DTD)g(*\))576
-1939 y(method)f(set_id)h(:)h(Pxp_types.dtd_id)d(-)p Fo(>)i
-Fq(unit)665 2036 y(\(*)h(set)f(the)g(identifier.)f(This)i(method)e(can)
-i(be)f(invoked)g(only)g(once)g(*\))576 2230 y(method)f(encoding)h(:)h
-(Pxp_types.rep_encoding)665 2327 y(\(*)g(returns)e(the)i(encoding)e
-(used)h(for)h(character)e(representation)g(*\))576 2619
-y(method)g(allow_arbitrary)g(:)h(unit)665 2716 y(\(*)h(After)f(this)g
-(method)g(has)g(been)g(invoked,)g(the)g(ob-)396 2813
-y(ject)g(changes)g(its)g(behaviour:)710 2910 y(*)h(-)f(elements)g(and)g
-(notations)g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h
-(an)710 3007 y(*)134 b(arbitrary)44 b(way;)g(the)g(methods)g("element")
-f(and)i("notation")e(indicate)g(this)710 3104 y(*)134
-b(by)45 b(raising)f(Undeclared)f(instead)g(of)i(Validation_error.)710
-3202 y(*\))576 3396 y(method)e(disallow_arbitrary)f(:)j(unit)576
-3590 y(method)e(arbitrary_allowed)f(:)j(bool)665 3687
-y(\(*)g(Returns)e(whether)h(arbitrary)f(contents)h(are)g(allowed)g(or)g
-(not.)h(*\))576 3882 y(method)e(standalone_declaration)f(:)i(bool)665
-3979 y(\(*)h(Whether)e(there)h(is)h(a)g('standalone')d(declaration)h
-(or)i(not.)f(Strictly)710 4076 y(*)h(speaking,)e(this)h(declaration)f
-(is)i(not)f(part)g(of)h(the)f(DTD,)g(but)h(it)f(is)710
-4173 y(*)h(included)e(here)h(because)g(of)h(practical)e(reasons.)710
-4270 y(*)i(If)f(not)h(set,)f(this)g(property)f(defaults)h(to)g
-('false'.)710 4367 y(*\))576 4561 y(method)f
-(set_standalone_declaration)e(:)k(bool)f(-)p Fo(>)g Fq(unit)665
-4659 y(\(*)h(Sets)f(the)g('standalone')f(declaration.)g(*\))p
-Black 3800 5278 a Fr(82)p Black eop
-%%Page: 83 83
-83 82 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
-b(add_element)g(:)i(dtd_element)e(-)p Fo(>)h Fq(unit)665
-676 y(\(*)h(add)f(the)g(given)g(element)g(declaration)f(to)i(this)f
-(DTD.)g(Raises)g(Not_found)710 773 y(*)h(if)f(there)g(is)h(already)e
-(an)i(element)f(declaration)f(with)h(the)g(same)g(name.)710
-870 y(*\))576 1065 y(method)f(add_gen_entity)g(:)i(Pxp_entity.entity)d
-(-)p Fo(>)i Fq(bool)g(-)p Fo(>)g Fq(unit)665 1162 y(\(*)h
-(add_gen_entity)d(e)j(extdecl:)710 1259 y(*)g(add)f(the)g(entity)g('e')
-h(as)f(general)g(entity)g(to)g(this)g(DTD)h(\(general)e(entities)710
-1356 y(*)i(are)f(those)g(represented)f(by)i(&name;\).)e(If)i(there)f
-(is)g(already)g(a)g(declaration)710 1453 y(*)h(with)f(the)g(same)g
-(name,)g(the)h(second)f(definition)f(is)h(ignored;)g(as)g(excep-)396
-1550 y(tion)g(from)710 1647 y(*)h(this)f(rule,)g(entities)f(with)i
-(names)f("lt",)g("gt",)g("amp",)f("quot",)h(and)g("apos")710
-1745 y(*)h(may)f(only)g(be)h(redeclared)e(with)h(a)h(definition)e(that)
-h(is)h(equivalent)e(to)h(the)710 1842 y(*)h(standard)e(definition;)g
-(otherwise)h(a)g(Validation_error)e(is)j(raised.)710
-1939 y(*)710 2036 y(*)g('extdecl':)e('true')h(indicates)f(that)h(the)h
-(entity)e(declaration)g(occurs)h(in)710 2133 y(*)h(an)f(external)g
-(entity.)f(\(Used)h(for)h(the)f(standalone)f(check.\))710
-2230 y(*\))576 2424 y(method)g(add_par_entity)g(:)i(Pxp_entity.entity)d
-(-)p Fo(>)i Fq(unit)665 2522 y(\(*)h(add)f(the)g(given)g(entity)g(as)h
-(parameter)e(entity)h(to)g(this)h(DTD)f(\(parameter)710
-2619 y(*)h(entities)e(are)i(those)f(represented)f(by)h(\045name;\).)g
-(If)g(there)g(is)h(already)e(a)710 2716 y(*)i(declaration)e(with)h(the)
-g(same)g(name,)g(the)h(second)f(definition)f(is)h(ignored.)710
-2813 y(*\))576 3007 y(method)f(add_notation)g(:)i(dtd_notation)e(-)p
-Fo(>)h Fq(unit)665 3104 y(\(*)h(add)f(the)g(given)g(notation)g(to)g
-(this)h(DTD.)f(If)g(there)g(is)h(al-)396 3202 y(ready)f(a)h
-(declaration)710 3299 y(*)g(with)f(the)g(same)g(name,)g(a)h
-(Validation_error)d(is)j(raised.)710 3396 y(*\))576 3590
-y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p Fo(>)j
-Fq(unit)665 3687 y(\(*)g(add)f(the)g(given)g(processing)g(instruction)f
-(to)h(this)g(DTD.)g(*\))576 3882 y(method)f(element)h(:)h(string)f(-)p
-Fo(>)g Fq(dtd_element)665 3979 y(\(*)h(looks)f(up)g(the)h(element)e
-(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
-4076 y(*)h(Validation_error)d(if)i(the)h(element)e(can-)396
-4173 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
-4270 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
-f(instead.\))710 4367 y(*\))576 4561 y(method)f(element_names)g(:)i
-(string)f(list)665 4659 y(\(*)h(returns)e(the)i(list)f(of)g(the)h
-(names)f(of)g(all)h(element)e(declarations.)g(*\))576
-4853 y(method)g(gen_entity)h(:)g(string)g(-)p Fo(>)g
-Fq(\(Pxp_entity.entity)e(*)j(bool\))p Black 3800 5278
-a Fr(83)p Black eop
-%%Page: 84 84
-84 83 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
-b(let)f(e,)h(extdecl)e(=)i(obj)f(#)h(gen_entity)e(n:)710
-676 y(*)i(looks)f(up)g(the)h(general)e(entity)h('e')g(with)h(the)f
-(name)g('n'.)g(Raises)710 773 y(*)h(WF_error)e(if)i(the)f(entity)g
-(cannot)g(be)g(found.)710 870 y(*)h('extdecl':)e(indicates)g(whether)h
-(the)g(entity)g(declaration)f(occured)h(in)g(an)710 967
-y(*)h(external)e(entity.)710 1065 y(*\))576 1259 y(method)g
-(gen_entity_names)g(:)h(string)g(list)665 1356 y(\(*)h(returns)e(the)i
-(list)f(of)g(all)h(general)e(entity)h(names)g(*\))576
-1550 y(method)f(par_entity)h(:)g(string)g(-)p Fo(>)g
-Fq(Pxp_entity.entity)665 1647 y(\(*)h(looks)f(up)g(the)h(parameter)e
-(entity)h(with)g(the)g(given)g(name.)g(Raises)710 1745
-y(*)h(WF_error)e(if)i(the)f(entity)g(cannot)g(be)g(found.)710
-1842 y(*\))576 2036 y(method)f(par_entity_names)g(:)h(string)g(list)665
-2133 y(\(*)h(returns)e(the)i(list)f(of)g(all)h(parameter)e(entity)h
-(names)g(*\))576 2327 y(method)f(notation)h(:)h(string)e(-)p
-Fo(>)i Fq(dtd_notation)665 2424 y(\(*)g(looks)f(up)g(the)h(notation)e
-(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
-2522 y(*)h(Validation_error)d(if)i(the)h(notation)e(can-)396
-2619 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
-2716 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
-f(instead.\))710 2813 y(*\))576 3007 y(method)f(notation_names)g(:)i
-(string)e(list)665 3104 y(\(*)i(Returns)e(the)i(list)f(of)g(the)h
-(names)f(of)g(all)h(added)f(notations)f(*\))576 3299
-y(method)g(pinstr)h(:)h(string)f(-)p Fo(>)g Fq(proc_instruction)e(list)
-665 3396 y(\(*)j(looks)f(up)g(all)h(processing)e(instructions)g(with)h
-(the)g(given)g(target.)710 3493 y(*)h(The)f("target")g(is)g(the)g
-(identifier)g(following)f(")p Fo(<)p Fq(?".)710 3590
-y(*)i(Note:)f(It)g(is)h(not)f(possible)g(to)g(find)g(out)h(the)f(exact)
-g(position)f(of)i(the)710 3687 y(*)g(processing)e(instruction.)710
-3784 y(*\))576 3979 y(method)g(pinstr_names)g(:)i(string)f(list)665
-4076 y(\(*)h(Returns)e(the)i(list)f(of)g(the)h(names)f(\(targets\))f
-(of)i(all)f(added)g(pinstrs)f(*\))576 4270 y(method)g(validate)h(:)h
-(unit)665 4367 y(\(*)g(ensures)e(that)i(the)f(DTD)g(is)h(valid.)f(This)
-g(method)g(is)g(optimized)f(such)h(that)710 4464 y(*)h(actual)f
-(validation)f(is)h(only)g(performed)g(if)g(DTD)h(has)f(changed.)710
-4561 y(*)h(If)f(the)h(DTD)f(is)g(invalid,)g(mostly)g(a)g
-(Validation_error)f(is)h(raised,)710 4659 y(*)h(but)f(other)g
-(exceptions)f(are)i(possible,)e(too.)710 4756 y(*\))p
-Black 3800 5278 a Fr(84)p Black eop
-%%Page: 85 85
-85 84 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
-b(only_deterministic_models)e(:)k(unit)665 676 y(\(*)g(Succeeds)e(if)i
-(all)f(regexp)g(content)g(models)f(are)i(deterministic.)710
-773 y(*)g(Otherwise)e(Validation_error.)710 870 y(*\))576
-1065 y(method)g(write)h(:)h(Pxp_types.output_stream)c(-)p
-Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(bool)f(-)396
-1162 y Fo(>)h Fq(unit)665 1259 y(\(*)g(write_compact_as_latin1)c(os)j
-(enc)h(doctype:)710 1356 y(*)g(Writes)f(the)g(DTD)g(as)h('enc'-encoded)
-d(string)i(to)h('os'.)f(If)g('doctype',)f(a)710 1453
-y(*)i(DTD)f(like)g Fo(<)p Fq(!DOCTYPE)f(root)i([)f(...)h(])p
-Fo(>)f Fq(is)g(written.)g(If)g('not)h(doctype',)710 1550
-y(*)g(only)f(the)g(declarations)f(are)h(written)g(\(the)g(material)g
-(within)g(the)710 1647 y(*)h(square)f(brackets\).)710
-1745 y(*\))576 1939 y(method)f(write_compact_as_latin1)e(:)k
-(Pxp_types.output_stream)c(-)p Fo(>)j Fq(bool)h(-)p Fo(>)f
-Fq(unit)665 2036 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)h(to)h
-(keep)f(compatibility)f(with)710 2133 y(*)i(older)f(versions)f(of)i
-(the)f(parser)710 2230 y(*\))576 2522 y
-(\(*---------------------------*\))576 2619 y(method)f(invalidate)h(:)g
-(unit)665 2716 y(\(*)h(INTERNAL)e(METHOD)h(*\))576 2813
-y(method)f(warner)h(:)h(Pxp_types.collect_warnings)665
-2910 y(\(*)g(INTERNAL)e(METHOD)h(*\))486 3007 y(end)396
-3396 y(\(*)h(--------------------------------------)o(------)o(---)39
-b(*\))396 3590 y(and)45 b(dtd_element)e(:)h(dtd)h(-)p
-Fo(>)f Fq(string)g(-)p Fo(>)486 3687 y Fq(\(*)g(Creation:)531
-3784 y(*)134 b(new)44 b(dtd_element)f(init_dtd)h(init_name:)531
-3882 y(*)g(creates)g(a)h(new)f(dtd_element)f(object)h(for)g(init_dtd)g
-(with)g(init_name.)531 3979 y(*)g(The)h(strings)e(are)i(represented)e
-(in)h(the)h(same)f(encoding)f(as)i(init_dtd.)531 4076
-y(*\))486 4173 y(object)576 4367 y(method)e(name)i(:)f(string)665
-4464 y(\(*)h(returns)e(the)i(name)f(of)g(the)h(declared)e(element)h
-(*\))576 4659 y(method)f(externally_declared)f(:)j(bool)665
-4756 y(\(*)g(returns)e(whether)h(the)g(element)g(declaration)f(occurs)h
-(in)g(an)h(external)710 4853 y(*)g(entity.)p Black 3800
-5278 a Fr(85)p Black eop
-%%Page: 86 86
-86 85 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 710 579 a Fq(*\))576
-773 y(method)43 b(content_model)g(:)i(Pxp_types.content_model_type)665
-870 y(\(*)g(get)f(the)g(content)g(model)g(of)h(this)f(element)f
-(declaration,)g(or)i(Unspecified)e(*\))576 1065 y(method)g(content_dfa)
-g(:)i(Pxp_dfa.dfa_definition)c(option)665 1162 y(\(*)k(return)f(the)g
-(DFA)g(of)h(the)f(content)g(model)g(if)g(there)g(is)h(a)f(DFA,)h(or)f
-(None.)710 1259 y(*)h(A)f(DFA)h(exists)f(only)g(for)g(regexp)g(style)g
-(content)g(models)f(which)h(are)710 1356 y(*)h(deterministic.)710
-1453 y(*\))576 1647 y(method)e(set_cm_and_extdecl)f(:)j
-(Pxp_types.content_model_type)40 b(-)p Fo(>)k Fq(bool)h(-)p
-Fo(>)f Fq(unit)665 1745 y(\(*)h(set_cm_and_extdecl)d(cm)i(extdecl:)710
-1842 y(*)h(set)f(the)g(content)g(model)g(to)h('cm'.)f(Once)g(the)g
-(content)g(model)g(is)g(not)710 1939 y(*)h(Unspecified,)e(it)h(cannot)g
-(be)g(set)h(to)f(a)h(different)e(value)h(again.)710 2036
-y(*)h(Furthermore,)e(it)h(is)h(set)f(whether)g(the)g(element)g(occurs)f
-(in)i(an)f(external)710 2133 y(*)h(entity)f(\('extdecl'\).)710
-2230 y(*\))576 2424 y(method)f(encoding)h(:)h(Pxp_types.rep_encoding)
-665 2522 y(\(*)g(Return)f(the)g(encoding)f(of)i(the)f(strings)g(*\))576
-2716 y(method)f(allow_arbitrary)g(:)h(unit)665 2813 y(\(*)h(After)f
-(this)g(method)g(has)g(been)g(invoked,)g(the)g(ob-)396
-2910 y(ject)g(changes)g(its)g(behaviour:)710 3007 y(*)h(-)f(attributes)
-g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h(an)710
-3104 y(*)134 b(arbitrary)44 b(way;)g(the)g(method)g("attribute")f
-(indicates)g(this)710 3202 y(*)134 b(by)45 b(raising)f(Undeclared)f
-(instead)g(of)i(Validation_error.)710 3299 y(*\))576
-3493 y(method)e(disallow_arbitrary)f(:)j(unit)576 3687
-y(method)e(arbitrary_allowed)f(:)j(bool)665 3784 y(\(*)g(Returns)e
-(whether)h(arbitrary)f(attributes)h(are)g(allowed)g(or)g(not.)g(*\))576
-3979 y(method)f(attribute)h(:)g(string)g(-)p Fo(>)1517
-4076 y Fq(Pxp_types.att_type)e(*)j(Pxp_types.att_default)665
-4173 y(\(*)g(get)f(the)g(type)h(and)f(default)g(value)g(of)g(a)h
-(declared)e(attribute,)g(or)i(raise)710 4270 y(*)g(Validation_error)d
-(if)i(the)h(attribute)e(does)h(not)h(exist.)710 4367
-y(*)g(If)f('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g
-(raised)g(instead)710 4464 y(*)h(of)f(Validation_error.)710
-4561 y(*\))576 4756 y(method)f
-(attribute_violates_standalone_declaration)38 b(:)1069
-4853 y(string)44 b(-)p Fo(>)g Fq(string)g(option)g(-)p
-Fo(>)g Fq(bool)p Black 3798 5278 a Fr(86)p Black eop
-%%Page: 87 87
-87 86 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
-b(attribute_violates_standalone_declarat)o(ion)39 b(name)44
-b(v:)710 676 y(*)h(Checks)f(whether)f(the)i(attribute)e('name')h
-(violates)f(the)i("standalone")710 773 y(*)g(declaration)e(if)h(it)h
-(has)f(value)g('v'.)710 870 y(*)h(The)f(method)g(returns)g(true)g(if:)
-710 967 y(*)h(-)f(The)h(attribute)e(declaration)g(occurs)h(in)g(an)h
-(external)e(entity,)710 1065 y(*)i(and)f(if)h(one)f(of)g(the)h(two)f
-(conditions)f(holds:)710 1162 y(*)i(-)f(v)h(=)g(None,)f(and)g(there)g
-(is)h(a)f(default)g(for)g(the)h(attribute)e(value)710
-1259 y(*)i(-)f(v)h(=)g(Some)f(s,)g(and)h(the)f(type)g(of)h(the)f
-(attribute)f(is)i(not)f(CDATA,)710 1356 y(*)134 b(and)45
-b(s)f(changes)g(if)h(normalized)e(according)g(to)i(the)f(rules)g(of)g
-(the)710 1453 y(*)134 b(attribute)44 b(type.)710 1550
-y(*)710 1647 y(*)h(The)f(method)g(raises)g(Validation_error)e(if)i(the)
-h(attribute)e(does)h(not)g(exist.)710 1745 y(*)h(If)f
-('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g(raised)g
-(instead)710 1842 y(*)h(of)f(Validation_error.)710 1939
-y(*\))576 2133 y(method)f(attribute_names)g(:)h(string)g(list)665
-2230 y(\(*)h(get)f(the)g(list)h(of)f(all)g(declared)g(attributes)f(*\))
-576 2424 y(method)g(names_of_required_attributes)e(:)j(string)g(list)
-665 2522 y(\(*)h(get)f(the)g(list)h(of)f(all)g(attributes)g(that)g(are)
-g(specified)f(as)i(required)710 2619 y(*)g(attributes)710
-2716 y(*\))576 2910 y(method)e(id_attribute_name)f(:)j(string)f(option)
-665 3007 y(\(*)h(Returns)e(the)i(name)f(of)g(the)h(attribute)e(with)h
-(type)g(ID,)h(or)f(None.)g(*\))576 3202 y(method)f
-(idref_attribute_names)f(:)i(string)g(list)665 3299 y(\(*)h(Returns)e
-(the)i(names)f(of)g(the)h(attributes)e(with)h(type)g(IDREF)g(or)h
-(IDREFS.)e(*\))576 3493 y(method)g(add_attribute)g(:)i(string)f(-)p
-Fo(>)1607 3590 y Fq(Pxp_types.att_type)e(-)p Fo(>)531
-3687 y Fq(Pxp_types.att_default)f(-)p Fo(>)531 3784 y
-Fq(bool)j(-)p Fo(>)620 3882 y Fq(unit)665 3979 y(\(*)h(add_attribute)d
-(name)j(type)f(default)f(extdecl:)710 4076 y(*)i(add)f(an)h(attribute)e
-(declaration)g(for)h(an)h(attribute)e(with)h(the)h(given)e(name,)710
-4173 y(*)i(type,)f(and)g(default)g(value.)g(If)g(there)g(is)h(more)f
-(than)g(one)g(declaration)f(for)710 4270 y(*)i(an)f(attribute)g(name,)g
-(the)g(first)g(declara-)396 4367 y(tion)g(counts;)g(the)g(other)g
-(declarations)710 4464 y(*)h(are)f(ignored.)710 4561
-y(*)h('extdecl':)e(if)h(true,)g(the)h(attribute)e(declaration)g(occurs)
-h(in)g(an)h(external)710 4659 y(*)g(entity.)e(This)i(property)e(is)i
-(used)f(to)g(check)g(the)h("standalone")d(attribute.)710
-4756 y(*\))p Black 3797 5278 a Fr(87)p Black eop
-%%Page: 88 88
-88 87 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
-b(validate)h(:)h(unit)665 676 y(\(*)g(checks)f(whether)f(this)h
-(element)g(declaration)f(\(i.e.)h(the)g(content)g(model)g(and)710
-773 y(*)h(all)f(attribute)f(declarations\))g(is)i(valid)f(for)g(the)g
-(associated)f(DTD.)710 870 y(*)i(Raises)f(mostly)f(Validation_error)g
-(if)h(the)g(validation)g(fails.)710 967 y(*\))576 1162
-y(method)f(write)h(:)h(Pxp_types.output_stream)c(-)p
-Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
-1259 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
-1356 y(*)h(Writes)f(the)g Fo(<)p Fq(!ELEMENT)f(...)h
-Fo(>)h Fq(declaration)e(to)h('os')h(as)f('enc'-)396 1453
-y(encoded)g(string.)710 1550 y(*\))576 1745 y(method)f
-(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
-Fo(>)j Fq(unit)665 1842 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
-h(to)h(keep)f(compatibility)f(with)710 1939 y(*)i(older)f(versions)f
-(of)i(the)f(parser)710 2036 y(*\))486 2133 y(end)396
-2327 y(\(*)h(--------------------------------------)o(------)o(---)39
-b(*\))396 2522 y(and)45 b(dtd_notation)d(:)j(string)f(-)p
-Fo(>)g Fq(Pxp_types.ext_id)e(-)p Fo(>)j Fq(Pxp_types.rep_encoding)c(-)p
-Fo(>)486 2619 y Fq(\(*)j(Creation:)531 2716 y(*)179 b(new)44
-b(dtd_notation)f(a_name)h(an_external_ID)e(init_encoding)531
-2813 y(*)i(creates)g(a)h(new)f(dtd_notation)f(object)h(with)g(the)g
-(given)g(name)g(and)h(the)f(given)531 2910 y(*)g(external)g(ID.)531
-3007 y(*\))486 3104 y(object)576 3202 y(method)f(name)i(:)f(string)576
-3299 y(method)f(ext_id)h(:)h(Pxp_types.ext_id)576 3396
-y(method)e(encoding)h(:)h(Pxp_types.rep_encoding)576
-3590 y(method)e(write)h(:)h(Pxp_types.output_stream)c(-)p
-Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
-3687 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
-3784 y(*)h(Writes)f(the)g Fo(<)p Fq(!NOTATION)f(...)h
-Fo(>)h Fq(declaration)e(to)h('os')g(as)h('enc'-encoded)710
-3882 y(*)g(string.)710 3979 y(*\))576 4173 y(method)e
-(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
-Fo(>)j Fq(unit)665 4270 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
-h(to)h(keep)f(compatibility)f(with)710 4367 y(*)i(older)f(versions)f
-(of)i(the)f(parser)710 4464 y(*\))486 4659 y(end)396
-4853 y(\(*)h(--------------------------------------)o(------)o(---)39
-b(*\))p Black 3800 5278 a Fr(88)p Black eop
-%%Page: 89 89
-89 88 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 676 a Fq(and)45
-b(proc_instruction)d(:)i(string)g(-)p Fo(>)h Fq(string)e(-)p
-Fo(>)i Fq(Pxp_types.rep_encoding)c(-)p Fo(>)486 773 y
-Fq(\(*)j(Creation:)531 870 y(*)134 b(new)44 b(proc_instruction)f
-(a_target)g(a_value)531 967 y(*)h(creates)g(a)h(new)f(proc_instruction)
-e(object)i(with)g(the)h(given)f(target)f(string)h(and)531
-1065 y(*)g(the)h(given)f(value)g(string.)531 1162 y(*)g(Note:)g(A)h
-(processing)e(instruction)g(is)i(written)e(as)i Fo(<)p
-Fq(?target)e(value?)p Fo(>)p Fq(.)531 1259 y(*\))486
-1356 y(object)576 1453 y(method)g(target)h(:)h(string)576
-1550 y(method)e(value)h(:)h(string)576 1647 y(method)e(encoding)h(:)h
-(Pxp_types.rep_encoding)576 1842 y(method)e(write)h(:)h
-(Pxp_types.output_stream)c(-)p Fo(>)j Fq(Pxp_types.encoding)e(-)p
-Fo(>)j Fq(unit)665 1939 y(\(*)g(write)f(os)g(enc:)710
-2036 y(*)h(Writes)f(the)g Fo(<)p Fq(?...?)p Fo(>)f Fq(PI)i(to)f('os')h
-(as)f('enc'-encoded)f(string.)710 2133 y(*\))576 2327
-y(method)g(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
-Fo(>)j Fq(unit)665 2424 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
-h(to)h(keep)f(compatibility)f(with)710 2522 y(*)i(older)f(versions)f
-(of)i(the)f(parser)710 2619 y(*\))576 2813 y(method)f(parse_pxp_option)
-g(:)h(\(string)g(*)h(string)e(*)i(\(string)f(*)g(string\))g(list\))665
-2910 y(\(*)h(Parses)f(a)g(PI)h(containing)e(a)i(PXP)f(option.)g(Such)g
-(PIs)g(are)g(formed)g(like:)710 3007 y(*)134 b Fo(<)p
-Fq(?target)44 b(option-name)f(option-att="value")f(option-att="value")f
-(...)k(?)p Fo(>)710 3104 y Fq(*)g(The)f(method)g(returns)g(a)g(triple)
-710 3202 y(*)134 b(\(target,)44 b(option-name,)f([option-att,)g(value;)
-g(...]\))710 3299 y(*)i(or)f(raises)g(Error.)710 3396
-y(*\))486 3590 y(end)396 3784 y(;;)-2 4286 y Fx(4.4.)39
-b(In)-6 b(v)l(oking)38 b(the)h(par)n(ser)396 4466 y Fv(Here)20
-b(a)h(description)e(of)h(Pxp_yacc.)-2 4794 y Fp(4.4.1.)35
-b(Defaults)p Black 3800 5278 a Fr(89)p Black eop
-%%Page: 90 90
-90 89 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(follo)n(wing)f
-(def)o(aults)g(are)i(a)n(v)n(ailable:)396 759 y Fq(val)45
-b(default_config)d(:)j(config)396 856 y(val)g(default_extension)d(:)i
-(\('a)h(node)f(extension\))f(as)h('a)396 953 y(val)h(default_spec)d(:)j
-(\('a)f(node)h(extension)e(as)h('a\))h(spec)-2 1406 y
-Fp(4.4.2.)35 b(P)l(ar)n(sing)f(functions)396 1574 y Fv(In)20
-b(the)g(follo)n(wing,)f(the)h(term)g("closed)g(document")e(refers)h(to)
-i(an)f(XML)g(structure)f(lik)o(e)396 1754 y Fo(<)p Fq(!DOCTYPE)43
-b(...)i([)f Fn(declarations)f Fq(])i Fo(>)396 1851 y(<)p
-Fn(root)p Fo(>)396 1948 y Fq(...)396 2045 y Fo(<)p Fq(/)p
-Fn(root)p Fo(>)396 2236 y Fv(The)20 b(term)g("fragment")e(refers)i(to)g
-(an)g(XML)h(structure)e(lik)o(e)396 2416 y Fo(<)p Fn(root)p
-Fo(>)396 2513 y Fq(...)396 2611 y Fo(<)p Fq(/)p Fn(root)p
-Fo(>)396 2802 y Fv(i.e.)h(only)g(to)g(one)g(isolated)g(element)f
-(instance.)396 3023 y Fq(val)45 b(parse_dtd_entity)d(:)i(config)g(->)h
-(source)f(->)g(dtd)396 3214 y Fv(P)o(arses)21 b(the)f(declarations)f
-(which)h(are)g(contained)e(in)j(the)f(entity)-5 b(,)19
-b(and)h(returns)f(them)h(as)h Fq(dtd)f Fv(object.)396
-3436 y Fq(val)45 b(extract_dtd_from_document_entity)39
-b(:)45 b(config)f(->)g(source)g(->)g(dtd)396 3627 y Fv(Extracts)20
-b(the)g(DTD)h(from)e(a)h(closed)g(document.)e(Both)i(the)h(internal)e
-(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted)d(and)396
-3735 y(combined)g(to)i(one)f Fq(dtd)h Fv(object.)f(This)h(function)e
-(does)h(not)h(parse)f(the)h(whole)f(document,)f(b)n(ut)i(only)e(the)i
-(parts)g(that)g(are)396 3843 y(necessary)g(to)g(e)o(xtract)f(the)i
-(DTD.)396 4064 y Fq(val)45 b(parse_document_entity)c(:)576
-4161 y(?transform_dtd:\(dtd)g(->)k(dtd\))f(->)576 4259
-y(?id_index:\('ext)e(index\))i(->)576 4356 y(config)f(->)576
-4453 y(source)g(->)576 4550 y('ext)h(spec)g(->)755 4647
-y('ext)g(document)p Black 3800 5278 a Fr(90)p Black eop
-%%Page: 91 91
-91 90 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(P)o(arses)h(a)g(closed)
-e(document)g(and)g(v)n(alidates)h(it)h(against)e(the)i(DTD)f(that)g(is)
-h(contained)e(in)h(the)h(document)d(\(internal)396 687
-y(and)i(e)o(xternal)f(subsets\).)h(The)g(option)f Fq(~transform_dtd)f
-Fv(can)i(be)g(used)g(to)g(transform)f(the)h(DTD)h(in)f(the)g(document,)
-396 795 y(and)g(to)g(use)h(the)f(transformed)e(DTD)i(for)g(v)n
-(alidation.)e(If)i Fq(~id_index)g Fv(is)h(speci\002ed,)e(an)h(inde)o(x)
-f(of)h(all)h(ID)f(attrib)n(utes)h(is)396 903 y(created.)396
-1124 y Fq(val)45 b(parse_wfdocument_entity)c(:)576 1222
-y(config)i(->)576 1319 y(source)g(->)576 1416 y('ext)h(spec)g(->)755
-1513 y('ext)g(document)396 1704 y Fv(P)o(arses)21 b(a)g(closed)e
-(document,)f(b)n(ut)j(checks)e(it)i(only)e(on)h(well-formedness.)396
-1926 y Fq(val)45 b(parse_content_entity)86 b(:)576 2023
-y(?id_index:\('ext)42 b(index\))i(->)576 2120 y(config)f(->)576
-2217 y(source)g(->)576 2314 y(dtd)h(->)576 2411 y('ext)g(spec)g(->)755
-2508 y('ext)g(node)396 2699 y Fv(P)o(arses)21 b(a)g(fragment,)d(and)h
-(v)n(alidates)h(the)g(element.)396 2921 y Fq(val)45 b
-(parse_wfcontent_entity)c(:)576 3018 y(config)i(->)576
-3115 y(source)g(->)576 3212 y('ext)h(spec)g(->)755 3310
-y('ext)g(node)396 3500 y Fv(P)o(arses)21 b(a)g(fragment,)d(b)n(ut)i
-(checks)g(it)g(only)g(on)g(well-formedness.)-2 3870 y
-Fp(4.4.3.)35 b(Con\002guration)f(options)396 4110 y Fq(type)44
-b(config)g(=)576 4207 y({)g(warner)g(:)h(collect_warnings;)665
-4304 y(errors_with_line_numbers)c(:)k(bool;)665 4401
-y(enable_pinstr_nodes)d(:)j(bool;)665 4499 y(enable_super_root_node)c
-(:)k(bool;)665 4596 y(enable_comment_nodes)d(:)i(bool;)665
-4693 y(encoding)g(:)g(rep_encoding;)665 4790 y
-(recognize_standalone_declaration)c(:)k(bool;)p Black
-3800 5278 a Fr(91)p Black eop
-%%Page: 92 92
-92 91 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 665 579 a Fq
-(store_element_positions)41 b(:)k(bool;)665 676 y(idref_pass)e(:)i
-(bool;)665 773 y(validate_by_dfa)e(:)h(bool;)665 870
-y(accept_only_deterministic_models)c(:)k(bool;)665 967
-y(...)576 1065 y(})p Black 396 1422 a Ft(\225)p Black
-60 w Fq(warner:)p Fv(The)19 b(parser)h(prints)f(w)o(arnings)h(by)f(in)m
-(v)n(oking)f(the)j(method)d Fq(warn)j Fv(for)e(this)i(w)o(arner)e
-(object.)h(\(Def)o(ault:)f(all)479 1530 y(w)o(arnings)h(are)g
-(dropped\))p Black 396 1637 a Ft(\225)p Black 60 w Fq
-(errors_with_line_numbers:)p Fv(If)c(true,)k(errors)f(contain)g(line)i
-(numbers;)d(if)j(f)o(alse,)f(errors)g(contain)f(only)g(byte)479
-1745 y(positions.)h(The)g(latter)g(mode)f(is)i(f)o(aster)-5
-b(.)21 b(\(Def)o(ault:)e(true\))p Black 396 1853 a Ft(\225)p
-Black 60 w Fq(enable_pinstr_nodes:)p Fv(If)e(true,)j(the)g(parser)f
-(creates)i(e)o(xtra)e(nodes)g(for)h(processing)f(instructions.)g(If)h
-(f)o(alse,)479 1961 y(processing)f(instructions)g(are)h(simply)g(added)
-f(to)i(the)f(element)f(or)h(document)f(surrounding)e(the)j
-(instructions.)479 2069 y(\(Def)o(ault:)g(f)o(alse\))p
-Black 396 2177 a Ft(\225)p Black 60 w Fq(enable_super_root_node:)p
-Fv(If)c(true,)k(the)g(parser)g(creates)g(an)g(e)o(xtra)g(node)f(which)g
-(is)j(the)e(parent)f(of)h(the)g(root)479 2285 y(of)g(the)g(document)f
-(tree.)h(This)g(node)f(is)i(called)f(super)g(root;)f(it)i(is)g(an)g
-(element)e(with)i(type)e Fq(T_super_root)p Fv(.)g(-)h(If)479
-2393 y(there)g(are)g(processing)f(instructions)g(outside)h(the)g(root)f
-(element)h(and)g(outside)f(the)i(DTD,)f(the)o(y)f(are)h(added)f(to)i
-(the)479 2501 y(super)f(root)f(instead)h(of)g(the)g(document.)e(-)j(If)
-f(f)o(alse,)g(the)g(super)g(root)g(node)f(is)i(not)f(created.)f(\(Def)o
-(ault:)h(f)o(alse\))p Black 396 2609 a Ft(\225)p Black
-60 w Fq(enable_comment_nodes:)p Fv(If)d(true,)i(the)i(parser)e(creates)
-h(nodes)g(for)f(comments)g(with)i(type)f Fq(T_comment)p
-Fv(;)f(if)479 2717 y(f)o(alse,)i(such)f(nodes)f(are)h(not)g(created.)f
-(\(Def)o(ault:)h(f)o(alse\))p Black 396 2825 a Ft(\225)p
-Black 60 w Fq(encoding:)p Fv(Speci\002es)f(the)i(internal)e(encoding)f
-(of)i(the)g(parser)-5 b(.)20 b(Most)g(strings)h(are)f(then)f
-(represented)g(according)479 2933 y(to)i(this)f(encoding;)f(ho)n(we)n
-(v)o(er)f(there)h(are)i(some)f(e)o(xceptions)e(\(especially)i
-Fq(ext_id)f Fv(v)n(alues)h(which)g(are)g(al)o(w)o(ays)479
-3041 y(UTF-8)g(encoded\).)e(\(Def)o(ault:)h(`Enc_iso88591\))p
-Black 396 3148 a Ft(\225)p Black 60 w Fq
-(recognize_standalone_declaration:)c Fv(If)21 b(true)e(and)h(if)h(the)f
-(parser)f(is)i(v)n(alidating,)e(the)479 3256 y Fq(standalone="yes")f
-Fv(declaration)h(forces)h(that)g(it)h(is)g(check)o(ed)e(whether)g(the)h
-(document)e(is)j(a)g(standalone)479 3364 y(document.)d(-)j(If)f(f)o
-(alse,)g(or)g(if)g(the)h(parser)e(is)i(in)g(well-formedness)d(mode,)h
-(such)h(declarations)f(are)h(ignored.)479 3472 y(\(Def)o(ault:)g
-(true\))p Black 396 3580 a Ft(\225)p Black 60 w Fq
-(store_element_positions:)d Fv(If)j(true,)g(for)f(e)n(v)o(ery)g
-(non-data)f(node)h(the)i(source)e(position)g(is)j(stored.)d(If)h(f)o
-(alse,)479 3688 y(the)g(position)g(information)e(is)j(lost.)f(If)g(a)n
-(v)n(ailable,)g(you)f(can)h(get)g(the)g(positions)g(of)g(nodes)f(by)h
-(in)m(v)n(oking)e(the)479 3796 y Fq(position)i Fv(method.)e(\(Def)o
-(ault:)i(true\))p Black 396 3904 a Ft(\225)p Black 60
-w Fq(idref_pass:)p Fv(If)e(true)i(and)g(if)g(there)g(is)h(an)f(ID)h
-(inde)o(x,)e(the)h(parser)f(checks)h(whether)f(e)n(v)o(ery)g(IDREF)i
-(or)e(IDREFS)479 4012 y(attrib)n(ute)h(refer)g(to)g(an)g(e)o(xisting)f
-(node;)h(this)g(requires)g(that)g(the)g(parser)g(tra)n(v)o(erses)g(the)
-g(whole)f(doument)g(tree.)h(If)479 4120 y(f)o(alse,)h(this)f(check)g
-(is)h(left)f(out.)g(\(Def)o(ault:)g(f)o(alse\))p Black
-396 4228 a Ft(\225)p Black 60 w Fq(validate_by_dfa:)p
-Fv(If)e(true)h(and)h(if)h(the)f(content)f(model)g(for)h(an)g(element)g
-(type)f(is)i(deterministic,)e(a)479 4336 y(deterministic)h(\002nite)g
-(automaton)e(is)j(used)f(to)h(v)n(alidate)e(whether)g(the)i(element)e
-(contents)h(match)f(the)i(content)479 4444 y(model)e(of)h(the)g(type.)g
-(If)g(f)o(alse,)g(or)g(if)g(a)g(DF)-6 b(A)21 b(is)g(not)f(a)n(v)n
-(ailable,)f(a)h(backtracking)e(algorithm)g(is)j(used)f(for)f(v)n
-(alidation.)479 4552 y(\(Def)o(ault:)h(true\))p Black
-396 4659 a Ft(\225)p Black 60 w Fq(accept_only_deterministic_models:)15
-b Fv(If)21 b(true,)e(only)h(deterministic)f(content)g(models)h(are)g
-(accepted;)f(if)479 4767 y(f)o(alse,)i(an)o(y)e(syntactically)h
-(correct)f(content)g(models)h(can)g(be)g(processed.)f(\(Def)o(ault:)g
-(true\))p Black 3800 5278 a Fr(92)p Black eop
-%%Page: 93 93
-93 92 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black -2 583 a Fp(4.4.4.)35
-b(Whic)o(h)f(con\002guration)g(should)g(I)f(use?)396
-751 y Fv(First,)21 b(I)f(recommend)e(to)i(v)n(ary)g(the)g(def)o(ault)f
-(con\002guration)f(instead)i(of)g(creating)f(a)i(ne)n(w)f
-(con\002guration)d(record.)i(F)o(or)396 859 y(instance,)h(to)g(set)h
-Fq(idref_pass)e Fv(to)i Fq(true)p Fv(,)e(change)g(the)i(def)o(ault)e
-(as)i(in:)396 1039 y Fq(let)45 b(config)e(=)i({)g(default_config)d
-(with)i(idref_pass)g(=)g(true)g(})396 1230 y Fv(The)20
-b(background)d(is)k(that)f(I)h(can)f(add)f(more)h(options)f(to)h(the)g
-(record)f(in)i(future)e(v)o(ersions)g(of)h(the)g(parser)f(without)396
-1338 y(breaking)g(your)f(programs.)396 1487 y Fu(Do)i(I)i(need)e(extra)
-f(nodes)i(f)n(or)f(pr)o(ocessing)g(instructions?)g Fv(By)g(def)o(ault,)
-g(such)g(nodes)f(are)h(not)g(created.)f(This)i(does)396
-1595 y(not)f(mean)g(that)g(the)g(processing)f(instructions)g(are)h
-(lost;)h(ho)n(we)n(v)o(er)m(,)d(you)h(cannot)g(\002nd)h(out)g(the)g(e)o
-(xact)g(location)f(where)396 1703 y(the)o(y)h(occur)-5
-b(.)19 b(F)o(or)h(e)o(xample,)e(the)j(follo)n(wing)d(XML)i(te)o(xt)396
-1883 y Fq(<x><?pi1?><y/><?pi2?></x>)396 2074 y Fv(will)h(normally)e
-(create)h(one)f(element)h(node)f(for)h Fq(x)g Fv(containing)e
-Fr(one)i Fv(subnode)f(for)g Fq(y)p Fv(.)h(The)g(processing)f
-(instructions)396 2182 y(are)h(attached)g(to)g Fq(x)h
-Fv(in)f(a)h(separate)e(hash)h(table;)h(you)e(can)h(access)h(them)e
-(using)h Fq(x)45 b(#)f(pinstr)g("pi1")20 b Fv(and)g Fq(x)44
-b(#)396 2290 y(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o(ely)-5
-b(.)18 b(The)i(information)d(is)k(lost)g(where)f(the)g(instructions)f
-(occur)g(within)h Fq(x)p Fv(.)396 2439 y(If)g(the)h(option)d
-Fq(enable_pinstr_nodes)g Fv(is)j(turned)e(on,)h(the)g(parser)f(creates)
-i(e)o(xtra)e(nodes)g Fq(pi1)i Fv(and)e Fq(pi2)i Fv(such)f(that)396
-2547 y(the)g(subnodes)f(of)h Fq(x)h Fv(are)f(no)n(w:)396
-2728 y Fq(x)45 b(#)g(sub_nodes)e(=)i([)f(pi1;)g(y;)h(pi2)f(])396
-2919 y Fv(The)20 b(e)o(xtra)g(nodes)f(contain)g(the)h(processing)f
-(instructions)g(in)i(the)f(usual)g(w)o(ay)-5 b(,)20 b(i.e.)g(you)f(can)
-h(access)h(them)f(using)f Fq(pi1)396 3026 y(#)45 b(pinstr)f("pi1")20
-b Fv(and)f Fq(pi2)45 b(#)f(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o
-(ely)-5 b(.)396 3176 y(Note)20 b(that)h(you)e(will)i(need)e(an)i(e)o(x)
-o(emplar)d(for)h(the)i(PI)f(nodes)g(\(see)g Fq(make_spec_from_alist)p
-Fv(\).)396 3325 y Fu(Do)g(I)i(need)e(a)h(super)g(r)o(oot)d(node?)i
-Fv(By)h(def)o(ault,)e(there)h(is)h(no)f(super)f(root)h(node.)f(The)h
-Fq(document)f Fv(object)h(refers)396 3433 y(directly)g(to)g(the)g(node)
-f(representing)f(the)j(root)e(element)h(of)g(the)g(document,)e(i.e.)396
-3613 y Fq(doc)45 b(#)f(root)g(=)h(r)396 3804 y Fv(if)21
-b Fq(r)f Fv(is)h(the)g(root)e(node.)g(This)h(is)i(sometimes)d(incon)m
-(v)o(enient:)f(\(1\))h(Some)h(algorithms)f(become)g(simpler)h(if)g(e)n
-(v)o(ery)f(node)396 3912 y(has)i(a)f(parent,)f(e)n(v)o(en)g(the)i(root)
-e(node.)g(\(2\))h(Some)g(standards)f(such)h(as)h(XP)o(ath)f(call)g(the)
-h("root)e(node")g(the)h(node)f(whose)396 4020 y(child)h(represents)f
-(the)i(root)e(of)h(the)g(document.)e(\(3\))i(The)g(super)f(root)h(node)
-f(can)h(serv)o(e)f(as)i(a)g(container)e(for)g(processing)396
-4128 y(instructions)g(outside)h(the)g(root)g(element.)f(Because)i(of)e
-(these)i(reasons,)e(it)i(is)g(possible)f(to)h(create)f(an)g(e)o(xtra)f
-(super)h(root)396 4236 y(node,)f(whose)h(child)g(is)h(the)f(root)g
-(node:)396 4416 y Fq(doc)45 b(#)f(root)g(=)h(sr)403 b(&&)396
-4513 y(sr)45 b(#)f(sub_nodes)g(=)g([)h(r)g(])396 4704
-y Fv(When)20 b(e)o(xtra)g(nodes)f(are)h(also)h(created)e(for)h
-(processing)f(instructions,)g(these)h(nodes)f(can)h(be)h(added)e(to)h
-(the)g(super)g(root)396 4812 y(node)f(if)h(the)o(y)e(occur)h(outside)g
-(the)g(root)g(element)g(\(reason)f(\(3\)\),)h(and)g(the)g(order)g
-(re\003ects)g(the)h(order)e(in)i(the)f(source)g(te)o(xt.)p
-Black 3800 5278 a Fr(93)p Black eop
-%%Page: 94 94
-94 93 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Note)g(that)h(you)e
-(will)i(need)e(an)i(e)o(x)o(emplar)d(for)h(the)i(super)e(root)h(node)f
-(\(see)h Fq(make_spec_from_alist)p Fv(\).)396 728 y Fu(What)g(is)h(the)
-g(effect)e(of)h(the)h(UTF-8)e(encoding?)h Fv(By)h(def)o(ault,)e(the)h
-(parser)g(represents)f(strings)h(\(with)g(fe)n(w)396
-836 y(e)o(xceptions\))e(as)j(ISO-8859-1)c(strings.)i(These)h(are)g
-(well-kno)n(wn,)d(and)j(there)f(are)h(tools)g(and)f(fonts)g(for)h(this)
-g(encoding.)396 986 y(Ho)n(we)n(v)o(er)m(,)e(internationalization)g
-(may)h(require)g(that)i(you)e(switch)h(o)o(v)o(er)f(to)i(UTF-8)e
-(encoding.)f(In)i(most)396 1094 y(en)m(vironments,)d(the)k(immediate)e
-(ef)n(fect)h(will)g(be)h(that)f(you)f(cannot)g(read)h(strings)g(with)g
-(character)f(codes)h(>=)h(160)e(an)o(y)396 1202 y(longer;)g(your)g
-(terminal)h(will)h(only)e(sho)n(w)h(funn)o(y)e(glyph)h(combinations.)f
-(It)i(is)h(strongly)e(recommended)e(to)k(install)396
-1310 y(Unicode)e(fonts)h(\(GNU)g(Unifont)f
-(\(http://czyborra.com/unifon)o(t/\),)c(Markus)k(K)o(uhn')-5
-b(s)19 b(fonts)396 1417 y(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/do)
-m(wnlo)o(ad/u)o(cs-fo)o(nts.tar)g(.g)o(z\)\))14 b(and)20
-b(terminal)f(emulators)h(that)g(can)g(handle)396 1525
-y(UTF-8)g(byte)g(sequences)f(\(http://myweb)m(.clark.net/pub/d)o(ick)o
-(e)o(y)o(/xter)o(m/x)o(term.)o(html\))o(.)c(Furthermore,)i(a)k(Unicode)
-396 1633 y(editor)f(may)f(be)i(helpful)e(\(such)g(as)i(Y)-9
-b(udit)20 b(\(ftp://metalab)m(.unc.edu/pub)o(/Linu)o(x/ap)o(ps/ed)o
-(itors/X/\)\))o(.)15 b(There)k(are)h(also)396 1741 y(F)-6
-b(A)h(Q)21 b(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/unico)o(de)o
-(.htm)o(l\))15 b(by)20 b(Markus)f(K)o(uhn.)396 1891 y(By)i(setting)f
-Fq(encoding)f Fv(to)i Fq(`Enc_utf8)e Fv(all)i(strings)f(originating)e
-(from)h(the)i(parsed)e(XML)h(document)e(are)396 1999
-y(represented)h(as)i(UTF-8)e(strings.)h(This)h(includes)e(not)h(only)f
-(character)g(data)h(and)g(attrib)n(ute)g(v)n(alues)g(b)n(ut)g(also)g
-(element)396 2107 y(names,)g(attrib)n(ute)g(names)g(and)f(so)i(on,)e
-(as)i(it)g(is)g(possible)f(to)h(use)f(an)o(y)f(Unicode)g(letter)i(to)f
-(form)f(such)h(names.)g(Strictly)396 2214 y(speaking,)f(PXP)i(is)g
-(only)e(XML-compliant)f(if)j(the)f(UTF-8)g(mode)f(is)i(used;)f
-(otherwise)g(it)h(will)g(ha)n(v)o(e)e(dif)n(\002culties)396
-2322 y(when)h(v)n(alidating)f(documents)f(containing)g
-(non-ISO-8859-1-names.)396 2472 y(This)j(mode)e(does)h(not)g(ha)n(v)o
-(e)f(an)o(y)h(impact)f(on)h(the)g(e)o(xternal)f(representation)f(of)i
-(documents.)f(The)g(character)g(set)396 2580 y(assumed)h(when)g
-(reading)e(a)j(document)d(is)j(set)g(in)g(the)f(XML)g(declaration,)e
-(and)i(character)f(set)i(when)e(writing)h(a)396 2688
-y(document)e(must)j(be)f(passed)g(to)g(the)g Fq(write)g
-Fv(method.)396 2837 y Fu(Ho)o(w)g(do)h(I)g(check)f(that)g(nodes)h
-(exist)f(which)h(ar)o(e)e(r)o(eferr)o(ed)g(by)i(IDREF)g(attrib)n(utes?)
-e Fv(First,)i(you)e(must)h(create)g(an)396 2945 y(inde)o(x)f(of)h(all)h
-(occurring)d(ID)i(attrib)n(utes:)396 3125 y Fq(let)45
-b(index)f(=)g(new)h(hash_index)396 3316 y Fv(This)21
-b(inde)o(x)e(must)h(be)g(passed)g(to)g(the)h(parsing)e(function:)396
-3496 y Fq(parse_document_entity)486 3593 y(~id_index:\(index)42
-b(:>)j(index\))486 3691 y(config)f(source)g(spec)396
-3882 y Fv(Ne)o(xt,)20 b(you)f(must)h(turn)g(on)g(the)g
-Fq(idref_pass)f Fv(mode:)396 4062 y Fq(let)45 b(config)e(=)i({)g
-(default_config)d(with)i(idref_pass)g(=)g(true)g(})396
-4253 y Fv(Note)20 b(that)h(no)n(w)e(the)i(whole)e(document)f(tree)j
-(will)g(be)f(tra)n(v)o(ersed,)f(and)g(e)n(v)o(ery)g(node)g(will)i(be)f
-(check)o(ed)f(for)h(IDREF)g(and)396 4361 y(IDREFS)h(attrib)n(utes.)f
-(If)g(the)g(tree)g(is)h(big,)f(this)h(may)f(tak)o(e)g(some)g(time.)396
-4510 y Fu(What)g(ar)o(e)g(deterministic)g(content)g(models?)g
-Fv(These)g(type)g(of)g(models)g(can)g(speed)f(up)h(the)g(v)n(alidation)
-f(checks;)396 4618 y(furthermore)f(the)o(y)h(ensure)g
-(SGML-compatibility)-5 b(.)18 b(In)i(particular)m(,)e(a)j(content)e
-(model)g(is)i(deterministic)e(if)i(the)f(parser)396 4726
-y(can)g(determine)f(the)h(actually)g(used)g(alternati)n(v)o(e)f(by)g
-(inspecting)g(only)h(the)g(current)f(tok)o(en.)g(F)o(or)h(e)o(xample,)e
-(this)396 4834 y(element)i(has)g(non-deterministic)e(contents:)p
-Black 3800 5278 a Fr(94)p Black eop
-%%Page: 95 95
-95 94 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
-(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(<!ELEMENT)44
-b(x)g(\(\(u,v\))g(|)h(\(u,y+\))f(|)g(v\)>)396 770 y Fv(If)20
-b(the)h(\002rst)f(element)g(in)g Fq(x)h Fv(is)g Fq(u)p
-Fv(,)f(the)h(parser)e(does)h(not)g(kno)n(w)f(which)h(of)g(the)g
-(alternati)n(v)o(es)f Fq(\(u,v\))h Fv(or)g Fq(\(u,y+\))g
-Fv(will)396 878 y(w)o(ork;)g(the)g(parser)g(must)g(also)g(inspect)g
-(the)h(second)e(element)g(to)i(be)f(able)g(to)g(distinguish)g(between)f
-(the)h(alternati)n(v)o(es.)396 986 y(Because)h(such)f(look-ahead)d
-(\(or)j("guessing"\))e(is)k(required,)c(this)i(e)o(xample)f(is)i
-(non-deterministic.)396 1135 y(The)f(XML)g(standard)f(demands)g(that)i
-(content)e(models)g(must)i(be)f(deterministic.)f(So)h(it)h(is)g
-(recommended)c(to)k(turn)e(the)396 1243 y(option)g Fq
-(accept_only_deterministic_models)d Fv(on;)j(ho)n(we)n(v)o(er)m(,)f
-(PXP)j(can)f(also)h(process)e(non-deterministic)396 1351
-y(models)h(using)g(a)g(backtracking)e(algorithm.)396
-1500 y(Deterministic)i(models)g(ensure)f(that)h(v)n(alidation)f(can)h
-(be)g(performed)e(in)i(linear)g(time.)g(In)g(order)f(to)h(get)g(the)396
-1608 y(maximum)f(bene\002ts,)h(PXP)h(also)f(implements)f(a)i(special)f
-(v)n(alidator)f(that)h(pro\002ts)g(from)f(deterministic)h(models;)f
-(this)396 1716 y(is)i(the)g(deterministic)e(\002nite)h(automaton)f
-(\(DF)-6 b(A\).)19 b(This)i(v)n(alidator)d(is)k(enabled)d(per)g
-(element)h(type)g(if)g(the)g(element)396 1824 y(type)g(has)g(a)h
-(deterministic)e(model)h(and)f(if)i(the)f(option)f Fq(validate_by_dfa)f
-Fv(is)j(turned)e(on.)396 1974 y(In)h(general,)f(I)h(e)o(xpect)g(that)g
-(the)g(DF)-6 b(A)21 b(method)e(is)i(f)o(aster)f(than)g(the)g
-(backtracking)e(method;)g(especially)i(in)h(the)f(w)o(orst)396
-2082 y(case)h(the)f(DF)-6 b(A)21 b(tak)o(es)f(only)g(linear)f(time.)i
-(Ho)n(we)n(v)o(er)m(,)d(if)i(the)g(content)g(model)f(has)h(only)g(fe)n
-(w)g(alternati)n(v)o(es)f(and)h(the)396 2190 y(alternati)n(v)o(es)f(do)
-h(not)g(nest,)g(the)h(backtracking)c(algorithm)i(may)g(be)i(better)-5
-b(.)-2 2691 y Fx(4.5.)39 b(Updates)396 2871 y Fr(Some)20
-b(\(often)f(later)i(added\))d(featur)m(es)i(that)g(ar)m(e)h(otherwise)f
-(not)g(e)n(xplained)f(in)h(the)h(manual)d(b)n(ut)j(worth)f(to)g(be)396
-2979 y(mentioned.)p Black 396 3211 a Ft(\225)p Black
-60 w Fv(Methods)g(node_position,)d(node_path,)g(nth_node,)h(pre)n
-(vious_node,)e(ne)o(xt_node)h(for)j(nodes:)f(See)479
-3319 y(pxp_document.mli)p Black 396 3427 a Ft(\225)p
-Black 60 w Fv(Functions)h(to)g(determine)f(the)h(document)e(order)h(of)
-h(nodes:)f(compare,)g(create_ord_inde)o(x,)c(ord_number)m(,)479
-3535 y(ord_compare:)i(See)k(pxp_document.mli)p Black
-3800 5278 a Fr(95)p Black eop
-%%Page: 96 96
-96 95 bop Black Black Black Black eop
-%%Trailer
-end
-userdict /end-hook known{end-hook}if
-%%EOF
+++ /dev/null
-<!ENTITY markup-dtd1.mli '
-
-(**********************************************************************)
-(* *)
-(* Pxp_dtd: *)
-(* Object model of document type declarations *)
-(* *)
-(**********************************************************************)
-
-(* ======================================================================
- * OVERVIEW
- *
- * class dtd ............... represents the whole DTD, including element
- * declarations, entity declarations, notation
- * declarations, and processing instructions
- * class dtd_element ....... represents an element declaration consisting
- * of a content model and an attribute list
- * declaration
- * class dtd_notation ...... represents a notation declaration
- * class proc_instruction .. represents a processing instruction
- * ======================================================================
- *
- *)
-
-
-class dtd :
- (* Creation:
- * new dtd
- * creates a new, empty DTD object without any declaration, without a root
- * element, without an ID.
- *)
- Pxp_types.collect_warnings ->
- Pxp_types.rep_encoding ->
- object
- method root : string option
- (* get the name of the root element if present *)
-
- method set_root : string -> unit
- (* set the name of the root element. This method can be invoked
- * only once
- *)
-
- method id : Pxp_types.dtd_id option
- (* get the identifier for this DTD *)
-
- method set_id : Pxp_types.dtd_id -> unit
- (* set the identifier. This method can be invoked only once *)
-
- method encoding : Pxp_types.rep_encoding
- (* returns the encoding used for character representation *)
-
-
- method allow_arbitrary : unit
- (* After this method has been invoked, the object changes its behaviour:
- * - elements and notations that have not been added may be used in an
- * arbitrary way; the methods "element" and "notation" indicate this
- * by raising Undeclared instead of Validation_error.
- *)
-
- method disallow_arbitrary : unit
-
- method arbitrary_allowed : bool
- (* Returns whether arbitrary contents are allowed or not. *)
-
- method standalone_declaration : bool
- (* Whether there is a 'standalone' declaration or not. Strictly
- * speaking, this declaration is not part of the DTD, but it is
- * included here because of practical reasons.
- * If not set, this property defaults to 'false'.
- *)
-
- method set_standalone_declaration : bool -> unit
- (* Sets the 'standalone' declaration. *)
-
-
- method add_element : dtd_element -> unit
- (* add the given element declaration to this DTD. Raises Not_found
- * if there is already an element declaration with the same name.
- *)
-
- method add_gen_entity : Pxp_entity.entity -> bool -> unit
- (* add_gen_entity e extdecl:
- * add the entity 'e' as general entity to this DTD (general entities
- * are those represented by &name;). If there is already a declaration
- * with the same name, the second definition is ignored; as exception from
- * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
- * may only be redeclared with a definition that is equivalent to the
- * standard definition; otherwise a Validation_error is raised.
- *
- * 'extdecl': 'true' indicates that the entity declaration occurs in
- * an external entity. (Used for the standalone check.)
- *)
-
- method add_par_entity : Pxp_entity.entity -> unit
- (* add the given entity as parameter entity to this DTD (parameter
- * entities are those represented by &percent;name;). If there is already a
- * declaration with the same name, the second definition is ignored.
- *)
-
- method add_notation : dtd_notation -> unit
- (* add the given notation to this DTD. If there is already a declaration
- * with the same name, a Validation_error is raised.
- *)
-
- method add_pinstr : proc_instruction -> unit
- (* add the given processing instruction to this DTD. *)
-
- method element : string -> dtd_element
- (* looks up the element declaration with the given name. Raises
- * Validation_error if the element cannot be found. (If "allow_arbitrary"
- * has been invoked before, Unrestricted is raised instead.)
- *)
-
- method element_names : string list
- (* returns the list of the names of all element declarations. *)
-
- method gen_entity : string -> (Pxp_entity.entity * bool)
- (* let e, extdecl = obj # gen_entity n:
- * looks up the general entity 'e' with the name 'n'. Raises
- * WF_error if the entity cannot be found.
- * 'extdecl': indicates whether the entity declaration occured in an
- * external entity.
- *)
-
- method gen_entity_names : string list
- (* returns the list of all general entity names *)
-
- method par_entity : string -> Pxp_entity.entity
- (* looks up the parameter entity with the given name. Raises
- * WF_error if the entity cannot be found.
- *)
-
- method par_entity_names : string list
- (* returns the list of all parameter entity names *)
-
- method notation : string -> dtd_notation
- (* looks up the notation declaration with the given name. Raises
- * Validation_error if the notation cannot be found. (If "allow_arbitrary"
- * has been invoked before, Unrestricted is raised instead.)
- *)
-
- method notation_names : string list
- (* Returns the list of the names of all added notations *)
-
- method pinstr : string -> proc_instruction list
- (* looks up all processing instructions with the given target.
- * The "target" is the identifier following "<?".
- * Note: It is not possible to find out the exact position of the
- * processing instruction.
- *)
-
- method pinstr_names : string list
- (* Returns the list of the names (targets) of all added pinstrs *)
-
- method validate : unit
- (* ensures that the DTD is valid. This method is optimized such that
- * actual validation is only performed if DTD has changed.
- * If the DTD is invalid, mostly a Validation_error is raised,
- * but other exceptions are possible, too.
- *)
-
- method only_deterministic_models : unit
- (* Succeeds if all regexp content models are deterministic.
- * Otherwise Validation_error.
- *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
- (* write_compact_as_latin1 os enc doctype:
- * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
- * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
- * only the declarations are written (the material within the
- * square brackets).
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
-
- (*----------------------------------------*)
- method invalidate : unit
- (* INTERNAL METHOD *)
- method warner : Pxp_types.collect_warnings
- (* INTERNAL METHOD *)
- end
-
-'>
-<!ENTITY markup-dtd2.mli '
-
-(* ---------------------------------------------------------------------- *)
-
-and dtd_element : dtd -> string ->
- (* Creation:
- * new dtd_element init_dtd init_name:
- * creates a new dtd_element object for init_dtd with init_name.
- * The strings are represented in the same encoding as init_dtd.
- *)
- object
-
- method name : string
- (* returns the name of the declared element *)
-
- method externally_declared : bool
- (* returns whether the element declaration occurs in an external
- * entity.
- *)
-
- method content_model : Pxp_types.content_model_type
- (* get the content model of this element declaration, or Unspecified *)
-
- method content_dfa : Pxp_dfa.dfa_definition option
- (* return the DFA of the content model if there is a DFA, or None.
- * A DFA exists only for regexp style content models which are
- * deterministic.
- *)
-
- method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
- (* set_cm_and_extdecl cm extdecl:
- * set the content model to 'cm'. Once the content model is not
- * Unspecified, it cannot be set to a different value again.
- * Furthermore, it is set whether the element occurs in an external
- * entity ('extdecl').
- *)
-
- method encoding : Pxp_types.rep_encoding
- (* Return the encoding of the strings *)
-
- method allow_arbitrary : unit
- (* After this method has been invoked, the object changes its behaviour:
- * - attributes that have not been added may be used in an
- * arbitrary way; the method "attribute" indicates this
- * by raising Undeclared instead of Validation_error.
- *)
-
- method disallow_arbitrary : unit
-
- method arbitrary_allowed : bool
- (* Returns whether arbitrary attributes are allowed or not. *)
-
- method attribute : string ->
- Pxp_types.att_type * Pxp_types.att_default
- (* get the type and default value of a declared attribute, or raise
- * Validation_error if the attribute does not exist.
- * If 'arbitrary_allowed', the exception Undeclared is raised instead
- * of Validation_error.
- *)
-
- method attribute_violates_standalone_declaration :
- string -> string option -> bool
- (* attribute_violates_standalone_declaration name v:
- * Checks whether the attribute 'name' violates the "standalone"
- * declaration if it has value 'v'.
- * The method returns true if:
- * - The attribute declaration occurs in an external entity,
- * and if one of the two conditions holds:
- * - v = None, and there is a default for the attribute value
- * - v = Some s, and the type of the attribute is not CDATA,
- * and s changes if normalized according to the rules of the
- * attribute type.
- *
- * The method raises Validation_error if the attribute does not exist.
- * If 'arbitrary_allowed', the exception Undeclared is raised instead
- * of Validation_error.
- *)
-
- method attribute_names : string list
- (* get the list of all declared attributes *)
-
- method names_of_required_attributes : string list
- (* get the list of all attributes that are specified as required
- * attributes
- *)
-
- method id_attribute_name : string option
- (* Returns the name of the attribute with type ID, or None. *)
-
- method idref_attribute_names : string list
- (* Returns the names of the attributes with type IDREF or IDREFS. *)
-
- method add_attribute : string ->
- Pxp_types.att_type ->
- Pxp_types.att_default ->
- bool ->
- unit
- (* add_attribute name type default extdecl:
- * add an attribute declaration for an attribute with the given name,
- * type, and default value. If there is more than one declaration for
- * an attribute name, the first declaration counts; the other declarations
- * are ignored.
- * 'extdecl': if true, the attribute declaration occurs in an external
- * entity. This property is used to check the "standalone" attribute.
- *)
-
- method validate : unit
- (* checks whether this element declaration (i.e. the content model and
- * all attribute declarations) is valid for the associated DTD.
- * Raises mostly Validation_error if the validation fails.
- *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write_compact_as_latin1 os enc:
- * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
- end
-
-(* ---------------------------------------------------------------------- *)
-
-and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
- (* Creation:
- * new dtd_notation a_name an_external_ID init_encoding
- * creates a new dtd_notation object with the given name and the given
- * external ID.
- *)
- object
- method name : string
- method ext_id : Pxp_types.ext_id
- method encoding : Pxp_types.rep_encoding
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write_compact_as_latin1 os enc:
- * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
- * string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
- end
-
-(* ---------------------------------------------------------------------- *)
-
-and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
- (* Creation:
- * new proc_instruction a_target a_value
- * creates a new proc_instruction object with the given target string and
- * the given value string.
- * Note: A processing instruction is written as <?target value?>.
- *)
- object
- method target : string
- method value : string
- method encoding : Pxp_types.rep_encoding
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write os enc:
- * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
- method parse_pxp_option : (string * string * (string * string) list)
- (* Parses a PI containing a PXP option. Such PIs are formed like:
- * <?target option-name option-att="value" option-att="value" ... ?>
- * The method returns a triple
- * (target, option-name, [option-att, value; ...])
- * or raises Error.
- *)
-
- end
-
-;;
-
-'>
+++ /dev/null
-#! /bin/sh
-# (*
-exec ocamlfattop "$0"
-*) directory ".";;
-
-open Str;;
-
-let name_re = regexp "(\\*\\$[ \t]*\\([a-zA-Z0-9.-]*\\)[ \t]*\\*)";;
-let subst_re = regexp "[<>&'%]";;
-
-let begin_entity name =
- "<!ENTITY " ^ name ^ " '";;
-
-let end_entity () =
- "'>\n"
-;;
-
-
-let text = ref "" in
-let within_entity = ref false in
-try
- while true do
- let line = read_line() in
- if string_match name_re line 0 then begin
- let name = matched_group 1 line in
- if !within_entity then
- text := !text ^ "\n" ^ end_entity();
- within_entity := false;
- if name <> "-" then begin
- text := !text ^ begin_entity name;
- within_entity := true
- end
- end
- else
- if !within_entity then begin
- let line' =
- global_substitute subst_re
- (fun s ->
- let s' = matched_group 0 s in
- match s' with
- "<" -> "<"
- | ">" -> ">"
- | "&" -> "&"
- | "'" -> "'"
- | "%" -> "&percent;"
- | _ -> assert false)
- line
- in
- text := !text ^ "\n" ^ line'
- end
- done;
-with End_of_file ->
- if !within_entity then
- text := !text ^ "\n" ^ end_entity();
- print_string !text
-;;
+++ /dev/null
-.acronym {
- font-weight: bold;
- color: #c71585
-}
+++ /dev/null
-<!DOCTYPE style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN" [
-
-<!-- The default is the print stylesheet. Call 'jade' with option '-ihtml'
- to select the HTML stylesheet.
- -->
-
-<!ENTITY % html "IGNORE">
-<![%html;[
-<!ENTITY % print "IGNORE">
-<!ENTITY docbook.dsl SYSTEM "docbook.dsl" CDATA dsssl>
-]]>
-<!ENTITY % print "INCLUDE">
-<![%print;[
-<!ENTITY docbook.dsl SYSTEM "docbook.dsl" CDATA dsssl>
-]]>
-]>
-<style-sheet>
-<style-specification use="docbook">
-<style-specification-body>
-
-;; HTML:
-
-<![%html;[
-
-(define %footnotes-at-end%
- ;; Should footnotes appear at the end of HTML pages?
- #t)
-
-(define %html-ext%
- ;; Default extension for HTML output files
- ".html")
-
-(define %root-filename%
- ;; Name for the root HTML document
- "index")
-
-(define %css-decoration%
- ;; Enable CSS decoration of elements
- #t)
-
-(define %stylesheet%
- ;; Name of the stylesheet to use
- "markup.css")
-
-(define %graphic-default-extension%
- ;; Default extension for graphic FILEREFs
- "gif")
-
-]]>
-
-;; printing:
-
-<![%print;[
-
-(define bop-footnotes
- ;; Make "bottom-of-page" footnotes?
- #t)
-
-(define %graphic-default-extension%
- ;; Default extension for graphic FILEREFs
- "ps")
-
-]]>
-
-;; both:
-
-(define %section-autolabel%
- ;; Are sections enumerated?
- #t)
-
-</style-specification-body>
-</style-specification>
-<external-specification id="docbook" document="docbook.dsl">
-</style-sheet>
+++ /dev/null
-<!DOCTYPE book PUBLIC "-//Davenport//DTD DocBook V3.0//EN" [
-<!ENTITY markup "<acronym>PXP</acronym>">
-<!ENTITY pxp "<acronym>PXP</acronym>">
-<!ENTITY % readme.code.to-html SYSTEM "readme.ent">
-<!ENTITY apos "'">
-<!ENTITY percent "%">
-<!ENTITY % get.markup-yacc.mli SYSTEM "yacc.mli.ent">
-<!ENTITY % get.markup-dtd.mli SYSTEM "dtd.mli.ent">
-%readme.code.to-html;
-%get.markup-yacc.mli;
-%get.markup-dtd.mli;
-
-<!ENTITY fun "->"> <!-- function type operator -->
-
-]>
-
-
-<book>
-
- <title>The PXP user's guide</title>
- <bookinfo>
- <!-- <bookbiblio> -->
- <authorgroup>
- <author>
- <firstname>Gerd</firstname>
- <surname>Stolpmann</surname>
- <authorblurb>
- <para>
- <address>
- <email>gerd@gerd-stolpmann.de</email>
- </address>
- </para>
- </authorblurb>
- </author>
- </authorgroup>
-
- <copyright>
- <year>1999, 2000</year><holder>Gerd Stolpmann</holder>
- </copyright>
- <!-- </bookbiblio> -->
-
- <abstract>
- <para>
-&markup; is a validating parser for XML-1.0 which has been
-written entirely in Objective Caml.
-</para>
- <formalpara>
- <title>Download &markup;: </title>
- <para>
-The free &markup; library can be downloaded at
-<ulink URL="http://www.ocaml-programming.de/packages/">
-http://www.ocaml-programming.de/packages/
-</ulink>. This user's guide is included.
-Newest releases of &markup; will be announced in
-<ulink URL="http://www.npc.de/ocaml/linkdb/">The OCaml Link
-Database</ulink>.
-</para>
- </formalpara>
- </abstract>
-
- <legalnotice>
- <title>License</title>
- <para>
-This document, and the described software, "&markup;", are copyright by
-Gerd Stolpmann.
-</para>
-
-<para>
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this document and the "&markup;" software (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-</para>
- <para>
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-</para>
- <para>
-The Software is provided ``as is'', without warranty of any kind, express
-or implied, including but not limited to the warranties of
-merchantability, fitness for a particular purpose and noninfringement.
-In no event shall Gerd Stolpmann be liable for any claim, damages or
-other liability, whether in an action of contract, tort or otherwise,
-arising from, out of or in connection with the Software or the use or
-other dealings in the software.
-</para>
- </legalnotice>
-
- </bookinfo>
-
-
-<!-- ********************************************************************** -->
-
- <part>
- <title>User's guide</title>
-
- <chapter>
- <title>What is XML?</title>
-
- <sect1>
- <title>Introduction</title>
-
- <para>XML (short for <emphasis>Extensible Markup Language</emphasis>)
-generalizes the idea that text documents are typically structured in sections,
-sub-sections, paragraphs, and so on. The format of the document is not fixed
-(as, for example, in HTML), but can be declared by a so-called DTD (document
-type definition). The DTD describes only the rules how the document can be
-structured, but not how the document can be processed. For example, if you want
-to publish a book that uses XML markup, you will need a processor that converts
-the XML file into a printable format such as Postscript. On the one hand, the
-structure of XML documents is configurable; on the other hand, there is no
-longer a canonical interpretation of the elements of the document; for example
-one XML DTD might want that paragraphes are delimited by
-<literal>para</literal> tags, and another DTD expects <literal>p</literal> tags
-for the same purpose. As a result, for every DTD a new processor is required.
-</para>
-
- <para>
-Although XML can be used to express structured text documents it is not limited
-to this kind of application. For example, XML can also be used to exchange
-structured data over a network, or to simply store structured data in
-files. Note that XML documents cannot contain arbitrary binary data because
-some characters are forbidden; for some applications you need to encode binary
-data as text (e.g. the base 64 encoding).
-</para>
-
-
- <sect2>
- <title>The "hello world" example</title>
- <para>
-The following example shows a very simple DTD, and a corresponding document
-instance. The document is structured such that it consists of sections, and
-that sections consist of paragraphs, and that paragraphs contain plain text:
-</para>
-
- <programlisting>
-<![CDATA[<!ELEMENT document (section)+>
-<!ELEMENT section (paragraph)+>
-<!ELEMENT paragraph (#PCDATA)>
-]]>
-</programlisting>
-
- <para>The following document is an instance of this DTD:</para>
-
- <programlisting>
-<![CDATA[<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE document SYSTEM "simple.dtd">
-<document>
- <section>
- <paragraph>This is a paragraph of the first section.</paragraph>
- <paragraph>This is another paragraph of the first section.</paragraph>
- </section>
- <section>
- <paragraph>This is the only paragraph of the second section.</paragraph>
- </section>
-</document>
-]]>
-</programlisting>
-
- <para>As in HTML (and, of course, in grand-father SGML), the "pieces" of
-the document are delimited by element braces, i.e. such a piece begins with
-<literal><name-of-the-type-of-the-piece></literal> and ends with
-<literal></name-of-the-type-of-the-piece></literal>, and the pieces are
-called <emphasis>elements</emphasis>. Unlike HTML and SGML, both start tags and
-end tags (i.e. the delimiters written in angle brackets) can never be left
-out. For example, HTML calls the paragraphs simply <literal>p</literal>, and
-because paragraphs never contain paragraphs, a sequence of several paragraphs
-can be written as:
-
-<programlisting><![CDATA[<p>First paragraph
-<p>Second paragraph]]></programlisting>
-
-This is not possible in XML; continuing our example above we must always write
-
-<programlisting><![CDATA[<paragraph>First paragraph</paragraph>
-<paragraph>Second paragraph</paragraph>]]></programlisting>
-
-The rationale behind that is to (1) simplify the development of XML parsers
-(you need not convert the DTD into a deterministic finite automaton which is
-required to detect omitted tags), and to (2) make it possible to parse the
-document independent of whether the DTD is known or not.
-</para>
-
-<para>
-The first line of our sample document,
-
-<programlisting>
-<![CDATA[<?xml version="1.0" encoding="ISO-8859-1"?>]]>
-</programlisting>
-
-is the so-called <emphasis>XML declaration</emphasis>. It expresses that the
-document follows the conventions of XML version 1.0, and that the document is
-encoded using characters from the ISO-8859-1 character set (often known as
-"Latin 1", mostly used in Western Europe). Although the XML declaration is not
-mandatory, it is good style to include it; everybody sees at the first glance
-that the document uses XML markup and not the similar-looking HTML and SGML
-markup languages. If you omit the XML declaration, the parser will assume
-that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
-it possible to distinguish between UTF-8 and UTF-16 automatically); these
-are encodings of Unicode's universal character set. (Note that &pxp;, unlike its
-predecessor "Markup", fully supports Unicode.)
-</para>
-
-<para>
-The second line,
-
-<programlisting>
-<![CDATA[<!DOCTYPE document SYSTEM "simple.dtd">]]>
-</programlisting>
-
-names the DTD that is going to be used for the rest of the document. In
-general, it is possible that the DTD consists of two parts, the so-called
-external and the internal subset. "External" means that the DTD exists as a
-second file; "internal" means that the DTD is included in the same file. In
-this example, there is only an external subset, and the system identifier
-"simple.dtd" specifies where the DTD file can be found. System identifiers are
-interpreted as URLs; for instance this would be legal:
-
-<programlisting>
-<![CDATA[<!DOCTYPE document SYSTEM "http://host/location/simple.dtd">]]>
-</programlisting>
-
-Please note that &pxp; cannot interpret HTTP identifiers by default, but it is
-possible to change the interpretation of system identifiers.
-</para>
-
- <para>
-The word immediately following <literal>DOCTYPE</literal> determines which of
-the declared element types (here "document", "section", and "paragraph") is
-used for the outermost element, the <emphasis>root element</emphasis>. In this
-example it is <literal>document</literal> because the outermost element is
-delimited by <literal><document></literal> and
-<literal></document></literal>.
-</para>
-
- <para>
-The DTD consists of three declarations for element types:
-<literal>document</literal>, <literal>section</literal>, and
-<literal>paragraph</literal>. Such a declaration has two parts:
-
-<programlisting>
-<!ELEMENT <replaceable>name</replaceable> <replaceable>content-model</replaceable>>
-</programlisting>
-
-The content model is a regular expression which describes the possible inner
-structure of the element. Here, <literal>document</literal> contains one or
-more sections, and a <literal>section</literal> contains one or more
-paragraphs. Note that these two element types are not allowed to contain
-arbitrary text. Only the <literal>paragraph</literal> element type is declared
-such that parsed character data (indicated by the symbol
-<literal>#PCDATA</literal>) is permitted.
-</para>
-
- <para>
-See below for a detailed discussion of content models.
-</para>
- </sect2>
-
- <sect2>
- <title>XML parsers and processors</title>
- <para>
-XML documents are human-readable, but this is not the main purpose of this
-language. XML has been designed such that documents can be read by a program
-called an <emphasis>XML parser</emphasis>. The parser checks that the document
-is well-formatted, and it represents the document as objects of the programming
-language. There are two aspects when checking the document: First, the document
-must follow some basic syntactic rules, such as that tags are written in angle
-brackets, that for every start tag there must be a corresponding end tag and so
-on. A document respecting these rules is
-<emphasis>well-formed</emphasis>. Second, the document must match the DTD in
-which case the document is <emphasis>valid</emphasis>. Many parsers check only
-on well-formedness and ignore the DTD; &pxp; is designed such that it can
-even validate the document.
-</para>
-
- <para>
-A parser does not make a sensible application, it only reads XML
-documents. The whole application working with XML-formatted data is called an
-<emphasis>XML processor</emphasis>. Often XML processors convert documents into
-another format, such as HTML or Postscript. Sometimes processors extract data
-of the documents and output the processed data again XML-formatted. The parser
-can help the application processing the document; for example it can provide
-means to access the document in a specific manner. &pxp; supports an
-object-oriented access layer specially.
-</para>
- </sect2>
-
- <sect2>
- <title>Discussion</title>
- <para>
-As we have seen, there are two levels of description: On the one hand, XML can
-define rules about the format of a document (the DTD), on the other hand, XML
-expresses structured documents. There are a number of possible applications:
-</para>
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-XML can be used to express structured texts. Unlike HTML, there is no canonical
-interpretation; one would have to write a backend for the DTD that translates
-the structured texts into a format that existing browsers, printers
-etc. understand. The advantage of a self-defined document format is that it is
-possible to design the format in a more problem-oriented way. For example, if
-the task is to extract reports from a database, one can use a DTD that reflects
-the structure of the report or the database. A possible approach would be to
-have an element type for every database table and for every column. Once the
-DTD has been designed, the report procedure can be splitted up in a part that
-selects the database rows and outputs them as an XML document according to the
-DTD, and in a part that translates the document into other formats. Of course,
-the latter part can be solved in a generic way, e.g. there may be configurable
-backends for all DTDs that follow the approach and have element types for
-tables and columns.
-</para>
-
- <para>
-XML plays the role of a configurable intermediate format. The database
-extraction function can be written without having to know the details of
-typesetting; the backends can be written without having to know the details of
-the database.
-</para>
-
- <para>
-Of course, there are traditional solutions. One can define an ad hoc
-intermediate text file format. This disadvantage is that there are no names for
-the pieces of the format, and that such formats usually lack of documentation
-because of this. Another solution would be to have a binary representation,
-either as language-dependent or language-independent structure (example of the
-latter can be found in RPC implementations). The disadvantage is that it is
-harder to view such representations, one has to write pretty printers for this
-purpose. It is also more difficult to enter test data; XML is plain text that
-can be written using an arbitrary editor (Emacs has even a good XML mode,
-PSGML). All these alternatives suffer from a missing structure checker,
-i.e. the programs processing these formats usually do not check the input file
-or input object in detail; XML parsers check the syntax of the input (the
-so-called well-formedness check), and the advanced parsers like &markup; even
-verify that the structure matches the DTD (the so-called validation).
-</para>
-
- </listitem>
-
- <listitem>
- <para>
-XML can be used as configurable communication language. A fundamental problem
-of every communication is that sender and receiver must follow the same
-conventions about the language. For data exchange, the question is usually
-which data records and fields are available, how they are syntactically
-composed, and which values are possible for the various fields. Similar
-questions arise for text document exchange. XML does not answer these problems
-completely, but it reduces the number of ambiguities for such conventions: The
-outlines of the syntax are specified by the DTD (but not necessarily the
-details), and XML introduces canonical names for the components of documents
-such that it is simpler to describe the rest of the syntax and the semantics
-informally.
-</para>
- </listitem>
-
- <listitem>
- <para>
-XML is a data storage format. Currently, every software product tends to use
-its own way to store data; commercial software often does not describe such
-formats, and it is a pain to integrate such software into a bigger project.
-XML can help to improve this situation when several applications share the same
-syntax of data files. DTDs are then neutral instances that check the format of
-data files independent of applications.
-</para>
- </listitem>
-
- </itemizedlist>
- </sect2>
- </sect1>
-
-
- <!-- ================================================== -->
-
-
- <sect1>
- <title>Highlights of XML</title>
-
- <para>
-This section explains many of the features of XML, but not all, and some
-features not in detail. For a complete description, see the <ulink
-url="http://www.w3.org/TR/1998/REC-xml-19980210.html">XML
-specification</ulink>.
-</para>
-
- <sect2>
- <title>The DTD and the instance</title>
- <para>
-The DTD contains various declarations; in general you can only use a feature if
-you have previously declared it. The document instance file may contain the
-full DTD, but it is also possible to split the DTD into an internal and an
-external subset. A document must begin as follows if the full DTD is included:
-
-<programlisting>
-<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
-<!DOCTYPE <replaceable>root</replaceable> [
- <replaceable>Declarations</replaceable>
-]>
-</programlisting>
-
-These declarations are called the <emphasis>internal subset</emphasis>. Note
-that the usage of entities and conditional sections is restricted within the
-internal subset.
-</para>
- <para>
-If the declarations are located in a different file, you can refer to this file
-as follows:
-
-<programlisting>
-<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
-<!DOCTYPE <replaceable>root</replaceable> SYSTEM "<replaceable>file name</replaceable>">
-</programlisting>
-
-The declarations in the file are called the <emphasis>external
-subset</emphasis>. The file name is called the <emphasis>system
-identifier</emphasis>.
-It is also possible to refer to the file by a so-called
-<emphasis>public identifier</emphasis>, but most XML applications won't use
-this feature.
-</para>
- <para>
-You can also specify both internal and external subsets. In this case, the
-declarations of both subsets are mixed, and if there are conflicts, the
-declaration of the internal subset overrides those of the external subset with
-the same name. This looks as follows:
-
-<programlisting>
-<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
-<!DOCTYPE <replaceable>root</replaceable> SYSTEM "<replaceable>file name</replaceable>" [
- <replaceable>Declarations</replaceable>
-]>
-</programlisting>
-</para>
-
- <para>
-The XML declaration (the string beginning with <literal><?xml</literal> and
-ending at <literal>?></literal>) should specify the encoding of the
-file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
-that every file parsed by the XML processor can begin with an XML declaration
-and that every file may have its own encoding.
-</para>
-
- <para>
-The name of the root element must be mentioned directly after the
-<literal>DOCTYPE</literal> string. This means that a full document instance
-looks like
-
-<programlisting>
-<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
-<!DOCTYPE <replaceable>root</replaceable> SYSTEM "<replaceable>file name</replaceable>" [
- <replaceable>Declarations</replaceable>
-]>
-
-<<replaceable>root</replaceable>>
- <replaceable>inner contents</replaceable>
-</<replaceable>root</replaceable>>
-</programlisting>
-</para>
- </sect2>
-
- <!-- ======================================== -->
-
- <sect2>
- <title>Reserved characters</title>
- <para>
-Some characters are generally reserved to indicate markup such that they cannot
-be used for character data. These characters are <, >, and
-&. Furthermore, single and double quotes are sometimes reserved. If you
-want to include such a character as character, write it as follows:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>&lt;</literal> instead of <
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>&gt;</literal> instead of >
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>&amp;</literal> instead of &
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>&apos;</literal> instead of '
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>&quot;</literal> instead of "
-</para>
- </listitem>
- </itemizedlist>
-
-All other characters are free in the document instance. It is possible to
-include a character by its position in the Unicode alphabet:
-
-<programlisting>
-&#<replaceable>n</replaceable>;
-</programlisting>
-
-where <replaceable>n</replaceable> is the decimal number of the
-character. Alternatively, you can specify the character by its hexadecimal
-number:
-
-<programlisting>
-&#x<replaceable>n</replaceable>;
-</programlisting>
-
-In the scope of declarations, the character % is no longer free. To include it
-as character, you must use the notations <literal>&#37;</literal> or
-<literal>&#x25;</literal>.
-</para>
-
- <para>Note that besides &lt;, &gt;, &amp;,
-&apos;, and &quot; there are no predefines character entities. This is
-different from HTML which defines a list of characters that can be referenced
-by name (e.g. &auml; for ä); however, if you prefer named characters, you
-can declare such entities yourself (see below).</para>
- </sect2>
-
-
- <!-- ======================================== -->
-
- <sect2>
- <title>Elements and ELEMENT declarations</title>
-
- <para>
-Elements structure the document instance in a hierarchical way. There is a
-top-level element, the <emphasis>root element</emphasis>, which contains a
-sequence of inner elements and character sections. The inner elements are
-structured in the same way. Every element has an <emphasis>element
-type</emphasis>. The beginning of the element is indicated by a <emphasis>start
-tag</emphasis>, written
-
-<programlisting>
-<<replaceable>element-type</replaceable>>
-</programlisting>
-
-and the element continues until the corresponding <emphasis>end tag</emphasis>
-is reached:
-
-<programlisting>
-</<replaceable>element-type</replaceable>>
-</programlisting>
-
-In XML, it is not allowed to omit start or end tags, even if the DTD would
-permit this. Note that there are no special rules how to interpret spaces or
-newlines near start or end tags; all spaces and newlines count.
-</para>
-
- <para>
-Every element type must be declared before it can be used. The declaration
-consists of two parts: the ELEMENT declaration describes the content model,
-i.e. which inner elements are allowed; the ATTLIST declaration describes the
-attributes of the element.
-</para>
-
- <para>
-An element can simply allow everything as content. This is written:
-
-<programlisting>
-<!ELEMENT <replaceable>name</replaceable> ANY>
-</programlisting>
-
-On the opposite, an element can be forced to be empty; declared by:
-
-<programlisting>
-<!ELEMENT <replaceable>name</replaceable> EMPTY>
-</programlisting>
-
-Note that there is an abbreviated notation for empty element instances:
-<literal><<replaceable>name</replaceable>/></literal>.
-</para>
-
- <para>
-There are two more sophisticated forms of declarations: so-called
-<emphasis>mixed declarations</emphasis>, and <emphasis>regular
-expressions</emphasis>. An element with mixed content contains character data
-interspersed with inner elements, and the set of allowed inner elements can be
-specified. In contrast to this, a regular expression declaration does not allow
-character data, but the inner elements can be described by the more powerful
-means of regular expressions.
-</para>
-
- <para>
-A declaration for mixed content looks as follows:
-
-<programlisting>
-<!ELEMENT <replaceable>name</replaceable> (#PCDATA | <replaceable>element<subscript>1</subscript></replaceable> | ... | <replaceable>element<subscript>n</subscript></replaceable> )*>
-</programlisting>
-
-or if you do not want to allow any inner element, simply
-
-<programlisting>
-<!ELEMENT <replaceable>name</replaceable> (#PCDATA)>
-</programlisting>
-</para>
-
-
-<blockquote>
- <title>Example</title>
- <para>
-If element type <literal>q</literal> is declared as
-
-<programlisting>
-<![CDATA[<!ELEMENT q (#PCDATA | r | s)*>]]>
-</programlisting>
-
-this is a legal instance:
-
-<programlisting>
-<![CDATA[<q>This is character data<r></r>with <s></s>inner elements</q>]]>
-</programlisting>
-
-But this is illegal because <literal>t</literal> has not been enumerated in the
-declaration:
-
-<programlisting>
-<![CDATA[<q>This is character data<r></r>with <t></t>inner elements</q>]]>
-</programlisting>
-</para>
- </blockquote>
-
- <para>
-The other form uses a regular expression to describe the possible contents:
-
-<programlisting>
-<!ELEMENT <replaceable>name</replaceable> <replaceable>regexp</replaceable>>
-</programlisting>
-
-The following well-known regexp operators are allowed:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal><replaceable>element-name</replaceable></literal>
-</para>
- </listitem>
-
- <listitem>
- <para>
-<literal>(<replaceable>subexpr<subscript>1</subscript></replaceable> ,</literal> ... <literal>, <replaceable>subexpr<subscript>n</subscript></replaceable> )</literal>
-</para>
- </listitem>
-
- <listitem>
- <para>
-<literal>(<replaceable>subexpr<subscript>1</subscript></replaceable> |</literal> ... <literal>| <replaceable>subexpr<subscript>n</subscript></replaceable> )</literal>
-</para>
- </listitem>
-
- <listitem>
- <para>
-<literal><replaceable>subexpr</replaceable>*</literal>
-</para>
- </listitem>
-
- <listitem>
- <para>
-<literal><replaceable>subexpr</replaceable>+</literal>
-</para>
- </listitem>
-
- <listitem>
- <para>
-<literal><replaceable>subexpr</replaceable>?</literal>
-</para>
- </listitem>
- </itemizedlist>
-
-The <literal>,</literal> operator indicates a sequence of sub-models, the
-<literal>|</literal> operator describes alternative sub-models. The
-<literal>*</literal> indicates zero or more repetitions, and
-<literal>+</literal> one or more repetitions. Finally, <literal>?</literal> can
-be used for optional sub-models. As atoms the regexp can contain names of
-elements; note that it is not allowed to include <literal>#PCDATA</literal>.
-</para>
-
- <para>
-The exact syntax of the regular expressions is rather strange. This can be
-explained best by a list of constraints:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-The outermost expression must not be
-<literal><replaceable>element-name</replaceable></literal>.
-</para>
- <para><emphasis>Illegal:</emphasis>
-<literal><![CDATA[<!ELEMENT x y>]]></literal>; this must be written as
-<literal><![CDATA[<!ELEMENT x (y)>]]></literal>.</para>
- </listitem>
- <listitem>
- <para>
-For the unary operators <literal><replaceable>subexpr</replaceable>*</literal>,
-<literal><replaceable>subexpr</replaceable>+</literal>, and
-<literal><replaceable>subexpr</replaceable>?</literal>, the
-<literal><replaceable>subexpr</replaceable></literal> must not be again an
-unary operator.
-</para>
- <para><emphasis>Illegal:</emphasis>
-<literal><![CDATA[<!ELEMENT x y**>]]></literal>; this must be written as
-<literal><![CDATA[<!ELEMENT x (y*)*>]]></literal>.</para>
- </listitem>
- <listitem>
- <para>
-Between <literal>)</literal> and one of the unary operatory
-<literal>*</literal>, <literal>+</literal>, or <literal>?</literal>, there must
-not be whitespace.</para>
- <para><emphasis>Illegal:</emphasis>
-<literal><![CDATA[<!ELEMENT x (y|z) *>]]></literal>; this must be written as
-<literal><![CDATA[<!ELEMENT x (y|z)*>]]></literal>.</para>
- </listitem>
- <listitem><para>There is the additional constraint that the
-right parenthsis must be contained in the same entity as the left parenthesis;
-see the section about parsed entities below.</para>
- </listitem>
- </itemizedlist>
-
-</para>
-
-<para>
-Note that there is another restriction on regular expressions which must be
-deterministic. This means that the parser must be able to see by looking at the
-next token which alternative is actually used, or whether the repetition
-stops. The reason for this is simply compatability with SGML (there is no
-intrinsic reason for this rule; XML can live without this restriction).
-</para>
-
- <blockquote>
- <title>Example</title>
- <para>
-The elements are declared as follows:
-
-<programlisting>
-<![CDATA[<!ELEMENT q (r?, (s | t)+)>
-<!ELEMENT r (#PCDATA)>
-<!ELEMENT s EMPTY>
-<!ELEMENT t (q | r)>
-]]></programlisting>
-
-This is a legal instance:
-
-<programlisting>
-<![CDATA[<q><r>Some characters</r><s/></q>]]>
-</programlisting>
-
-(Note: <literal><s/></literal> is an abbreviation for
-<literal><s></s></literal>.)
-
-It would be illegal to leave <literal><![CDATA[<s/>]]></literal> out because at
-least one instance of <literal>s</literal> or <literal>t</literal> must be
-present. It would be illegal, too, if characters existed outside the
-<literal>r</literal> element; the only exception is white space. -- This is
-legal, too:
-
-<programlisting>
-<![CDATA[<q><s/><t><q><s/></q></t></q>]]>
-</programlisting>
-</para>
- </blockquote>
-
- </sect2>
-
- <!-- ======================================== -->
-
- <sect2>
- <title>Attribute lists and ATTLIST declarations</title>
- <para>
-Elements may have attributes. These are put into the start tag of an element as
-follows:
-
-<programlisting>
-<<replaceable>element-name</replaceable> <replaceable>attribute<subscript>1</subscript></replaceable>="<replaceable>value<subscript>1</subscript></replaceable>" ... <replaceable>attribute<subscript>n</subscript></replaceable>="<replaceable>value<subscript>n</subscript></replaceable>">
-</programlisting>
-
-Instead of
-<literal>"<replaceable>value<subscript>k</subscript></replaceable>"</literal>
-it is also possible to use single quotes as in
-<literal>'<replaceable>value<subscript>k</subscript></replaceable>'</literal>.
-Note that you cannot use double quotes literally within the value of the
-attribute if double quotes are the delimiters; the same applies to single
-quotes. You can generally not use < and & as characters in attribute
-values. It is possible to include the paraphrases &lt;, &gt;,
-&amp;, &apos;, and &quot; (and any other reference to a general
-entity as long as the entity is not defined by an external file) as well as
-&#<replaceable>n</replaceable>;.
-</para>
-
- <para>
-Before you can use an attribute you must declare it. An ATTLIST declaration
-looks as follows:
-
-<programlisting>
-<!ATTLIST <replaceable>element-name</replaceable>
- <replaceable>attribute-name</replaceable> <replaceable>attribute-type</replaceable> <replaceable>attribute-default</replaceable>
- ...
- <replaceable>attribute-name</replaceable> <replaceable>attribute-type</replaceable> <replaceable>attribute-default</replaceable>
->
-</programlisting>
-
-There are a lot of types, but most important are:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>CDATA</literal>: Every string is allowed as attribute value.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>NMTOKEN</literal>: Every nametoken is allowed as attribute
-value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
-order.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>NMTOKENS</literal>: A space-separated list of nametokens is allowed as
-attribute value.
-</para>
- </listitem>
- </itemizedlist>
-
-The most interesting default declarations are:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>#REQUIRED</literal>: The attribute must be specified.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>#IMPLIED</literal>: The attribute can be specified but also can be
-left out. The application can find out whether the attribute was present or
-not.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>"<replaceable>value</replaceable>"</literal> or
-<literal>'<replaceable>value</replaceable>'</literal>: This particular value is
-used as default if the attribute is omitted in the element.
-</para>
- </listitem>
- </itemizedlist>
-</para>
-
- <blockquote>
- <title>Example</title>
- <para>
-This is a valid attribute declaration for element type <literal>r</literal>:
-
-<programlisting>
-<![CDATA[<!ATTLIST r
- x CDATA #REQUIRED
- y NMTOKEN #IMPLIED
- z NMTOKENS "one two three">
-]]></programlisting>
-
-This means that <literal>x</literal> is a required attribute that cannot be
-left out, while <literal>y</literal> and <literal>z</literal> are optional. The
-XML parser indicates the application whether <literal>y</literal> is present or
-not, but if <literal>z</literal> is missing the default value
-"one two three" is returned automatically.
-</para>
-
- <para>
-This is a valid example of these attributes:
-
-<programlisting>
-<![CDATA[<r x="He said: "I don't like quotes!"" y='1'>]]>
-</programlisting>
-</para>
- </blockquote>
-
- </sect2>
-
- <sect2>
- <title>Parsed entities</title>
- <para>
-Elements describe the logical structure of the document, while
-<emphasis>entities</emphasis> determine the physical structure. Entities are
-the pieces of text the parser operates on, mostly files and macros. Entities
-may be <emphasis>parsed</emphasis> in which case the parser reads the text and
-interprets it as XML markup, or <emphasis>unparsed</emphasis> which simply
-means that the data of the entity has a foreign format (e.g. a GIF icon).
-</para>
-
- <para>If the parsed entity is going to be used as part of the DTD, it
-is called a <emphasis>parameter entity</emphasis>. You can declare a parameter
-entity with a fixed text as content by:
-
-<programlisting>
-<!ENTITY % <replaceable>name</replaceable> "<replaceable>value</replaceable>">
-</programlisting>
-
-Within the DTD, you can <emphasis>refer to</emphasis> this entity, i.e. read
-the text of the entity, by:
-
-<programlisting>
-%<replaceable>name</replaceable>;
-</programlisting>
-
-Such entities behave like macros, i.e. when they are referred to, the
-macro text is inserted and read instead of the original text.
-
-<blockquote>
- <title>Example</title>
- <para>
-For example, you can declare two elements with the same content model by:
-
-<programlisting>
-<![CDATA[
-<!ENTITY % model "a | b | c">
-<!ELEMENT x (%model;)>
-<!ELEMENT y (%model;)>
-]]>
-</programlisting>
-
-</para>
- </blockquote>
-
-If the contents of the entity are given as string constant, the entity is
-called an <emphasis>internal</emphasis> entity. It is also possible to name a
-file to be used as content (an <emphasis>external</emphasis> entity):
-
-<programlisting>
-<!ENTITY % <replaceable>name</replaceable> SYSTEM "<replaceable>file name</replaceable>">
-</programlisting>
-
-There are some restrictions for parameter entities:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-If the internal parameter entity contains the first token of a declaration
-(i.e. <literal><!</literal>), it must also contain the last token of the
-declaration, i.e. the <literal>></literal>. This means that the entity
-either contains a whole number of complete declarations, or some text from the
-middle of one declaration.
-</para>
-<para><emphasis>Illegal:</emphasis>
-<programlisting>
-<![CDATA[
-<!ENTITY % e "(a | b | c)>">
-<!ELEMENT x %e;
-]]></programlisting> Because <literal><!</literal> is contained in the main
-entity, and the corresponding <literal>></literal> is contained in the
-entity <literal>e</literal>.</para>
- </listitem>
- <listitem>
- <para>
-If the internal parameter entity contains a left paranthesis, it must also
-contain the corresponding right paranthesis.
-</para>
-<para><emphasis>Illegal:</emphasis>
-<programlisting>
-<![CDATA[
-<!ENTITY % e "(a | b | c">
-<!ELEMENT x %e;)>
-]]></programlisting> Because <literal>(</literal> is contained in the entity
-<literal>e</literal>, and the corresponding <literal>)</literal> is
-contained in the main entity.</para>
- </listitem>
- <listitem>
- <para>
-When reading text from an entity, the parser automatically inserts one space
-character before the entity text and one space character after the entity
-text. However, this rule is not applied within the definition of another
-entity.</para>
-<para><emphasis>Legal:</emphasis>
-<programlisting>
-<![CDATA[
-<!ENTITY % suffix "gif">
-<!ENTITY iconfile 'icon.%suffix;'>
-]]></programlisting> Because <literal>%suffix;</literal> is referenced within
-the definition text for <literal>iconfile</literal>, no additional spaces are
-added.
-</para>
-<para><emphasis>Illegal:</emphasis>
-<programlisting>
-<![CDATA[
-<!ENTITY % suffix "test">
-<!ELEMENT x.%suffix; ANY>
-]]></programlisting>
-Because <literal>%suffix;</literal> is referenced outside the definition
-text of another entity, the parser replaces <literal>%suffix;</literal> by
-<literal><replaceable>space</replaceable>test<replaceable>space</replaceable></literal>. </para>
-<para><emphasis>Illegal:</emphasis>
-<programlisting>
-<![CDATA[
-<!ENTITY % e "(a | b | c)">
-<!ELEMENT x %e;*>
-]]></programlisting> Because there is a whitespace between <literal>)</literal>
-and <literal>*</literal>, which is illegal.</para>
- </listitem>
- <listitem>
- <para>
-An external parameter entity must always consist of a whole number of complete
-declarations.
-</para>
- </listitem>
- <listitem>
- <para>
-In the internal subset of the DTD, a reference to a parameter entity (internal
-or external) is only allowed at positions where a new declaration can start.
-</para>
- </listitem>
- </itemizedlist>
-</para>
-
- <para>
-If the parsed entity is going to be used in the document instance, it is called
-a <emphasis>general entity</emphasis>. Such entities can be used as
-abbreviations for frequent phrases, or to include external files. Internal
-general entities are declared as follows:
-
-<programlisting>
-<!ENTITY <replaceable>name</replaceable> "<replaceable>value</replaceable>">
-</programlisting>
-
-External general entities are declared this way:
-
-<programlisting>
-<!ENTITY <replaceable>name</replaceable> SYSTEM "<replaceable>file name</replaceable>">
-</programlisting>
-
-References to general entities are written as:
-
-<programlisting>
-&<replaceable>name</replaceable>;
-</programlisting>
-
-The main difference between parameter and general entities is that the former
-are only recognized in the DTD and that the latter are only recognized in the
-document instance. As the DTD is parsed before the document, the parameter
-entities are expanded first; for example it is possible to use the content of a
-parameter entity as the name of a general entity:
-<literal>&#38;%name;;</literal><footnote><para>This construct is only
-allowed within the definition of another entity; otherwise extra spaces would
-be added (as explained above). Such indirection is not recommended.
-</para>
-<para>Complete example:
-<programlisting>
-<![CDATA[
-<!ENTITY % variant "a"> <!-- or "b" -->
-<!ENTITY text-a "This is text A.">
-<!ENTITY text-b "This is text B.">
-<!ENTITY text "&text-%variant;;">
-]]></programlisting>
-You can now write <literal>&text;</literal> in the document instance, and
-depending on the value of <literal>variant</literal> either
-<literal>text-a</literal> or <literal>text-b</literal> is inserted.</para>
-</footnote>.
-</para>
- <para>
-General entities must respect the element hierarchy. This means that there must
-be an end tag for every start tag in the entity value, and that end tags
-without corresponding start tags are not allowed.
-</para>
-
- <blockquote>
- <title>Example</title>
- <para>
-If the author of a document changes sometimes, it is worthwhile to set up a
-general entity containing the names of the authors. If the author changes, you
-need only to change the definition of the entity, and do not need to check all
-occurrences of authors' names:
-
-<programlisting>
-<![CDATA[
-<!ENTITY authors "Gerd Stolpmann">
-]]>
-</programlisting>
-
-In the document text, you can now refer to the author names by writing
-<literal>&authors;</literal>.
-</para>
-
- <para>
-<emphasis>Illegal:</emphasis>
-The following two entities are illegal because the elements in the definition
-do not nest properly:
-
-<programlisting>
-<![CDATA[
-<!ENTITY lengthy-tag "<section textcolor='white' background='graphic'>">
-<!ENTITY nonsense "<a></b>">
-]]></programlisting>
-</para>
- </blockquote>
-
- <para>
-Earlier in this introduction we explained that there are substitutes for
-reserved characters: &lt;, &gt;, &amp;, &apos;, and
-&quot;. These are simply predefined general entities; note that they are
-the only predefined entities. It is allowed to define these entities again
-as long as the meaning is unchanged.
-</para>
- </sect2>
-
- <sect2>
- <title>Notations and unparsed entities</title>
- <para>
-Unparsed entities have a foreign format and can thus not be read by the XML
-parser. Unparsed entities are always external. The format of an unparsed entity
-must have been declared, such a format is called a
-<emphasis>notation</emphasis>. The entity can then be declared by referring to
-this notation. As unparsed entities do not contain XML text, it is not possible
-to include them directly into the document; you can only declare attributes
-such that names of unparsed entities are acceptable values.
-</para>
-
- <para>
-As you can see, unparsed entities are too complicated in order to have any
-purpose. It is almost always better to simply pass the name of the data file as
-normal attribute value, and let the application recognize and process the
-foreign format.
-</para>
- </sect2>
-
- </sect1>
-
-
- <!-- ================================================== -->
-
-
- <sect1 id="sect.readme.dtd">
- <title>A complete example: The <emphasis>readme</emphasis> DTD</title>
- <para>
-The reason for <emphasis>readme</emphasis> was that I often wrote two versions
-of files such as README and INSTALL which explain aspects of a distributed
-software archive; one version was ASCII-formatted, the other was written in
-HTML. Maintaining both versions means double amount of work, and changes
-of one version may be forgotten in the other version. To improve this situation
-I invented the <emphasis>readme</emphasis> DTD which allows me to maintain only
-one source written as XML document, and to generate the ASCII and the HTML
-version from it.
-</para>
-
- <para>
-In this section, I explain only the DTD. The <emphasis>readme</emphasis> DTD is
-contained in the &markup; distribution together with the two converters to
-produce ASCII and HTML. Another <link
-linkend="sect.readme.to-html">section</link> of this manual describes the HTML
-converter.
-</para>
-
- <para>
-The documents have a simple structure: There are up to three levels of nested
-sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
-outermost element has usually the type <literal>readme</literal>, it is
-declared by
-
-<programlisting>
-<![CDATA[<!ELEMENT readme (sect1+)>
-<!ATTLIST readme
- title CDATA #REQUIRED>
-]]></programlisting>
-
-This means that this element contains one or more sections of the first level
-(element type <literal>sect1</literal>), and that the element has a required
-attribute <literal>title</literal> containing character data (CDATA). Note that
-<literal>readme</literal> elements must not contain text data.
-</para>
-
- <para>
-The three levels of sections are declared as follows:
-
-<programlisting>
-<![CDATA[<!ELEMENT sect1 (title,(sect2|p|ul)+)>
-
-<!ELEMENT sect2 (title,(sect3|p|ul)+)>
-
-<!ELEMENT sect3 (title,(p|ul)+)>
-]]></programlisting>
-
-Every section has a <literal>title</literal> element as first subelement. After
-the title an arbitrary but non-empty sequence of inner sections, paragraphs and
-item lists follows. Note that the inner sections must belong to the next higher
-section level; <literal>sect3</literal> elements must not contain inner
-sections because there is no next higher level.
-</para>
-
- <para>
-Obviously, all three declarations allow paragraphs (<literal>p</literal>) and
-item lists (<literal>ul</literal>). The definition can be simplified at this
-point by using a parameter entity:
-
-<programlisting>
-<![CDATA[<!ENTITY % p.like "p|ul">
-
-<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
-
-<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
-
-<!ELEMENT sect3 (title,(%p.like;)+)>
-]]></programlisting>
-
-Here, the entity <literal>p.like</literal> is nothing but a macro abbreviating
-the same sequence of declarations; if new elements on the same level as
-<literal>p</literal> and <literal>ul</literal> are later added, it is
-sufficient only to change the entity definition. Note that there are some
-restrictions on the usage of entities in this context; most important, entities
-containing a left paranthesis must also contain the corresponding right
-paranthesis.
-</para>
-
- <para>
-Note that the entity <literal>p.like</literal> is a
-<emphasis>parameter</emphasis> entity, i.e. the ENTITY declaration contains a
-percent sign, and the entity is referred to by
-<literal>%p.like;</literal>. This kind of entity must be used to abbreviate
-parts of the DTD; the <emphasis>general</emphasis> entities declared without
-percent sign and referred to as <literal>&name;</literal> are not allowed
-in this context.
-</para>
-
- <para>
-The <literal>title</literal> element specifies the title of the section in
-which it occurs. The title is given as character data, optionally interspersed
-with line breaks (<literal>br</literal>):
-
-<programlisting>
-<![CDATA[<!ELEMENT title (#PCDATA|br)*>
-]]></programlisting>
-
-Compared with the <literal>title</literal> <emphasis>attribute</emphasis> of
-the <literal>readme</literal> element, this element allows inner markup
-(i.e. <literal>br</literal>) while attribute values do not: It is an error if
-an attribute value contains the left angle bracket < literally such that it
-is impossible to include inner elements.
-</para>
-
- <para>
-The paragraph element <literal>p</literal> has a structure similar to
-<literal>title</literal>, but it allows more inner elements:
-
-<programlisting>
-<![CDATA[<!ENTITY % text "br|code|em|footnote|a">
-
-<!ELEMENT p (#PCDATA|%text;)*>
-]]></programlisting>
-
-Line breaks do not have inner structure, so they are declared as being empty:
-
-<programlisting>
-<![CDATA[<!ELEMENT br EMPTY>
-]]></programlisting>
-
-This means that really nothing is allowed within <literal>br</literal>; you
-must always write <literal><![CDATA[<br></br>]]></literal> or abbreviated
-<literal><![CDATA[<br/>]]></literal>.
-</para>
-
- <para>
-Code samples should be marked up by the <literal>code</literal> tag; emphasized
-text can be indicated by <literal>em</literal>:
-
-<programlisting>
-<![CDATA[<!ELEMENT code (#PCDATA)>
-
-<!ELEMENT em (#PCDATA|%text;)*>
-]]></programlisting>
-
-That <literal>code</literal> elements are not allowed to contain further markup
-while <literal>em</literal> elements do is a design decision by the author of
-the DTD.
-</para>
-
- <para>
-Unordered lists simply consists of one or more list items, and a list item may
-contain paragraph-level material:
-
-<programlisting>
-<![CDATA[<!ELEMENT ul (li+)>
-
-<!ELEMENT li (%p.like;)*>
-]]></programlisting>
-
-Footnotes are described by the text of the note; this text may contain
-text-level markup. There is no mechanism to describe the numbering scheme of
-footnotes, or to specify how footnote references are printed.
-
-<programlisting>
-<![CDATA[<!ELEMENT footnote (#PCDATA|%text;)*>
-]]></programlisting>
-
-Hyperlinks are written as in HTML. The anchor tag contains the text describing
-where the link points to, and the <literal>href</literal> attribute is the
-pointer (as URL). There is no way to describe locations of "hash marks". If the
-link refers to another <emphasis>readme</emphasis> document, the attribute
-<literal>readmeref</literal> should be used instead of <literal>href</literal>.
-The reason is that the converted document has usually a different system
-identifier (file name), and the link to a converted document must be
-converted, too.
-
-<programlisting>
-<![CDATA[<!ELEMENT a (#PCDATA)*>
-<!ATTLIST a
- href CDATA #IMPLIED
- readmeref CDATA #IMPLIED
->
-]]></programlisting>
-
-Note that although it is only sensible to specify one of the two attributes,
-the DTD has no means to express this restriction.
-</para>
-
-<para>
-So far the DTD. Finally, here is a document for it:
-
-<programlisting>
-<![CDATA[
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE readme SYSTEM "readme.dtd">
-<readme title="How to use the readme converters">
-<sect1>
- <title>Usage</title>
- <p>
- The <em>readme</em> converter is invoked on the command line by:
- </p>
- <p>
- <code>readme [ -text | -html ] input.xml</code>
- </p>
- <p>
- Here a list of options:
- </p>
- <ul>
- <li>
- <p><code>-text</code>: specifies that ASCII output should be produced</p>
- </li>
- <li>
- <p><code>-html</code>: specifies that HTML output should be produced</p>
- </li>
- </ul>
- <p>
- The input file must be given on the command line. The converted output is
- printed to <em>stdout</em>.
- </p>
-</sect1>
-<sect1>
- <title>Author</title>
- <p>
- The program has been written by
- <a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>.
- </p>
-</sect1>
-</readme>
-]]></programlisting>
-
-</para>
-
-
- </sect1>
- </chapter>
-
-<!-- ********************************************************************** -->
-
- <chapter>
- <title>Using &markup;</title>
-
- <sect1>
- <title>Validation</title>
- <para>
-The parser can be used to <emphasis>validate</emphasis> a document. This means
-that all the constraints that must hold for a valid document are actually
-checked. Validation is the default mode of &markup;, i.e. every document is
-validated while it is being parsed.
-</para>
-
- <para>
-In the <literal>examples</literal> directory of the distribution you find the
-<literal>pxpvalidate</literal> application. It is invoked in the following way:
-
-<programlisting>
-pxpvalidate [ -wf ] <replaceable>file</replaceable>...
-</programlisting>
-
-The files mentioned on the command line are validated, and every warning and
-every error messages are printed to stderr.
-</para>
-
- <para>
-The -wf switch modifies the behaviour such that a well-formedness parser is
-simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
-DTD are ignored, and only the ENTITY declarations will take effect. This mode
-is intended for documents lacking a DTD. Please note that the parser still
-scans the DTD fully and will report all errors in the DTD; such checks are not
-required by a well-formedness parser.
-</para>
-
- <para>
-The <literal>pxpvalidate</literal> application is the simplest sensible program
-using &markup;, you may consider it as "hello world" program.
-</para>
- </sect1>
-
-
- <!-- ================================================== -->
-
-
- <sect1>
- <title>How to parse a document from an application</title>
- <para>
-Let me first give a rough overview of the object model of the parser. The
-following items are represented by objects:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<emphasis>Documents:</emphasis> The document representation is more or less the
-anchor for the application; all accesses to the parsed entities start here. It
-is described by the class <literal>document</literal> contained in the module
-<literal>Pxp_document</literal>. You can get some global information, such
-as the XML declaration the document begins with, the DTD of the document,
-global processing instructions, and most important, the document tree.
-</para>
- </listitem>
-
- <listitem>
- <para>
-<emphasis>The contents of documents:</emphasis> The contents have the structure
-of a tree: Elements contain other elements and text<footnote><para>Elements may
-also contain processing instructions. Unlike other document models, &markup;
-separates processing instructions from the rest of the text and provides a
-second interface to access them (method <literal>pinstr</literal>). However,
-there is a parser option (<literal>enable_pinstr_nodes</literal>) which changes
-the behaviour of the parser such that extra nodes for processing instructions
-are included into the tree.</para>
-<para>Furthermore, the tree does normally not contain nodes for XML comments;
-they are ignored by default. Again, there is an option
-(<literal>enable_comment_nodes</literal>) changing this.</para>
-</footnote>.
-
-The common type to represent both kinds of content is <literal>node</literal>
-which is a class type that unifies the properties of elements and character
-data. Every node has a list of children (which is empty if the element is empty
-or the node represents text); nodes may have attributes; nodes have always text
-contents. There are two implementations of <literal>node</literal>, the class
-<literal>element_impl</literal> for elements, and the class
-<literal>data_impl</literal> for text data. You find these classes and class
-types in the module <literal>Pxp_document</literal>, too.
-</para>
-
- <para>
-Note that attribute lists are represented by non-class values.
-</para>
- </listitem>
-
- <listitem>
- <para>
-<emphasis>The node extension:</emphasis> For advanced usage, every node of the
-document may have an associated <emphasis>extension</emphasis> which is simply
-a second object. This object must have the three methods
-<literal>clone</literal>, <literal>node</literal>, and
-<literal>set_node</literal> as bare minimum, but you are free to add methods as
-you want. This is the preferred way to add functionality to the document
-tree<footnote><para>Due to the typing system it is more or less impossible to
-derive recursive classes in O'Caml. To get around this, it is common practice
-to put the modifiable or extensible part of recursive objects into parallel
-objects.</para> </footnote>. The class type <literal>extension</literal> is
-defined in <literal>Pxp_document</literal>, too.
-</para>
- </listitem>
-
- <listitem>
- <para>
-<emphasis>The DTD:</emphasis> Sometimes it is necessary to access the DTD of a
-document; the average application does not need this feature. The class
-<literal>dtd</literal> describes DTDs, and makes it possible to get
-representations of element, entity, and notation declarations as well as
-processing instructions contained in the DTD. This class, and
-<literal>dtd_element</literal>, <literal>dtd_notation</literal>, and
-<literal>proc_instruction</literal> can be found in the module
-<literal>Pxp_dtd</literal>. There are a couple of classes representing
-different kinds of entities; these can be found in the module
-<literal>Pxp_entity</literal>.
-</para>
- </listitem>
- </itemizedlist>
-
-Additionally, the following modules play a role:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<emphasis>Pxp_yacc:</emphasis> Here the main parsing functions such as
-<literal>parse_document_entity</literal> are located. Some additional types and
-functions allow the parser to be configured in a non-standard way.
-</para>
- </listitem>
-
- <listitem>
- <para>
-<emphasis>Pxp_types:</emphasis> This is a collection of basic types and
-exceptions.
-</para>
- </listitem>
- </itemizedlist>
-
-There are some further modules that are needed internally but are not part of
-the API.
-</para>
-
- <para>
-Let the document to be parsed be stored in a file called
-<literal>doc.xml</literal>. The parsing process is started by calling the
-function
-
-<programlisting>
-val parse_document_entity : config -> source -> 'ext spec -> 'ext document
-</programlisting>
-
-defined in the module <literal>Pxp_yacc</literal>. The first argument
-specifies some global properties of the parser; it is recommended to start with
-the <literal>default_config</literal>. The second argument determines where the
-document to be parsed comes from; this may be a file, a channel, or an entity
-ID. To parse <literal>doc.xml</literal>, it is sufficient to pass
-<literal>from_file "doc.xml"</literal>.
-</para>
-
- <para>
-The third argument passes the object specification to use. Roughly
-speaking, it determines which classes implement the node objects of which
-element types, and which extensions are to be used. The <literal>'ext</literal>
-polymorphic variable is the type of the extension. For the moment, let us
-simply pass <literal>default_spec</literal> as this argument, and ignore it.
-</para>
-
- <para>
-So the following expression parses <literal>doc.xml</literal>:
-
-<programlisting>
-open Pxp_yacc
-let d = parse_document_entity default_config (from_file "doc.xml") default_spec
-</programlisting>
-
-Note that <literal>default_config</literal> implies that warnings are collected
-but not printed. Errors raise one of the exception defined in
-<literal>Pxp_types</literal>; to get readable errors and warnings catch the
-exceptions as follows:
-
-<programlisting>
-<![CDATA[class warner =
- object
- method warn w =
- print_endline ("WARNING: " ^ w)
- end
-;;
-
-try
- let config = { default_config with warner = new warner } in
- let d = parse_document_entity config (from_file "doc.xml") default_spec
- in
- ...
-with
- e ->
- print_endline (Pxp_types.string_of_exn e)
-]]></programlisting>
-
-Now <literal>d</literal> is an object of the <literal>document</literal>
-class. If you want the node tree, you can get the root element by
-
-<programlisting>
-let root = d # root
-</programlisting>
-
-and if you would rather like to access the DTD, determine it by
-
-<programlisting>
-let dtd = d # dtd
-</programlisting>
-
-As it is more interesting, let us investigate the node tree now. Given the root
-element, it is possible to recursively traverse the whole tree. The children of
-a node <literal>n</literal> are returned by the method
-<literal>sub_nodes</literal>, and the type of a node is returned by
-<literal>node_type</literal>. This function traverses the tree, and prints the
-type of each node:
-
-<programlisting>
-<![CDATA[let rec print_structure n =
- let ntype = n # node_type in
- match ntype with
- T_element name ->
- print_endline ("Element of type " ^ name);
- let children = n # sub_nodes in
- List.iter print_structure children
- | T_data ->
- print_endline "Data"
- | _ ->
- (* Other node types are not possible unless the parser is configured
- differently.
- *)
- assert false
-]]></programlisting>
-
-You can call this function by
-
-<programlisting>
-print_structure root
-</programlisting>
-
-The type returned by <literal>node_type</literal> is either <literal>T_element
-name</literal> or <literal>T_data</literal>. The <literal>name</literal> of the
-element type is the string included in the angle brackets. Note that only
-elements have children; data nodes are always leaves of the tree.
-</para>
-
- <para>
-There are some more methods in order to access a parsed node tree:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>n # parent</literal>: Returns the parent node, or raises
-<literal>Not_found</literal> if the node is already the root
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>n # root</literal>: Returns the root of the node tree.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>n # attribute a</literal>: Returns the value of the attribute with
-name <literal>a</literal>. The method returns a value for every
-<emphasis>declared</emphasis> attribute, independently of whether the attribute
-instance is defined or not. If the attribute is not declared,
-<literal>Not_found</literal> will be raised. (In well-formedness mode, every
-attribute is considered as being implicitly declared with type
-<literal>CDATA</literal>.)
-</para>
-
-<para>
-The following return values are possible: <literal>Value s</literal>,
-<literal>Valuelist sl</literal> , and <literal>Implied_value</literal>.
-The first two value types indicate that the attribute value is available,
-either because there is a definition
-<literal><replaceable>a</replaceable>="<replaceable>value</replaceable>"</literal>
-in the XML text, or because there is a default value (declared in the
-DTD). Only if both the instance definition and the default declaration are
-missing, the latter value <literal>Implied_value</literal> will be returned.
-</para>
-
-<para>
-In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
-IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
-<literal>Value s</literal> back, where <literal>s</literal> is the normalized
-string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
-represent list values, and the parser splits the XML literal into several
-tokens and returns these tokens as <literal>Valuelist sl</literal>.
-</para>
-
-<para>
-Normalization means that entity references (the
-<literal>&<replaceable>name</replaceable>;</literal> tokens) and
-character references
-(<literal>&#<replaceable>number</replaceable>;</literal>) are replaced
-by the text they represent, and that white space characters are converted into
-plain spaces.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>n # data</literal>: Returns the character data contained in the
-node. For data nodes, the meaning is obvious as this is the main content of
-data nodes. For element nodes, this method returns the concatenated contents of
-all inner data nodes.
-</para>
- <para>
-Note that entity references included in the text are resolved while they are
-being parsed; for example the text <![CDATA["a <> b"]]> will be returned
-as <![CDATA["a <> b"]]> by this method. Spaces of data nodes are always
-preserved. Newlines are preserved, but always converted to \n characters even
-if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
-data nodes because the parser collapses all data material at one location into
-one node. (However, if you create your own tree or transform the parsed tree,
-it is possible to have adjacent data nodes.)
-</para>
- <para>
-Note that elements that do <emphasis>not</emphasis> allow #PCDATA as content
-will not have data nodes as children. This means that spaces and newlines, the
-only character material allowed for such elements, are silently dropped.
-</para>
- </listitem>
- </itemizedlist>
-
-For example, if the task is to print all contents of elements with type
-"valuable" whose attribute "priority" is "1", this function can help:
-
-<programlisting>
-<![CDATA[let rec print_valuable_prio1 n =
- let ntype = n # node_type in
- match ntype with
- T_element "valuable" when n # attribute "priority" = Value "1" ->
- print_endline "Valuable node with priotity 1 found:";
- print_endline (n # data)
- | (T_element _ | T_data) ->
- let children = n # sub_nodes in
- List.iter print_valuable_prio1 children
- | _ ->
- assert false
-]]></programlisting>
-
-You can call this function by:
-
-<programlisting>
-print_valuable_prio1 root
-</programlisting>
-
-If you like a DSSSL-like style, you can make the function
-<literal>process_children</literal> explicit:
-
-<programlisting>
-<![CDATA[let rec print_valuable_prio1 n =
-
- let process_children n =
- let children = n # sub_nodes in
- List.iter print_valuable_prio1 children
- in
-
- let ntype = n # node_type in
- match ntype with
- T_element "valuable" when n # attribute "priority" = Value "1" ->
- print_endline "Valuable node with priority 1 found:";
- print_endline (n # data)
- | (T_element _ | T_data) ->
- process_children n
- | _ ->
- assert false
-]]></programlisting>
-
-So far, O'Caml is now a simple "style-sheet language": You can form a big
-"match" expression to distinguish between all significant cases, and provide
-different reactions on different conditions. But this technique has
-limitations; the "match" expression tends to get larger and larger, and it is
-difficult to store intermediate values as there is only one big
-recursion. Alternatively, it is also possible to represent the various cases as
-classes, and to use dynamic method lookup to find the appropiate class. The
-next section explains this technique in detail.
-
-</para>
- </sect1>
-
-
- <!-- ================================================== -->
-
-
- <sect1>
- <title>Class-based processing of the node tree</title>
- <para>
-By default, the parsed node tree consists of objects of the same class; this is
-a good design as long as you want only to access selected parts of the
-document. For complex transformations, it may be better to use different
-classes for objects describing different element types.
-</para>
-
- <para>
-For example, if the DTD declares the element types <literal>a</literal>,
-<literal>b</literal>, and <literal>c</literal>, and if the task is to convert
-an arbitrary document into a printable format, the idea is to define for every
-element type a separate class that has a method <literal>print</literal>. The
-classes are <literal>eltype_a</literal>, <literal>eltype_b</literal>, and
-<literal>eltype_c</literal>, and every class implements
-<literal>print</literal> such that elements of the type corresponding to the
-class are converted to the output format.
-</para>
-
- <para>
-The parser supports such a design directly. As it is impossible to derive
-recursive classes in O'Caml<footnote><para>The problem is that the subclass is
-usually not a subtype in this case because O'Caml has a contravariant subtyping
-rule. </para> </footnote>, the specialized element classes cannot be formed by
-simply inheriting from the built-in classes of the parser and adding methods
-for customized functionality. To get around this limitation, every node of the
-document tree is represented by <emphasis>two</emphasis> objects, one called
-"the node" and containing the recursive definition of the tree, one called "the
-extension". Every node object has a reference to the extension, and the
-extension has a reference to the node. The advantage of this model is that it
-is now possible to customize the extension without affecting the typing
-constraints of the recursive node definition.
-</para>
-
- <para>
-Every extension must have the three methods <literal>clone</literal>,
-<literal>node</literal>, and <literal>set_node</literal>. The method
-<literal>clone</literal> creates a deep copy of the extension object and
-returns it; <literal>node</literal> returns the node object for this extension
-object; and <literal>set_node</literal> is used to tell the extension object
-which node is associated with it, this method is automatically called when the
-node tree is initialized. The following definition is a good starting point
-for these methods; usually <literal>clone</literal> must be further refined
-when instance variables are added to the class:
-
-<programlisting>
-<![CDATA[class custom_extension =
- object (self)
-
- val mutable node = (None : custom_extension node option)
-
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
-
- end
-]]>
-</programlisting>
-
-This part of the extension is usually the same for all classes, so it is a good
-idea to consider <literal>custom_extension</literal> as the super-class of the
-further class definitions. Continuining the example of above, we can define the
-element type classes as follows:
-
-<programlisting>
-<![CDATA[class virtual custom_extension =
- object (self)
- ... clone, node, set_node defined as above ...
-
- method virtual print : out_channel -> unit
- end
-
-class eltype_a =
- object (self)
- inherit custom_extension
- method print ch = ...
- end
-
-class eltype_b =
- object (self)
- inherit custom_extension
- method print ch = ...
- end
-
-class eltype_c =
- object (self)
- inherit custom_extension
- method print ch = ...
- end
-]]></programlisting>
-
-The method <literal>print</literal> can now be implemented for every element
-type separately. Note that you get the associated node by invoking
-
-<programlisting>
-self # node
-</programlisting>
-
-and you get the extension object of a node <literal>n</literal> by writing
-
-<programlisting>
-n # extension
-</programlisting>
-
-It is guaranteed that
-
-<programlisting>
-self # node # extension == self
-</programlisting>
-
-always holds.
-</para>
-
- <para>Here are sample definitions of the <literal>print</literal>
-methods:
-
-<programlisting><![CDATA[
-class eltype_a =
- object (self)
- inherit custom_extension
- method print ch =
- (* Nodes <a>...</a> are only containers: *)
- output_string ch "(";
- List.iter
- (fun n -> n # extension # print ch)
- (self # node # sub_nodes);
- output_string ch ")";
- end
-
-class eltype_b =
- object (self)
- inherit custom_extension
- method print ch =
- (* Print the value of the CDATA attribute "print": *)
- match self # node # attribute "print" with
- Value s -> output_string ch s
- | Implied_value -> output_string ch "<missing>"
- | Valuelist l -> assert false
- (* not possible because the att is CDATA *)
- end
-
-class eltype_c =
- object (self)
- inherit custom_extension
- method print ch =
- (* Print the contents of this element: *)
- output_string ch (self # node # data)
- end
-
-class null_extension =
- object (self)
- inherit custom_extension
- method print ch = assert false
- end
-]]></programlisting>
-</para>
-
-
- <para>
-The remaining task is to configure the parser such that these extension classes
-are actually used. Here another problem arises: It is not possible to
-dynamically select the class of an object to be created. As workaround,
-&markup; allows the user to specify <emphasis>exemplar objects</emphasis> for
-the various element types; instead of creating the nodes of the tree by
-applying the <literal>new</literal> operator the nodes are produced by
-duplicating the exemplars. As object duplication preserves the class of the
-object, one can create fresh objects of every class for which previously an
-exemplar has been registered.
-</para>
-
- <para>
-Exemplars are meant as objects without contents, the only interesting thing is
-that exemplars are instances of a certain class. The creation of an exemplar
-for an element node can be done by:
-
-<programlisting>
-let element_exemplar = new element_impl extension_exemplar
-</programlisting>
-
-And a data node exemplar is created by:
-
-<programlisting>
-let data_exemplar = new data_impl extension_exemplar
-</programlisting>
-
-The classes <literal>element_impl</literal> and <literal>data_impl</literal>
-are defined in the module <literal>Pxp_document</literal>. The constructors
-initialize the fresh objects as empty objects, i.e. without children, without
-data contents, and so on. The <literal>extension_exemplar</literal> is the
-initial extension object the exemplars are associated with.
-</para>
-
- <para>
-Once the exemplars are created and stored somewhere (e.g. in a hash table), you
-can take an exemplar and create a concrete instance (with contents) by
-duplicating it. As user of the parser you are normally not concerned with this
-as this is part of the internal logic of the parser, but as background knowledge
-it is worthwhile to mention that the two methods
-<literal>create_element</literal> and <literal>create_data</literal> actually
-perform the duplication of the exemplar for which they are invoked,
-additionally apply modifications to the clone, and finally return the new
-object. Moreover, the extension object is copied, too, and the new node object
-is associated with the fresh extension object. Note that this is the reason why
-every extension object must have a <literal>clone</literal> method.
-</para>
-
- <para>
-The configuration of the set of exemplars is passed to the
-<literal>parse_document_entity</literal> function as third argument. In our
-example, this argument can be set up as follows:
-
-<programlisting>
-<![CDATA[let spec =
- make_spec_from_alist
- ~data_exemplar: (new data_impl (new null_extension))
- ~default_element_exemplar: (new element_impl (new null_extension))
- ~element_alist:
- [ "a", new element_impl (new eltype_a);
- "b", new element_impl (new eltype_b);
- "c", new element_impl (new eltype_c);
- ]
- ()
-]]></programlisting>
-
-The <literal>~element_alist</literal> function argument defines the mapping
-from element types to exemplars as associative list. The argument
-<literal>~data_exemplar</literal> specifies the exemplar for data nodes, and
-the <literal>~default_element_exemplar</literal> is used whenever the parser
-finds an element type for which the associative list does not define an
-exemplar.
-</para>
-
- <para>
-The configuration is now complete. You can still use the same parsing
-functions, only the initialization is a bit different. For example, call the
-parser by:
-
-<programlisting>
-let d = parse_document_entity default_config (from_file "doc.xml") spec
-</programlisting>
-
-Note that the resulting document <literal>d</literal> has a usable type;
-especially the <literal>print</literal> method we added is visible. So you can
-print your document by
-
-<programlisting>
-d # root # extension # print stdout
-</programlisting>
-</para>
-
- <para>
-This object-oriented approach looks rather complicated; this is mostly caused
-by working around some problems of the strict typing system of O'Caml. Some
-auxiliary concepts such as extensions were needed, but the practical
-consequences are low. In the next section, one of the examples of the
-distribution is explained, a converter from <emphasis>readme</emphasis>
-documents to HTML.
-</para>
-
- </sect1>
-
-
- <!-- ================================================== -->
-
-
- <sect1 id="sect.readme.to-html">
- <title>Example: An HTML backend for the <emphasis>readme</emphasis>
-DTD</title>
-
- <para>The converter from <emphasis>readme</emphasis> documents to HTML
-documents follows strictly the approach to define one class per element
-type. The HTML code is similar to the <emphasis>readme</emphasis> source,
-because of this most elements can be converted in the following way: Given the
-input element
-
-<programlisting>
-<![CDATA[<e>content</e>]]>
-</programlisting>
-
-the conversion text is the concatenation of a computed prefix, the recursively
-converted content, and a computed suffix.
-</para>
-
- <para>
-Only one element type cannot be handled by this scheme:
-<literal>footnote</literal>. Footnotes are collected while they are found in
-the input text, and they are printed after the main text has been converted and
-printed.
-</para>
-
- <sect2>
- <title>Header</title>
- <para>
-<programlisting>&readme.code.header;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Type declarations</title>
- <para>
-<programlisting>&readme.code.footnote-printer;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Class <literal>store</literal></title>
- <para>
-The <literal>store</literal> is a container for footnotes. You can add a
-footnote by invoking <literal>alloc_footnote</literal>; the argument is an
-object of the class <literal>footnote_printer</literal>, the method returns the
-number of the footnote. The interesting property of a footnote is that it can
-be converted to HTML, so a <literal>footnote_printer</literal> is an object
-with a method <literal>footnote_to_html</literal>. The class
-<literal>footnote</literal> which is defined below has a compatible method
-<literal>footnote_to_html</literal> such that objects created from it can be
-used as <literal>footnote_printer</literal>s.
-</para>
- <para>
-The other method, <literal>print_footnotes</literal> prints the footnotes as
-definition list, and is typically invoked after the main material of the page
-has already been printed. Every item of the list is printed by
-<literal>footnote_to_html</literal>.
-</para>
-
- <para>
-<programlisting>&readme.code.store;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Function <literal>escape_html</literal></title>
- <para>
-This function converts the characters <, >, &, and " to their HTML
-representation. For example,
-<literal>escape_html "<>" = "&lt;&gt;"</literal>. Other
-characters are left unchanged.
-
-<programlisting>&readme.code.escape-html;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Virtual class <literal>shared</literal></title>
- <para>
-This virtual class is the abstract superclass of the extension classes shown
-below. It defines the standard methods <literal>clone</literal>,
-<literal>node</literal>, and <literal>set_node</literal>, and declares the type
-of the virtual method <literal>to_html</literal>. This method recursively
-traverses the whole element tree, and prints the converted HTML code to the
-output channel passed as second argument. The first argument is the reference
-to the global <literal>store</literal> object which collects the footnotes.
-
-<programlisting>&readme.code.shared;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Class <literal>only_data</literal></title>
- <para>
-This class defines <literal>to_html</literal> such that the character data of
-the current node is converted to HTML. Note that <literal>self</literal> is an
-extension object, <literal>self # node</literal> is the node object, and
-<literal>self # node # data</literal> returns the character data of the node.
-
-<programlisting>&readme.code.only-data;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Class <literal>readme</literal></title>
- <para>
-This class converts elements of type <literal>readme</literal> to HTML. Such an
-element is (by definition) always the root element of the document. First, the
-HTML header is printed; the <literal>title</literal> attribute of the element
-determines the title of the HTML page. Some aspects of the HTML page can be
-configured by setting certain parameter entities, for example the background
-color, the text color, and link colors. After the header, the
-<literal>body</literal> tag, and the headline have been printed, the contents
-of the page are converted by invoking <literal>to_html</literal> on all
-children of the current node (which is the root node). Then, the footnotes are
-appended to this by telling the global <literal>store</literal> object to print
-the footnotes. Finally, the end tags of the HTML pages are printed.
-</para>
-
- <para>
-This class is an example how to access the value of an attribute: The value is
-determined by invoking <literal>self # node # attribute "title"</literal>. As
-this attribute has been declared as CDATA and as being required, the value has
-always the form <literal>Value s</literal> where <literal>s</literal> is the
-string value of the attribute.
-</para>
-
- <para>
-You can also see how entity contents can be accessed. A parameter entity object
-can be looked up by <literal>self # node # dtd # par_entity "name"</literal>,
-and by invoking <literal>replacement_text</literal> the value of the entity
-is returned after inner parameter and character entities have been
-processed. Note that you must use <literal>gen_entity</literal> instead of
-<literal>par_entity</literal> to access general entities.
-</para>
-
- <para>
-<programlisting>&readme.code.readme;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Classes <literal>section</literal>, <literal>sect1</literal>,
-<literal>sect2</literal>, and <literal>sect3</literal></title>
- <para>
-As the conversion process is very similar, the conversion classes of the three
-section levels are derived from the more general <literal>section</literal>
-class. The HTML code of the section levels only differs in the type of the
-headline, and because of this the classes describing the section levels can be
-computed by replacing the class argument <literal>the_tag</literal> of
-<literal>section</literal> by the HTML name of the headline tag.
-</para>
-
- <para>
-Section elements are converted to HTML by printing a headline and then
-converting the contents of the element recursively. More precisely, the first
-sub-element is always a <literal>title</literal> element, and the other
-elements are the contents of the section. This structure is declared in the
-DTD, and it is guaranteed that the document matches the DTD. Because of this
-the title node can be separated from the rest without any checks.
-</para>
-
- <para>
-Both the title node, and the body nodes are then converted to HTML by calling
-<literal>to_html</literal> on them.
-</para>
-
- <para>
-<programlisting>&readme.code.section;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Classes <literal>map_tag</literal>, <literal>p</literal>,
-<literal>em</literal>, <literal>ul</literal>, <literal>li</literal></title>
- <para>
-Several element types are converted to HTML by simply mapping them to
-corresponding HTML element types. The class <literal>map_tag</literal>
-implements this, and the class argument <literal>the_target_tag</literal>
-determines the tag name to map to. The output consists of the start tag, the
-recursively converted inner elements, and the end tag.
-
-<programlisting>&readme.code.map-tag;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Class <literal>br</literal></title>
- <para>
-Element of type <literal>br</literal> are mapped to the same HTML type. Note
-that HTML forbids the end tag of <literal>br</literal>.
-
-<programlisting>&readme.code.br;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Class <literal>code</literal></title>
- <para>
-The <literal>code</literal> type is converted to a <literal>pre</literal>
-section (preformatted text). As the meaning of tabs is unspecified in HTML,
-tabs are expanded to spaces.
-
-<programlisting>&readme.code.code;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Class <literal>a</literal></title>
- <para>
-Hyperlinks, expressed by the <literal>a</literal> element type, are converted
-to the HTML <literal>a</literal> type. If the target of the hyperlink is given
-by <literal>href</literal>, the URL of this attribute can be used
-directly. Alternatively, the target can be given by
-<literal>readmeref</literal> in which case the ".html" suffix must be added to
-the file name.
-</para>
-
- <para>
-Note that within <literal>a</literal> only #PCDATA is allowed, so the contents
-can be converted directly by applying <literal>escape_html</literal> to the
-character data contents.
-
-<programlisting>&readme.code.a;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Class <literal>footnote</literal></title>
- <para>
-The <literal>footnote</literal> class has two methods:
-<literal>to_html</literal> to convert the footnote reference to HTML, and
-<literal>footnote_to_html</literal> to convert the footnote text itself.
-</para>
-
- <para>
-The footnote reference is converted to a local hyperlink; more precisely, to
-two anchor tags which are connected with each other. The text anchor points to
-the footnote anchor, and the footnote anchor points to the text anchor.
-</para>
-
- <para>
-The footnote must be allocated in the <literal>store</literal> object. By
-allocating the footnote, you get the number of the footnote, and the text of
-the footnote is stored until the end of the HTML page is reached when the
-footnotes can be printed. The <literal>to_html</literal> method stores simply
-the object itself, such that the <literal>footnote_to_html</literal> method is
-invoked on the same object that encountered the footnote.
-</para>
-
- <para>
-The <literal>to_html</literal> only allocates the footnote, and prints the
-reference anchor, but it does not print nor convert the contents of the
-note. This is deferred until the footnotes actually get printed, i.e. the
-recursive call of <literal>to_html</literal> on the sub nodes is done by
-<literal>footnote_to_html</literal>.
-</para>
-
- <para>
-Note that this technique does not work if you make another footnote within a
-footnote; the second footnote gets allocated but not printed.
-</para>
-
- <para>
-<programlisting>&readme.code.footnote;</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>The specification of the document model</title>
- <para>
-This code sets up the hash table that connects element types with the exemplars
-of the extension classes that convert the elements to HTML.
-
-<programlisting>&readme.code.tag-map;</programlisting>
-</para>
- </sect2>
-
-<!-- <![RCDATA[&readme.code.to-html;]]> -->
- </sect1>
-
- </chapter>
-
-<!-- ********************************************************************** -->
-
- <chapter>
- <title>The objects representing the document</title>
-
- <para>
-<emphasis>This description might be out-of-date. See the module interface files
-for updated information.</emphasis></para>
-
- <sect1>
- <title>The <literal>document</literal> class</title>
- <para>
-<programlisting>
-<![CDATA[
-class [ 'ext ] document :
- Pxp_types.collect_warnings ->
- object
- method init_xml_version : string -> unit
- method init_root : 'ext node -> unit
-
- method xml_version : string
- method xml_standalone : bool
- method dtd : dtd
- method root : 'ext node
-
- method encoding : Pxp_types.rep_encoding
-
- method add_pinstr : proc_instruction -> unit
- method pinstr : string -> proc_instruction list
- method pinstr_names : string list
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
-
- end
-;;
-]]>
-</programlisting>
-
-The methods beginning with <literal>init_</literal> are only for internal use
-of the parser.
-</para>
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>xml_version</literal>: returns the version string at the beginning of
-the document. For example, "1.0" is returned if the document begins with
-<literal><?xml version="1.0"?></literal>.</para>
- </listitem>
- <listitem>
- <para>
-<literal>xml_standalone</literal>: returns the boolean value of
-<literal>standalone</literal> declaration in the XML declaration. If the
-<literal>standalone</literal> attribute is missing, <literal>false</literal> is
-returned. </para>
- </listitem>
- <listitem>
- <para>
-<literal>dtd</literal>: returns a reference to the global DTD object.</para>
- </listitem>
- <listitem>
- <para>
-<literal>root</literal>: returns a reference to the root element.</para>
- </listitem>
- <listitem>
- <para>
-<literal>encoding</literal>: returns the internal encoding of the
-document. This means that all strings of which the document consists are
-encoded in this character set.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>pinstr</literal>: returns the processing instructions outside the DTD
-and outside the root element. The argument passed to the method names a
-<emphasis>target</emphasis>, and the method returns all instructions with this
-target. The target is the first word inside <literal><?</literal> and
-<literal>?></literal>.</para>
- </listitem>
- <listitem>
- <para>
-<literal>pinstr_names</literal>: returns the names of the processing instructions</para>
- </listitem>
- <listitem>
- <para>
-<literal>add_pinstr</literal>: adds another processing instruction. This method
-is used by the parser itself to enter the instructions returned by
-<literal>pinstr</literal>, but you can also enter additional instructions.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>write</literal>: writes the document to the passed stream as XML
-text using the passed (external) encoding. The generated text is always valid
-XML and can be parsed by PXP; however, the text is badly formatted (this is not
-a pretty printer).</para>
- </listitem>
- </itemizedlist>
- </sect1>
-
-<!-- ********************************************************************** -->
-
- <sect1>
- <title>The class type <literal>node</literal></title>
- <para>
-
-From <literal>Pxp_document</literal>:
-
-<programlisting>
-type node_type =
- T_data
-| T_element of string
-| T_super_root
-| T_pinstr of string
-| T_comment
-<replaceable>and some other, reserved types</replaceable>
-;;
-
-class type [ 'ext ] node =
- object ('self)
- constraint 'ext = 'ext node #extension
-
- <anchor id="type-node-general.sig"
- >(* <link linkend="type-node-general" endterm="type-node-general.title"
- ></link> *)
-
- method extension : 'ext
- method dtd : dtd
- method parent : 'ext node
- method root : 'ext node
- method sub_nodes : 'ext node list
- method iter_nodes : ('ext node &fun; unit) &fun; unit
- method iter_nodes_sibl :
- ('ext node option &fun; 'ext node &fun; 'ext node option &fun; unit) &fun; unit
- method node_type : node_type
- method encoding : Pxp_types.rep_encoding
- method data : string
- method position : (string * int * int)
- method comment : string option
- method pinstr : string &fun; proc_instruction list
- method pinstr_names : string list
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
-
- <anchor id="type-node-atts.sig"
- >(* <link linkend="type-node-atts" endterm="type-node-atts.title"
- ></link> *)
-
- method attribute : string &fun; Pxp_types.att_value
- method required_string_attribute : string &fun; string
- method optional_string_attribute : string &fun; string option
- method required_list_attribute : string &fun; string list
- method optional_list_attribute : string &fun; string list
- method attribute_names : string list
- method attribute_type : string &fun; Pxp_types.att_type
- method attributes : (string * Pxp_types.att_value) list
- method id_attribute_name : string
- method id_attribute_value : string
- method idref_attribute_names : string
-
- <anchor id="type-node-mods.sig"
- >(* <link linkend="type-node-mods" endterm="type-node-mods.title"
- ></link> *)
-
- method add_node : ?force:bool &fun; 'ext node &fun; unit
- method add_pinstr : proc_instruction &fun; unit
- method delete : unit
- method set_nodes : 'ext node list &fun; unit
- method quick_set_attributes : (string * Pxp_types.att_value) list &fun; unit
- method set_comment : string option &fun; unit
-
- <anchor id="type-node-cloning.sig"
- >(* <link linkend="type-node-cloning" endterm="type-node-cloning.title"
- ></link> *)
-
- method orphaned_clone : 'self
- method orphaned_flat_clone : 'self
- method create_element :
- ?position:(string * int * int) &fun;
- dtd &fun; node_type &fun; (string * string) list &fun;
- 'ext node
- method create_data : dtd &fun; string &fun; 'ext node
- method keep_always_whitespace_mode : unit
-
- <anchor id="type-node-weird.sig"
- >(* <link linkend="type-node-weird" endterm="type-node-weird.title"
- ></link> *)
-
- method local_validate : ?use_dfa:bool -> unit -> unit
-
- (* ... Internal methods are undocumented. *)
-
- end
-;;
-</programlisting>
-
-In the module <literal>Pxp_types</literal> you can find another type
-definition that is important in this context:
-
-<programlisting>
-type Pxp_types.att_value =
- Value of string
- | Valuelist of string list
- | Implied_value
-;;
-</programlisting>
-</para>
-
- <sect2>
- <title>The structure of document trees</title>
-
-<para>
-A node represents either an element or a character data section. There are two
-classes implementing the two aspects of nodes: <literal>element_impl</literal>
-and <literal>data_impl</literal>. The latter class does not implement all
-methods because some methods do not make sense for data nodes.
-</para>
-
-<para>
-(Note: PXP also supports a mode which forces that processing instructions and
-comments are represented as nodes of the document tree. However, these nodes
-are instances of <literal>element_impl</literal> with node types
-<literal>T_pinstr</literal> and <literal>T_comment</literal>,
-respectively. This mode must be explicitly configured; the basic representation
-knows only element and data nodes.)
-</para>
-
- <para>The following figure
-(<link linkend="node-term" endterm="node-term"></link>) shows an example how
-a tree is constructed from element and data nodes. The circular areas
-represent element nodes whereas the ovals denote data nodes. Only elements
-may have subnodes; data nodes are always leaves of the tree. The subnodes
-of an element can be either element or data nodes; in both cases the O'Caml
-objects storing the nodes have the class type <literal>node</literal>.</para>
-
- <para>Attributes (the clouds in the picture) are not directly
-integrated into the tree; there is always an extra link to the attribute
-list. This is also true for processing instructions (not shown in the
-picture). This means that there are separated access methods for attributes and
-processing instructions.</para>
-
-<figure id="node-term" float="1">
-<title>A tree with element nodes, data nodes, and attributes</title>
-<graphic fileref="pic/node_term" format="GIF"></graphic>
-</figure>
-
- <para>Only elements, data sections, attributes and processing
-instructions (and comments, if configured) can, directly or indirectly, occur
-in the document tree. It is impossible to add entity references to the tree; if
-the parser finds such a reference, not the reference as such but the referenced
-text (i.e. the tree representing the structured text) is included in the
-tree.</para>
-
- <para>Note that the parser collapses as much data material into one
-data node as possible such that there are normally never two adjacent data
-nodes. This invariant is enforced even if data material is included by entity
-references or CDATA sections, or if a data sequence is interrupted by
-comments. So <literal>a &amp; b <-- comment --> c <![CDATA[
-<> d]]></literal> is represented by only one data node, for
-instance. However, you can create document trees manually which break this
-invariant; it is only the way the parser forms the tree.
-</para>
-
-<figure id="node-general" float="1">
-<title>Nodes are doubly linked trees</title>
-<graphic fileref="pic/node_general" format="GIF"></graphic>
-</figure>
-
- <para>
-The node tree has links in both directions: Every node has a link to its parent
-(if any), and it has links to the subnodes (see
-figure <link linkend="node-general" endterm="node-general"></link>). Obviously,
-this doubly-linked structure simplifies the navigation in the tree; but has
-also some consequences for the possible operations on trees.</para>
-
- <para>
-Because every node must have at most <emphasis>one</emphasis> parent node,
-operations are illegal if they violate this condition. The following figure
-(<link linkend="node-add" endterm="node-add"></link>) shows on the left side
-that node <literal>y</literal> is added to <literal>x</literal> as new subnode
-which is allowed because <literal>y</literal> does not have a parent yet. The
-right side of the picture illustrates what would happen if <literal>y</literal>
-had a parent node; this is illegal because <literal>y</literal> would have two
-parents after the operation.</para>
-
-<figure id="node-add" float="1">
-<title>A node can only be added if it is a root</title>
-<graphic fileref="pic/node_add" format="GIF">
-</graphic>
-</figure>
-
- <para>
-The "delete" operation simply removes the links between two nodes. In the
-picture (<link linkend="node-delete" endterm="node-delete"></link>) the node
-<literal>x</literal> is deleted from the list of subnodes of
-<literal>y</literal>. After that, <literal>x</literal> becomes the root of the
-subtree starting at this node.</para>
-
-<figure id="node-delete" float="1">
-<title>A deleted node becomes the root of the subtree</title>
-<graphic fileref="pic/node_delete" format="GIF"></graphic>
-</figure>
-
- <para>
-It is also possible to make a clone of a subtree; illustrated in
-<link linkend="node-clone" endterm="node-clone"></link>. In this case, the
-clone is a copy of the original subtree except that it is no longer a
-subnode. Because cloning never keeps the connection to the parent, the clones
-are called <emphasis>orphaned</emphasis>.
-</para>
-
-<figure id="node-clone" float="1">
-<title>The clone of a subtree</title>
-<graphic fileref="pic/node_clone" format="GIF"></graphic>
-</figure>
- </sect2>
-
- <sect2>
- <title>The methods of the class type <literal>node</literal></title>
-
- <anchor id="type-node-general">
- <formalpara>
- <title id="type-node-general.title">
- <link linkend="type-node-general.sig">General observers</link>
- </title>
-
- <para>
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>extension</literal>: The reference to the extension object which
-belongs to this node (see ...).</para>
- </listitem>
- <listitem>
- <para>
-<literal>dtd</literal>: Returns a reference to the global DTD. All nodes
-of a tree must share the same DTD.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>parent</literal>: Get the father node. Raises
-<literal>Not_found</literal> in the case the node does not have a
-parent, i.e. the node is the root.</para>
- </listitem>
- <listitem>
- <para>
-<literal>root</literal>: Gets the reference to the root node of the tree.
-Every node is contained in a tree with a root, so this method always
-succeeds. Note that this method <emphasis>searches</emphasis> the root,
-which costs time proportional to the length of the path to the root.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>sub_nodes</literal>: Returns references to the children. The returned
-list reflects the order of the children. For data nodes, this method returns
-the empty list.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>iter_nodes f</literal>: Iterates over the children, and calls
-<literal>f</literal> for every child in turn.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>iter_nodes_sibl f</literal>: Iterates over the children, and calls
-<literal>f</literal> for every child in turn. <literal>f</literal> gets as
-arguments the previous node, the current node, and the next node.</para>
- </listitem>
- <listitem>
- <para>
-<literal>node_type</literal>: Returns either <literal>T_data</literal> which
-means that the node is a data node, or <literal>T_element n</literal>
-which means that the node is an element of type <literal>n</literal>.
-If configured, possible node types are also <literal>T_pinstr t</literal>
-indicating that the node represents a processing instruction with target
-<literal>t</literal>, and <literal>T_comment</literal> in which case the node
-is a comment.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>encoding</literal>: Returns the encoding of the strings.</para>
- </listitem>
- <listitem>
- <para>
-<literal>data</literal>: Returns the character data of this node and all
-children, concatenated as one string. The encoding of the string is what
-the method <literal>encoding</literal> returns.
-- For data nodes, this method simply returns the represented characters.
-For elements, the meaning of the method has been extended such that it
-returns something useful, i.e. the effectively contained characters, without
-markup. (For <literal>T_pinstr</literal> and <literal>T_comment</literal>
-nodes, the method returns the empty string.)
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>position</literal>: If configured, this method returns the position of
-the element as triple (entity, line, byteposition). For data nodes, the
-position is not stored. If the position is not available the triple
-<literal>"?", 0, 0</literal> is returned.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>comment</literal>: Returns <literal>Some text</literal> for comment
-nodes, and <literal>None</literal> for other nodes. The <literal>text</literal>
-is everything between the comment delimiters <literal><--</literal> and
-<literal>--></literal>.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>pinstr n</literal>: Returns all processing instructions that are
-directly contained in this element and that have a <emphasis>target</emphasis>
-specification of <literal>n</literal>. The target is the first word after
-the <literal><?</literal>.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>pinstr_names</literal>: Returns the list of all targets of processing
-instructions directly contained in this element.</para>
- </listitem>
- <listitem>
- <para>
-<literal>write s enc</literal>: Prints the node and all subnodes to the passed
-output stream as valid XML text, using the passed external encoding.
-</para>
- </listitem>
- </itemizedlist>
- </para>
- </formalpara>
-
- <anchor id="type-node-atts">
- <formalpara>
- <title id="type-node-atts.title">
- <link linkend="type-node-atts.sig">Attribute observers</link>
- </title>
- <para>
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>attribute n</literal>: Returns the value of the attribute with name
-<literal>n</literal>. This method returns a value for every declared
-attribute, and it raises <literal>Not_found</literal> for any undeclared
-attribute. Note that it even returns a value if the attribute is actually
-missing but is declared as <literal>#IMPLIED</literal> or has a default
-value. - Possible values are:
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>Implied_value</literal>: The attribute has been declared with the
-keyword <literal>#IMPLIED</literal>, and the attribute is missing in the
-attribute list of this element.</para>
- </listitem>
- <listitem>
- <para>
-<literal>Value s</literal>: The attribute has been declared as type
-<literal>CDATA</literal>, as <literal>ID</literal>, as
-<literal>IDREF</literal>, as <literal>ENTITY</literal>, or as
-<literal>NMTOKEN</literal>, or as enumeration or notation, and one of the two
-conditions holds: (1) The attribute value is present in the attribute list in
-which case the value is returned in the string <literal>s</literal>. (2) The
-attribute has been omitted, and the DTD declared the attribute with a default
-value. The default value is returned in <literal>s</literal>.
-- Summarized, <literal>Value s</literal> is returned for non-implied, non-list
-attribute values.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>Valuelist l</literal>: The attribute has been declared as type
-<literal>IDREFS</literal>, as <literal>ENTITIES</literal>, or
-as <literal>NMTOKENS</literal>, and one of the two conditions holds: (1) The
-attribute value is present in the attribute list in which case the
-space-separated tokens of the value are returned in the string list
-<literal>l</literal>. (2) The attribute has been omitted, and the DTD declared
-the attribute with a default value. The default value is returned in
-<literal>l</literal>.
-- Summarized, <literal>Valuelist l</literal> is returned for all list-type
-attribute values.
-</para>
- </listitem>
- </itemizedlist>
-
-Note that before the attribute value is returned, the value is normalized. This
-means that newlines are converted to spaces, and that references to character
-entities (i.e. <literal>&#<replaceable>n</replaceable>;</literal>) and
-general entities
-(i.e. <literal>&<replaceable>name</replaceable>;</literal>) are expanded;
-if necessary, expansion is performed recursively.
-</para>
-
-<para>
-In well-formedness mode, there is no DTD which could declare an
-attribute. Because of this, every occuring attribute is considered as a CDATA
-attribute.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>required_string_attribute n</literal>: returns the Value attribute
-called n, or the Valuelist attribute as a string where the list elements
-are separated by spaces. If the attribute value is implied, or if the
-attribute does not exists, the method will fail. - This method is convenient
-if you expect a non-implied and non-list attribute value.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>optional_string_attribute n</literal>: returns the Value attribute
-called n, or the Valuelist attribute as a string where the list elements
-are separated by spaces. If the attribute value is implied, or if the
-attribute does not exists, the method returns None. - This method is
-convenient if you expect a non-list attribute value including the implied
-value.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>required_list_attribute n</literal>: returns the Valuelist attribute
-called n, or the Value attribute as a list with a single element.
-If the attribute value is implied, or if the
-attribute does not exists, the method will fail. - This method is
-convenient if you expect a list attribute value.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>optional_list_attribute n</literal>: returns the Valuelist attribute
-called n, or the Value attribute as a list with a single element.
-If the attribute value is implied, or if the
-attribute does not exists, an empty list will be returned. - This method
-is convenient if you expect a list attribute value or the implied value.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>attribute_names</literal>: returns the list of all attribute names of
-this element. As this is a validating parser, this list is equal to the
-list of declared attributes.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>attribute_type n</literal>: returns the type of the attribute called
-<literal>n</literal>. See the module <literal>Pxp_types</literal> for a
-description of the encoding of the types.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>attributes</literal>: returns the list of pairs of names and values
-for all attributes of
-this element.</para>
- </listitem>
- <listitem>
- <para>
-<literal>id_attribute_name</literal>: returns the name of the attribute that is
-declared with type ID. There is at most one such attribute. The method raises
-<literal>Not_found</literal> if there is no declared ID attribute for the
-element type.</para>
- </listitem>
- <listitem>
- <para>
-<literal>id_attribute_value</literal>: returns the value of the attribute that
-is declared with type ID. There is at most one such attribute. The method raises
-<literal>Not_found</literal> if there is no declared ID attribute for the
-element type.</para>
- </listitem>
- <listitem>
- <para>
-<literal>idref_attribute_names</literal>: returns the list of attribute names
-that are declared as IDREF or IDREFS.</para>
- </listitem>
- </itemizedlist>
- </para>
- </formalpara>
-
- <anchor id="type-node-mods">
- <formalpara>
- <title id="type-node-mods.title">
- <link linkend="type-node-mods.sig">Modifying methods</link>
- </title>
-
- <para>
-The following methods are only defined for element nodes (more exactly:
-the methods are defined for data nodes, too, but fail always).
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>add_node sn</literal>: Adds sub node <literal>sn</literal> to the list
-of children. This operation is illustrated in the picture
-<link linkend="node-add" endterm="node-add"></link>. This method expects that
-<literal>sn</literal> is a root, and it requires that <literal>sn</literal> and
-the current object share the same DTD.
-</para>
-
-<para>Because <literal>add_node</literal> is the method the parser itself uses
-to add new nodes to the tree, it performs by default some simple validation
-checks: If the content model is a regular expression, it is not allowed to add
-data nodes to this node unless the new nodes consist only of whitespace. In
-this case, the new data nodes are silently dropped (you can change this by
-invoking <literal>keep_always_whitespace_mode</literal>).
-</para>
-
-<para>If the document is flagged as stand-alone, these data nodes only
-containing whitespace are even forbidden if the element declaration is
-contained in an external entity. This case is detected and rejected.</para>
-
-<para>If the content model is <literal>EMPTY</literal>, it is not allowed to
-add any data node unless the data node is empty. In this case, the new data
-node is silently dropped.
-</para>
-
-<para>These checks only apply if there is a DTD. In well-formedness mode, it is
-assumed that every element is declared with content model
-<literal>ANY</literal> which prohibits any validation check. Furthermore, you
-turn these checks off by passing <literal>~force:true</literal> as first
-argument.</para>
- </listitem>
- <listitem>
- <para>
-<literal>add_pinstr pi</literal>: Adds the processing instruction
-<literal>pi</literal> to the list of processing instructions.
-</para>
- </listitem>
-
- <listitem>
- <para>
-<literal>delete</literal>: Deletes this node from the tree. After this
-operation, this node is no longer the child of the former father node; and the
-node loses the connection to the father as well. This operation is illustrated
-by the figure <link linkend="node-delete" endterm="node-delete"></link>.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>set_nodes nl</literal>: Sets the list of children to
-<literal>nl</literal>. It is required that every member of <literal>nl</literal>
-is a root, and that all members and the current object share the same DTD.
-Unlike <literal>add_node</literal>, no validation checks are performed.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>quick_set_attributes atts</literal>: sets the attributes of this
-element to <literal>atts</literal>. It is <emphasis>not</emphasis> checked
-whether <literal>atts</literal> matches the DTD or not; it is up to the
-caller of this method to ensure this. (This method may be useful to transform
-the attribute values, i.e. apply a mapping to every attribute.)
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>set_comment text</literal>: This method is only applicable to
-<literal>T_comment</literal> nodes; it sets the comment text contained by such
-nodes. </para>
- </listitem>
- </itemizedlist>
-</para>
- </formalpara>
-
- <anchor id="type-node-cloning">
- <formalpara>
- <title id="type-node-cloning.title">
- <link linkend="type-node-cloning.sig">Cloning methods</link>
- </title>
-
- <para>
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>orphaned_clone</literal>: Returns a clone of the node and the complete
-tree below this node (deep clone). The clone does not have a parent (i.e. the
-reference to the parent node is <emphasis>not</emphasis> cloned). While
-copying the subtree, strings are skipped; it is likely that the original tree
-and the copy tree share strings. Extension objects are cloned by invoking
-the <literal>clone</literal> method on the original objects; how much of
-the extension objects is cloned depends on the implemention of this method.
-</para>
- <para>This operation is illustrated by the figure
-<link linkend="node-clone" endterm="node-clone"></link>.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>orphaned_flat_clone</literal>: Returns a clone of the node,
-but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.
-</para>
- </listitem>
- <listitem>
- <para>
-<anchor id="type-node-meth-create-element">
-<literal>create_element dtd nt al</literal>: Returns a flat copy of this node
-(which must be an element) with the following modifications: The DTD is set to
-<literal>dtd</literal>; the node type is set to <literal>nt</literal>, and the
-new attribute list is set to <literal>al</literal> (given as list of
-(name,value) pairs). The copy does not have children nor a parent. It does not
-contain processing instructions. See
-<link linkend="type-node-ex-create-element">the example below</link>.
-</para>
-
- <para>Note that you can specify the position of the new node
-by the optional argument <literal>~position</literal>.</para>
- </listitem>
- <listitem>
- <para>
-<anchor id="type-node-meth-create-data">
-<literal>create_data dtd cdata</literal>: Returns a flat copy of this node
-(which must be a data node) with the following modifications: The DTD is set to
-<literal>dtd</literal>; the node type is set to <literal>T_data</literal>; the
-attribute list is empty (data nodes never have attributes); the list of
-children and PIs is empty, too (same reason). The new node does not have a
-parent. The value <literal>cdata</literal> is the new character content of the
-node. See
-<link linkend="type-node-ex-create-data">the example below</link>.
-</para>
- </listitem>
- <listitem>
- <para>
-<literal>keep_always_whitespace_mode</literal>: Even data nodes which are
-normally dropped because they only contain ignorable whitespace, can added to
-this node once this mode is turned on. (This mode is useful to produce
-canonical XML.)
-</para>
- </listitem>
- </itemizedlist>
-</para>
- </formalpara>
-
- <anchor id="type-node-weird">
- <formalpara>
- <title id="type-node-weird.title">
- <link linkend="type-node-weird.sig">Validating methods</link>
- </title>
- <para>
-There is one method which locally validates the node, i.e. checks whether the
-subnodes match the content model of this node.
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>
-<literal>local_validate</literal>: Checks that this node conforms to the
-DTD by comparing the type of the subnodes with the content model for this
-node. (Applications need not call this method unless they add new nodes
-themselves to the tree.)
-</para>
- </listitem>
- </itemizedlist>
-</para>
- </formalpara>
- </sect2>
-
- <sect2>
- <title>The class <literal>element_impl</literal></title>
- <para>
-This class is an implementation of <literal>node</literal> which
-realizes element nodes:
-
-<programlisting>
-<![CDATA[
-class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
-]]>
-</programlisting>
-
-</para>
- <formalpara>
- <title>Constructor</title>
- <para>
-You can create a new instance by
-
-<programlisting>
-new element_impl <replaceable>extension_object</replaceable>
-</programlisting>
-
-which creates a special form of empty element which already contains a
-reference to the <replaceable>extension_object</replaceable>, but is
-otherwise empty. This special form is called an
-<emphasis>exemplar</emphasis>. The purpose of exemplars is that they serve as
-patterns that can be duplicated and filled with data. The method
-<link linkend="type-node-meth-create-element">
-<literal>create_element</literal></link> is designed to perform this action.
-</para>
- </formalpara>
-
- <anchor id="type-node-ex-create-element">
- <formalpara>
- <title>Example</title>
-
- <para>First, create an exemplar by
-
-<programlisting>
-let exemplar_ext = ... in
-let exemplar = new element_impl exemplar_ext in
-</programlisting>
-
-The <literal>exemplar</literal> is not used in node trees, but only as
-a pattern when the element nodes are created:
-
-<programlisting>
-let element = exemplar # <link linkend="type-node-meth-create-element">create_element</link> dtd (T_element name) attlist
-</programlisting>
-
-The <literal>element</literal> is a copy of <literal>exemplar</literal>
-(even the extension <literal>exemplar_ext</literal> has been copied)
-which ensures that <literal>element</literal> and its extension are objects
-of the same class as the exemplars; note that you need not to pass a
-class name or other meta information. The copy is initially connected
-with the <literal>dtd</literal>, it gets a node type, and the attribute list
-is filled. The <literal>element</literal> is now fully functional; it can
-be added to another element as child, and it can contain references to
-subnodes.
-</para>
- </formalpara>
-
- </sect2>
-
- <sect2>
- <title>The class <literal>data_impl</literal></title>
- <para>
-This class is an implementation of <literal>node</literal> which
-should be used for all character data nodes:
-
-<programlisting>
-<![CDATA[
-class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
-]]>
-</programlisting>
-
-</para>
-
- <formalpara>
- <title>Constructor</title>
- <para>
-You can create a new instance by
-
-<programlisting>
-new data_impl <replaceable>extension_object</replaceable>
-</programlisting>
-
-which creates an empty exemplar node which is connected to
-<replaceable>extension_object</replaceable>. The node does not contain a
-reference to any DTD, and because of this it cannot be added to node trees.
-</para>
- </formalpara>
-
- <para>To get a fully working data node, apply the method
-<link linkend="type-node-meth-create-data"><literal>create_data</literal>
-</link> to the exemplar (see example).
-</para>
-
- <anchor id="type-node-ex-create-data">
- <formalpara>
- <title>Example</title>
-
- <para>First, create an exemplar by
-
-<programlisting>
-let exemplar_ext = ... in
-let exemplar = new exemplar_ext data_impl in
-</programlisting>
-
-The <literal>exemplar</literal> is not used in node trees, but only as
-a pattern when the data nodes are created:
-
-<programlisting>
-let data_node = exemplar # <link
- linkend="type-node-meth-create-data">create_data</link> dtd "The characters contained in the data node"
-</programlisting>
-
-The <literal>data_node</literal> is a copy of <literal>exemplar</literal>.
-The copy is initially connected
-with the <literal>dtd</literal>, and it is filled with character material.
-The <literal>data_node</literal> is now fully functional; it can
-be added to an element as child.
-</para>
- </formalpara>
- </sect2>
-
- <sect2>
- <title>The type <literal>spec</literal></title>
- <para>
-The type <literal>spec</literal> defines a way to handle the details of
-creating nodes from exemplars.
-
-<programlisting><![CDATA[
-type 'ext spec
-constraint 'ext = 'ext node #extension
-
-val make_spec_from_mapping :
- ?super_root_exemplar : 'ext node ->
- ?comment_exemplar : 'ext node ->
- ?default_pinstr_exemplar : 'ext node ->
- ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
- data_exemplar: 'ext node ->
- default_element_exemplar: 'ext node ->
- element_mapping: (string, 'ext node) Hashtbl.t ->
- unit ->
- 'ext spec
-
-val make_spec_from_alist :
- ?super_root_exemplar : 'ext node ->
- ?comment_exemplar : 'ext node ->
- ?default_pinstr_exemplar : 'ext node ->
- ?pinstr_alist : (string * 'ext node) list ->
- data_exemplar: 'ext node ->
- default_element_exemplar: 'ext node ->
- element_alist: (string * 'ext node) list ->
- unit ->
- 'ext spec
-]]></programlisting>
-
-The two functions <literal>make_spec_from_mapping</literal> and
-<literal>make_spec_from_alist</literal> create <literal>spec</literal>
-values. Both functions are functionally equivalent and the only difference is
-that the first function prefers hashtables and the latter associative lists to
-describe mappings from names to exemplars.
-</para>
-
-<para>
-You can specify exemplars for the various kinds of nodes that need to be
-generated when an XML document is parsed:
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para><literal>~super_root_exemplar</literal>: This exemplar
-is used to create the super root. This special node is only created if the
-corresponding configuration option has been selected; it is the parent node of
-the root node which may be convenient if every working node must have a parent.</para>
- </listitem>
- <listitem>
- <para><literal>~comment_exemplar</literal>: This exemplar is
-used when a comment node must be created. Note that such nodes are only created
-if the corresponding configuration option is "on".
-</para>
- </listitem>
- <listitem>
- <para><literal>~default_pinstr_exemplar</literal>: If a node
-for a processing instruction must be created, and the instruction is not listed
-in the table passed by <literal>~pinstr_mapping</literal> or
-<literal>~pinstr_alist</literal>, this exemplar is used.
-Again the configuration option must be "on" in order to create such nodes at
-all.
-</para>
- </listitem>
- <listitem>
- <para><literal>~pinstr_mapping</literal> or
-<literal>~pinstr_alist</literal>: Map the target names of processing
-instructions to exemplars. These mappings are only used when nodes for
-processing instructions are created.</para>
- </listitem>
- <listitem>
- <para><literal>~data_exemplar</literal>: The exemplar for
-ordinary data nodes.</para>
- </listitem>
- <listitem>
- <para><literal>~default_element_exemplar</literal>: This
-exemplar is used if an element node must be created, but the element type
-cannot be found in the tables <literal>element_mapping</literal> or
-<literal>element_alist</literal>.</para>
- </listitem>
- <listitem>
- <para><literal>~element_mapping</literal> or
-<literal>~element_alist</literal>: Map the element types to exemplars. These
-mappings are used to create element nodes.</para>
- </listitem>
- </itemizedlist>
-
-In most cases, you only want to create <literal>spec</literal> values to pass
-them to the parser functions found in <literal>Pxp_yacc</literal>. However, it
-might be useful to apply <literal>spec</literal> values directly.
-</para>
-
-<para>The following functions create various types of nodes by selecting the
-corresponding exemplar from the passed <literal>spec</literal> value, and by
-calling <literal>create_element</literal> or <literal>create_data</literal> on
-the exemplar.
-
-<programlisting><![CDATA[
-val create_data_node :
- 'ext spec ->
- dtd ->
- (* data material: *) string ->
- 'ext node
-
-val create_element_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- (* element type: *) string ->
- (* attributes: *) (string * string) list ->
- 'ext node
-
-val create_super_root_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- 'ext node
-
-val create_comment_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- (* comment text: *) string ->
- 'ext node
-
-val create_pinstr_node :
- ?position:(string * int * int) ->
- 'ext spec ->
- dtd ->
- proc_instruction ->
- 'ext node
-]]></programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Examples</title>
-
- <formalpara>
- <title>Building trees.</title>
-
- <para>Here is the piece of code that creates the tree of
-the figure <link linkend="node-term" endterm="node-term"></link>. The extension
-object and the DTD are beyond the scope of this example.
-
-<programlisting>
-let exemplar_ext = ... (* some extension *) in
-let dtd = ... (* some DTD *) in
-
-let element_exemplar = new element_impl exemplar_ext in
-let data_exemplar = new data_impl exemplar_ext in
-
-let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
-and b1 = element_exemplar # create_element dtd (T_element "b") []
-and c1 = element_exemplar # create_element dtd (T_element "c") []
-and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
-in
-
-let cherries = data_exemplar # create_data dtd "Cherries" in
-let orange = data_exemplar # create_data dtd "An orange" in
-
-a1 # add_node b1;
-a1 # add_node c1;
-b1 # add_node a2;
-b1 # add_node cherries;
-a2 # add_node orange;
-</programlisting>
-
-Alternatively, the last block of statements could also be written as:
-
-<programlisting>
-a1 # set_nodes [b1; c1];
-b1 # set_nodes [a2; cherries];
-a2 # set_nodes [orange];
-</programlisting>
-
-The root of the tree is <literal>a1</literal>, i.e. it is true that
-
-<programlisting>
-x # root == a1
-</programlisting>
-
-for every x from { <literal>a1</literal>, <literal>a2</literal>,
-<literal>b1</literal>, <literal>c1</literal>, <literal>cherries</literal>,
-<literal>orange</literal> }.
-</para>
- </formalpara>
- <para>
-Furthermore, the following properties hold:
-
-<programlisting>
- a1 # attribute "att" = Value "apple"
-& a2 # attribute "att" = Value "orange"
-
-& cherries # data = "Cherries"
-& orange # data = "An orange"
-& a1 # data = "CherriesAn orange"
-
-& a1 # node_type = T_element "a"
-& a2 # node_type = T_element "a"
-& b1 # node_type = T_element "b"
-& c1 # node_type = T_element "c"
-& cherries # node_type = T_data
-& orange # node_type = T_data
-
-& a1 # sub_nodes = [ b1; c1 ]
-& a2 # sub_nodes = [ orange ]
-& b1 # sub_nodes = [ a2; cherries ]
-& c1 # sub_nodes = []
-& cherries # sub_nodes = []
-& orange # sub_nodes = []
-
-& a2 # parent == a1
-& b1 # parent == b1
-& c1 # parent == a1
-& cherries # parent == b1
-& orange # parent == a2
-</programlisting>
-</para>
- <formalpara>
- <title>Searching nodes.</title>
-
- <para>The following function searches all nodes of a tree
-for which a certain condition holds:
-
-<programlisting>
-let rec search p t =
- if p t then
- t :: search_list p (t # sub_nodes)
- else
- search_list p (t # sub_nodes)
-
-and search_list p l =
- match l with
- [] -> []
- | t :: l' -> (search p t) @ (search_list p l')
-;;
-</programlisting>
-</para>
- </formalpara>
-
- <para>For example, if you want to search all elements of a certain
-type <literal>et</literal>, the function <literal>search</literal> can be
-applied as follows:
-
-<programlisting>
-let search_element_type et t =
- search (fun x -> x # node_type = T_element et) t
-;;
-</programlisting>
-</para>
-
- <formalpara>
- <title>Getting attribute values.</title>
-
- <para>Suppose we have the declaration:
-
-<programlisting><![CDATA[
-<!ATTLIST e a CDATA #REQUIRED
- b CDATA #IMPLIED
- c CDATA "12345">]]>
-</programlisting>
-
-In this case, every element <literal>e</literal> must have an attribute
-<literal>a</literal>, otherwise the parser would indicate an error. If
-the O'Caml variable <literal>n</literal> holds the node of the tree
-corresponding to the element, you can get the value of the attribute
-<literal>a</literal> by
-
-<programlisting>
-let value_of_a = n # required_string_attribute "a"
-</programlisting>
-
-which is more or less an abbreviation for
-
-<programlisting><![CDATA[
-let value_of_a =
- match n # attribute "a" with
- Value s -> s
- | _ -> assert false]]>
-</programlisting>
-
-- as the attribute is required, the <literal>attribute</literal> method always
-returns a <literal>Value</literal>.
-</para>
- </formalpara>
-
- <para>In contrast to this, the attribute <literal>b</literal> can be
-omitted. In this case, the method <literal>required_string_attribute</literal>
-works only if the attribute is there, and the method will fail if the attribute
-is missing. To get the value, you can apply the method
-<literal>optional_string_attribute</literal>:
-
-<programlisting>
-let value_of_b = n # optional_string_attribute "b"
-</programlisting>
-
-Now, <literal>value_of_b</literal> is of type <literal>string option</literal>,
-and <literal>None</literal> represents the omitted attribute. Alternatively,
-you could also use <literal>attribute</literal>:
-
-<programlisting><![CDATA[
-let value_of_b =
- match n # attribute "b" with
- Value s -> Some s
- | Implied_value -> None
- | _ -> assert false]]>
-</programlisting>
-</para>
-
- <para>The attribute <literal>c</literal> behaves much like
-<literal>a</literal>, because it has always a value. If the attribute is
-omitted, the default, here "12345", will be returned instead. Because of this,
-you can again use <literal>required_string_attribute</literal> to get the
-value.
-</para>
-
- <para>The type <literal>CDATA</literal> is the most general string
-type. The types <literal>NMTOKEN</literal>, <literal>ID</literal>,
-<literal>IDREF</literal>, <literal>ENTITY</literal>, and all enumerators and
-notations are special forms of string types that restrict the possible
-values. From O'Caml, they behave like <literal>CDATA</literal>, i.e. you can
-use the methods <literal>required_string_attribute</literal> and
-<literal>optional_string_attribute</literal>, too.
-</para>
-
- <para>In contrast to this, the types <literal>NMTOKENS</literal>,
-<literal>IDREFS</literal>, and <literal>ENTITIES</literal> mean lists of
-strings. Suppose we have the declaration:
-
-<programlisting><![CDATA[
-<!ATTLIST f d NMTOKENS #REQUIRED
- e NMTOKENS #IMPLIED>]]>
-</programlisting>
-
-The type <literal>NMTOKENS</literal> stands for lists of space-separated
-tokens; for example the value <literal>"1 abc 23ef"</literal> means the list
-<literal>["1"; "abc"; "23ef"]</literal>. (Again, <literal>IDREFS</literal>
-and <literal>ENTITIES</literal> have more restricted values.) To get the
-value of attribute <literal>d</literal>, one can use
-
-<programlisting>
-let value_of_d = n # required_list_attribute "d"
-</programlisting>
-
-or
-
-<programlisting><![CDATA[
-let value_of_d =
- match n # attribute "d" with
- Valuelist l -> l
- | _ -> assert false]]>
-</programlisting>
-
-As <literal>d</literal> is required, the attribute cannot be omitted, and
-the <literal>attribute</literal> method returns always a
-<literal>Valuelist</literal>.
-</para>
-
- <para>For optional attributes like <literal>e</literal>, apply
-
-<programlisting>
-let value_of_e = n # optional_list_attribute "e"
-</programlisting>
-
-or
-
-<programlisting><![CDATA[
-let value_of_e =
- match n # attribute "e" with
- Valuelist l -> l
- | Implied_value -> []
- | _ -> assert false]]>
-</programlisting>
-
-Here, the case that the attribute is missing counts like the empty list.
-</para>
-
- </sect2>
-
-
- <sect2>
- <title>Iterators</title>
-
- <para>There are also several iterators in Pxp_document; please see
-the mli file for details. You can find examples for them in the
-"simple_transformation" directory.
-
-<programlisting><![CDATA[
-val find : ?deeply:bool ->
- f:('ext node -> bool) -> 'ext node -> 'ext node
-
-val find_all : ?deeply:bool ->
- f:('ext node -> bool) -> 'ext node -> 'ext node list
-
-val find_element : ?deeply:bool ->
- string -> 'ext node -> 'ext node
-
-val find_all_elements : ?deeply:bool ->
- string -> 'ext node -> 'ext node list
-
-exception Skip
-val map_tree : pre:('exta node -> 'extb node) ->
- ?post:('extb node -> 'extb node) ->
- 'exta node ->
- 'extb node
-
-
-val map_tree_sibl :
- pre: ('exta node option -> 'exta node -> 'exta node option ->
- 'extb node) ->
- ?post:('extb node option -> 'extb node -> 'extb node option ->
- 'extb node) ->
- 'exta node ->
- 'extb node
-
-val iter_tree : ?pre:('ext node -> unit) ->
- ?post:('ext node -> unit) ->
- 'ext node ->
- unit
-
-val iter_tree_sibl :
- ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
- ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
- 'ext node ->
- unit
-]]></programlisting>
-</para>
- </sect2>
-
- </sect1>
-
-<!-- ********************************************************************** -->
-
- <sect1>
- <title>The class type <literal>extension</literal></title>
- <para>
-
-<programlisting>
-<![CDATA[
-class type [ 'node ] extension =
- object ('self)
- method clone : 'self
- (* "clone" should return an exact deep copy of the object. *)
- method node : 'node
- (* "node" returns the corresponding node of this extension. This method
- * intended to return exactly what previously has been set by "set_node".
- *)
- method set_node : 'node -> unit
- (* "set_node" is invoked once the extension is associated to a new
- * node object.
- *)
- end
-]]>
-</programlisting>
-
-This is the type of classes used for node extensions. For every node of the
-document tree, there is not only the <literal>node</literal> object, but also
-an <literal>extension</literal> object. The latter has minimal
-functionality; it has only the necessary methods to be attached to the node
-object containing the details of the node instance. The extension object is
-called extension because its purpose is extensibility.</para>
-
- <para>For some reasons, it is impossible to derive the
-<literal>node</literal> classes (i.e. <literal>element_impl</literal> and
-<literal>data_impl</literal>) such that the subclasses can be extended by new
-new methods. But
-subclassing nodes is a great feature, because it allows the user to provide
-different classes for different types of nodes. The extension objects are a
-workaround that is as powerful as direct subclassing, the costs are
-some notation overhead.
-</para>
-
-<figure id="extension-general" float="1">
-<title>The structure of nodes and extensions</title>
-<graphic fileref="pic/extension_general" format="GIF">
-</graphic>
-</figure>
-
- <para>The picture shows how the nodes and extensions are linked
-together. Every node has a reference to its extension, and every extension has
-a reference to its node. The methods <literal>extension</literal> and
-<literal>node</literal> follow these references; a typical phrase is
-
-<programlisting>
-self # node # attribute "xy"
-</programlisting>
-
-to get the value of an attribute from a method defined in the extension object;
-or
-
-<programlisting>
-self # node # iter
- (fun n -> n # extension # my_method ...)
-</programlisting>
-
-to iterate over the subnodes and to call <literal>my_method</literal> of the
-corresponding extension objects.
-</para>
-
- <para>Note that extension objects do not have references to subnodes
-(or "subextensions") themselves; in order to get one of the children of an
-extension you must first go to the node object, then get the child node, and
-finally reach the extension that is logically the child of the extension you
-started with.</para>
-
- <sect2>
- <title>How to define an extension class</title>
-
- <para>At minimum, you must define the methods
-<literal>clone</literal>, <literal>node</literal>, and
-<literal>set_node</literal> such that your class is compatible with the type
-<literal>extension</literal>. The method <literal>set_node</literal> is called
-during the initialization of the node, or after a node has been cloned; the
-node object invokes <literal>set_node</literal> on the extension object to tell
-it that this node is now the object the extension is linked to. The extension
-must return the node object passed as argument of <literal>set_node</literal>
-when the <literal>node</literal> method is called.</para>
-
- <para>The <literal>clone</literal> method must return a copy of the
-extension object; at least the object itself must be duplicated, but if
-required, the copy should deeply duplicate all objects and values that are
-referred by the extension, too. Whether this is required, depends on the
-application; <literal>clone</literal> is invoked by the node object when one of
-its cloning methods is called.</para>
-
- <para>A good starting point for an extension class:
-
-<programlisting>
-<![CDATA[class custom_extension =
- object (self)
-
- val mutable node = (None : custom_extension node option)
-
- method clone = {< >}
-
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
-
- method set_node n =
- node <- Some n
-
- end
-]]>
-</programlisting>
-
-This class is compatible with <literal>extension</literal>. The purpose of
-defining such a class is, of course, adding further methods; and you can do it
-without restriction.
-</para>
-
- <para>Often, you want not only one extension class. In this case,
-it is the simplest way that all your classes (for one kind of document) have
-the same type (with respect to the interface; i.e. it does not matter if your
-classes differ in the defined private methods and instance variables, but
-public methods count). This approach avoids lots of coercions and problems with
-type incompatibilities. It is simple to implement:
-
-<programlisting>
-<![CDATA[class custom_extension =
- object (self)
- val mutable node = (None : custom_extension node option)
-
- method clone = ... (* see above *)
- method node = ... (* see above *)
- method set_node n = ... (* see above *)
-
- method virtual my_method1 : ...
- method virtual my_method2 : ...
- ... (* etc. *)
- end
-
-class custom_extension_kind_A =
- object (self)
- inherit custom_extension
-
- method my_method1 = ...
- method my_method2 = ...
- end
-
-class custom_extension_kind_B =
- object (self)
- inherit custom_extension
-
- method my_method1 = ...
- method my_method2 = ...
- end
-]]>
-</programlisting>
-
-If a class does not need a method (e.g. because it does not make sense, or it
-would violate some important condition), it is possible to define the method
-and to always raise an exception when the method is invoked
-(e.g. <literal>assert false</literal>).
-</para>
-
- <para>The latter is a strong recommendation: do not try to further
-specialize the types of extension objects. It is difficult, sometimes even
-impossible, and almost never worth-while.</para>
- </sect2>
-
- <sect2>
- <title>How to bind extension classes to element types</title>
-
- <para>Once you have defined your extension classes, you can bind them
-to element types. The simplest case is that you have only one class and that
-this class is to be always used. The parsing functions in the module
-<literal>Pxp_yacc</literal> take a <literal>spec</literal> argument which
-can be customized. If your single class has the name <literal>c</literal>,
-this argument should be
-
-<programlisting>
-let spec =
- make_spec_from_alist
- ~data_exemplar: (new data_impl c)
- ~default_element_exemplar: (new element_impl c)
- ~element_alist: []
- ()
-</programlisting>
-
-This means that data nodes will be created from the exemplar passed by
-~data_exemplar and that all element nodes will be made from the exemplar
-specified by ~default_element_exemplar. In ~element_alist, you can
-pass that different exemplars are to be used for different element types; but
-this is an optional feature. If you do not need it, pass the empty list.
-</para>
-
-<para>
-Remember that an exemplar is a (node, extension) pair that serves as pattern
-when new nodes (and the corresponding extension objects) are added to the
-document tree. In this case, the exemplar contains <literal>c</literal> as
-extension, and when nodes are created, the exemplar is cloned, and cloning
-makes also a copy of <literal>c</literal> such that all nodes of the document
-tree will have a copy of <literal>c</literal> as extension.
-</para>
-
- <para>The <literal>~element_alist</literal> argument can bind
-specific element types to specific exemplars; as exemplars may be instances of
-different classes it is effectively possible to bind element types to
-classes. For example, if the element type "p" is implemented by class "c_p",
-and "q" is realized by "c_q", you can pass the following value:
-
-<programlisting>
-let spec =
- make_spec_from_alist
- ~data_exemplar: (new data_impl c)
- ~default_element_exemplar: (new element_impl c)
- ~element_alist:
- [ "p", new element_impl c_p;
- "q", new element_impl c_q;
- ]
- ()
-</programlisting>
-
-The extension object <literal>c</literal> is still used for all data nodes and
-for all other element types.
-</para>
-
- </sect2>
-
- </sect1>
-
-<!-- ********************************************************************** -->
-
- <sect1>
- <title>Details of the mapping from XML text to the tree representation
-</title>
-
- <sect2>
- <title>The representation of character-free elements</title>
-
- <para>If an element declaration does not allow the element to
-contain character data, the following rules apply.</para>
-
- <para>If the element must be empty, i.e. it is declared with the
-keyword <literal>EMPTY</literal>, the element instance must be effectively
-empty (it must not even contain whitespace characters). The parser guarantees
-that a declared <literal>EMPTY</literal> element does never contain a data
-node, even if the data node represents the empty string.</para>
-
- <para>If the element declaration only permits other elements to occur
-within that element but not character data, it is still possible to insert
-whitespace characters between the subelements. The parser ignores these
-characters, too, and does not create data nodes for them.</para>
-
- <formalpara>
- <title>Example.</title>
-
- <para>Consider the following element types:
-
-<programlisting><![CDATA[
-<!ELEMENT x ( #PCDATA | z )* >
-<!ELEMENT y ( z )* >
-<!ELEMENT z EMPTY>
-]]></programlisting>
-
-Only <literal>x</literal> may contain character data, the keyword
-<literal>#PCDATA</literal> indicates this. The other types are character-free.
-</para>
- </formalpara>
-
- <para>The XML term
-
-<programlisting><![CDATA[
-<x><z/> <z/></x>
-]]></programlisting>
-
-will be internally represented by an element node for <literal>x</literal>
-with three subnodes: the first <literal>z</literal> element, a data node
-containing the space character, and the second <literal>z</literal> element.
-In contrast to this, the term
-
-<programlisting><![CDATA[
-<y><z/> <z/></y>
-]]></programlisting>
-
-is represented by an element node for <literal>y</literal> with only
-<emphasis>two</emphasis> subnodes, the two <literal>z</literal> elements. There
-is no data node for the space character because spaces are ignored in the
-character-free element <literal>y</literal>.
-</para>
-
- </sect2>
-
- <sect2>
- <title>The representation of character data</title>
-
- <para>The XML specification allows all Unicode characters in XML
-texts. This parser can be configured such that UTF-8 is used to represent the
-characters internally; however, the default character encoding is
-ISO-8859-1. (Currently, no other encodings are possible for the internal string
-representation; the type <literal>Pxp_types.rep_encoding</literal> enumerates
-the possible encodings. Principially, the parser could use any encoding that is
-ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
-ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
-encodings (or other multibyte encodings which are not ASCII-compatible) unless
-major parts of the parser are rewritten - unlikely...)
-</para>
-
-<para>
-The internal encoding may be different from the external encoding (specified
-in the XML declaration <literal><?xml ... encoding="..."?></literal>); in
-this case the strings are automatically converted to the internal encoding.
-</para>
-
-<para>
-If the internal encoding is ISO-8859-1, it is possible that there are
-characters that cannot be represented. In this case, the parser ignores such
-characters and prints a warning (to the <literal>collect_warning</literal>
-object that must be passed when the parser is called).
-</para>
-
- <para>The XML specification allows lines to be separated by single LF
-characters, by CR LF character sequences, or by single CR
-characters. Internally, these separators are always converted to single LF
-characters.</para>
-
- <para>The parser guarantees that there are never two adjacent data
-nodes; if necessary, data material that would otherwise be represented by
-several nodes is collapsed into one node. Note that you can still create node
-trees with adjacent data nodes; however, the parser does not return such trees.
-</para>
-
- <para>Note that CDATA sections are not represented specially; such
-sections are added to the current data material that being collected for the
-next data node.</para>
- </sect2>
-
-
- <sect2>
- <title>The representation of entities within documents</title>
-
- <para><emphasis>Entities are not represented within
-documents!</emphasis> If the parser finds an entity reference in the document
-content, the reference is immediately expanded, and the parser reads the
-expansion text instead of the reference.
-</para>
- </sect2>
-
- <sect2>
- <title>The representation of attributes</title> <para>As attribute
-values are composed of Unicode characters, too, the same problems with the
-character encoding arise as for character material. Attribute values are
-converted to the internal encoding, too; and if there are characters that
-cannot be represented, these are dropped, and a warning is printed.</para>
-
- <para>Attribute values are normalized before they are returned by
-methods like <literal>attribute</literal>. First, any remaining entity
-references are expanded; if necessary, expansion is performed recursively.
-Second, newline characters (any of LF, CR LF, or CR characters) are converted
-to single space characters. Note that especially the latter action is
-prescribed by the XML standard (but <literal> </literal> is not converted
-such that it is still possible to include line feeds into attributes).
-</para>
- </sect2>
-
- <sect2>
- <title>The representation of processing instructions</title>
-<para>Processing instructions are parsed to some extent: The first word of the
-PI is called the target, and it is stored separated from the rest of the PI:
-
-<programlisting><![CDATA[
-<?target rest?>
-]]></programlisting>
-
-The exact location where a PI occurs is not represented (by default). The
-parser puts the PI into the object that represents the embracing construct (an
-element, a DTD, or the whole document); that means you can find out which PIs
-occur in a certain element, in the DTD, or in the whole document, but you
-cannot lookup the exact position within the construct.
-</para>
-
- <para>If you require the exact location of PIs, it is possible to
-create extra nodes for them. This mode is controled by the option
-<literal>enable_pinstr_nodes</literal>. The additional nodes have the node type
-<literal>T_pinstr <replaceable>target</replaceable></literal>, and are created
-from special exemplars contained in the <literal>spec</literal> (see
-pxp_document.mli).</para>
- </sect2>
-
- <sect2>
- <title>The representation of comments</title>
-
-<para>Normally, comments are not represented; they are dropped by
-default. However, if you require them, it is possible to create
-<literal>T_comment</literal> nodes for them. This mode can be specified by the
-option <literal>enable_comment_nodes</literal>. Comment nodes are created from
-special exemplars contained in the <literal>spec</literal> (see
-pxp_document.mli). You can access the contents of comments through the
-method <literal>comment</literal>.</para>
- </sect2>
-
- <sect2>
- <title>The attributes <literal>xml:lang</literal> and
-<literal>xml:space</literal></title>
-
- <para>These attributes are not supported specially; they are handled
-like any other attribute.</para>
- </sect2>
-
-
- <sect2>
- <title>And what about namespaces?</title>
- <para>Currently, there is no special support for namespaces.
-However, the parser allows it that the colon occurs in names such that it is
-possible to implement namespaces on top of the current API.</para>
-
- <para>Some future release of PXP will support namespaces as built-in
-feature...</para>
- </sect2>
-
- </sect1>
-
- </chapter>
-
-<!-- ********************************************************************** -->
-
- <chapter>
- <title>Configuring and calling the parser</title>
-
-<!--
- <para>
-<emphasis>
-Sorry, this chapter has not yet been written. For an introduction into parser
-configuration, see the previous chapters. As a first approximation, the
-interface definition of Markup_yacc outlines what could go here.
-</emphasis>
-</para>
--->
-
-<!--
- <para>
-<programlisting>&markup-yacc.mli;</programlisting>
-</para>
--->
-
- <sect1>
- <title>Overview</title>
- <para>
-There are the following main functions invoking the parser (in Pxp_yacc):
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para><emphasis>parse_document_entity:</emphasis> You want to
-parse a complete and closed document consisting of a DTD and the document body;
-the body is validated against the DTD. This mode is interesting if you have a
-file
-
-<programlisting><![CDATA[
-<!DOCTYPE root ... [ ... ] > <root> ... </root>
-]]></programlisting>
-
-and you can accept any DTD that is included in the file (e.g. because the file
-is under your control).
-</para>
- </listitem>
- <listitem>
- <para><emphasis>parse_wfdocument_entity:</emphasis> You want to
-parse a complete and closed document consisting of a DTD and the document body;
-but the body is not validated, only checked for well-formedness. This mode is
-preferred if validation costs too much time or if the DTD is missing.
-</para>
- </listitem>
- <listitem>
- <para><emphasis>parse_dtd_entity:</emphasis> You want only to
-parse an entity (file) containing the external subset of a DTD. Sometimes it is
-interesting to read such a DTD, for example to compare it with the DTD included
-in a document, or to apply the next mode:
-</para>
- </listitem>
- <listitem>
- <para><emphasis>parse_content_entity:</emphasis> You want only to
-parse an entity (file) containing a fragment of a document body; this fragment
-is validated against the DTD you pass to the function. Especially, the fragment
-must not have a <literal> <!DOCTYPE></literal> clause, and must directly
-begin with an element. The element is validated against the DTD. This mode is
-interesting if you want to check documents against a fixed, immutable DTD.
-</para>
- </listitem>
- <listitem>
- <para><emphasis>parse_wfcontent_entity:</emphasis> This function
-also parses a single element without DTD, but does not validate it.</para>
- </listitem>
- <listitem>
- <para><emphasis>extract_dtd_from_document_entity:</emphasis> This
-function extracts the DTD from a closed document consisting of a DTD and a
-document body. Both the internal and the external subsets are extracted.</para>
- </listitem>
- </itemizedlist>
-</para>
-
-<para>
-In many cases, <literal>parse_document_entity</literal> is the preferred mode
-to parse a document in a validating way, and
-<literal>parse_wfdocument_entity</literal> is the mode of choice to parse a
-file while only checking for well-formedness.
-</para>
-
-<para>
-There are a number of variations of these modes. One important application of a
-parser is to check documents of an untrusted source against a fixed DTD. One
-solution is to not allow the <literal><!DOCTYPE></literal> clause in
-these documents, and treat the document like a fragment (using mode
-<emphasis>parse_content_entity</emphasis>). This is very simple, but
-inflexible; users of such a system cannot even define additional entities to
-abbreviate frequent phrases of their text.
-</para>
-
-<para>
-It may be necessary to have a more intelligent checker. For example, it is also
-possible to parse the document to check fully, i.e. with DTD, and to compare
-this DTD with the prescribed one. In order to fully parse the document, mode
-<emphasis>parse_document_entity</emphasis> is applied, and to get the DTD to
-compare with mode <emphasis>parse_dtd_entity</emphasis> can be used.
-</para>
-
-<para>
-There is another very important configurable aspect of the parser: the
-so-called resolver. The task of the resolver is to locate the contents of an
-(external) entity for a given entity name, and to make the contents accessible
-as a character stream. (Furthermore, it also normalizes the character set;
-but this is a detail we can ignore here.) Consider you have a file called
-<literal>"main.xml"</literal> containing
-
-<programlisting><![CDATA[
-<!ENTITY % sub SYSTEM "sub/sub.xml">
-%sub;
-]]></programlisting>
-
-and a file stored in the subdirectory <literal>"sub"</literal> with name
-<literal>"sub.xml"</literal> containing
-
-<programlisting><![CDATA[
-<!ENTITY % subsub SYSTEM "subsub/subsub.xml">
-%subsub;
-]]></programlisting>
-
-and a file stored in the subdirectory <literal>"subsub"</literal> of
-<literal>"sub"</literal> with name <literal>"subsub.xml"</literal> (the
-contents of this file do not matter). Here, the resolver must track that
-the second entity <literal>subsub</literal> is located in the directory
-<literal>"sub/subsub"</literal>, i.e. the difficulty is to interpret the
-system (file) names of entities relative to the entities containing them,
-even if the entities are deeply nested.
-</para>
-
-<para>
-There is not a fixed resolver already doing everything right - resolving entity
-names is a task that highly depends on the environment. The XML specification
-only demands that <literal>SYSTEM</literal> entities are interpreted like URLs
-(which is not very precise, as there are lots of URL schemes in use), hoping
-that this helps overcoming the local peculiarities of the environment; the idea
-is that if you do not know your environment you can refer to other entities by
-denoting URLs for them. I think that this interpretation of
-<literal>SYSTEM</literal> names may have some applications in the internet, but
-it is not the first choice in general. Because of this, the resolver is a
-separate module of the parser that can be exchanged by another one if
-necessary; more precisely, the parser already defines several resolvers.
-</para>
-
-<para>
-The following resolvers do already exist:
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>Resolvers reading from arbitrary input channels. These
-can be configured such that a certain ID is associated with the channel; in
-this case inner references to external entities can be resolved. There is also
-a special resolver that interprets SYSTEM IDs as URLs; this resolver can
-process relative SYSTEM names and determine the corresponding absolute URL.
-</para>
- </listitem>
- <listitem>
- <para>A resolver that reads always from a given O'Caml
-string. This resolver is not able to resolve further names unless the string is
-not associated with any name, i.e. if the document contained in the string
-refers to an external entity, this reference cannot be followed in this
-case.</para>
- </listitem>
- <listitem>
- <para>A resolver for file names. The <literal>SYSTEM</literal>
-name is interpreted as file URL with the slash "/" as separator for
-directories. - This resolver is derived from the generic URL resolver.</para>
- </listitem>
- </itemizedlist>
-
-The interface a resolver must have is documented, so it is possible to write
-your own resolver. For example, you could connect the parser with an HTTP
-client, and resolve URLs of the HTTP namespace. The resolver classes support
-that several independent resolvers are combined to one more powerful resolver;
-thus it is possible to combine a self-written resolver with the already
-existing resolvers.
-</para>
-
-<para>
-Note that the existing resolvers only interpret <literal>SYSTEM</literal>
-names, not <literal>PUBLIC</literal> names. If it helps you, it is possible to
-define resolvers for <literal>PUBLIC</literal> names, too; for example, such a
-resolver could look up the public name in a hash table, and map it to a system
-name which is passed over to the existing resolver for system names. It is
-relatively simple to provide such a resolver.
-</para>
-
-
- </sect1>
-
- <sect1>
- <title>Resolvers and sources</title>
-
- <sect2>
- <title>Using the built-in resolvers (called sources)</title>
-
- <para>The type <literal>source</literal> enumerates the two
-possibilities where the document to parse comes from.
-
-<programlisting>
-type source =
- Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
- | ExtID of (ext_id * Pxp_reader.resolver)
-</programlisting>
-
-You normally need not to worry about this type as there are convenience
-functions that create <literal>source</literal> values:
-
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para><literal>from_file s</literal>: The document is read from
-file <literal>s</literal>; you may specify absolute or relative path names.
-The file name must be encoded as UTF-8 string.
-</para>
-
-<para>There is an optional argument <literal>~system_encoding</literal>
-specifying the character encoding which is used for the names of the file
-system. For example, if this encoding is ISO-8859-1 and <literal>s</literal> is
-also a ISO-8859-1 string, you can form the source:
-
-<programlisting><![CDATA[
-let s_utf8 = recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
-from_file ~system_encoding:`Enc_iso88591 s_utf8
-]]></programlisting>
-</para>
-
-<para>
-This <literal>source</literal> has the advantage that
-it is able to resolve inner external entities; i.e. if your document includes
-data from another file (using the <literal>SYSTEM</literal> attribute), this
-mode will find that file. However, this mode cannot resolve
-<literal>PUBLIC</literal> identifiers nor <literal>SYSTEM</literal> identifiers
-other than "file:".
-</para>
- </listitem>
- <listitem>
- <para><literal>from_channel ch</literal>: The document is read
-from the channel <literal>ch</literal>. In general, this source also supports
-file URLs found in the document; however, by default only absolute URLs are
-understood. It is possible to associate an ID with the channel such that the
-resolver knows how to interpret relative URLs:
-
-<programlisting>
-from_channel ~id:(System "file:///dir/dir1/") ch
-</programlisting>
-
-There is also the ~system_encoding argument specifying how file names are
-encoded. - The example from above can also be written (but it is no
-longer possible to interpret relative URLs because there is no ~id argument,
-and computing this argument is relatively complicated because it must
-be a valid URL):
-
-<programlisting>
-let ch = open_in s in
-let src = from_channel ~system_encoding:`Enc_iso88591 ch in
-...;
-close_in ch
-</programlisting>
-</para>
- </listitem>
- <listitem>
- <para><literal>from_string s</literal>: The string
-<literal>s</literal> is the document to parse. This mode is not able to
-interpret file names of <literal>SYSTEM</literal> clauses, nor it can look up
-<literal>PUBLIC</literal> identifiers. </para>
-
- <para>Normally, the encoding of the string is detected as usual
-by analyzing the XML declaration, if any. However, it is also possible to
-specify the encoding directly:
-
-<programlisting>
-let src = from_string ~fixenc:`ISO-8859-2 s
-</programlisting>
-</para>
- </listitem>
- <listitem>
- <para><literal>ExtID (id, r)</literal>: The document to parse
-is denoted by the identifier <literal>id</literal> (either a
-<literal>SYSTEM</literal> or <literal>PUBLIC</literal> clause), and this
-identifier is interpreted by the resolver <literal>r</literal>. Use this mode
-if you have written your own resolver.</para>
- <para>Which character sets are possible depends on the passed
-resolver <literal>r</literal>.</para>
- </listitem>
- <listitem>
- <para><literal>Entity (get_entity, r)</literal>: The document
-to parse is returned by the function invocation <literal>get_entity
-dtd</literal>, where <literal>dtd</literal> is the DTD object to use (it may be
-empty). Inner external references occuring in this entity are resolved using
-the resolver <literal>r</literal>.</para>
- <para>Which character sets are possible depends on the passed
-resolver <literal>r</literal>.</para>
- </listitem>
- </itemizedlist></para>
- </sect2>
-
-
- <sect2>
- <title>The resolver API</title>
-
- <para>A resolver is an object that can be opened like a file, but you
-do not pass the file name to the resolver, but the XML identifier of the entity
-to read from (either a <literal>SYSTEM</literal> or <literal>PUBLIC</literal>
-clause). When opened, the resolver must return the
-<literal>Lexing.lexbuf</literal> that reads the characters. The resolver can
-be closed, and it can be cloned. Furthermore, it is possible to tell the
-resolver which character set it should assume. - The following from Pxp_reader:
-
-<programlisting><![CDATA[
-exception Not_competent
-exception Not_resolvable of exn
-
-class type resolver =
- object
- method init_rep_encoding : rep_encoding -> unit
- method init_warner : collect_warnings -> unit
- method rep_encoding : rep_encoding
- method open_in : ext_id -> Lexing.lexbuf
- method close_in : unit
- method change_encoding : string -> unit
- method clone : resolver
- method close_all : unit
- end
-]]></programlisting>
-
-The resolver object must work as follows:</para>
-
-<para>
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para>When the parser is called, it tells the resolver the
-warner object and the internal encoding by invoking
-<literal>init_warner</literal> and <literal>init_rep_encoding</literal>. The
-resolver should store these values. The method <literal>rep_encoding</literal>
-should return the internal encoding.
-</para>
- </listitem>
- <listitem>
- <para>If the parser wants to read from the resolver, it invokes
-the method <literal>open_in</literal>. Either the resolver succeeds, in which
-case the <literal>Lexing.lexbuf</literal> reading from the file or stream must
-be returned, or opening fails. In the latter case the method implementation
-should raise an exception (see below).</para>
- </listitem>
- <listitem>
- <para>If the parser finishes reading, it calls the
-<literal>close_in</literal> method.</para>
- </listitem>
- <listitem>
- <para>If the parser finds a reference to another external
-entity in the input stream, it calls <literal>clone</literal> to get a second
-resolver which must be initially closed (not yet connected with an input
-stream). The parser then invokes <literal>open_in</literal> and the other
-methods as described.</para>
- </listitem>
- <listitem>
- <para>If you already know the character set of the input
-stream, you should recode it to the internal encoding, and define the method
-<literal>change_encoding</literal> as an empty method.</para>
- </listitem>
- <listitem>
- <para>If you want to support multiple external character sets,
-the object must follow a much more complicated protocol. Directly after
-<literal>open_in</literal> has been called, the resolver must return a lexical
-buffer that only reads one byte at a time. This is only possible if you create
-the lexical buffer with <literal>Lexing.from_function</literal>; the function
-must then always return 1 if the EOF is not yet reached, and 0 if EOF is
-reached. If the parser has read the first line of the document, it will invoke
-<literal>change_encoding</literal> to tell the resolver which character set to
-assume. From this moment, the object can return more than one byte at once. The
-argument of <literal>change_encoding</literal> is either the parameter of the
-"encoding" attribute of the XML declaration, or the empty string if there is
-not any XML declaration or if the declaration does not contain an encoding
-attribute. </para>
-
- <para>At the beginning the resolver must only return one
-character every time something is read from the lexical buffer. The reason for
-this is that you otherwise would not exactly know at which position in the
-input stream the character set changes.</para>
-
- <para>If you want automatic recognition of the character set,
-it is up to the resolver object to implement this.</para>
- </listitem>
-
- <listitem><para>If an error occurs, the parser calls the method
-<literal>close_all</literal> for the top-level resolver; this method should
-close itself (if not already done) and all clones.</para>
- </listitem>
- </itemizedlist>
-</para>
- <formalpara><title>Exceptions</title>
- <para>
-It is possible to chain resolvers such that when the first resolver is not able
-to open the entity, the other resolvers of the chain are tried in turn. The
-method <literal>open_in</literal> should raise the exception
-<literal>Not_competent</literal> to indicate that the next resolver should try
-to open the entity. If the resolver is able to handle the ID, but some other
-error occurs, the exception <literal>Not_resolvable</literal> should be raised
-to force that the chain breaks.
- </para>
- </formalpara>
-
- <para>Example: How to define a resolver that is equivalent to
-from_string: ...</para>
-
- </sect2>
-
- <sect2>
- <title>Predefined resolver components</title>
- <para>
-There are some classes in Pxp_reader that define common resolver behaviour.
-
-<programlisting><![CDATA[
-class resolve_read_this_channel :
- ?id:ext_id ->
- ?fixenc:encoding ->
- ?auto_close:bool ->
- in_channel ->
- resolver
-]]></programlisting>
-
-Reads from the passed channel (it may be even a pipe). If the
-<literal>~id</literal> argument is passed to the object, the created resolver
-accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has
-been cloned, it does not accept any ID. This means that this resolver cannot
-handle inner references to external entities. Note that you can combine this
-resolver with another resolver that can handle inner references (such as
-resolve_as_file); see class 'combine' below. - If you pass the
-<literal>~fixenc</literal> argument, the encoding of the channel is set to the
-passed value, regardless of any auto-recognition or any XML declaration. - If
-<literal>~auto_close = true</literal> (which is the default), the channel is
-closed after use. If <literal>~auto_close = false</literal>, the channel is
-left open.
- </para>
-
- <para>
-<programlisting><![CDATA[
-class resolve_read_any_channel :
- ?auto_close:bool ->
- channel_of_id:(ext_id -> (in_channel * encoding option)) ->
- resolver
-]]></programlisting>
-
-This resolver calls the function <literal>~channel_of_id</literal> to open a
-new channel for the passed <literal>ext_id</literal>. This function must either
-return the channel and the encoding, or it must fail with Not_competent. The
-function must return <literal>None</literal> as encoding if the default
-mechanism to recognize the encoding should be used. It must return
-<literal>Some e</literal> if it is already known that the encoding of the
-channel is <literal>e</literal>. If <literal>~auto_close = true</literal>
-(which is the default), the channel is closed after use. If
-<literal>~auto_close = false</literal>, the channel is left open.
-</para>
-
- <para>
-<programlisting><![CDATA[
-class resolve_read_url_channel :
- ?base_url:Neturl.url ->
- ?auto_close:bool ->
- url_of_id:(ext_id -> Neturl.url) ->
- channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
- resolver
-]]></programlisting>
-
-When this resolver gets an ID to read from, it calls the function
-<literal>~url_of_id</literal> to get the corresponding URL. This URL may be a
-relative URL; however, a URL scheme must be used which contains a path. The
-resolver converts the URL to an absolute URL if necessary. The second
-function, <literal>~channel_of_url</literal>, is fed with the absolute URL as
-input. This function opens the resource to read from, and returns the channel
-and the encoding of the resource.
-</para>
-<para>
-Both functions, <literal>~url_of_id</literal> and
-<literal>~channel_of_url</literal>, can raise Not_competent to indicate that
-the object is not able to read from the specified resource. However, there is a
-difference: A Not_competent from <literal>~url_of_id</literal> is left as it
-is, but a Not_competent from <literal>~channel_of_url</literal> is converted to
-Not_resolvable. So only <literal>~url_of_id</literal> decides which URLs are
-accepted by the resolver and which not.
-</para>
-<para>
-The function <literal>~channel_of_url</literal> must return
-<literal>None</literal> as encoding if the default mechanism to recognize the
-encoding should be used. It must return <literal>Some e</literal> if it is
-already known that the encoding of the channel is <literal>e</literal>.
-</para>
-<para>
-If <literal>~auto_close = true</literal> (which is the default), the channel is
-closed after use. If <literal>~auto_close = false</literal>, the channel is
-left open.
-</para>
-<para>
-Objects of this class contain a base URL relative to which relative URLs are
-interpreted. When creating a new object, you can specify the base URL by
-passing it as <literal>~base_url</literal> argument. When an existing object is
-cloned, the base URL of the clone is the URL of the original object. - Note
-that the term "base URL" has a strict definition in RFC 1808.
-</para>
-
- <para>
-<programlisting><![CDATA[
-class resolve_read_this_string :
- ?id:ext_id ->
- ?fixenc:encoding ->
- string ->
- resolver
-]]></programlisting>
-
-Reads from the passed string. If the <literal>~id</literal> argument is passed
-to the object, the created resolver accepts only this ID. Otherwise all IDs are
-accepted. - Once the resolver has been cloned, it does not accept any ID. This
-means that this resolver cannot handle inner references to external
-entities. Note that you can combine this resolver with another resolver that
-can handle inner references (such as resolve_as_file); see class 'combine'
-below. - If you pass the <literal>~fixenc</literal> argument, the encoding of
-the string is set to the passed value, regardless of any auto-recognition or
-any XML declaration.
-</para>
-
- <para>
-<programlisting><![CDATA[
-class resolve_read_any_string :
- string_of_id:(ext_id -> (string * encoding option)) ->
- resolver
-]]></programlisting>
-
-This resolver calls the function <literal>~string_of_id</literal> to get the
-string for the passed <literal>ext_id</literal>. This function must either
-return the string and the encoding, or it must fail with Not_competent. The
-function must return <literal>None</literal> as encoding if the default
-mechanism to recognize the encoding should be used. It must return
-<literal>Some e</literal> if it is already known that the encoding of the
-string is <literal>e</literal>.
-</para>
-
- <para>
-<programlisting><![CDATA[
-class resolve_as_file :
- ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
- ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
- ?system_encoding:encoding ->
- ?url_of_id:(ext_id -> Neturl.url) ->
- ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
- unit ->
- resolver
-]]></programlisting>
-Reads from the local file system. Every file name is interpreted as
-file name of the local file system, and the referred file is read.
-</para>
-<para>
-The full form of a file URL is: file://host/path, where
-'host' specifies the host system where the file identified 'path'
-resides. host = "" or host = "localhost" are accepted; other values
-will raise Not_competent. The standard for file URLs is
-defined in RFC 1738.
-</para>
-<para>
-Option <literal>~file_prefix</literal>: Specifies how the "file:" prefix of
-file names is handled:
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para><literal>`Not_recognized:</literal>The prefix is not
-recognized.</para>
- </listitem>
- <listitem>
- <para><literal>`Allowed:</literal> The prefix is allowed but
-not required (the default).</para>
- </listitem>
- <listitem>
- <para><literal>`Required:</literal> The prefix is
-required.</para>
- </listitem>
- </itemizedlist>
-</para>
-<para>
-Option <literal>~host_prefix:</literal> Specifies how the "//host" phrase of
-file names is handled:
- <itemizedlist mark="bullet" spacing="compact">
- <listitem>
- <para><literal>`Not_recognized:</literal>The prefix is not
-recognized.</para>
- </listitem>
- <listitem>
- <para><literal>`Allowed:</literal> The prefix is allowed but
-not required (the default).</para>
- </listitem>
- <listitem>
- <para><literal>`Required:</literal> The prefix is
-required.</para>
- </listitem>
- </itemizedlist>
-</para>
-<para>
-Option <literal>~system_encoding:</literal> Specifies the encoding of file
-names of the local file system. Default: UTF-8.
-</para>
-<para>
-Options <literal>~url_of_id</literal>, <literal>~channel_of_url</literal>: Not
-for the casual user!
-</para>
-
- <para>
-<programlisting><![CDATA[
-class combine :
- ?prefer:resolver ->
- resolver list ->
- resolver
-]]></programlisting>
-
-Combines several resolver objects. If a concrete entity with an
-<literal>ext_id</literal> is to be opened, the combined resolver tries the
-contained resolvers in turn until a resolver accepts opening the entity
-(i.e. it does not raise Not_competent on open_in).
-</para>
-<para>
-Clones: If the 'clone' method is invoked before 'open_in', all contained
-resolvers are cloned separately and again combined. If the 'clone' method is
-invoked after 'open_in' (i.e. while the resolver is open), additionally the
-clone of the active resolver is flagged as being preferred, i.e. it is tried
-first.
-</para>
-
- </sect2>
- </sect1>
-
- <sect1>
- <title>The DTD classes</title> <para><emphasis>Sorry, not yet
-written. Perhaps the interface definition of Pxp_dtd expresses the same:
-</emphasis></para>
- <para>
-<programlisting>&markup-dtd1.mli;&markup-dtd2.mli;</programlisting>
-</para>
- </sect1>
-
- <sect1>
- <title>Invoking the parser</title>
-
- <para>Here a description of Pxp_yacc.</para>
-
- <sect2>
- <title>Defaults</title>
- <para>The following defaults are available:
-
-<programlisting>
-val default_config : config
-val default_extension : ('a node extension) as 'a
-val default_spec : ('a node extension as 'a) spec
-</programlisting>
-</para>
- </sect2>
-
- <sect2>
- <title>Parsing functions</title>
- <para>In the following, the term "closed document" refers to
-an XML structure like
-
-<programlisting>
-<!DOCTYPE ... [ <replaceable>declarations</replaceable> ] >
-<<replaceable>root</replaceable>>
-...
-</<replaceable>root</replaceable>>
-</programlisting>
-
-The term "fragment" refers to an XML structure like
-
-<programlisting>
-<<replaceable>root</replaceable>>
-...
-</<replaceable>root</replaceable>>
-</programlisting>
-
-i.e. only to one isolated element instance.
-</para>
-
- <para>
-<programlisting><![CDATA[
-val parse_dtd_entity : config -> source -> dtd
-]]></programlisting>
-
-Parses the declarations which are contained in the entity, and returns them as
-<literal>dtd</literal> object.
-</para>
-
- <para>
-<programlisting><![CDATA[
-val extract_dtd_from_document_entity : config -> source -> dtd
-]]></programlisting>
-
-Extracts the DTD from a closed document. Both the internal and the external
-subsets are extracted and combined to one <literal>dtd</literal> object. This
-function does not parse the whole document, but only the parts that are
-necessary to extract the DTD.
-</para>
-
- <para>
-<programlisting><![CDATA[
-val parse_document_entity :
- ?transform_dtd:(dtd -> dtd) ->
- ?id_index:('ext index) ->
- config ->
- source ->
- 'ext spec ->
- 'ext document
-]]></programlisting>
-
-Parses a closed document and validates it against the DTD that is contained in
-the document (internal and external subsets). The option
-<literal>~transform_dtd</literal> can be used to transform the DTD in the
-document, and to use the transformed DTD for validation. If
-<literal>~id_index</literal> is specified, an index of all ID attributes is
-created.
-</para>
-
- <para>
-<programlisting><![CDATA[
-val parse_wfdocument_entity :
- config ->
- source ->
- 'ext spec ->
- 'ext document
-]]></programlisting>
-
-Parses a closed document, but checks it only on well-formedness.
-</para>
-
- <para>
-<programlisting><![CDATA[
-val parse_content_entity :
- ?id_index:('ext index) ->
- config ->
- source ->
- dtd ->
- 'ext spec ->
- 'ext node
-]]></programlisting>
-
-Parses a fragment, and validates the element.
-</para>
-
- <para>
-<programlisting><![CDATA[
-val parse_wfcontent_entity :
- config ->
- source ->
- 'ext spec ->
- 'ext node
-]]></programlisting>
-
-Parses a fragment, but checks it only on well-formedness.
-</para>
- </sect2>
-
- <sect2>
- <title>Configuration options</title>
- <para>
-
-<programlisting><![CDATA[
-type config =
- { warner : collect_warnings;
- errors_with_line_numbers : bool;
- enable_pinstr_nodes : bool;
- enable_super_root_node : bool;
- enable_comment_nodes : bool;
- encoding : rep_encoding;
- recognize_standalone_declaration : bool;
- store_element_positions : bool;
- idref_pass : bool;
- validate_by_dfa : bool;
- accept_only_deterministic_models : bool;
- ...
- }
-]]></programlisting>
-
-<itemizedlist mark="bullet" spacing="compact">
- <listitem><para><literal>warner:</literal>The parser prints
-warnings by invoking the method <literal>warn</literal> for this warner
-object. (Default: all warnings are dropped)</para>
- </listitem>
- <listitem><para><literal>errors_with_line_numbers:</literal>If
-true, errors contain line numbers; if false, errors contain only byte
-positions. The latter mode is faster. (Default: true)</para>
- </listitem>
- <listitem><para><literal>enable_pinstr_nodes:</literal>If true,
-the parser creates extra nodes for processing instructions. If false,
-processing instructions are simply added to the element or document surrounding
-the instructions. (Default: false)</para>
- </listitem>
- <listitem><para><literal>enable_super_root_node:</literal>If
-true, the parser creates an extra node which is the parent of the root of the
-document tree. This node is called super root; it is an element with type
-<literal>T_super_root</literal>. - If there are processing instructions outside
-the root element and outside the DTD, they are added to the super root instead
-of the document. - If false, the super root node is not created. (Default:
-false)</para>
- </listitem>
- <listitem><para><literal>enable_comment_nodes:</literal>If true,
-the parser creates nodes for comments with type <literal>T_comment</literal>;
-if false, such nodes are not created. (Default: false)</para>
- </listitem>
- <listitem><para><literal>encoding:</literal>Specifies the
-internal encoding of the parser. Most strings are then represented according to
-this encoding; however there are some exceptions (especially
-<literal>ext_id</literal> values which are always UTF-8 encoded).
-(Default: `Enc_iso88591)</para>
- </listitem>
- <listitem><para><literal>
-recognize_standalone_declaration:</literal> If true and if the parser is
-validating, the <literal>standalone="yes"</literal> declaration forces that it
-is checked whether the document is a standalone document. - If false, or if the
-parser is in well-formedness mode, such declarations are ignored.
-(Default: true)
-</para>
- </listitem>
- <listitem><para><literal>store_element_positions:</literal> If
-true, for every non-data node the source position is stored. If false, the
-position information is lost. If available, you can get the positions of nodes
-by invoking the <literal>position</literal> method.
-(Default: true)</para>
- </listitem>
- <listitem><para><literal>idref_pass:</literal>If true and if
-there is an ID index, the parser checks whether every IDREF or IDREFS attribute
-refer to an existing node; this requires that the parser traverses the whole
-doument tree. If false, this check is left out. (Default: false)</para>
- </listitem>
- <listitem><para><literal>validate_by_dfa:</literal>If true and if
-the content model for an element type is deterministic, a deterministic finite
-automaton is used to validate whether the element contents match the content
-model of the type. If false, or if a DFA is not available, a backtracking
-algorithm is used for validation. (Default: true)
-</para>
- </listitem>
- <listitem><para><literal>
-accept_only_deterministic_models:</literal> If true, only deterministic content
-models are accepted; if false, any syntactically correct content models can be
-processed. (Default: true)</para>
- </listitem>
- </itemizedlist></para>
- </sect2>
-
- <sect2>
- <title>Which configuration should I use?</title>
- <para>First, I recommend to vary the default configuration instead of
-creating a new configuration record. For instance, to set
-<literal>idref_pass</literal> to <literal>true</literal>, change the default
-as in:
-<programlisting>
-let config = { default_config with idref_pass = true }
-</programlisting>
-The background is that I can add more options to the record in future versions
-of the parser without breaking your programs.</para>
-
- <formalpara>
- <title>Do I need extra nodes for processing instructions?</title>
-<para>By default, such nodes are not created. This does not mean that the
-processing instructions are lost; however, you cannot find out the exact
-location where they occur. For example, the following XML text
-
-<programlisting><![CDATA[
-<x><?pi1?><y/><?pi2?></x>
-]]></programlisting>
-
-will normally create one element node for <literal>x</literal> containing
-<emphasis>one</emphasis> subnode for <literal>y</literal>. The processing
-instructions are attached to <literal>x</literal> in a separate hash table; you
-can access them using <literal>x # pinstr "pi1"</literal> and <literal>x #
-pinstr "pi2"</literal>, respectively. The information is lost where the
-instructions occur within <literal>x</literal>.
-</para>
- </formalpara>
-
- <para>If the option <literal>enable_pinstr_nodes</literal> is
-turned on, the parser creates extra nodes <literal>pi1</literal> and
-<literal>pi2</literal> such that the subnodes of <literal>x</literal> are now:
-
-<programlisting><![CDATA[
-x # sub_nodes = [ pi1; y; pi2 ]
-]]></programlisting>
-
-The extra nodes contain the processing instructions in the usual way, i.e. you
-can access them using <literal>pi1 # pinstr "pi1"</literal> and <literal>pi2 #
-pinstr "pi2"</literal>, respectively.
-</para>
-
- <para>Note that you will need an exemplar for the PI nodes (see
-<literal>make_spec_from_alist</literal>).</para>
-
- <formalpara>
- <title>Do I need a super root node?</title>
- <para>By default, there is no super root node. The
-<literal>document</literal> object refers directly to the node representing the
-root element of the document, i.e.
-
-<programlisting><![CDATA[
-doc # root = r
-]]></programlisting>
-
-if <literal>r</literal> is the root node. This is sometimes inconvenient: (1)
-Some algorithms become simpler if every node has a parent, even the root
-node. (2) Some standards such as XPath call the "root node" the node whose
-child represents the root of the document. (3) The super root node can serve
-as a container for processing instructions outside the root element. Because of
-these reasons, it is possible to create an extra super root node, whose child
-is the root node:
-
-<programlisting><![CDATA[
-doc # root = sr &&
-sr # sub_nodes = [ r ]
-]]></programlisting>
-
-When extra nodes are also created for processing instructions, these nodes can
-be added to the super root node if they occur outside the root element (reason
-(3)), and the order reflects the order in the source text.</para>
- </formalpara>
-
- <para>Note that you will need an exemplar for the super root node
-(see <literal>make_spec_from_alist</literal>).</para>
-
- <formalpara>
- <title>What is the effect of the UTF-8 encoding?</title>
- <para>By default, the parser represents strings (with few
-exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
-and fonts for this encoding.</para>
- </formalpara>
- <para>However, internationalization may require that you switch over
-to UTF-8 encoding. In most environments, the immediate effect will be that you
-cannot read strings with character codes >= 160 any longer; your terminal will
-only show funny glyph combinations. It is strongly recommended to install
-Unicode fonts (<ulink URL="http://czyborra.com/unifont/">GNU Unifont</ulink>,
-<ulink URL="http://www.cl.cam.ac.uk/~mgk25/download/ucs-fonts.tar.gz">
-Markus Kuhn's fonts</ulink>) and <ulink
-URL="http://myweb.clark.net/pub/dickey/xterm/xterm.html">terminal emulators
-that can handle UTF-8 byte sequences</ulink>. Furthermore, a Unicode editor may
-be helpful (such as <ulink
-URL="ftp://metalab.unc.edu/pub/Linux/apps/editors/X/">Yudit</ulink>). There are
-also <ulink URL="http://www.cl.cam.ac.uk/~mgk25/unicode.html">FAQ</ulink> by
-Markus Kuhn.
-</para>
- <para>By setting <literal>encoding</literal> to
-<literal>`Enc_utf8</literal> all strings originating from the parsed XML
-document are represented as UTF-8 strings. This includes not only character
-data and attribute values but also element names, attribute names and so on, as
-it is possible to use any Unicode letter to form such names. Strictly
-speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
-will have difficulties when validating documents containing
-non-ISO-8859-1-names.
-</para>
-
- <para>This mode does not have any impact on the external
-representation of documents. The character set assumed when reading a document
-is set in the XML declaration, and character set when writing a document must
-be passed to the <literal>write</literal> method.
-</para>
-
- <formalpara>
- <title>How do I check that nodes exist which are referred by IDREF attributes?</title>
- <para>First, you must create an index of all occurring ID
-attributes:
-
-<programlisting><![CDATA[
-let index = new hash_index
-]]></programlisting>
-
-This index must be passed to the parsing function:
-
-<programlisting><![CDATA[
-parse_document_entity
- ~id_index:(index :> index)
- config source spec
-]]></programlisting>
-
-Next, you must turn on the <literal>idref_pass</literal> mode:
-
-<programlisting><![CDATA[
-let config = { default_config with idref_pass = true }
-]]></programlisting>
-
-Note that now the whole document tree will be traversed, and every node will be
-checked for IDREF and IDREFS attributes. If the tree is big, this may take some
-time.
-</para>
- </formalpara>
-
- <formalpara>
- <title>What are deterministic content models?</title>
- <para>These type of models can speed up the validation checks;
-furthermore they ensure SGML-compatibility. In particular, a content model is
-deterministic if the parser can determine the actually used alternative by
-inspecting only the current token. For example, this element has
-non-deterministic contents:
-
-<programlisting><![CDATA[
-<!ELEMENT x ((u,v) | (u,y+) | v)>
-]]></programlisting>
-
-If the first element in <literal>x</literal> is <literal>u</literal>, the
-parser does not know which of the alternatives <literal>(u,v)</literal> or
-<literal>(u,y+)</literal> will work; the parser must also inspect the second
-element to be able to distinguish between the alternatives. Because such
-look-ahead (or "guessing") is required, this example is
-non-deterministic.</para>
- </formalpara>
-
- <para>The XML standard demands that content models must be
-deterministic. So it is recommended to turn the option
-<literal>accept_only_deterministic_models</literal> on; however, PXP can also
-process non-deterministic models using a backtracking algorithm.</para>
-
- <para>Deterministic models ensure that validation can be performed in
-linear time. In order to get the maximum benefits, PXP also implements a
-special validator that profits from deterministic models; this is the
-deterministic finite automaton (DFA). This validator is enabled per element
-type if the element type has a deterministic model and if the option
-<literal>validate_by_dfa</literal> is turned on.</para>
-
- <para>In general, I expect that the DFA method is faster than the
-backtracking method; especially in the worst case the DFA takes only linear
-time. However, if the content model has only few alternatives and the
-alternatives do not nest, the backtracking algorithm may be better.</para>
-
- </sect2>
-
-
- </sect1>
-
-
- <sect1>
- <title>Updates</title>
-
- <para><emphasis>Some (often later added) features that are otherwise
-not explained in the manual but worth to be mentioned.</emphasis></para>
-
- <itemizedlist mark="bullet" spacing="compact">
- <listitem><para>Methods node_position, node_path, nth_node,
-previous_node, next_node for nodes: See pxp_document.mli</para>
- </listitem>
- <listitem><para>Functions to determine the document order of nodes:
-compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli</para>
- </listitem>
- </itemizedlist>
- </sect1>
-
- </chapter>
-
- </part>
-</book>
-
+++ /dev/null
-#FIG 3.2
-Portrait
-Center
-Metric
-A4
-100.00
-Single
--2
-1200 2
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2250 229 229 1575 2250 1800 2295
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 3375 225 225 1575 3375 1800 3375
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 675 3375 229 229 675 3375 900 3420
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2475 3375 229 229 2475 3375 2700 3420
-1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 2475 180 180 3600 2475 3780 2475
-1 3 0 1 0 7 100 0 10 0.000 1 0.0000 2880 2475 180 180 2880 2475 3060 2475
-1 3 0 1 0 7 100 0 10 0.000 1 0.0000 4320 2475 186 186 4320 2475 4500 2520
-1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 1485 186 186 3600 1485 3780 1530
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 675 3150 1395 2385
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 1575 2475 1575 3150
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 1755 2385 2475 3150
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 1537 2010 3412 1462
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
- 0 0 1.00 60.00 120.00
- 3412 1537 1672 2047
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 810 3195 2707 2512
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 1740 3217 3442 2580
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
- 0 0 1.00 60.00 120.00
- 0 0 1.00 60.00 120.00
- 2640 3210 4177 2610
-4 0 0 80 0 14 12 0.0000 4 75 105 3555 1530 x\001
-4 0 0 80 0 14 12 0.0000 4 75 105 1530 2295 n\001
-4 0 0 80 0 12 12 0.2967 4 135 1365 1658 1950 n # extension\001
-4 0 0 80 0 12 12 0.2967 4 135 840 2475 1950 x # node\001
-4 0 0 80 0 16 12 0.0000 4 135 1140 1020 4050 The node tree\001
-4 0 0 80 0 16 12 0.0000 4 135 1245 3225 3285 The extensions\001
+++ /dev/null
-#FIG 3.2
-Portrait
-Center
-Metric
-A4
-100.00
-Single
--2
-1200 2
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 1350 242 229 6141 1350 6379 1395
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 2250 242 229 6141 2250 6379 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 5426 2250 242 229 5426 2250 5665 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6856 2250 242 229 6856 2250 7094 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 7571 2925 242 229 7571 2925 7809 2970
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8524 2925 242 229 8524 2925 8762 2970
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8047 2250 242 229 8047 2250 8285 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 1350 242 229 1866 1350 2104 1395
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 2250 242 229 1866 2250 2104 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1151 2250 242 229 1151 2250 1390 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 2581 2250 242 229 2581 2250 2819 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3296 2925 242 229 3296 2925 3534 2970
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 4249 2925 242 229 4249 2925 4487 2970
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3772 2250 242 229 3772 2250 4010 2295
-1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8325 1350 242 229 8325 1350 8563 1395
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.76 123.53
- 5910 1440 5402 2017
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.76 123.53
- 6109 1590 6101 2025
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.76 123.53
- 6307 1537 6697 2070
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.76 123.53
- 7832 2347 7602 2692
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.76 123.53
- 8150 2452 8349 2752
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.76 123.53
- 5490 2017 5958 1492
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.76 123.53
- 6164 2010 6173 1575
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.76 123.53
- 6768 2025 6355 1470
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.76 123.53
- 7673 2715 7880 2415
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.76 123.53
- 8412 2707 8222 2415
-2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
- 6387 1372 8023 2017
-2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
- 4950 900 9000 900 9000 3375 4950 3375 4950 900
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.75 123.51
- 1635 1440 1127 2017
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.75 123.51
- 1834 1590 1826 2025
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.75 123.51
- 2032 1537 2422 2070
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.75 123.51
- 3557 2347 3327 2692
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 61.75 123.51
- 3875 2452 4074 2752
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.75 123.51
- 1215 2017 1683 1492
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.75 123.51
- 1889 2010 1898 1575
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.75 123.51
- 2493 2025 2080 1470
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.75 123.51
- 3398 2715 3605 2415
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 61.75 123.51
- 4137 2707 3947 2415
-2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
- 2112 1372 3748 2017
-2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
- 675 900 4725 900 4725 3375 675 3375 675 900
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 8197 1545 8055 2010
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 8137 2025 8280 1590
-2 1 0 3 0 7 95 0 -1 0.000 0 0 -1 1 0 4
- 2 1 2.00 120.00 180.00
- 7875 1500 7620 1965 7845 1920 7485 2355
-4 0 0 95 0 14 13 0.0000 4 79 111 6094 1379 x\001
-4 0 0 95 0 14 13 0.0000 4 111 111 7991 2265 y\001
-4 0 0 95 0 14 13 0.0000 4 79 111 1819 1379 x\001
-4 0 0 95 0 14 13 0.0000 4 111 111 3716 2265 y\001
-4 0 0 95 0 12 12 0.0000 4 150 1470 6459 1335 x # add_node y\001
-4 0 0 95 0 12 12 0.0000 4 150 1470 2214 1365 x # add_node y\001
+++ /dev/null
-#FIG 3.2
-Portrait
-Center
-Metric
-A4
-100.00
-Single
--2
-1200 2
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6345 2700 229 229 6345 2700 6570 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 5895 3600 229 229 5895 3600 6120 3645
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6795 3600 229 229 6795 3600 7020 3645
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
-2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
- 4050 2610 4725 2610
-2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
- 4050 2745 4725 2745
-2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
- 4500 2385 4950 2655 4500 2970
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2490 1905 2025 2467
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2827 2002 3202 2542
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2115 2475 2535 1965
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 3255 2505 2872 1957
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 6135 1905 5670 2467
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 6472 2002 6847 2542
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 5760 2475 6180 1965
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 6900 2505 6517 1957
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 8160 1957 7860 2460
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 8407 2032 8625 2520
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 7942 2467 8212 2010
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 8685 2475 8467 1987
-2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
- 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 6382 2460 6382 2032
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 6307 2032 6307 2467
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 6180 2857 5880 3360
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 6427 2932 6645 3420
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 5962 3367 6232 2910
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 6705 3375 6487 2887
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2737 2460 2737 2032
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2662 2032 2662 2467
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2535 2857 2235 3360
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2782 2932 3000 3420
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2317 3367 2587 2910
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 3060 3375 2842 2887
-4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
-4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
-4 0 0 80 0 14 12 0.0000 4 75 105 6285 2752 x\001
-4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
-4 0 0 80 0 12 12 0.0000 4 105 840 3690 2025 let x' =\001
-4 0 0 80 0 12 12 0.0000 4 150 1890 3690 2205 x # orphaned_clone\001
-4 0 0 80 0 14 12 0.0000 4 105 210 8235 1845 x'\001
+++ /dev/null
-#FIG 3.2
-Portrait
-Center
-Metric
-A4
-100.00
-Single
--2
-1200 2
-6 2550 2092 2865 2407
-2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
- 2595 2362 2820 2137
-2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
- 2595 2137 2820 2362
--6
-6 1980 2430 3420 3870
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2535 2857 2235 3360
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2782 2932 3000 3420
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2317 3367 2587 2910
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 3060 3375 2842 2887
--6
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
-1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
-1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2737 2460 2737 2032
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2662 2032 2662 2467
-2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
- 4050 2610 4725 2610
-2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
- 4050 2745 4725 2745
-2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
- 4500 2385 4950 2655 4500 2970
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2490 1905 2025 2467
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2827 2002 3202 2542
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2115 2475 2535 1965
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 3255 2505 2872 1957
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 6135 1905 5670 2467
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 6472 2002 6847 2542
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 5760 2475 6180 1965
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 6900 2505 6517 1957
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 8160 1957 7860 2460
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 8407 2032 8625 2520
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 7942 2467 8212 2010
-2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 8685 2475 8467 1987
-2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
- 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
-4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
-4 0 0 95 0 12 12 0.0000 4 135 1050 3960 2250 x # delete\001
-4 0 0 80 0 14 12 0.0000 4 75 105 8280 1845 x\001
-4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
-4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
+++ /dev/null
-#FIG 3.2
-Portrait
-Center
-Metric
-A4
-100.00
-Single
--2
-1200 2
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 2025 229 229 2025 2025 2250 2070
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1350 2025 225 225 1350 2025 1575 2025
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2700 2025 225 225 2700 2025 2925 2025
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 1125 225 225 2025 1125 2250 1125
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 1380 1800 1845 1275
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 1815 1207 1282 1815
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2055 1792 2055 1350
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 1980 1350 1980 1807
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 1 1.00 60.00 120.00
- 2190 1297 2550 1867
-2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
- 1 0 1.00 60.00 120.00
- 2602 1807 2220 1237
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 450 675 3150 675 3150 2475 450 2475 450 675
-4 0 0 100 0 12 10 0.0000 4 120 540 2377 1342 parent\001
-4 0 0 100 0 12 10 0.0000 4 105 810 645 1628 sub_nodes\001
+++ /dev/null
-#FIG 3.2
-Portrait
-Center
-Metric
-A4
-100.00
-Single
--2
-1200 2
-6 1665 2700 2835 3150
-2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
- 2835 3150 2835 2700 1665 2700 1665 3150 2835 3150
-4 0 0 80 0 18 12 0.0000 4 135 930 1815 3015 "Cherries"\001
--6
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2250 1125 225 225 2250 1125 2475 1125
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2025 225 225 1575 2025 1800 2025
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2925 2025 225 225 2925 2025 3150 2025
-1 3 0 1 0 7 100 0 15 0.000 1 0.0000 900 2925 242 242 900 2925 1125 3015
-2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
- 1485 4275 1485 3825 315 3825 315 4275 1485 4275
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 2085 1275 1582 1807
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 2407 1297 2940 1800
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 1417 2190 900 2692
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 1740 2190 2257 2700
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
- 892 3180 892 3825
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
- 45 675 6525 675 6525 4950 45 4950 45 675
-3 3 0 1 0 7 100 0 -1 0.000 0 0 0 22
- 2115 3645 2250 3600 2520 3555 2745 3510 2925 3555 3150 3690
- 3375 3735 3600 3735 3825 3735 4140 3825 4140 4005 4005 4185
- 3735 4230 3420 4185 3150 4230 2835 4275 2520 4230 2340 4140
- 2115 4095 1980 4005 1980 3825 2025 3735
- -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
- -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
- -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
-3 3 0 1 0 7 100 0 -1 0.000 0 0 0 17
- 3465 1170 3645 1080 4050 1035 4320 1035 4545 1080 4770 1170
- 5130 1215 5355 1350 5400 1530 5265 1665 4860 1710 4455 1710
- 4095 1665 3780 1620 3555 1575 3420 1485 3420 1305
- -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
- -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
- -1.000
-3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
- 2475 1215 2655 1350 2970 1440 3240 1395 3420 1260
- 0.000 -1.000 -1.000 -1.000 0.000
-3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
- 1125 3060 1215 3397 1410 3607 1687 3727 2025 3720
- 0.000 -1.000 -1.000 -1.000 0.000
-4 0 0 80 0 18 12 0.0000 4 180 1065 375 4125 "An orange"\001
-4 0 0 80 0 18 12 0.0000 4 90 315 750 2985 <a>\001
-4 0 0 80 0 18 12 0.0000 4 135 315 1410 2085 <b>\001
-4 0 0 80 0 18 12 0.0000 4 90 315 2790 2070 <c>\001
-4 0 0 80 0 18 12 0.0000 4 90 315 2100 1200 <a>\001
-4 0 0 100 0 16 12 0.0000 4 135 795 3600 1260 attributes:\001
-4 0 0 100 0 16 12 0.0000 4 180 1680 3600 1485 "att" -> Value "apple"\001
-4 0 0 100 0 16 12 0.0000 4 135 795 2250 3780 attributes:\001
-4 0 0 100 0 17 12 0.0000 4 180 5910 390 4725 <a att="apple"><b><a att="orange">An orange</a>Cherries</b><c/></a>\001
-4 0 0 100 0 16 12 0.0000 4 180 1800 2250 4005 "att" -> Value "orange"\001
+++ /dev/null
-<!ENTITY readme.code.header '
-open Pxp_types
-open Pxp_document
-'>
-<!ENTITY readme.code.footnote-printer '
-class type footnote_printer =
- object
- method footnote_to_html : store_type -> out_channel -> unit
- end
-
-and store_type =
- object
- method alloc_footnote : footnote_printer -> int
- method print_footnotes : out_channel -> unit
- end
-;;
-'>
-<!ENTITY readme.code.store '
-class store =
- object (self)
-
- val mutable footnotes = ( [] : (int * footnote_printer) list )
- val mutable next_footnote_number = 1
-
- method alloc_footnote n =
- let number = next_footnote_number in
- next_footnote_number <- number+1;
- footnotes <- footnotes @ [ number, n ];
- number
-
- method print_footnotes ch =
- if footnotes <> [] then begin
- output_string ch "<hr align=left noshade=noshade width=\"30&percent;\">\n";
- output_string ch "<dl>\n";
- List.iter
- (fun (_,n) ->
- n # footnote_to_html (self : #store_type :> store_type) ch)
- footnotes;
- output_string ch "</dl>\n";
- end
-
- end
-;;
-'>
-<!ENTITY readme.code.escape-html '
-let escape_html s =
- Str.global_substitute
- (Str.regexp "<\\|>\\|&\\|\"")
- (fun s ->
- match Str.matched_string s with
- "<" -> "&lt;"
- | ">" -> "&gt;"
- | "&" -> "&amp;"
- | "\"" -> "&quot;"
- | _ -> assert false)
- s
-;;
-'>
-<!ENTITY readme.code.shared '
-class virtual shared =
- object (self)
-
- (* --- default_ext --- *)
-
- val mutable node = (None : shared node option)
-
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
-
- (* --- virtual --- *)
-
- method virtual to_html : store -> out_channel -> unit
-
- end
-;;
-'>
-<!ENTITY readme.code.only-data '
-class only_data =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch (escape_html (self # node # data))
- end
-;;
-'>
-<!ENTITY readme.code.no-markup '
-class no_markup =
- object (self)
- inherit shared
-
- method to_html store ch =
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes)
- end
-;;
-'>
-<!ENTITY readme.code.readme '
-class readme =
- object (self)
- inherit shared
-
- method to_html store ch =
- (* output header *)
- output_string
- ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
- output_string
- ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
- let title =
- match self # node # attribute "title" with
- Value s -> s
- | _ -> assert false
- in
- let html_header, _ =
- try (self # node # dtd # par_entity "readme:html:header")
- # replacement_text
- with WF_error _ -> "", false in
- let html_trailer, _ =
- try (self # node # dtd # par_entity "readme:html:trailer")
- # replacement_text
- with WF_error _ -> "", false in
- let html_bgcolor, _ =
- try (self # node # dtd # par_entity "readme:html:bgcolor")
- # replacement_text
- with WF_error _ -> "white", false in
- let html_textcolor, _ =
- try (self # node # dtd # par_entity "readme:html:textcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_alinkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:alinkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_vlinkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:vlinkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_linkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:linkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_background, _ =
- try (self # node # dtd # par_entity "readme:html:background")
- # replacement_text
- with WF_error _ -> "", false in
-
- output_string ch "<html><header><title>\n";
- output_string ch (escape_html title);
- output_string ch "</title></header>\n";
- output_string ch "<body ";
- List.iter
- (fun (name,value) ->
- if value <> "" then
- output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
- [ "bgcolor", html_bgcolor;
- "text", html_textcolor;
- "link", html_linkcolor;
- "alink", html_alinkcolor;
- "vlink", html_vlinkcolor;
- ];
- output_string ch ">\n";
- output_string ch html_header;
- output_string ch "<h1>";
- output_string ch (escape_html title);
- output_string ch "</h1>\n";
- (* process main content: *)
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- (* now process footnotes *)
- store # print_footnotes ch;
- (* trailer *)
- output_string ch html_trailer;
- output_string ch "</html>\n";
-
- end
-;;
-'>
-<!ENTITY readme.code.section '
-class section the_tag =
- object (self)
- inherit shared
-
- val tag = the_tag
-
- method to_html store ch =
- let sub_nodes = self # node # sub_nodes in
- match sub_nodes with
- title_node :: rest ->
- output_string ch ("<" ^ tag ^ ">\n");
- title_node # extension # to_html store ch;
- output_string ch ("\n</" ^ tag ^ ">");
- List.iter
- (fun n -> n # extension # to_html store ch)
- rest
- | _ ->
- assert false
- end
-;;
-
-class sect1 = section "h1";;
-class sect2 = section "h3";;
-class sect3 = section "h4";;
-'>
-<!ENTITY readme.code.map-tag '
-class map_tag the_target_tag =
- object (self)
- inherit shared
-
- val target_tag = the_target_tag
-
- method to_html store ch =
- output_string ch ("<" ^ target_tag ^ ">\n");
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- output_string ch ("\n</" ^ target_tag ^ ">");
- end
-;;
-
-class p = map_tag "p";;
-class em = map_tag "b";;
-class ul = map_tag "ul";;
-class li = map_tag "li";;
-'>
-<!ENTITY readme.code.br '
-class br =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch "<br>\n";
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- end
-;;
-'>
-<!ENTITY readme.code.code '
-class code =
- object (self)
- inherit shared
-
- method to_html store ch =
- let data = self # node # data in
- (* convert tabs *)
- let l = String.length data in
- let rec preprocess i column =
- (* this is very ineffective but comprehensive: *)
- if i < l then
- match data.[i] with
- '\t' ->
- let n = 8 - (column mod 8) in
- String.make n ' ' ^ preprocess (i+1) (column + n)
- | '\n' ->
- "\n" ^ preprocess (i+1) 0
- | c ->
- String.make 1 c ^ preprocess (i+1) (column + 1)
- else
- ""
- in
- output_string ch "<p><pre>";
- output_string ch (escape_html (preprocess 0 0));
- output_string ch "</pre></p>";
-
- end
-;;
-'>
-<!ENTITY readme.code.a '
-class a =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch "<a ";
- let href =
- match self # node # attribute "href" with
- Value v -> escape_html v
- | Valuelist _ -> assert false
- | Implied_value ->
- begin match self # node # attribute "readmeref" with
- Value v -> escape_html v ^ ".html"
- | Valuelist _ -> assert false
- | Implied_value ->
- ""
- end
- in
- if href <> "" then
- output_string ch ("href=\"" ^ href ^ "\"");
- output_string ch ">";
- output_string ch (escape_html (self # node # data));
- output_string ch "</a>";
-
- end
-;;
-'>
-<!ENTITY readme.code.footnote '
-class footnote =
- object (self)
- inherit shared
-
- val mutable footnote_number = 0
-
- method to_html store ch =
- let number =
- store # alloc_footnote (self : #shared :> footnote_printer) in
- let foot_anchor =
- "footnote" ^ string_of_int number in
- let text_anchor =
- "textnote" ^ string_of_int number in
- footnote_number <- number;
- output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^
- foot_anchor ^ "\">[" ^ string_of_int number ^
- "]</a>" )
-
- method footnote_to_html store ch =
- (* prerequisite: we are in a definition list <dl>...</dl> *)
- let foot_anchor =
- "footnote" ^ string_of_int footnote_number in
- let text_anchor =
- "textnote" ^ string_of_int footnote_number in
- output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^
- text_anchor ^ "\">[" ^ string_of_int footnote_number ^
- "]</a></dt>\n<dd>");
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- output_string ch ("\n</dd>")
-
- end
-;;
-'>
-<!ENTITY readme.code.tag-map '
-open Pxp_yacc
-
-let tag_map =
- make_spec_from_alist
- ~data_exemplar:(new data_impl (new only_data))
- ~default_element_exemplar:(new element_impl (new no_markup))
- ~element_alist:
- [ "readme", (new element_impl (new readme));
- "sect1", (new element_impl (new sect1));
- "sect2", (new element_impl (new sect2));
- "sect3", (new element_impl (new sect3));
- "title", (new element_impl (new no_markup));
- "p", (new element_impl (new p));
- "br", (new element_impl (new br));
- "code", (new element_impl (new code));
- "em", (new element_impl (new em));
- "ul", (new element_impl (new ul));
- "li", (new element_impl (new li));
- "footnote", (new element_impl (new footnote : #shared :> shared));
- "a", (new element_impl (new a));
- ]
- ()
-;;
-'>
+++ /dev/null
-<!ENTITY markup-yacc.mli '
-
-open Pxp_types
-open Pxp_dtd
-open Pxp_document
-
-exception ID_not_unique
-
-class type [ 'ext ] index =
-object
- (* The type of indexes over the ID attributes of the elements. This type
- * is the minimum requirement needed by the parser to create such an index.
- *)
- constraint 'ext = 'ext node #extension
- method add : string -> 'ext node -> unit
- (* Add the passed node to the index. If there is already an ID with
- * the passed string value, the exception ID_not_unique should be
- * raised. (But the index is free also to accept several identical IDs.)
- *)
- method find : string -> 'ext node
- (* Finds the node with the passed ID value, or raises Not_found *)
-end
-;;
-
-
-class [ 'ext ] hash_index :
-object
- (* This is a simple implementation of 'index' using a hash table. *)
- constraint 'ext = 'ext node #extension
- method add : string -> 'ext node -> unit
- (* See above. *)
- method find : string -> 'ext node
- (* See above. *)
- method index : (string, 'ext node) Hashtbl.t
- (* Returns the hash table. *)
-end
-;;
-
-
-type config =
- { warner : collect_warnings;
- (* An object that collects warnings. *)
-
- errors_with_line_numbers : bool;
- (* Whether error messages contain line numbers or not. The parser
- * is 10 to 20 per cent faster if line numbers are turned off;
- * you get only byte positions in this case.
- *)
-
- enable_pinstr_nodes : bool;
- (* true: turns a special mode for processing instructions on. Normally,
- * you cannot determine the exact location of a PI; you only know
- * in which element the PI occurs. This mode makes it possible
- * to find the exact location out: Every PI is artificially wrapped
- * by a special node with type T_pinstr. For example, if the XML text
- * is <a><?x?><?y?></a>, the parser normally produces only an element
- * object for "a", and puts the PIs "x" and "y" into it (without
- * order). In this mode, the object "a" will contain two objects
- * with type T_pinstr, and the first object will contain "x", and the
- * second "y": the object tree looks like
- * - Node with type = T_element "a"
- * - Node with type = T_pinstr "x"
- * + contains processing instruction "x"
- * - Node with type = T_pinstr "y"
- * + contains processing instruction "y"
- *
- * Notes:
- * (1) In past versions of PXP this mode was called
- * processing_instructions_inline, and it produced nodes of
- * type T_element "-pi" instead of T_pinstr.
- * (2) The T_pinstr nodes are created from the pinstr exemplars
- * in your spec
- *)
-
- enable_super_root_node : bool;
- (* true: the topmost element of the XML tree is not the root element,
- * but the so-called super root. The root element is a son of the
- * super root. The super root is a node with type T_super_root.
- * The following behaviour changes, too:
- * - PIs occurring outside the root element and outside the DTD are
- * added to the super root instead of the document object
- * - If enable_pinstr_nodes is also turned on, the PI wrappers
- * are added to the super root
- *
- * For example, the document
- * <?x?><a>y</a><?y?>
- * is normally represented by:
- * - document object
- * + contains PIs x and y
- * - reference to root node with type = T_element "a"
- * - node with type = T_data: contains "y"
- * With enabled super root node:
- * - document object
- * - reference to super root node with type = T_super_root
- * + contains PIs x and y
- * - root node with type = T_element "a"
- * - node with type = T_data: contains "y"
- * If also enable_pinstr_nodes:
- * - document object
- * - reference to super root node with type = T_super_root
- * - node with type = T_pinstr "x"
- * + contains PI "x"
- * - root node with type = T_element "a"
- * - node with type = T_data: contains "y"
- * - node with type = T_pinstr "y"
- * + contains PI "y"
- * Notes:
- * (1) In previous versions of PXP this mode was called
- * virtual_root, and it produced an additional node of type
- * T_element "-vr" instead of T_super_root.
- * (2) The T_super_root node is created from the super root exemplar
- * in your spec.
- *)
-
- enable_comment_nodes : bool;
- (* When enabled, comments are represented as nodes with type =
- * T_comment.
- * To access the contents of comments, use the method "comment"
- * for the comment nodes.
- * These nodes behave like elements; however, they are normally
- * empty and do not have attributes. Note that it is possible to
- * add children to comment nodes and to set attributes, but it is
- * strongly recommended not to do so. There are no checks on
- * such abnormal use, because they would cost too
- * much time, even when no comment nodes are generated at all.
- *
- * Comment nodes should be disabled unless you must parse a
- * third-party XML text which uses comments as another data
- * container.
- *
- * The nodes of type T_comment are created from the comment exemplars
- * in your spec.
- *)
-
- encoding : rep_encoding;
- (* Specifies the encoding used for the *internal* representation
- * of any character data.
- * Note that the default is still Enc_iso88591.
- *)
-
- recognize_standalone_declaration : bool;
- (* Whether the "standalone" declaration is recognized or not.
- * This option does not have an effect on well-formedness parsing:
- * in this case such declarations are never recognized.
- *
- * Recognizing the "standalone" declaration means that the
- * value of the declaration is scanned and passed to the DTD,
- * and that the "standalone-check" is performed.
- *
- * Standalone-check: If a document is flagged standalone='yes'
- * some additional constraints apply. The idea is that a parser
- * without access to any external document subsets can still parse
- * the document, and will still return the same values as the parser
- * with such access. For example, if the DTD is external and if
- * there are attributes with default values, it is checked that there
- * is no element instance where these attributes are omitted - the
- * parser would return the default value but this requires access to
- * the external DTD subset.
- *)
-
- store_element_positions : bool;
- (* Whether the file name, the line and the column of the
- * beginning of elements are stored in the element nodes.
- * This option may be useful to generate error messages.
- *
- * Positions are only stored for:
- * - Elements
- * - Wrapped processing instructions (see enable_pinstr_nodes)
- * For all other node types, no position is stored.
- *
- * You can access positions by the method "position" of nodes.
- *)
-
- idref_pass : bool;
- (* Whether the parser does a second pass and checks that all
- * IDREF and IDREFS attributes contain valid references.
- * This option works only if an ID index is available. To create
- * an ID index, pass an index object as id_index argument to the
- * parsing functions (such as parse_document_entity; see below).
- *
- * "Second pass" does not mean that the XML text is again parsed;
- * only the existing document tree is traversed, and the check
- * on bad IDREF/IDREFS attributes is performed for every node.
- *)
-
- validate_by_dfa : bool;
- (* If true, and if DFAs are available for validation, the DFAs will
- * actually be used for validation.
- * If false, or if no DFAs are available, the standard backtracking
- * algorithm will be used.
- * DFA = deterministic finite automaton.
- *
- * DFAs are only available if accept_only_deterministic_models is
- * "true" (because in this case, it is relatively cheap to construct
- * the DFAs). DFAs are a data structure which ensures that validation
- * can always be performed in linear time.
- *
- * I strongly recommend using DFAs; however, there are examples
- * for which validation by backtracking is faster.
- *)
-
- accept_only_deterministic_models : bool;
- (* Whether only deterministic content models are accepted in DTDs. *)
-
- (* The following options are not implemented, or only for internal
- * use.
- *)
-
- debugging_mode : bool;
- }
-
-
-type source =
- Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
- | ExtID of (ext_id * Pxp_reader.resolver)
-
-val from_channel :
- ?system_encoding:encoding -> ?id:ext_id -> ?fixenc:encoding ->
- in_channel -> source
-
-val from_string :
- ?fixenc:encoding -> string -> source
-
-val from_file :
- ?system_encoding:encoding -> string -> source
-
-(* Notes on sources (version 2):
- *
- * Sources specify where the XML text to parse comes from. Sources not only
- * represent character streams, but also external IDs (i.e. SYSTEM or PUBLIC
- * names), and they are interpreted as a specific encoding of characters.
- * A source should be associated with an external ID, because otherwise
- * it is not known how to handle relative names.
- *
- * There are two primary sources, Entity and ExtID, and several functions
- * for derived sources. First explanations for the functions:
- *
- * from_channel: The XML text is read from an in_channel. By default, the
- * channel is not associated with an external ID, and it is impossible
- * to resolve relative SYSTEM IDs found in the document.
- * If the ?id argument is passed, it is assumed that the channel has this
- * external ID. If relative SYSTEM IDs occur in the document, they can
- * be interpreted; however, it is only possible to read from "file:"
- * IDs.
- * By default, the channel automatically detects the encoding. You can
- * set a fixed encoding by passing the ?fixenc argument.
- *
- * from_string: The XML text is read from a string.
- * It is impossible to read from any external entity whose reference is found
- * in the string.
- * By default, the encoding of the string is detected automatically. You can
- * set a fixed encoding by passing the ?fixenc argument.
- *
- * from_file: The XML text is read from the file whose file name is
- * passed to the function (as UTF-8 string).
- * Relative system IDs can be interpreted by this function.
- * The ?system_encoding argument specifies the character encoding used
- * for file names (sic!). By default, UTF-8 is assumed.
- *
- * Examples:
- *
- * from_file "/tmp/file.xml":
- * reads from this file, which is assumed to have the ID
- * SYSTEM "file://localhost/tmp/file.xml".
- *
- * let ch = open_in "/tmp/file.xml" in
- * from_channel ~id:(System "file://localhost/tmp/file.xml") ch
- * This does the same, but uses a channel.
- *
- * from_channel ~id:(System "http://host/file.xml")
- * ch
- * reads from the channel ch, and it is assumed that the ID is
- * SYSTEM "http://host/file.xml". If there is any relative SYSTEM ID,
- * it will be interpreted relative to this location; however, there is
- * no way to read via HTTP.
- * If there is any "file:" SYSTEM ID, it is possible to read the file.
- *
- * The primary sources:
- *
- * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
- * entity to read from is passed to the resolver, and the resolver finds
- * the entity and opens it.
- * The intention of this option is to allow customized
- * resolvers to interpret external identifiers without any restriction.
- * The Pxp_reader module contains several classes allowing the user to
- * compose such a customized resolver from predefined components.
- *
- * ExtID is the interface of choice for own extensions to resolvers.
- *
- * - Entity(m,r): You can implementy every behaviour by using a customized
- * entity class. Once the DTD object d is known that will be used during
- * parsing, the entity e = m d is determined and used together with the
- * resolver r.
- * This is only for hackers.
- *)
-
-
-
-val default_config : config
- (* - Warnings are thrown away
- * - Error messages will contain line numbers
- * - Neither T_super_root nor T_pinstr nor T_comment nodes are generated
- * - The internal encoding is ISO-8859-1
- * - The standalone declaration is checked
- * - Element positions are stored
- * - The IDREF pass is left out
- * - If available, DFAs are used for validation
- * - Only deterministic content models are accepted
- *)
-
-val default_extension : ('a node extension) as 'a
- (* A "null" extension; an extension that does not extend the functionality *)
-
-val default_spec : ('a node extension as 'a) spec
- (* Specifies that you do not want to use extensions. *)
-
-val parse_dtd_entity : config -> source -> dtd
- (* Parse an entity containing a DTD (external subset), and return this DTD. *)
-
-val extract_dtd_from_document_entity : config -> source -> dtd
- (* Parses a closed document, i.e. a document beginning with <!DOCTYPE...>,
- * and returns the DTD contained in the document.
- * The parts of the document outside the DTD are actually not parsed,
- * i.e. parsing stops when all declarations of the DTD have been read.
- *)
-
-val parse_document_entity :
- ?transform_dtd:(dtd -> dtd) ->
- ?id_index:('ext index) ->
- config -> source -> 'ext spec -> 'ext document
- (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
- * and validate the contents of the document against the DTD contained
- * and/or referenced in the document.
- *
- * If the optional argument ~transform_dtd is passed, the following
- * modification applies: After the DTD (both the internal and external
- * subsets) has been parsed, the function ~transform_dtd is called,
- * and the resulting DTD is actually used to validate the document.
- *
- * If the optional argument ~transform_dtd is missing, the parser
- * behaves in the same way as if the identity were passed as ~transform_dtd.
- *
- * If the optional argument ~id_index is present, the parser adds
- * any ID attribute to the passed index. An index is required to detect
- * violations of the uniqueness of IDs.
- *)
-
-val parse_wfdocument_entity :
- config -> source -> 'ext spec -> 'ext document
- (* Parse a closed document (see parse_document_entity), but do not
- * validate it. Only checks on well-formedness are performed.
- *)
-
-val parse_content_entity :
- ?id_index:('ext index) ->
- config -> source -> dtd -> 'ext spec -> 'ext node
- (* Parse a file representing a well-formed fragment of a document. The
- * fragment must be a single element (i.e. something like <a>...</a>;
- * not a sequence like <a>...</a><b>...</b>). The element is validated
- * against the passed DTD, but it is not checked whether the element is
- * the root element specified in the DTD.
- *
- * If the optional argument ~id_index is present, the parser adds
- * any ID attribute to the passed index. An index is required to detect
- * violations of the uniqueness of IDs.
- *)
-
-val parse_wfcontent_entity :
- config -> source -> 'ext spec -> 'ext node
- (* Parse a file representing a well-formed fragment of a document
- * (see parse_content_entity). The fragment is not validated, only
- * checked for well-formedness.
- *)
-
-
-'>
+++ /dev/null
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
-
-.PHONY: CLEAN
-CLEAN: clean
- $(MAKE) -C xmlforms CLEAN
- $(MAKE) -C validate CLEAN
- $(MAKE) -C readme CLEAN
- $(MAKE) -C simple_transformation CLEAN
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- $(MAKE) -C xmlforms distclean
- $(MAKE) -C validate distclean
- $(MAKE) -C readme distclean
- $(MAKE) -C simple_transformation distclean
-
-
+++ /dev/null
-*.cmi
-*.cmo
-*.cma
-*.cmx
-*.o
-*.a
-*.cmxa
-depend
-depend.pkg
-
+++ /dev/null
-# make readme: make bytecode executable
-# make readme.opt: make native executable
-# make clean: remove intermediate files
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files
-# make install
-#----------------------------------------------------------------------
-
-BIN = /usr/local/bin
-
-.PHONY: readme
-readme:
- $(MAKE) -f Makefile.code readme
-
-.PHONY: readme.opt
-readme.opt:
- $(MAKE) -f Makefile.code readme.opt
-
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~ depend depend.pkg
- rm -f readme readme.opt
-
-.PHONY: install
-install:
- cp readme $(BIN)
+++ /dev/null
-#----------------------------------------------------------------------
-# specific rules for this package:
-
-OBJECTS = to_html.cmo to_text.cmo
-XOBJECTS = $(OBJECTS:.cmo=.cmx)
-ARCHIVE = readme.cma
-XARCHIVE = readme.cmxa
-NAME = readme
-REQUIRES = str pxp
-
-readme: $(ARCHIVE) main.cmo
- ocamlfind ocamlc -o readme -custom -package "$(REQUIRES)" \
- -linkpkg $(ARCHIVE) main.cmo
-
-readme.opt: $(XARCHIVE) main.cmx
- ocamlfind ocamlopt -o readme.opt -custom -package "$(REQUIRES)" \
- -linkpkg $(XARCHIVE) main.cmx
-
-$(ARCHIVE): $(OBJECTS)
- $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
-
-$(XARCHIVE): $(XOBJECTS)
- $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
-
-#----------------------------------------------------------------------
-# general rules:
-
-OPTIONS =
-OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
-OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
-OCAMLDEP = ocamldep $(OPTIONS)
-OCAMLFIND = ocamlfind
-
-depend: *.ml *.mli
- $(OCAMLDEP) *.ml *.mli >depend
-
-depend.pkg: Makefile
- $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
-
-.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
-
-.ml.cmx:
- $(OCAMLOPT) -c $<
-
-.ml.cmo:
- $(OCAMLC) -c $<
-
-.mli.cmi:
- $(OCAMLC) -c $<
-
-.mll.ml:
- ocamllex $<
-
-*.mli:
-
-include depend
-include depend.pkg
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_types
-open Pxp_document
-open Pxp_yacc
-
-
-let rec print_error e =
- prerr_endline(string_of_exn e)
-;;
-
-
-let run f a =
- try f a with
- e -> print_error e
-;;
-
-
-let convert_to_html filename =
- (* read in style definition *)
- let document =
- parse_document_entity
- { default_config with encoding = `Enc_iso88591 }
- (from_file filename)
- To_html.tag_map
- in
- let root = document # root in
- let store = new To_html.store in
- root # extension # to_html store stdout
-;;
-
-
-let convert_to_text filename =
- (* read in style definition *)
- let document =
- parse_document_entity
- default_config
- (from_file filename)
- To_text.tag_map
- in
- let root = document # root in
- let store = new To_text.store in
- let box = new To_text.box 79 79 in
- root # extension # to_box store box;
- box # output 0 0 stdout
-;;
-
-
-let main() =
- let want_html = ref false in
- let want_text = ref false in
- let filename = ref None in
- Arg.parse
- [ "-html", Arg.Set want_html,
- " convert file to html";
- "-text", Arg.Set want_text,
- " convert file to text";
- ]
- (fun s ->
- match !filename with
- None -> filename := Some s
- | Some _ ->
- raise (Arg.Bad "Multiple arguments not allowed."))
- "usage: readme [ -text | -html ] input.xml >output";
- let fn =
- match !filename with
- None ->
- prerr_endline "readme: no input";
- exit 1
- | Some s -> s
- in
- match !want_html, !want_text with
- true, false ->
- run convert_to_html fn
- | false, true ->
- run convert_to_text fn
- | _ ->
- prerr_endline ("readme: Please select exactly one output format")
-;;
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:31 lpadovan
- * Initial revision
- *
- * Revision 1.5 2000/07/08 17:58:17 gerd
- * Updated because of PXP API changes.
- *
- * Revision 1.4 2000/06/04 20:25:38 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.3 2000/05/01 16:46:40 gerd
- * Using the new error formatter.
- *
- * Revision 1.2 1999/08/23 16:54:19 gerd
- * Minor changes.
- *
- * Revision 1.1 1999/08/22 22:29:32 gerd
- * Initial revision.
- *
- *)
+++ /dev/null
-<!-- $Id -->
-
-<!ENTITY % p.like "p|ul">
-<!ENTITY % text "br|code|em|footnote|a">
-
-<!ELEMENT readme (sect1+)>
-<!ATTLIST readme
- title CDATA #REQUIRED>
-
-<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
-
-<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
-
-<!ELEMENT sect3 (title,(%p.like;)+)>
-
-<!ELEMENT title (#PCDATA|br)*>
-
-<!ELEMENT p (#PCDATA|%text;)*>
-
-<!ELEMENT br EMPTY>
-
-<!ELEMENT code (#PCDATA)>
-
-<!ELEMENT em (#PCDATA|%text;)*>
-
-<!ELEMENT ul (li+)>
-
-<!ELEMENT li (%p.like;)*>
-
-<!ELEMENT footnote (#PCDATA|%text;)*>
-
-<!ELEMENT a (#PCDATA)*>
-<!ATTLIST a
- href CDATA #IMPLIED
- readmeref CDATA #IMPLIED
->
-
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-(*$ readme.code.header *)
-open Pxp_types
-open Pxp_document
-(*$-*)
-
-
-(*$ readme.code.footnote-printer *)
-class type footnote_printer =
- object
- method footnote_to_html : store_type -> out_channel -> unit
- end
-
-and store_type =
- object
- method alloc_footnote : footnote_printer -> int
- method print_footnotes : out_channel -> unit
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.store *)
-class store =
- object (self)
-
- val mutable footnotes = ( [] : (int * footnote_printer) list )
- val mutable next_footnote_number = 1
-
- method alloc_footnote n =
- let number = next_footnote_number in
- next_footnote_number <- number+1;
- footnotes <- footnotes @ [ number, n ];
- number
-
- method print_footnotes ch =
- if footnotes <> [] then begin
- output_string ch "<hr align=left noshade=noshade width=\"30%\">\n";
- output_string ch "<dl>\n";
- List.iter
- (fun (_,n) ->
- n # footnote_to_html (self : #store_type :> store_type) ch)
- footnotes;
- output_string ch "</dl>\n";
- end
-
- end
-;;
-(*$-*)
-
-
-
-(*$ readme.code.escape-html *)
-let escape_html s =
- Str.global_substitute
- (Str.regexp "<\\|>\\|&\\|\"")
- (fun s ->
- match Str.matched_string s with
- "<" -> "<"
- | ">" -> ">"
- | "&" -> "&"
- | "\"" -> """
- | _ -> assert false)
- s
-;;
-(*$-*)
-
-
-(*$ readme.code.shared *)
-class virtual shared =
- object (self)
-
- (* --- default_ext --- *)
-
- val mutable node = (None : shared node option)
-
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
-
- (* --- virtual --- *)
-
- method virtual to_html : store -> out_channel -> unit
-
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.only-data *)
-class only_data =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch (escape_html (self # node # data))
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.no-markup *)
-class no_markup =
- object (self)
- inherit shared
-
- method to_html store ch =
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes)
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.readme *)
-class readme =
- object (self)
- inherit shared
-
- method to_html store ch =
- (* output header *)
- output_string
- ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
- output_string
- ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
- let title =
- match self # node # attribute "title" with
- Value s -> s
- | _ -> assert false
- in
- let html_header, _ =
- try (self # node # dtd # par_entity "readme:html:header")
- # replacement_text
- with WF_error _ -> "", false in
- let html_trailer, _ =
- try (self # node # dtd # par_entity "readme:html:trailer")
- # replacement_text
- with WF_error _ -> "", false in
- let html_bgcolor, _ =
- try (self # node # dtd # par_entity "readme:html:bgcolor")
- # replacement_text
- with WF_error _ -> "white", false in
- let html_textcolor, _ =
- try (self # node # dtd # par_entity "readme:html:textcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_alinkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:alinkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_vlinkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:vlinkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_linkcolor, _ =
- try (self # node # dtd # par_entity "readme:html:linkcolor")
- # replacement_text
- with WF_error _ -> "", false in
- let html_background, _ =
- try (self # node # dtd # par_entity "readme:html:background")
- # replacement_text
- with WF_error _ -> "", false in
-
- output_string ch "<html><header><title>\n";
- output_string ch (escape_html title);
- output_string ch "</title></header>\n";
- output_string ch "<body ";
- List.iter
- (fun (name,value) ->
- if value <> "" then
- output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
- [ "bgcolor", html_bgcolor;
- "text", html_textcolor;
- "link", html_linkcolor;
- "alink", html_alinkcolor;
- "vlink", html_vlinkcolor;
- ];
- output_string ch ">\n";
- output_string ch html_header;
- output_string ch "<h1>";
- output_string ch (escape_html title);
- output_string ch "</h1>\n";
- (* process main content: *)
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- (* now process footnotes *)
- store # print_footnotes ch;
- (* trailer *)
- output_string ch html_trailer;
- output_string ch "</html>\n";
-
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.section *)
-class section the_tag =
- object (self)
- inherit shared
-
- val tag = the_tag
-
- method to_html store ch =
- let sub_nodes = self # node # sub_nodes in
- match sub_nodes with
- title_node :: rest ->
- output_string ch ("<" ^ tag ^ ">\n");
- title_node # extension # to_html store ch;
- output_string ch ("\n</" ^ tag ^ ">");
- List.iter
- (fun n -> n # extension # to_html store ch)
- rest
- | _ ->
- assert false
- end
-;;
-
-class sect1 = section "h1";;
-class sect2 = section "h3";;
-class sect3 = section "h4";;
-(*$-*)
-
-
-(*$ readme.code.map-tag *)
-class map_tag the_target_tag =
- object (self)
- inherit shared
-
- val target_tag = the_target_tag
-
- method to_html store ch =
- output_string ch ("<" ^ target_tag ^ ">\n");
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- output_string ch ("\n</" ^ target_tag ^ ">");
- end
-;;
-
-class p = map_tag "p";;
-class em = map_tag "b";;
-class ul = map_tag "ul";;
-class li = map_tag "li";;
-(*$-*)
-
-
-(*$ readme.code.br *)
-class br =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch "<br>\n";
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.code *)
-class code =
- object (self)
- inherit shared
-
- method to_html store ch =
- let data = self # node # data in
- (* convert tabs *)
- let l = String.length data in
- let rec preprocess i column =
- (* this is very ineffective but comprehensive: *)
- if i < l then
- match data.[i] with
- '\t' ->
- let n = 8 - (column mod 8) in
- String.make n ' ' ^ preprocess (i+1) (column + n)
- | '\n' ->
- "\n" ^ preprocess (i+1) 0
- | c ->
- String.make 1 c ^ preprocess (i+1) (column + 1)
- else
- ""
- in
- output_string ch "<p><pre>";
- output_string ch (escape_html (preprocess 0 0));
- output_string ch "</pre></p>";
-
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.a *)
-class a =
- object (self)
- inherit shared
-
- method to_html store ch =
- output_string ch "<a ";
- let href =
- match self # node # attribute "href" with
- Value v -> escape_html v
- | Valuelist _ -> assert false
- | Implied_value ->
- begin match self # node # attribute "readmeref" with
- Value v -> escape_html v ^ ".html"
- | Valuelist _ -> assert false
- | Implied_value ->
- ""
- end
- in
- if href <> "" then
- output_string ch ("href=\"" ^ href ^ "\"");
- output_string ch ">";
- output_string ch (escape_html (self # node # data));
- output_string ch "</a>";
-
- end
-;;
-(*$-*)
-
-
-(*$ readme.code.footnote *)
-class footnote =
- object (self)
- inherit shared
-
- val mutable footnote_number = 0
-
- method to_html store ch =
- let number =
- store # alloc_footnote (self : #shared :> footnote_printer) in
- let foot_anchor =
- "footnote" ^ string_of_int number in
- let text_anchor =
- "textnote" ^ string_of_int number in
- footnote_number <- number;
- output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^
- foot_anchor ^ "\">[" ^ string_of_int number ^
- "]</a>" )
-
- method footnote_to_html store ch =
- (* prerequisite: we are in a definition list <dl>...</dl> *)
- let foot_anchor =
- "footnote" ^ string_of_int footnote_number in
- let text_anchor =
- "textnote" ^ string_of_int footnote_number in
- output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^
- text_anchor ^ "\">[" ^ string_of_int footnote_number ^
- "]</a></dt>\n<dd>");
- List.iter
- (fun n -> n # extension # to_html store ch)
- (self # node # sub_nodes);
- output_string ch ("\n</dd>")
-
- end
-;;
-(*$-*)
-
-
-(**********************************************************************)
-
-(*$ readme.code.tag-map *)
-open Pxp_yacc
-
-let tag_map =
- make_spec_from_alist
- ~data_exemplar:(new data_impl (new only_data))
- ~default_element_exemplar:(new element_impl (new no_markup))
- ~element_alist:
- [ "readme", (new element_impl (new readme));
- "sect1", (new element_impl (new sect1));
- "sect2", (new element_impl (new sect2));
- "sect3", (new element_impl (new sect3));
- "title", (new element_impl (new no_markup));
- "p", (new element_impl (new p));
- "br", (new element_impl (new br));
- "code", (new element_impl (new code));
- "em", (new element_impl (new em));
- "ul", (new element_impl (new ul));
- "li", (new element_impl (new li));
- "footnote", (new element_impl (new footnote : #shared :> shared));
- "a", (new element_impl (new a));
- ]
- ()
-;;
-(*$-*)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:31 lpadovan
- * Initial revision
- *
- * Revision 1.6 2000/08/22 14:34:25 gerd
- * Using make_spec_from_alist instead of make_spec_from_mapping.
- *
- * Revision 1.5 2000/08/18 21:15:14 gerd
- * Update because of PXP API change: par_entity raises WF_error
- * instead of Validation error if the entity is not defined.
- * Further minor updates.
- *
- * Revision 1.4 2000/07/08 17:58:17 gerd
- * Updated because of PXP API changes.
- *
- * Revision 1.3 2000/06/04 20:25:38 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.2 1999/09/12 20:09:32 gerd
- * Added section marks.
- *
- * Revision 1.1 1999/08/22 22:29:32 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_types
-open Pxp_document
-
-
-(**********************************************************************)
-(* The box class represents formatted text *)
-(**********************************************************************)
-
-class type formatted_text =
- object
- method output : int -> int -> out_channel -> unit
- (* output initial_indent indent ch:
- * 'initial_indent' is how far the first line should be indented;
- * 'indent' how far the rest. 'ch' is the channel on which the lines
- * are to be printed.
- *)
-
- method multiline : bool
- (* whether the box occupies multiple lines *)
-
- method width_of_last_line : int
- (* returns the width of the last line *)
- end
-;;
-
-
-type text =
- Text of string
- | Box of formatted_text
-;;
-
-
-let textwidth tl =
- let rec compute tl r =
- match tl with
- [] -> r
- | t :: tl' ->
- begin match t with
- Text s ->
- compute tl' (r + String.length s)
- | Box b ->
- if b # multiline then
- compute tl' (b # width_of_last_line)
- else
- compute tl' (r + b # width_of_last_line)
- end
- in
- compute (List.rev tl) 0
-;;
-
-
-class box the_initial_width the_width =
- object (self)
-
- (* The 'initial_width' is the width that is available on the first
- * line of output; the 'width' is the width that is available in the
- * rest.
- *)
-
- val initial_width = the_initial_width
- val width = the_width
-
- (* state: *)
-
- val mutable space_added = false
- val mutable linefeed_added = false
- val mutable is_first_line = true
- val mutable lines = []
- (* lines in reverse order (first line = last element) *)
- val mutable current_line = []
- (* not member of 'lines'; again reverse order *)
- val mutable current_indent = 0
-
- method add_space =
- if not space_added then begin
- space_added <- true;
- linefeed_added <- true;
- current_line <- Text " " :: current_line
- end
-
- method ignore_space =
- space_added <- true;
- linefeed_added <- true
-
- method add_linefeed =
- if not linefeed_added then begin
- linefeed_added <- true;
- if not space_added then
- current_line <- Text " " :: current_line
- end
-
- method ignore_linefeed =
- linefeed_added <- true
-
- method add_newline =
- lines <- current_line :: lines;
- current_line <- [];
- space_added <- true;
- linefeed_added <- true;
- is_first_line <- false;
- current_indent <- 0;
-
- method add_word s =
- (* first try to add 's' to 'current_line' *)
- let current_line' = Text s :: current_line in
- let current_width =
- if is_first_line then initial_width else width in
- if textwidth current_line' + current_indent <= current_width then begin
- (* ok, the line does not become too long *)
- current_line <- current_line';
- space_added <- false;
- linefeed_added <- false
- end
- else begin
- (* The line would be too long. *)
- lines <- current_line :: lines;
- current_line <- [Text s];
- space_added <- false;
- linefeed_added <- false;
- is_first_line <- false;
- current_indent <- 0;
- end
-
- method add_box b =
- current_line <- Box b :: current_line;
- space_added <- false;
- linefeed_added <- false;
-
-
- method width_of_last_line =
- textwidth current_line + current_indent
-
-
- method available_width =
- let current_width =
- if is_first_line then initial_width else width in
- current_width - textwidth current_line - current_indent
-
-
- method multiline =
- lines <> [] or
- (List.exists
- (function
- Text _ -> false
- | Box b -> b # multiline)
- current_line)
-
- method output initial_indent indent ch =
- let eff_lines =
- List.rev
- (current_line :: lines) in
- let rec out_lines cur_indent ll =
- match ll with
- [] -> ()
- | l :: ll' ->
- output_string ch (String.make cur_indent ' ');
- List.iter
- (function
- Text s ->
- output_string ch s
- | Box b ->
- b # output 0 indent ch
- )
- (List.rev l);
- if ll' <> [] then
- output_string ch "\n";
- out_lines indent ll'
- in
- out_lines initial_indent eff_lines
- end
-;;
-
-
-class listitem_box listmark indent totalwidth =
- let initial_newline = String.length listmark >= indent in
- object (self)
- inherit box totalwidth (totalwidth - indent) as super
-
- val extra_indent = indent
-
- initializer
- self # add_word listmark;
- if initial_newline then
- self # add_newline
- else begin
- current_line <- Text (String.make (indent - String.length listmark) ' ')
- :: current_line;
- space_added <- true;
- linefeed_added <- true;
- end
-
-
- method output initial_indent indent ch =
- super # output initial_indent (indent + extra_indent) ch
- end
-;;
-
-
-(**********************************************************************)
-(* Footnotes etc. *)
-(**********************************************************************)
-
-
-class type footnote_printer =
- object
- method footnote_to_box : store_type -> box -> unit
- end
-
-and store_type =
- object
- method alloc_footnote : footnote_printer -> int
- method print_footnotes : box -> unit
- end
-;;
-
-
-class store =
- object (self)
-
- val mutable footnotes = ( [] : (int * footnote_printer) list )
- val mutable next_footnote_number = 1
-
- method alloc_footnote n =
- let number = next_footnote_number in
- next_footnote_number <- number+1;
- footnotes <- footnotes @ [ number, n ];
- number
-
- method print_footnotes (b : box) =
- if footnotes <> [] then begin
- b # add_newline;
- b # add_newline;
- let w = b # available_width in
- b # add_word (String.make (w/3) '-');
- b # add_newline;
- b # add_newline;
- List.iter
- (fun (_,n) ->
- n # footnote_to_box (self : #store_type :> store_type) b)
- footnotes;
- b # add_newline;
- end
- end
-;;
-
-
-
-(**********************************************************************)
-(* The extension objects *)
-(**********************************************************************)
-
-
-class virtual shared =
- object (self)
-
- (* --- default_ext --- *)
-
- val mutable node = (None : shared node option)
-
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
-
- (* --- virtual --- *)
-
- method virtual to_box : store -> box -> unit
- (* to_box store b:
- * formats the element using box 'b'
- *)
- end
-;;
-
-
-class only_data =
- object (self)
- inherit shared
-
- val white_space_re = Str.regexp "[ \t]+\\|\n"
-
- method to_box store b =
- let s = self # node # data in
- let splitted = Str.full_split white_space_re s in
- List.iter
- (function
- Str.Delim "\n" ->
- b # add_linefeed
- | Str.Delim _ ->
- b # add_space
- | Str.Text s ->
- b # add_word s)
- splitted
- end
-;;
-
-
-class no_markup =
- object (self)
- inherit shared
-
- method to_box store b =
- List.iter
- (fun n -> n # extension # to_box store b)
- (self # node # sub_nodes)
- end
-;;
-
-
-class readme =
- object (self)
- inherit shared
-
- method to_box store b =
- let title =
- match self # node # attribute "title" with
- Value s -> s
- | _ -> assert false
- in
- let w = b # available_width in
- let line = String.make (w-1) '*' in
- b # add_word line;
- b # add_newline;
- b # add_word title;
- b # add_newline;
- b # add_word line;
- b # add_newline;
- b # add_newline;
- (* process main content: *)
- List.iter
- (fun n -> n # extension # to_box store b)
- (self # node # sub_nodes);
- (* now process footnotes *)
- store # print_footnotes b;
- (* trailer *)
- b # add_newline;
- end
-;;
-
-
-class section the_tag =
- object (self)
- inherit shared
-
- val tag = the_tag
-
- method to_box store b =
- let sub_nodes = self # node # sub_nodes in
- match sub_nodes with
- title_node :: rest ->
- b # add_newline;
- let w = b # available_width in
- let line = String.make (w-1) tag in
- b # add_word line;
- b # add_newline;
- b # add_word (title_node # data);
- b # add_newline;
- b # add_word line;
- b # add_newline;
- List.iter
- (fun n ->
- n # extension # to_box store b)
- rest;
- | _ ->
- assert false
- end
-;;
-
-class sect1 = section '=';;
-class sect2 = section '-';;
-class sect3 = section ':';;
-
-
-class p =
- object (self)
- inherit shared
-
- method to_box store b =
- let within_list =
- match self # node # parent # node_type with
- T_element "li" -> true
- | T_element _ -> false
- | _ -> assert false
- in
- if not within_list then
- b # add_newline;
- let w = b # available_width in
- let b' = new box w w in
- b' # ignore_space;
- List.iter
- (fun n -> n # extension # to_box store b')
- (self # node # sub_nodes);
- b # add_box (b' :> formatted_text);
- b # add_newline;
- end
-;;
-
-
-class li =
- object (self)
- inherit shared
-
- method to_box store b =
- b # add_newline;
- let w = b # available_width in
- let b' = new listitem_box "-" 3 w in
- b' # ignore_space;
- List.iter
- (fun n -> n # extension # to_box store b')
- (self # node # sub_nodes);
- b # add_box (b' :> formatted_text);
- end
-;;
-
-
-class code =
- object (self)
- inherit shared
-
- method to_box store b =
- b # add_newline;
- let w = b # available_width in
- let b' = new box w w in
- b' # ignore_space;
- let data = self # node # data in
- (* convert tabs *)
- let l = String.length data in
- let rec add s i column =
- (* this is very ineffective but comprehensive: *)
- if i < l then
- match data.[i] with
- '\t' ->
- let n = 8 - (column mod 8) in
- add (s ^ String.make n ' ') (i+1) (column + n)
- | '\n' ->
- b' # add_word s;
- b' # add_newline;
- add "" (i+1) 0
- | c ->
- add (s ^ String.make 1 c) (i+1) (column + 1)
- else
- if s <> "" then begin
- b' # add_word s;
- b' # add_newline;
- end
- in
- add "" 0 0;
- b # add_box (b' :> formatted_text);
- b # add_newline;
- end
-;;
-
-
-class br =
- object (self)
- inherit shared
-
- method to_box store b =
- b # add_newline;
- end
-;;
-
-
-class footnote =
- object (self)
- inherit shared
-
- val mutable footnote_number = 0
-
- method to_box store b =
- let number =
- store # alloc_footnote (self : #shared :> footnote_printer) in
- footnote_number <- number;
- b # add_space;
- b # add_word ("[" ^ string_of_int number ^ "]");
-
- method footnote_to_box store b =
- let w = b # available_width in
- let n = "[" ^ string_of_int footnote_number ^ "]" in
- let b' = new listitem_box n 6 w in
- b' # ignore_space;
- List.iter
- (fun n -> n # extension # to_box store b')
- (self # node # sub_nodes);
- b # add_box (b' :> formatted_text);
- b # add_newline;
- b # add_newline;
-
- end
-;;
-
-
-class a =
- object (self)
- inherit shared
-
- val mutable footnote_number = 0
- val mutable a_href = ""
-
- method to_box store b =
- let href =
- match self # node # attribute "href" with
- Value v -> "see " ^ v
- | Valuelist _ -> assert false
- | Implied_value ->
- begin match self # node # attribute "readmeref" with
- Value v -> "see file " ^ v
- | Valuelist _ -> assert false
- | Implied_value ->
- ""
- end
- in
- a_href <- href;
- List.iter
- (fun n -> n # extension # to_box store b)
- (self # node # sub_nodes);
- if href <> "" then begin
- let number =
- store # alloc_footnote (self : #shared :> footnote_printer) in
- footnote_number <- number;
- b # add_space;
- b # add_word ("[" ^ string_of_int number ^ "]");
- end
-
- method footnote_to_box store b =
- if a_href <> "" then begin
- let w = b # available_width in
- let n = "[" ^ string_of_int footnote_number ^ "]" in
- let b' = new listitem_box n 6 w in
- b' # ignore_space;
- b' # add_word a_href;
- b # add_box (b' :> formatted_text);
- b # add_newline;
- b # add_newline;
- end
- end
-;;
-
-(**********************************************************************)
-
-open Pxp_yacc
-
-let tag_map =
- make_spec_from_alist
- ~data_exemplar:(new data_impl (new only_data))
- ~default_element_exemplar:(new element_impl (new no_markup))
- ~element_alist:
- [ "readme", (new element_impl (new readme));
- "sect1", (new element_impl (new sect1));
- "sect2", (new element_impl (new sect2));
- "sect3", (new element_impl (new sect3));
- "title", (new element_impl (new no_markup));
- "p", (new element_impl (new p));
- "br", (new element_impl (new br));
- "code", (new element_impl (new code));
- "em", (new element_impl (new no_markup));
- "ul", (new element_impl (new no_markup));
- "li", (new element_impl (new li));
- "footnote", (new element_impl (new footnote : #shared :> shared));
- "a", (new element_impl (new a : #shared :> shared));
- ]
- ()
-;;
-
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:31 lpadovan
- * Initial revision
- *
- * Revision 1.5 2000/08/22 14:34:25 gerd
- * Using make_spec_from_alist instead of make_spec_from_mapping.
- *
- * Revision 1.4 2000/08/18 21:15:25 gerd
- * Minor updates because of PXP API changes.
- *
- * Revision 1.3 2000/07/08 17:58:17 gerd
- * Updated because of PXP API changes.
- *
- * Revision 1.2 2000/06/04 20:25:38 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.1 1999/08/22 22:29:32 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-all: print sort delcol
-
-print: print.ml
- ocamlfind ocamlc -o print -package pxp -linkpkg -custom \
- -predicates pxp_without_utf8 print.ml
-
-sort: sort.ml
- ocamlfind ocamlc -o sort -package pxp -linkpkg -custom \
- -predicates pxp_without_utf8 sort.ml
-
-delcol: delcol.ml
- ocamlfind ocamlc -o delcol -package pxp -linkpkg -custom \
- -predicates pxp_without_utf8 delcol.ml
-
-clean:
- rm -f *.cmo *.cma *.cmi *.cmxa *.a *.o
-
-distclean: clean
- rm -f *~ print sort delcol
-
-CLEAN: clean
+++ /dev/null
-Usage:
- sort -by phone <sample.xml | print
-
-once sort and print are compiled.
-
-These examples illustrate iter_tree, map_tree and find_element.
-
-
-sort: reads an XML file from stdin, sorts the records, and prints the
- result as XML.
-delcol: reads an XML file from stdin, deletes a column from all records,
- and prints the result as XML.
-print: reads an XML file from stdin, and pretty-prints the file
-
-The XML file must not contain a DTD. The programs assume the fixed DTD
-record.dtd.
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* Read a record-list, delete a column, and print it as XML *)
-open Pxp_types;;
-open Pxp_document;;
-open Pxp_yacc;;
-
-let delcol col tree =
- map_tree
- ~pre:
- (fun n ->
- match n # node_type with
- T_element name when name = col ->
- raise Skip
- | _ -> n # orphaned_flat_clone)
- tree
-;;
-
-
-let main() =
- let column = ref "" in
- Arg.parse
- [ "-col", Arg.String (fun s -> column := s),
- " (last-name|first-name|phone)";
- ]
- (fun _ -> raise (Arg.Bad "Bad usage"))
- "usage: sort [ options ]";
- if !column = "" then (
- prerr_endline "Column not specified!";
- exit 1;
- );
- if not(List.mem !column ["last-name"; "first-name"; "phone"]) then (
- prerr_endline ("Unknown column: " ^ !column);
- exit 1
- );
- try
- let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
- let tree =
- parse_content_entity default_config (from_channel stdin) dtd default_spec
- in
- print_endline "<?xml encoding='ISO-8859-1'?>";
- (delcol !column tree) # write (Out_channel stdout) `Enc_iso88591
- with
- x ->
- prerr_endline(string_of_exn x);
- exit 1
-;;
-
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/24 09:42:52 gerd
- * Updated a comment.
- *
- * Revision 1.1 2000/08/24 09:39:59 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* Read a record-list structure and print it *)
-open Pxp_types;;
-open Pxp_document;;
-open Pxp_yacc;;
-
-let print tree =
- iter_tree
- ~pre:
- (fun n ->
- match n # node_type with
- T_element "last-name" ->
- print_endline ("Last name: " ^ n # data)
- | T_element "first-name" ->
- print_endline ("First name: " ^ n # data)
- | T_element "phone" ->
- print_endline ("Telephone number: " ^ n # data)
- | _ ->
- ())
- ~post:
- (fun n ->
- match n # node_type with
- T_element "record" ->
- print_newline()
- | _ ->
- ())
- tree
-;;
-
-let main() =
- try
- let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
- let tree =
- parse_content_entity default_config (from_channel stdin) dtd default_spec in
- print tree
- with
- x ->
- prerr_endline(string_of_exn x);
- exit 1
-;;
-
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/08/22 21:57:43 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-<!ELEMENT record-list (record*)>
-<!ELEMENT record (last-name?, first-name?, phone?)>
-<!ELEMENT last-name (#PCDATA)>
-<!ELEMENT first-name (#PCDATA)>
-<!ELEMENT phone (#PCDATA)>
+++ /dev/null
-<?xml encoding="ISO-8859-1"?>
-<record-list>
- <record>
- <last-name>Stolpmann</last-name>
- <first-name>Gerd</first-name>
- <phone>997705</phone>
- </record>
- <record>
- <last-name>Smith</last-name>
- <first-name>Jack</first-name>
- <phone>12345</phone>
- </record>
- <record>
- <last-name>Ützgür</last-name>
- <first-name>xxx</first-name>
- <phone>7654</phone>
- </record>
-</record-list>
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* Read a record-list, sort it, and print it as XML *)
-open Pxp_types;;
-open Pxp_document;;
-open Pxp_yacc;;
-
-let sort by tree =
- map_tree
- ~pre:
- (fun n -> n # orphaned_flat_clone)
- ~post:
- (fun n ->
- match n # node_type with
- T_element "record-list" ->
- let l = n # sub_nodes in
- let l' = List.sort
- (fun a b ->
- let a_string =
- try (find_element by a) # data
- with Not_found -> "" in
- let b_string =
- try (find_element by b) # data
- with Not_found -> "" in
- Pervasives.compare a_string b_string)
- l in
- n # set_nodes l';
- n
- | _ ->
- n)
- tree
-;;
-
-
-let main() =
- let criterion = ref "last-name" in
- Arg.parse
- [ "-by", Arg.String (fun s -> criterion := s),
- " (last-name|first-name|phone)";
- ]
- (fun _ -> raise (Arg.Bad "Bad usage"))
- "usage: sort [ options ]";
- if not(List.mem !criterion ["last-name"; "first-name"; "phone"]) then (
- prerr_endline ("Unknown criterion: " ^ !criterion);
- exit 1
- );
- try
- let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
- let tree =
- parse_content_entity default_config (from_channel stdin) dtd default_spec
- in
- print_endline "<?xml encoding='ISO-8859-1'?>";
- (sort !criterion tree) # write (Out_channel stdout) `Enc_iso88591
- with
- x ->
- prerr_endline(string_of_exn x);
- exit 1
-;;
-
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/30 16:05:44 gerd
- * Minor update
- *
- * Revision 1.2 2000/08/24 09:40:11 gerd
- * Allow that columns are missing.
- *
- * Revision 1.1 2000/08/22 21:57:44 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-*.cmi
-*.cmo
-*.cma
-*.cmx
-*.o
-*.a
-*.cmxa
-*.new
-*.mlf
-*.ml0
-depend
-depend.pkg
-
+++ /dev/null
-# make validate: make bytecode executable
-# make validate.opt: make native executable
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-#----------------------------------------------------------------------
-
-pxpvalidate: validate.ml
- ocamlfind ocamlc -o pxpvalidate -package "pxp" -linkpkg validate.ml
-
-pxpvalidate.opt: validate.ml
- ocamlfind ocamlopt -o pxpvalidate.opt -package "pxp" -linkpkg validate.ml
-
-#----------------------------------------------------------------------
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- rm -f pxpvalidate pxpvalidate.opt
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-open Pxp_document;;
-open Pxp_yacc;;
-open Pxp_types;;
-
-let error_happened = ref false;;
-
-let print_error e =
- print_endline (string_of_exn e)
-;;
-
-class warner =
- object
- method warn w =
- print_endline ("WARNING: " ^ w)
- end
-;;
-
-let parse debug wf iso88591 filename =
- try
- (* Parse the document: *)
- let parse_fn =
- if wf then parse_wfdocument_entity
- else
- let index = new hash_index in
- parse_document_entity
- ?transform_dtd:None
- ~id_index:(index :> 'ext index)
- in
- let doc =
- parse_fn
- { default_config with
- debugging_mode = debug;
- encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
- idref_pass = true;
- warner = new warner
- }
- (from_file filename)
- default_spec
- in
- ()
- with
- e ->
- (* Print error; remember that there was an error *)
- error_happened := true;
- print_error e
-;;
-
-
-let main() =
- let debug = ref false in
- let wf = ref false in
- let iso88591 = ref false in
- let files = ref [] in
- Arg.parse
- [ "-d", Arg.Set debug,
- " turn debugging mode on";
- "-wf", Arg.Set wf,
- " check only on well-formedness";
- "-iso-8859-1", Arg.Set iso88591,
- " use ISO-8859-1 as internal encoding instead of UTF-8";
- ]
- (fun x -> files := x :: !files)
- "
-usage: pxpvalidate [options] file ...
-
-- checks the validity of XML documents. See below for list of options.
-
-<title>PXP - The XML parser for Objective Caml</title>
-
-List of options:";
- files := List.rev !files;
- List.iter (parse !debug !wf !iso88591) !files;
-;;
-
-
-main();
-if !error_happened then exit(1);;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:31 lpadovan
- * Initial revision
- *
- * Revision 1.10 2000/08/30 15:58:41 gerd
- * Updated.
- *
- * Revision 1.9 2000/07/14 14:57:30 gerd
- * Updated: warner
- *
- * Revision 1.8 2000/07/14 14:13:15 gerd
- * Cosmetic changes.
- *
- * Revision 1.7 2000/07/14 14:11:06 gerd
- * Updated because of changes of the PXP API.
- *
- * Revision 1.6 2000/07/08 21:53:00 gerd
- * Updated because of PXP interface changes.
- *
- * Revision 1.5 2000/06/04 20:21:55 gerd
- * Updated to new module names.
- *
- * Revision 1.4 2000/05/01 16:44:57 gerd
- * Added check for ID uniqueness.
- * Using new error formatter.
- *
- * Revision 1.3 1999/11/09 22:27:30 gerd
- * The programs returns now an exit code of 1 if one of the
- * XML files produces an error.
- *
- * Revision 1.2 1999/09/01 23:09:56 gerd
- * Added the option -wf that switches to well-formedness checking
- * instead of validation.
- *
- * Revision 1.1 1999/08/14 22:20:53 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-*.cmi
-*.cmo
-*.cma
-*.cmx
-*.o
-*.a
-*.cmxa
-*.new
-*.mlf
-*.ml0
-depend
-depend.pkg
-
+++ /dev/null
-# make xmlforms: make bytecode executable
-# make xmlforms.opt: make native executable
-# make clean: remove intermediate files
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files
-# make release: cleanup, create archive, tag CVS module
-# (for developers)
-#----------------------------------------------------------------------
-
-.PHONY: xmlforms
-xmlforms:
- $(MAKE) -f Makefile.code xmlforms
-
-.PHONY: xmlforms.opt
-xmlforms.opt:
- $(MAKE) -f Makefile.code xmlforms.opt
-
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
-
-.PHONY: CLEAN
-CLEAN: clean
- $(MAKE) -C styles CLEAN
-
-.PHONY: distclean
-distclean: clean
- rm -f *~ depend depend.pkg
- rm -f xmlforms xmlforms.opt
- $(MAKE) -C styles distclean
-
-
+++ /dev/null
-#----------------------------------------------------------------------
-# specific rules for this package:
-
-OBJECTS = ds_context.cmo ds_style.cmo
-XOBJECTS = $(OBJECTS:.cmo=.cmx)
-ARCHIVE = xmlforms.cma
-XARCHIVE = xmlforms.cmxa
-NAME = xmlforms
-REQUIRES = camltk str pxp
-
-xmlforms: $(ARCHIVE) ds_app.cmo
- ocamlfind ocamlc -g -o xmlforms -custom -package "$(REQUIRES)" \
- -linkpkg $(ARCHIVE) ds_app.cmo
-
-xmlform.opt: $(XARCHIVE) ds_app.cmx
- ocamlfind ocamlopt -o xmlforms.opt -custom -package "$(REQUIRES)" \
- -linkpkg $(XARCHIVE) ds_app.cmx
-
-$(ARCHIVE): $(OBJECTS)
- $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
-
-$(XARCHIVE): $(XOBJECTS)
- $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
-
-#----------------------------------------------------------------------
-# general rules:
-
-OPTIONS =
-OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
-OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
-OCAMLDEP = ocamldep $(OPTIONS)
-OCAMLFIND = ocamlfind
-
-depend: *.ml *.mli
- $(OCAMLDEP) *.ml *.mli >depend
-
-depend.pkg: Makefile
- $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
-
-.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
-
-.ml.cmx:
- $(OCAMLOPT) -c $<
-
-.ml.cmo:
- $(OCAMLC) -c $<
-
-.mli.cmi:
- $(OCAMLC) -c $<
-
-.mll.ml:
- ocamllex $<
-
-*.mli:
-
-include depend
-include depend.pkg
+++ /dev/null
------------------------------------------------------------------------------
-xmlforms
------------------------------------------------------------------------------
-
-THE IDEA:
-
-This example uses XML for two purposes:
-
-- The "story" and layout of the application is specified in XML
-- The data records are stored in XML
-
-An "application" is a set of "masks" or sequences of masks, and every mask
-is thought as a visible page of the application, containing layout
-elements and functional elements. Layout is specified in TeX-style using
-hboxes, vboxes, hspaces, vspaces. Functional elements are "entries" (input
-box for a string with one line), "textboxes" (input boxes with several
-lines), and buttons.
-
-See styles/ds-style.dtd for the DTD of an application specification, and
-the other xml files in this directory for examples.
-
-The entries and textboxes are bound to "slots", i.e. string variables. If
-the application is started, the slots are read from a file, and if the
-user presses a special "save" button, the slots are stored into this file.
-The format of this data file is again XML; the simplistic DTD can be found
-in styles/ds-object.dtd.
-
-
-THE IMPLEMENTATION:
-
-There is currently a mapping of the specifications to ocamltk, done by a
-program called "xmlforms".
-
-
-HOW TO COMPILE:
-
-It is assumed that "findlib" is present on your system; see ABOUT-FINDLIB
-in the toplevel directory.
-The "markup" module must have been installed.
-
-- "make xmlforms" produces a bytecode executable "xmlforms"
-- "make xmlforms.opt" produces a native executable "xmlforms.opt"
-
-Note that you cannot start the executables directly:
-
-
-HOW TO START AN APPLICATION:
-
-As "xmlforms" is a generic executable, there is a simple mechanism to bind
-it to a specific instance of an application. For example, in the "styles"
-subdirectory there is the application specification "crazy-style.xml". To
-start it, make a symlink called "crazy" referring to the "xmlforms"
-binary, set the environment variable DATASHEETS to the directory where the
-DTDs and XML files can be found, and start "crazy":
-
- ln -s ../xmlforms crazy
- DATASHEETS=. crazy my-record.xml
-
-(If you do not set DATASHEETS, a default directory, normally
-"/opt/xmlforms/lib" is used.)
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Tk
-open Pxp_types
-open Pxp_document
-open Pxp_yacc
-open Ds_context
-open Ds_style
-
-
-let installdir =
- try Sys.getenv "DATASHEETS" with
- Not_found -> "/opt/xmlforms/lib"
-let style_sysid = ref ""
-let object_dtd_sysid = Filename.concat installdir "ds-object.dtd"
-let object_dtd_root = "record"
-
-
-let rec print_error e =
- print_endline (string_of_exn e)
-;;
-
-
-let run f arg1 arg2 =
- try f arg1 arg2 with
- e -> print_error e
-;;
-
-
-let edit filename cmd =
- (* read in style definition *)
- let index = new hash_index in
- let style =
- parse_document_entity
- ~id_index:(index :> 'ext index)
- default_config
- (from_file !style_sysid)
- tag_map
- in
- let root = style # root in
- root # extension # prepare (index :> 'ext index);
-
- let obj_dtd =
- parse_dtd_entity
- default_config
- (from_file object_dtd_sysid)
- in
- obj_dtd # set_root object_dtd_root;
-
- let topframe = openTk() in
- let context = new context filename obj_dtd index root topframe in
-
- Toplevel.configure topframe [ Width (Centimeters 20.0);
- Height (Centimeters 12.0);
- ];
- Pack.propagate_set topframe false;
- Wm.title_set topframe cmd;
- context # goto (root # extension # start_node_name);
- mainLoop()
-;;
-
-
-let main() =
- let cmd = Filename.basename Sys.argv.(0) in
- match Sys.argv with
- [| _; filename |] ->
- style_sysid := Filename.concat installdir (cmd ^ "-style.xml");
- run edit filename cmd
- | _ ->
- prerr_endline ("usage: " ^ cmd ^ " filename");
- exit(1)
-;;
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.6 2000/07/16 19:36:03 gerd
- * Updated.
- *
- * Revision 1.5 2000/07/08 22:03:11 gerd
- * Updates because of PXP interface changes.
- *
- * Revision 1.4 2000/06/04 20:29:19 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.3 2000/05/01 16:48:45 gerd
- * Using the new error formatter.
- *
- * Revision 1.2 1999/12/17 21:34:29 gerd
- * The name of the root element is set to "record" in the
- * object_dtd; otherwise the parser would not check that the root
- * element is the right element.
- *
- * Revision 1.1 1999/08/21 19:11:05 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_types
-open Pxp_document
-open Pxp_yacc
-
-let empty_record = new element_impl (Pxp_yacc.default_extension);;
-let empty_dnode = new data_impl Pxp_yacc.default_extension;;
-
-class context the_filename the_obj_dtd the_index the_root the_topframe =
- object (self)
- val filename = the_filename
- val obj_dtd = the_obj_dtd
- val node_index = the_index
- val mutable obj = empty_record # create_element
- the_obj_dtd (T_element "record") []
- val root = the_root
- val topframe = the_topframe
- val mutable wdg = None
-
- val mutable history = ( [| |] : string array )
- val mutable index = 0
-
- initializer
- self # load_obj
-
- method obj = obj
-
- (* history *)
-
- method private leave_node =
- begin match wdg with
- None -> ()
- | Some w -> Tk.destroy w
- end;
- wdg <- None
-
- method private enter_node =
- let where = history.(index) in
- let n =
- try node_index # find where with
- Not_found -> failwith ("Mask not found: " ^ where) in
- let w = n # extension # create_widget topframe self in
- Tk.pack [w] (n # extension # pack_opts @ [ Tk.Expand true] );
- wdg <- Some w
-
-
-
- method previous =
- if index > 0 then
- index <- index - 1
- else
- raise Not_found;
- self # leave_node;
- self # enter_node;
-
-
- method next =
- if index < Array.length history - 1 then
- index <- index + 1
- else
- raise Not_found;
- self # leave_node;
- self # enter_node;
-
-
- method goto where =
- assert (index <= Array.length history);
- self # leave_node;
- let persisting_history =
- if index < Array.length history then
- Array.sub history 0 (index+1)
- else
- history
- in
- history <- Array.concat [ persisting_history; [| where |] ];
- index <- Array.length history - 1;
- self # enter_node;
-
-
- method current =
- if index < Array.length history then
- history.(index)
- else
- raise Not_found
-
-
- (* read, write the slots of object *)
-
- method search_slot name =
- let rec search n =
- match n # node_type with
- T_element "string" ->
- if n # required_string_attribute "name" = name then
- n
- else raise Not_found
- | T_element _ ->
- search_list (n # sub_nodes)
- | T_data ->
- raise Not_found
- | _ ->
- assert false
-
- and search_list l =
- match l with
- x :: l' ->
- (try search x with Not_found -> search_list l')
- | [] ->
- raise Not_found
- in
- search obj
-
- method get_slot name =
- let d = (self # search_slot name) # data in
- d
-
- method set_slot name value =
- let dtd = obj # dtd in
- begin try
- let n = self # search_slot name in
- n # delete
- with
- Not_found -> ()
- end;
- let e_string = empty_record # create_element dtd (T_element "string")
- [ "name", name ] in
- let dnode = empty_dnode # create_data dtd value in
- e_string # add_node dnode;
- e_string # local_validate();
- obj # add_node e_string;
- assert(self # get_slot name = value)
-
- (* load, save object *)
-
-
- method load_obj =
- if Sys.file_exists filename then begin
- obj <- parse_content_entity
- default_config
- (from_file filename)
- obj_dtd
- default_spec
- end
- else begin
- print_string "New file!\n";
- flush stdout
- end
-
-
- method save_obj =
- let fd = open_out filename in
- try
-
- let re1 = Str.regexp "&" in
- let re2 = Str.regexp "<" in
- let re3 = Str.regexp "'" in
- let re4 = Str.regexp ">" in
- let protect s =
- let s1 = Str.global_replace re1 "&" s in
- let s2 = Str.global_replace re2 "<" s1 in
- let s3 = Str.global_replace re3 "'" s2 in
- let s4 = Str.global_replace re2 ">" s1 in
- s3
- in
-
- let rec iterate (n : 'node extension node as 'node) =
- match n # node_type with
- T_data ->
- output_string fd (protect (n # data))
- | T_element name ->
- output_string fd ("<" ^ name ^ "\n");
- let anames = n # attribute_names in
- List.iter
- (fun aname ->
- let aval = n # attribute aname in
- let v =
- match aval with
- Value s ->
- aname ^ "='" ^ protect s ^ "'\n"
- | Valuelist l ->
- aname ^ "='" ^ String.concat " " (List.map protect l) ^ "'\n"
- | Implied_value ->
- ""
- in
- output_string fd v)
- anames;
- output_string fd ">";
- List.iter iterate (n # sub_nodes);
- output_string fd ("</" ^ name ^ "\n>");
- | _ ->
- assert false
- in
-
- output_string fd "<?xml version='1.0' encoding='ISO-8859-1'?>\n";
- iterate obj;
- close_out fd
- with
- e ->
- close_out fd;
- raise e
-
- end
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:31 lpadovan
- * Initial revision
- *
- * Revision 1.7 2000/08/30 15:58:49 gerd
- * Updated.
- *
- * Revision 1.6 2000/07/23 20:25:05 gerd
- * Update because of API change: local_validate.
- *
- * Revision 1.5 2000/07/16 19:36:03 gerd
- * Updated.
- *
- * Revision 1.4 2000/07/08 22:03:11 gerd
- * Updates because of PXP interface changes.
- *
- * Revision 1.3 2000/06/04 20:29:19 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.2 2000/05/30 00:09:08 gerd
- * Minor fix.
- *
- * Revision 1.1 1999/08/21 19:11:05 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_types
-open Pxp_document
-open Ds_context
-
-
-let get_dimension s =
- let re = Str.regexp "\\([0-9]*\\(.[0-9]+\\)?\\)[ \t\n]*\\(px\\|cm\\|in\\|mm\\|pt\\)" in
- if Str.string_match re s 0 then begin
- let number = Str.matched_group 1 s in
- let dim = Str.matched_group 3 s in
- match dim with
- "px" -> Tk.Pixels (int_of_float (float_of_string number))
- | "cm" -> Tk.Centimeters (float_of_string number)
- | "in" -> Tk.Inches (float_of_string number)
- | "mm" -> Tk.Millimeters (float_of_string number)
- | "pt" -> Tk.PrinterPoint (float_of_string number)
- | _ -> assert false
- end
- else
- failwith ("Bad dimension: " ^ s)
-;;
-
-
-class virtual shared =
- object(self)
-
- (* --- default_ext --- *)
-
- val mutable node = (None : shared node option)
-
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
-
- (* --- shared attributes: color & font settings --- *)
-
- val mutable fgcolor = (None : string option)
- val mutable bgcolor = (None : string option)
- val mutable font = (None : string option)
-
- method fgcolor =
- (* Get the foreground color: If there is a local value, return it;
- * otherwise ask parent node
- *)
- match fgcolor with
- Some c -> c
- | None -> try self # node # parent # extension # fgcolor with
- Not_found -> failwith "#fgcolor"
-
- method bgcolor =
- (* Get the background color: If there is a local value, return it;
- * otherwise ask parent node
- *)
- match bgcolor with
- Some c -> c
- | None -> try self # node # parent # extension # bgcolor with
- Not_found -> failwith "#bgcolor"
-
- method font =
- (* Get the current font: If there is a local value, return it;
- * otherwise ask parent node
- *)
- match font with
- Some c -> c
- | None -> try self # node # parent # extension # font with
- Not_found -> failwith "#font"
-
- method private init_color_and_font =
- let get_color n =
- try
- match self # node # attribute n with
- Value v -> Some v
- | Implied_value -> None
- | _ -> assert false
- with Not_found -> None in
- fgcolor <- get_color "fgcolor";
- bgcolor <- get_color "bgcolor";
- font <- get_color "font"; (* sic! *)
-
-
- method private bg_color_opt =
- [ Tk.Background (Tk.NamedColor (self # bgcolor)) ]
-
- method private fg_color_opt =
- [ Tk.Foreground (Tk.NamedColor (self # fgcolor)) ]
-
- method private font_opt =
- [ Tk.Font (self # font) ]
-
- (* --- virtual --- *)
-
- method virtual prepare : shared Pxp_yacc.index -> unit
- method virtual create_widget : Widget.widget -> context -> Widget.widget
-
- method pack_opts = ( [] : Tk.options list )
- method xstretchable = false
- method ystretchable = false
-
- method accept (c:context) = ()
-
- method private get_mask =
- (* find parent which is a mask *)
- let rec search n =
- match n # node_type with
- T_element "mask" ->
- n # extension
- | T_element _ ->
- search (n # parent)
- | _ ->
- assert false
- in
- search (self # node)
-
-
- method private accept_mask (c:context) =
- let rec iterate n =
- n # extension # accept c;
- List.iter iterate (n # sub_nodes)
- in
- iterate (self # get_mask # node)
-
-
- method start_node_name =
- (failwith "#start_node_name" : string)
-
- (* --- debug --- *)
-
- method private name =
- let nt = self # node # node_type in
- match nt with
- T_element n -> n
- | T_data -> "#PCDATA"
- | _ -> assert false
-
- end
-;;
-
-
-class default =
- object (self)
- inherit shared
-
- method prepare idx =
- self # init_color_and_font
-
- method create_widget w c =
- failwith "default # create_widget"
- end
-;;
-
-
-let dummy_node = new element_impl (new default);;
-
-class application =
- object (self)
- inherit shared
-
- val mutable start_node = dummy_node
-
- method prepare idx =
- (* prepare this node *)
- self # init_color_and_font;
- if fgcolor = None then fgcolor <- Some "black";
- if bgcolor = None then bgcolor <- Some "white";
- if font = None then font <- Some "fixed";
- let start =
- match self # node # attribute "start" with
- Value v -> v
- | _ -> assert false in
- start_node <- (try idx # find start with
- Not_found -> failwith "Start node not found");
- (* iterate over the subtree *)
- let rec iterate n =
- n # extension # prepare idx;
- List.iter iterate (n # sub_nodes)
- in
- List.iter iterate (self # node # sub_nodes)
-
-
- method start_node_name =
- match self # node # attribute "start" with
- Value v -> v
- | _ -> assert false
-
- method create_widget w c =
- start_node # extension # create_widget w c
-
- method pack_opts =
- start_node # extension # pack_opts
- end
-;;
-
-
-class sequence =
- object (self)
- inherit shared
-
- method prepare idx =
- self # init_color_and_font;
-
- method create_widget w c =
- let node = List.hd (self # node # sub_nodes) in
- node # extension # create_widget w c
-
- method pack_opts =
- let node = List.hd (self # node # sub_nodes) in
- node # extension # pack_opts
- end
-;;
-
-
-class vbox =
- object (self)
- inherit shared
-
- val mutable att_halign = "left"
-
- method prepare idx =
- self # init_color_and_font;
- match self # node # attribute "halign" with
- Value v -> att_halign <- v
- | _ -> assert false
-
- method create_widget w c =
- let f = Frame.create w (self # bg_color_opt) in
- let nodes = self # node # sub_nodes in
- let options =
- match att_halign with
- "left" -> [ Tk.Anchor Tk.W ]
- | "right" -> [ Tk.Anchor Tk.E ]
- | "center" -> [ Tk.Anchor Tk.Center ]
- | _ -> assert false
- in
- List.iter
- (fun n ->
- let opts = n # extension # pack_opts in
- let wdg = n # extension # create_widget f c in
- Tk.pack [wdg] (options @ opts);
- )
- nodes;
- f
-
- method pack_opts =
- match self # xstretchable, self # ystretchable with
- true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ]
- | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ]
- | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ]
- | false, false -> []
-
- method xstretchable =
- let nodes = self # node # sub_nodes in
- List.exists (fun n -> n # extension # xstretchable) nodes
-
- method ystretchable =
- let nodes = self # node # sub_nodes in
- List.exists (fun n -> n # extension # ystretchable) nodes
-
- end
-
-;;
-
-
-class mask =
- object (self)
-
- inherit vbox
-
- method prepare idx =
- self # init_color_and_font;
- att_halign <- "left"
- end
-;;
-
-
-class hbox =
- object (self)
- inherit shared
-
- val mutable att_width = None
- val mutable att_halign = "left"
- val mutable att_valign = "top"
-
- method prepare idx =
- self # init_color_and_font;
- begin match self # node # attribute "halign" with
- Value v -> att_halign <- v
- | _ -> assert false
- end;
- begin match self # node # attribute "valign" with
- Value v -> att_valign <- v
- | _ -> assert false
- end;
- begin match self # node # attribute "width" with
- Value v -> att_width <- Some (get_dimension v)
- | Implied_value -> att_width <- None
- | _ -> assert false
- end
-
- method create_widget w c =
- let f1 = Frame.create w (self # bg_color_opt) in
- let f_extra =
- match att_width with
- None -> []
- | Some wd ->
- [ Canvas.create f1
- ( [ Tk.Width wd; Tk.Height (Tk.Pixels 0);
- Tk.Relief Tk.Flat;
- Tk.HighlightThickness (Tk.Pixels 0);
- ] @
- self # bg_color_opt ) ]
- in
- let f2 = Frame.create f1 (self # bg_color_opt) in
- let nodes = self # node # sub_nodes in
-
- let outer_pack_opts =
- match att_halign with
- "left" -> [ Tk.Anchor Tk.W ]
- | "right" -> [ Tk.Anchor Tk.E ]
- | "center" -> [ Tk.Anchor Tk.Center ]
- | _ -> assert false
- in
- let inner_pack_opts =
- match att_valign with
- "top" -> [ Tk.Anchor Tk.N ]
- | "bottom" -> [ Tk.Anchor Tk.S ]
- | "center" -> [ Tk.Anchor Tk.Center ]
- | _ -> assert false
- in
- List.iter
- (fun n ->
- let opts = n # extension # pack_opts in
- let wdg = n # extension # create_widget f2 c in
- Tk.pack [wdg] (inner_pack_opts @ [ Tk.Side Tk.Side_Left ] @ opts);
- )
- nodes;
- let extra_opts = self # pack_opts in
- Tk.pack (f_extra @ [f2]) (outer_pack_opts @ extra_opts);
- f1
-
- method pack_opts =
- match self # xstretchable, self # ystretchable with
- true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ]
- | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ]
- | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ]
- | false, false -> []
-
- method xstretchable =
- let nodes = self # node # sub_nodes in
- List.exists (fun n -> n # extension # xstretchable) nodes
-
- method ystretchable =
- let nodes = self # node # sub_nodes in
- List.exists (fun n -> n # extension # ystretchable) nodes
-
- end
-;;
-
-class vspace =
- object (self)
- inherit shared
-
- val mutable att_height = Tk.Pixels 0
- val mutable att_fill = false
-
- method prepare idx =
- self # init_color_and_font;
- begin match self # node # attribute "height" with
- Value v -> att_height <- get_dimension v
- | _ -> assert false
- end;
- begin match self # node # attribute "fill" with
- Value "yes" -> att_fill <- true
- | Value "no" -> att_fill <- false
- | _ -> assert false
- end
-
-
- method create_widget w c =
- let f = Frame.create w ( self # bg_color_opt ) in
- let strut =
- Canvas.create f
- ( [ Tk.Height att_height; Tk.Width (Tk.Pixels 0);
- Tk.Relief Tk.Flat;
- Tk.HighlightThickness (Tk.Pixels 0);
- ] @
- self # bg_color_opt ) in
- if att_fill then
- Tk.pack [strut] [Tk.Fill Tk.Fill_Y; Tk.Expand true]
- else
- Tk.pack [strut] [];
- f
-
- method pack_opts =
- if att_fill then [ Tk.Fill Tk.Fill_Y; Tk.Expand true ] else []
-
- method ystretchable = att_fill
- end
-;;
-
-class hspace =
- object (self)
- inherit shared
-
-
- val mutable att_width = Tk.Pixels 0
- val mutable att_fill = false
-
- method prepare idx =
- self # init_color_and_font;
- begin match self # node # attribute "width" with
- Value v -> att_width <- get_dimension v
- | _ -> assert false
- end;
- begin match self # node # attribute "fill" with
- Value "yes" -> att_fill <- true
- | Value "no" -> att_fill <- false
- | _ -> assert false
- end
-
-
- method create_widget w c =
- let f = Frame.create w ( self # bg_color_opt ) in
- let strut =
- Canvas.create f
- ( [ Tk.Width att_width; Tk.Height (Tk.Pixels 0);
- Tk.Relief Tk.Flat;
- Tk.HighlightThickness (Tk.Pixels 0);
- ] @
- self # bg_color_opt ) in
- if att_fill then
- Tk.pack [strut] [Tk.Fill Tk.Fill_X; Tk.Expand true]
- else
- Tk.pack [strut] [];
- f
-
- method pack_opts =
- if att_fill then [ Tk.Fill Tk.Fill_X; Tk.Expand true ] else []
-
- method xstretchable = att_fill
- end
-;;
-
-class label =
- object (self)
- inherit shared
-
- val mutable att_textwidth = (-1)
- val mutable att_halign = "left"
-
- method prepare idx =
- self # init_color_and_font;
- att_textwidth <- (match self # node # attribute "textwidth" with
- Value v ->
- let w = try int_of_string v
- with _ -> failwith ("Not an integer: " ^ v) in
- w
- | Implied_value ->
- (-1)
- | _ -> assert false);
- att_halign <- (match self # node # attribute "halign" with
- Value v -> v
- | _ -> assert false);
-
-
- method create_widget w c =
- let opts_textwidth = if att_textwidth < 0 then [] else
- [ Tk.TextWidth att_textwidth ] in
- let opts_halign =
- match att_halign with
- "left" -> [ Tk.Anchor Tk.W ]
- | "right" -> [ Tk.Anchor Tk.E ]
- | "center" -> [ Tk.Anchor Tk.Center ]
- | _ -> assert false
- in
- let opts_content =
- [ Tk.Text (self # node # data) ] in
- let label = Label.create w (opts_textwidth @ opts_halign @
- opts_content @ self # bg_color_opt @
- self # fg_color_opt @ self # font_opt) in
- label
-
- end
-;;
-
-class entry =
- object (self)
- inherit shared
-
- val mutable tv = lazy (Textvariable.create())
- val mutable att_textwidth = (-1)
- val mutable att_slot = ""
-
- method prepare idx =
- self # init_color_and_font;
- tv <- lazy (Textvariable.create());
- att_textwidth <- (match self # node # attribute "textwidth" with
- Value v ->
- let w = try int_of_string v
- with _ -> failwith ("Not an integer: " ^ v) in
- w
- | Implied_value ->
- (-1)
- | _ -> assert false);
- att_slot <- (match self # node # attribute "slot" with
- Value v -> v
- | _ -> assert false);
-
- method create_widget w c =
- let opts_textwidth = if att_textwidth < 0 then [] else
- [ Tk.TextWidth att_textwidth ] in
- let e = Entry.create w ( [ Tk.TextVariable (Lazy.force tv) ] @
- self # fg_color_opt @
- self # bg_color_opt @
- self # font_opt @
- opts_textwidth
- ) in
- let s =
- try c # get_slot att_slot with
- Not_found -> self # node # data in
- Textvariable.set (Lazy.force tv) s;
- e
-
- method accept c =
- c # set_slot att_slot (Textvariable.get (Lazy.force tv))
-
- end
-;;
-
-class textbox =
- object (self)
- inherit shared
-
- val mutable att_textwidth = (-1)
- val mutable att_textheight = (-1)
- val mutable att_slot = ""
- val mutable last_widget = None
-
- method prepare idx =
- self # init_color_and_font;
- att_textwidth <- (match self # node # attribute "textwidth" with
- Value v ->
- let w = try int_of_string v
- with _ -> failwith ("Not an integer: " ^ v) in
- w
- | Implied_value ->
- (-1)
- | _ -> assert false);
- att_textheight <- (match self # node # attribute "textheight" with
- Value v ->
- let w = try int_of_string v
- with _ -> failwith ("Not an integer: " ^ v) in
- w
- | Implied_value ->
- (-1)
- | _ -> assert false);
- att_slot <- (match self # node # attribute "slot" with
- Value v -> v
- | Implied_value -> ""
- | _ -> assert false);
-
-
- method create_widget w c =
- let opts_textwidth = if att_textwidth < 0 then [] else
- [ Tk.TextWidth att_textwidth ] in
- let opts_textheight = if att_textheight < 0 then [] else
- [ Tk.TextHeight att_textheight ] in
- let f = Frame.create w (self # bg_color_opt) in
- let vscrbar = Scrollbar.create f [ Tk.Orient Tk.Vertical ] in
- let e = Text.create f ( [ ] @
- self # fg_color_opt @
- self # bg_color_opt @
- self # font_opt @
- opts_textwidth @ opts_textheight
- ) in
- last_widget <- Some e;
- Scrollbar.configure vscrbar [ Tk.ScrollCommand
- (fun s -> Text.yview e s);
- Tk.Width (Tk.Pixels 9) ];
- Text.configure e [ Tk.YScrollCommand
- (fun a b -> Scrollbar.set vscrbar a b) ];
- let s =
- if att_slot <> "" then
- try c # get_slot att_slot with
- Not_found -> self # node # data
- else
- self # node # data
- in
- (* Text.insert appends always a newline to the last line; so strip
- * an existing newline first
- *)
- let s' =
- if s <> "" & s.[String.length s - 1] = '\n' then
- String.sub s 0 (String.length s - 1)
- else
- s in
- Text.insert e (Tk.TextIndex(Tk.End,[])) s' [];
- if att_slot = "" then
- Text.configure e [ Tk.State Tk.Disabled ];
- Tk.pack [e] [ Tk.Side Tk.Side_Left ];
- Tk.pack [vscrbar] [ Tk.Side Tk.Side_Left; Tk.Fill Tk.Fill_Y ];
- f
-
- method accept c =
- if att_slot <> "" then
- match last_widget with
- None -> ()
- | Some w ->
- let s =
- Text.get
- w
- (Tk.TextIndex(Tk.LineChar(1,0),[]))
- (Tk.TextIndex(Tk.End,[])) in
- c # set_slot att_slot s
-
- end
-;;
-
-class button =
- object (self)
- inherit shared
-
- val mutable att_label = ""
- val mutable att_action = ""
- val mutable att_goto = ""
-
- method prepare idx =
- self # init_color_and_font;
- att_label <- (match self # node # attribute "label" with
- Value v -> v
- | _ -> assert false);
- att_action <- (match self # node # attribute "action" with
- Value v -> v
- | _ -> assert false);
- att_goto <- (match self # node # attribute "goto" with
- Value v -> v
- | Implied_value -> ""
- | _ -> assert false);
- if att_action = "goto" then begin
- try let _ = idx # find att_goto in () with
- Not_found -> failwith ("Target `" ^ att_goto ^ "' not found")
- end;
- if att_action = "list-prev" or att_action = "list-next" then begin
- let m = self # get_mask in
- if m # node # parent # node_type <> T_element "sequence" then
- failwith ("action " ^ att_action ^ " must not be used out of <sequence>");
- end;
-
-
- method create_widget w c =
- let cmd () =
- self # accept_mask c;
- match att_action with
- "goto" ->
- c # goto att_goto
- | "save" ->
- c # save_obj
- | "exit" ->
- Protocol.closeTk()
- | "save-exit" ->
- c # save_obj;
- Protocol.closeTk()
- | "list-prev" ->
- let m = self # get_mask # node in
- let s = m # parent in
- let rec search l =
- match l with
- x :: y :: l' ->
- if y == m then
- match x # attribute "name" with
- Value s -> c # goto s
- | _ -> assert false
- else
- search (y :: l')
- | _ -> ()
- in
- search (s # sub_nodes)
- | "list-next" ->
- let m = self # get_mask # node in
- let s = m # parent in
- let rec search l =
- match l with
- x :: y :: l' ->
- if x == m then
- match y # attribute "name" with
- Value s -> c # goto s
- | _ -> assert false
- else
- search (y :: l')
- | _ -> ()
- in
- search (s # sub_nodes)
- | "hist-prev" ->
- (try c # previous with Not_found -> ())
- | "hist-next" ->
- (try c # next with Not_found -> ())
- | _ -> ()
- in
- let b = Button.create w ( [ Tk.Text att_label; Tk.Command cmd ] @
- self # fg_color_opt @
- self # bg_color_opt @
- self # font_opt ) in
- b
-
-
- end
-;;
-
-
-(**********************************************************************)
-
-open Pxp_yacc
-
-let tag_map =
- make_spec_from_mapping
- ~data_exemplar:(new data_impl (new default))
- ~default_element_exemplar:(new element_impl (new default))
- ~element_mapping:
- (let m = Hashtbl.create 50 in
- Hashtbl.add m "application"
- (new element_impl (new application));
- Hashtbl.add m "sequence"
- (new element_impl (new sequence));
- Hashtbl.add m "mask"
- (new element_impl (new mask));
- Hashtbl.add m "vbox"
- (new element_impl (new vbox));
- Hashtbl.add m "hbox"
- (new element_impl (new hbox));
- Hashtbl.add m "vspace"
- (new element_impl (new vspace));
- Hashtbl.add m "hspace"
- (new element_impl (new hspace));
- Hashtbl.add m "label"
- (new element_impl (new label));
- Hashtbl.add m "entry"
- (new element_impl (new entry));
- Hashtbl.add m "textbox"
- (new element_impl (new textbox));
- Hashtbl.add m "button"
- (new element_impl (new button));
- m)
- ()
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:31 lpadovan
- * Initial revision
- *
- * Revision 1.5 2000/08/30 15:58:49 gerd
- * Updated.
- *
- * Revision 1.4 2000/07/16 19:36:03 gerd
- * Updated.
- *
- * Revision 1.3 2000/07/08 22:03:11 gerd
- * Updates because of PXP interface changes.
- *
- * Revision 1.2 2000/06/04 20:29:19 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.1 1999/08/21 19:11:05 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
-
-.PHONY: symlinks
-symlinks:
- for x in *-style.xml; do ln -s ../xmlforms $${x%-style.xml} || true; done
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!-- $Id$ -->
-
-<!DOCTYPE application SYSTEM "ds-style.dtd" [
-<!ENTITY h1.font '-*-helvetica-bold-r-*-*-18-*-*-*-*-*-*-*'>
-<!ENTITY h2.font '-*-helvetica-bold-r-*-*-14-*-*-*-*-*-*-*'>
-<!ENTITY h3.font '-*-helvetica-bold-r-*-*-12-*-*-*-*-*-*-*'>
-<!ENTITY dfl.font '-*-helvetica-medium-r-*-*-12-*-*-*-*-*-*-*'>
-<!ENTITY dfl.bold.font '-*-helvetica-bold-r-*-*-12-*-*-*-*-*-*-*'>
-<!ENTITY in.font '-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*'>
-
-<!ENTITY bg.button 'lightblue'>
-<!ENTITY bg.hilfe '#E0E0E0'>
-<!ENTITY fg.hilfe 'black'>
-
-<!ENTITY headline
- '<vbox>
- <label font="&h2.font;" fgcolor="darkgreen">More about person...</label>
- <vspace height="2mm"/>
- <hbox>
- <hbox bgcolor="black">
- <hspace width="18cm"/>
- <vbox><vspace height="2px"/></vbox>
- </hbox>
- <hspace width="5mm"/>
- </hbox>
- <vspace height="3mm"/>
- </vbox>'>
-
-<!ENTITY help.headline
- '<vbox>
- <label font="&h2.font;" fgcolor="darkgreen">Help</label>
- <vspace height="2mm"/>
- <hbox>
- <hbox bgcolor="black">
- <hspace width="18cm"/>
- <vbox><vspace height="2px"/></vbox>
- </hbox>
- <hspace width="5mm"/>
- </hbox>
- <vspace height="3mm"/>
- </vbox>'>
-
-<!ENTITY info.headline
- '<vbox>
- <label font="&h2.font;" fgcolor="darkgreen">About xmlforms</label>
- <vspace height="2mm"/>
- <hbox>
- <hbox bgcolor="black">
- <hspace width="18cm"/>
- <vbox><vspace height="2px"/></vbox>
- </hbox>
- <hspace width="5mm"/>
- </hbox>
- <vspace height="3mm"/>
- </vbox>'>
-
-<!ENTITY footline
- '<vbox>
- <hbox>
- <hbox bgcolor="black">
- <hspace width="18cm"/>
- <vbox><vspace height="2px"/></vbox>
- </hbox>
- <hspace width="5mm"/>
- </hbox>
- <vspace height="2mm"/>
- <hbox>
- <button bgcolor="&bg.button;" label="Previous" action="list-prev"/>
- <button bgcolor="&bg.button;" label="Next" action="list-next"/>
- <hspace width="0pt" fill="yes"/>
- <button bgcolor="&bg.button;" label="Home" goto="start-page"/>
- <hspace width="5mm"/>
- </hbox>
- </vbox>'>
-
-<!ENTITY help.footline
- '<vbox>
- <hbox>
- <hbox bgcolor="black">
- <hspace width="18cm"/>
- <vbox><vspace height="2px"/></vbox>
- </hbox>
- <hspace width="5mm"/>
- </hbox>
- <vspace height="2mm"/>
- <hbox>
- <button bgcolor="&bg.button;" label="Back" action="hist-prev"/>
- <hspace width="0pt" fill="yes"/>
- </hbox>
- </vbox>'>
-
-<!ENTITY info.footline '&help.footline;'>
-
-]>
-
-<!-- ***************************************************************** -->
-<!-- ************************ ************************** -->
-<!-- ************************ Starting page ************************** -->
-<!-- ************************ ************************** -->
-<!-- ***************************************************************** -->
-
-<application start="start-page"
- font="&dfl.font;"
->
-
- <mask name="start-page">
- <vspace height="5mm"/>
- <hbox>
- <hspace width="5mm"/>
- <vbox>
- <vbox font="&h1.font;">
- <label>A sample xmlforms application:</label>
- <label>Address editor</label>
- </vbox>
- <vspace height="1cm"/>
- <vbox>
- <hbox>
- <hbox width="6cm" halign="right">
- <label>Name:</label>
- </hbox>
- <entry font="&in.font;" textwidth="40" slot="person.name"/>
- </hbox>
- <hbox>
- <hbox width="6cm" halign="right">
- <label>Postal address:</label>
- </hbox>
- <textbox font="&in.font;"
- textwidth="40"
- textheight="5"
- slot="person.address"/>
- </hbox>
- <hbox>
- <hbox width="6cm" halign="right">
- <label>Email:</label>
- </hbox>
- <entry font="&in.font;" textwidth="40" slot="person.email"/>
- </hbox>
- <hbox>
- <hbox width="6cm" halign="right">
- <label>Telephone number:</label>
- </hbox>
- <entry font="&in.font;" textwidth="20" slot="person.phone-number"/>
- </hbox>
- </vbox>
- <vspace height="1cm"/>
- <hbox>
- <hspace width="3cm"/>
- <hbox width="8cm">
- <vbox>
- <button bgcolor="&bg.button;"
- label="More about this person..."
- goto="person-list"/>
- <button bgcolor="&bg.button;"
- label="Save"
- action="save"/>
- </vbox>
- </hbox>
- <hbox>
- <vbox>
- <button bgcolor="&bg.button;"
- label="Info..."
- goto="info"/>
- <button bgcolor="&bg.button;"
- label="Exit (without saving)"
- action="exit"/>
- </vbox>
- </hbox>
- </hbox>
- <vspace height="0px" fill="yes"/>
- <hbox>
- <hspace width="0px" fill="yes"/>
- </hbox>
- </vbox>
- </hbox>
- </mask>
-
- <!-- ***************************************************************** -->
- <!-- ********************** **************************** -->
- <!-- ********************** More about... **************************** -->
- <!-- ********************** **************************** -->
- <!-- ***************************************************************** -->
-
- <sequence name="person-list">
- <mask name="Department">
- <!-- ************************** HEADER ************************** -->
- <vspace height="5mm"/>
- <hbox>
- <hspace width="5mm"/>
- <vbox>
- &headline;
- <!-- ************************** CONTENT ************************* -->
- <label font="&h1.font;">Department</label>
- <vspace height="3mm"/>
- <label>The person is working in this department:</label>
- <hbox>
- <hspace width="1cm"/>
- <entry font="&in.font;"
- textwidth="70"
- slot="person.department"/>
- </hbox>
- <vspace height="3mm"/>
- <label>The project he/she is working for:</label>
- <hbox>
- <hspace width="1cm"/>
- <textbox font="&in.font;"
- textwidth="70"
- textheight="5"
- slot="person.project"/>
- </hbox>
- <vspace height="3mm"/>
- <button bgcolor="&bg.button;"
- label="Help"
- goto="help.department"/>
- <!-- ************************************************************ -->
- </vbox>
- </hbox>
- <!-- ************************** FOOTER ************************** -->
- <vspace height="0px" fill="yes"/>
- <hbox>
- <hspace width="5mm"/>
- &footline;
- </hbox>
- </mask>
-
-
- <mask name="business-contacts">
- <!-- ************************** HEADER ************************** -->
- <vspace height="5mm"/>
- <hbox>
- <hspace width="5mm"/>
- <vbox>
- &headline;
- <!-- ************************** CONTENT ************************* -->
- <label font="&h1.font;">Business Contacts</label>
- <vspace height="3mm"/>
- <label>Notes about contacts:</label>
- <hbox>
- <hspace width="1cm"/>
- <textbox font="&in.font;"
- textwidth="70"
- textheight="10"
- slot="person.contacts"/>
- </hbox>
- <vspace height="3mm"/>
- <button bgcolor="&bg.button;"
- label="Help"
- goto="help.business-contacts"/>
- <!-- ************************************************************ -->
- </vbox>
- </hbox>
- <!-- ************************** FOOTER ************************** -->
- <vspace height="0px" fill="yes"/>
- <hbox>
- <hspace width="5mm"/>
- &footline;
- </hbox>
- </mask>
-
- </sequence>
-
- <!-- ***************************************************************** -->
- <!-- ***************************** ***************************** -->
- <!-- ***************************** Help ***************************** -->
- <!-- ***************************** ***************************** -->
- <!-- ***************************************************************** -->
-
- <mask name="help.department">
- <!-- ************************** HEADER ************************** -->
- <vspace height="5mm"/>
- <hbox>
- <hspace width="5mm"/>
- <vbox>
- &help.headline;
- <!-- ************************** CONTENT ************************* -->
- <label font="&h1.font;">Department</label>
- <vspace height="3mm"/>
- <textbox fgcolor="&fg.hilfe;"
- bgcolor="&bg.hilfe;"
- textheight="15"
- textwidth="70"
->The help system should be designed to help you filling out your form, but
-writing help texts is so stupid...
-</textbox>
- <!-- ************************************************************ -->
- </vbox>
- </hbox>
- <!-- ************************** FOOTER ************************** -->
- <vspace height="0px" fill="yes"/>
- <hbox>
- <hspace width="5mm"/>
- &help.footline;
- </hbox>
- </mask>
-
- <mask name="help.business-contacts">
- <!-- ************************** HEADER ************************** -->
- <vspace height="5mm"/>
- <hbox>
- <hspace width="5mm"/>
- <vbox>
- &help.headline;
- <!-- ************************** CONTENT ************************* -->
- <label font="&h1.font;">Business Contacts</label>
- <vspace height="3mm"/>
- <textbox fgcolor="&fg.hilfe;"
- bgcolor="&bg.hilfe;"
- textheight="15"
- textwidth="70"
->It is often helpful to remember the last telephone and/or email contacts
-quickly.
-</textbox>
- <!-- ************************************************************ -->
- </vbox>
- </hbox>
- <!-- ************************** FOOTER ************************** -->
- <vspace height="0px" fill="yes"/>
- <hbox>
- <hspace width="5mm"/>
- &help.footline;
- </hbox>
- </mask>
-
- <!-- ***************************************************************** -->
- <!-- ***************************************************************** -->
- <!-- ****************************** Info ***************************** -->
- <!-- ***************************************************************** -->
- <!-- ***************************************************************** -->
-
- <mask name="info">
- <!-- ************************** HEADER ************************** -->
- <vspace height="5mm"/>
- <hbox>
- <hspace width="5mm"/>
- <vbox>
- &info.headline;
- <!-- ************************** CONTENT ************************* -->
- <vspace height="3mm"/>
- <textbox fgcolor="&fg.hilfe;"
- bgcolor="&bg.hilfe;"
- textheight="15"
- textwidth="70"
-><![CDATA[About "xmlforms":
-Version <unknown>,
-written by Gerd Stolpmann
-
-Contact: Gerd.Stolpmann@darmstadt.netsurf.de
-]]></textbox>
- <!-- ************************************************************ -->
- </vbox>
- </hbox>
- <!-- ************************** FOOTER ************************** -->
- <vspace height="0px" fill="yes"/>
- <hbox>
- <hspace width="5mm"/>
- &info.footline;
- </hbox>
- </mask>
-
-
-</application>
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE application SYSTEM "./ds-style.dtd" [
- <!ENTITY vz '<button label="<" action="list-prev"/>
- <button label=">" action="list-next"/>
- <button label="exit" goto="first"/>'>
-]
->
-
-<application start="first">
-<mask name="first" font="-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*">
-<vbox halign="right">
-<label>one</label>
-<label bgcolor="green">Number two</label>
-<hbox width="4cm" halign="center" valign="bottom" bgcolor="red" fgcolor="blue">
-<vbox>
-<label>a1</label>
-<vspace height="1cm"/>
-<label>a2</label>
-</vbox>
-<label>b
-c</label>
-</hbox>
-</vbox>
-<textbox slot="q" textheight="5" textwidth="60">A Text</textbox>
-<button label="sequence" goto="seq"/>
-<label bgcolor="blue">A very long label, bigger than the box</label>
-<vspace height="2cm" fill="yes"/>
-<hbox><button label="left" bgcolor="yellow" goto="second"/><hspace width="0px" fill="yes"/>
-<entry slot="a" textwidth="10" fgcolor="red">right</entry>
-</hbox>
-</mask>
-
-<mask name="second">
-<button label="main" bgcolor="yellow" goto="first"/>
-<button label="previous" action="hist-prev"/>
-<button label="save" action="save"/>
-</mask>
-
-<sequence name="seq">
-<mask name="n1">
-<label>n1</label>
-&vz;
-</mask>
-<mask name="n2">
-<label>n2</label>
-&vz;
-</mask>
-<mask name="n3">
-<label>n3</label>
-&vz;
-</mask>
-<mask name="n4">
-<label>n4</label>
-&vz;
-</mask>
-<mask name="n5">
-<label>n5</label>
-&vz;
-</mask>
-</sequence>
-
-</application>
+++ /dev/null
-<?xml encoding="ISO-8859-1"?>
-<!-- $Id$ -->
-
-<!ELEMENT record (string)*>
-
-<!ELEMENT string (#PCDATA)>
-<!ATTLIST string
- name ID #REQUIRED>
+++ /dev/null
-<?xml encoding="ISO-8859-1"?>
-<!-- $Id$ -->
-
-<!-- entities describing content models -->
-
-<!ENTITY % vertical.only "vspace">
-<!ENTITY % horizontal.only "hspace">
-<!ENTITY % mixed "vbox|hbox|label|entry|textbox|button">
-
-
-<!-- entities describing attribute type -->
-
-<!ENTITY % att.valign "(top|bottom|center)">
-<!ENTITY % att.halign "(left|right|center)">
-
-
-<!ENTITY % default.atts "bgcolor CDATA #IMPLIED
- fgcolor CDATA #IMPLIED
- font CDATA #IMPLIED">
-
-<!-- "bgcolor", "fgcolor", and "font" are attribute applicable to every
- element. They set the background color, foreground color, resp. the
- font of the element and all sub elements that do not specifiy another
- value.
- Colors: all X windows names are allowed, e.g. "black", "white",
- "lavenderblush", or "#A0B1C2".
- Font: again X windows font names
- -->
-
-
-<!ELEMENT application (mask|sequence)+>
-<!ATTLIST application
- start IDREF #REQUIRED
- %default.atts;
->
-
-<!-- An "application" is the top-level element. The "start" attribute must
- contain the name of the mask or mask sequence to start with.
- -->
-
-
-<!ELEMENT sequence (mask)+>
-<!ATTLIST sequence
- name ID #REQUIRED
- %default.atts;
->
-
-<!-- A "sequence" of masks. In a sequence, you can use the special button
- actions "list-prev" and "list-next" that go to the previous mask resp.
- the next mask of the sequence.
- -->
-
-
-<!ELEMENT mask (%vertical.only;|%horizontal.only;|%mixed;)*>
-<!ATTLIST mask
- name ID #REQUIRED
- %default.atts;
->
-
-<!-- A "mask" contains layout and functional elements of a visible page. -->
-
-
-<!ELEMENT vbox (%vertical.only;|%mixed;)*>
-<!ATTLIST vbox
- halign %att.halign; "left"
- %default.atts;
->
-
-<!-- A "vbox" (vertical box) renders the inner material in vertical direction.
- The "halign" attribute specifies whether the inner material should be
- left-aligned, right-aligned, or centered.
- -->
-
-<!ELEMENT hbox (%horizontal.only;|%mixed;)*>
-<!ATTLIST hbox
- width CDATA #IMPLIED
- halign %att.halign; "left"
- valign %att.valign; "top"
- %default.atts;
->
-
-<!-- An "hbox" (horizontal box) renders the inner material in horizontal
- direction. The "valign" attribute specifies whether the inner material
- should be top-aligned, bottom-aligned, or centered.
- Normally, the width of an hbox is the sum of its members, but you can
- also widen a box by specifying the "width" attribute. This is a number
- with a dimension, e.g. "10.5 cm", "105 mm", "4.13 in". Other dimensions
- are "pt" (points) and "px" (pixels).
- If "width" is given, you may also set "halign" (see vbox for possible
- values).
- -->
-
-<!ELEMENT vspace EMPTY>
-<!ATTLIST vspace
- height CDATA #REQUIRED
- fill (yes|no) "no"
- %default.atts;
->
-
-<!-- "vspace" is a vertical space of given "height" (again a number with a
- dimension, see hbox).
- If "fill" is "yes", the space is extended as much as possible.
- -->
-
-<!ELEMENT hspace EMPTY>
-<!ATTLIST hspace
- width CDATA #REQUIRED
- fill (yes|no) "no"
- %default.atts;
->
-
-<!-- "hspace" is a horizontal space of given "width" (again a number with a
- dimension, see hbox).
- If "fill" is "yes", the space is extended as much as possible.
- -->
-
-<!ELEMENT label (#PCDATA)>
-<!ATTLIST label
- textwidth CDATA #IMPLIED
- halign %att.halign; "left"
- %default.atts;
->
-
-<!-- A "label" is a piece of constant text. The text is included as #PCDATA
- in the element.
- You may set "textwidth" to a (dimensionless) number to specify a fixed
- width. In this case, "halign" determines the horizontal alignment.
- -->
-
-<!ELEMENT entry (#PCDATA)>
-<!ATTLIST entry
- textwidth CDATA #REQUIRED
- slot NMTOKEN #REQUIRED
- %default.atts;
->
-
-<!-- An "entry" is an editable text line. "textwidth" specifies the width of
- the visible line (but the contents can be longer). "slot" is the name of
- a slot that is associated with the element.
- If the element contains #PCDATA, this is used as default value if
- the slot has not yet been filled.
- -->
-
-<!ELEMENT textbox (#PCDATA)>
-<!ATTLIST textbox
- textwidth CDATA #REQUIRED
- textheight CDATA #REQUIRED
- slot NMTOKEN #IMPLIED
- %default.atts;
->
-
-<!-- A "textbox" is a text box with dimensions "textwidth" and "textheight"
- (both dimensionless number).
- "slot" is the name of a slot that is associated with the element.
- If the element contains #PCDATA, this is used as default value if
- the slot has not yet been filled.
- If you omit "slot", the #PCDATA is displayed read-only.
- -->
-
-<!ELEMENT button EMPTY>
-<!ATTLIST button
- label CDATA #REQUIRED
- action (goto|save|exit|save-exit|list-prev|list-next|
- hist-prev|hist-next) "goto"
- goto IDREF #IMPLIED
- %default.atts;
->
-
-<!-- A "button" is specified as follows:
- - "label" is what is written on the button
- - "action" specifies what to if the button is pressed:
- - "goto": jump to another mask or mask sequence whose name is given
- in the attribute "goto"
- - "save": save the record
- - "exit": exit the application
- - "save-exit": save, then exit
- - "list-prev": jump to the previous mask in the sequence
- - "list-next": jump to the next mask in the sequence
- - "hist-prev": jump to the mask that has actually been the predecessor
- - "hist-next": jump to the mask that has actually been the successor
- -->
-
-
+++ /dev/null
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!DOCTYPE application SYSTEM "./ds-style.dtd" [
-]>
-
-<application start="first">
-<mask name="first" font="-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*">
-<label>This is a label</label>
-</mask>
-</application>
+++ /dev/null
-all_iso88591: generate_iso88591
- $(MAKE) -f Makefile.code all_iso88591
-
-opt_iso88591: generate_iso88591
- $(MAKE) -f Makefile.code opt_iso88591
-
-all_utf8: generate_utf8
- $(MAKE) -f Makefile.code all_utf8
-
-opt_utf8: generate_utf8
- $(MAKE) -f Makefile.code opt_utf8
-
-
-
-generate_iso88591:
- $(MAKE) -f Makefile.generate all_iso88591
- rm -f objects_iso88591 objects_utf8
- $(MAKE) -f Makefile.generate objects_iso88591
- touch objects_utf8
- $(MAKE) -f Makefile.generate depend
-
-generate_utf8:
- $(MAKE) -f Makefile.generate all_utf8
- rm -f objects_iso88591 objects_utf8
- $(MAKE) -f Makefile.generate objects_utf8
- touch objects_iso88591
- $(MAKE) -f Makefile.generate depend
-
-
-
-clean:
- touch depend objects
- $(MAKE) -f Makefile.code clean
- $(MAKE) -f Makefile.generate clean
+++ /dev/null
-
-LARCHIVE_iso88591 = pxp_lex_iso88591.cma
-LARCHIVE_utf8 = pxp_lex_utf8.cma
-XLARCHIVE_iso88591 = $(LARCHIVE_iso88591:.cma=.cmxa)
-XLARCHIVE_utf8 = $(LARCHIVE_utf8:.cma=.cmxa)
-
-# LOBJECTS_* and XLOBJECTS_* are included from "objects_*":
-include objects_iso88591
-include objects_utf8
-
-#----------------------------------------------------------------------
-
-all_iso88591: $(LARCHIVE_iso88591)
-opt_iso88591: $(XLARCHIVE_iso88591)
-all_utf8: $(LARCHIVE_utf8)
-opt_utf8: $(XLARCHIVE_utf8)
-
-$(LARCHIVE_iso88591): $(LOBJECTS_iso88591)
- $(OCAMLC) -a -o $(LARCHIVE_iso88591) $(LOBJECTS_iso88591)
-
-$(XLARCHIVE_iso88591): $(XLOBJECTS_iso88591)
- $(OCAMLOPT) -a -o $(XLARCHIVE_iso88591) $(XLOBJECTS_iso88591)
-
-$(LARCHIVE_utf8): $(LOBJECTS_utf8)
- $(OCAMLC) -a -o $(LARCHIVE_utf8) $(LOBJECTS_utf8)
-
-$(XLARCHIVE_utf8): $(XLOBJECTS_utf8)
- $(OCAMLOPT) -a -o $(XLARCHIVE_utf8) $(XLOBJECTS_utf8)
-
-#----------------------------------------------------------------------
-# general rules:
-
-OPTIONS =
-OCAMLC = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS)
-OCAMLOPT = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS)
-
-.SUFFIXES: .cmo .cmi .cmx .ml .mli
-
-.ml.cmx:
- $(OCAMLOPT) -c $<
-
-.ml.cmo:
- $(OCAMLC) -c $<
-
-.mli.cmi:
- $(OCAMLC) -c $<
-
-
-*.mli:
-
-clean:
- rm -f *.cmo *.cmx *.cma *.cmxa *.cmi *.o *.a
-
-include depend
+++ /dev/null
-LEXERSRC = pxp_lex_misc.src \
- pxp_lex_document.src \
- pxp_lex_content.src \
- pxp_lex_within_tag.src \
- pxp_lex_document_type.src \
- pxp_lex_declaration.src \
- pxp_lex_dtd_string.src \
- pxp_lex_content_string.src \
- pxp_lex_name_string.src
-
-OTHERSRC = open_pxp_lex_aux_iso88591.src \
- pxp_lex_aux.src \
- pxp_lex_defs_iso88591.def
-
-LEXERMLL_iso88591 = $(LEXERSRC:.src=_iso88591.mll)
-LEXERMLL_utf8 = $(LEXERSRC:.src=_utf8.mll)
-
-LEXERML_iso88591 = $(LEXERSRC:.src=_iso88591.ml)
-LEXERML_utf8 = $(LEXERSRC:.src=_utf8.ml)
-
-LEXERCMO_iso88591 = pxp_lex_aux_iso88591.cmo $(LEXERSRC:.src=_iso88591.cmo)
-LEXERCMO_utf8 = pxp_lex_aux_utf8.cmo $(LEXERSRC:.src=_utf8.cmo)
-
-LEXERCMX_iso88591 = $(LEXERCMO_iso88591:.cmo=.cmx)
-LEXERCMX_utf8 = $(LEXERCMO_utf8:.cmo=.cmx)
-
-.PHONY: all_iso88591
-all_iso88591: iso88591_done
-
-.PHONY: all_utf8
-all_utf8: utf8_done
-
-iso88591_done: $(LEXERSRC) $(OTHERSRC)
- ../tools/insert_variant -variant iso88591 $(LEXERSRC)
- for file in $(LEXERMLL_iso88591); do ocamllex $$file; done
- touch iso88591_done
-
-utf8_done: $(LEXERSRC) $(OTHERSRC) pxp_lex_defs_utf8.def
- ../tools/insert_variant -variant utf8 $(LEXERSRC)
- for file in $(LEXERMLL_utf8); do ocamllex $$file; done
- touch utf8_done
-
-pxp_lex_defs_utf8.def: pxp_lex_defs_generic.def pxp_lex_defs_drv_utf8.def
- ../tools/ucs2_to_utf8/ucs2_to_utf8 <pxp_lex_defs_generic.def \
- >pxp_lex_defs_utf8.def || \
- rm -f pxp_lex_defs_utf8.def
- cat pxp_lex_defs_drv_utf8.def >>pxp_lex_defs_utf8.def
-
-objects_iso88591:
- echo LOBJECTS_iso88591 = $(LEXERCMO_iso88591) >objects_iso88591
- echo XLOBJECTS_iso88591 = $(LEXERCMX_iso88591) >>objects_iso88591
-
-objects_utf8:
- echo LOBJECTS_utf8 = $(LEXERCMO_utf8) >objects_utf8
- echo XLOBJECTS_utf8 = $(LEXERCMX_utf8) >>objects_utf8
-
-depend: *.ml *.mli
- ocamldep *.ml *.mli >depend
-
-.PHONY: clean
-clean:
- rm -f $(LEXERMLL_iso88591) $(LEXERML_iso88591) iso88591_done \
- $(LEXERMLL_utf8) $(LEXERML_utf8) utf8_done \
- pxp_lex_defs_utf8.def \
- objects_iso88591 objects_utf8 depend
-
-*.mli:
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_lex_aux_iso88591
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_lex_aux_utf8
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_lex_misc_iso88591
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_lex_misc_utf8
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
- class dummy_entity = object end
-
- let dummy_entity = ( new dummy_entity : entity_id )
-
- (* The following tokens are pre-allocated to reduce the load on the
- * GC.
- *)
-
- let tok_Doctype__Document_type = Doctype dummy_entity, Document_type
- let tok_Ignore__Document = Ignore, Document
- let tok_Ignore__Within_tag = Ignore, Within_tag
- let tok_Ignore__Document_type = Ignore, Document_type
- let tok_Ignore__Declaration = Ignore, Declaration
- let tok_Ignore__Ignored = Ignore, Ignored_section
- let tok_Eof__Document = Eof, Document
- let tok_Eof__Content = Eof, Content
- let tok_Eof__Within_tag = Eof, Within_tag
- let tok_Eof__Document_type = Eof, Document_type
- let tok_Eof__Declaration = Eof, Declaration
- let tok_Eof__Ignored = Eof, Ignored_section
- let tok_LineEndCRLF__Content = LineEnd "\r\n", Content
- let tok_LineEndCR__Content = LineEnd "\r", Content
- let tok_LineEndLF__Content = LineEnd "\n", Content
- let tok_CharDataRBRACKET__Content = CharData "]", Content
- let tok_Eq__Within_tag = Eq, Within_tag
- let tok_Rangle__Content = Rangle, Content
- let tok_Rangle_empty__Content = Rangle_empty, Content
- let tok_Dtd_begin__Declaration = Dtd_begin dummy_entity, Declaration
- let tok_Doctype_rangle__Document = Doctype_rangle dummy_entity, Document
- let tok_Percent__Declaration = Percent, Declaration
- let tok_Plus__Declaration = Plus, Declaration
- let tok_Star__Declaration = Star, Declaration
- let tok_Bar__Declaration = Bar, Declaration
- let tok_Comma__Declaration = Comma, Declaration
- let tok_Qmark__Declaration = Qmark, Declaration
- let tok_Lparen__Declaration = Lparen dummy_entity, Declaration
- let tok_RparenPlus__Declaration = RparenPlus dummy_entity, Declaration
- let tok_RparenStar__Declaration = RparenStar dummy_entity, Declaration
- let tok_RparenQmark__Declaration = RparenQmark dummy_entity, Declaration
- let tok_Rparen__Declaration = Rparen dummy_entity, Declaration
- let tok_Required__Declaration = Required, Declaration
- let tok_Implied__Declaration = Implied, Declaration
- let tok_Fixed__Declaration = Fixed, Declaration
- let tok_Pcdata__Declaration = Pcdata, Declaration
- let tok_Decl_element__Declaration = Decl_element dummy_entity, Declaration
- let tok_Decl_attlist__Declaration = Decl_attlist dummy_entity, Declaration
- let tok_Decl_entity__Declaration = Decl_entity dummy_entity, Declaration
- let tok_Decl_notation__Declaration = Decl_notation dummy_entity, Declaration
- let tok_Conditional_begin__Declaration = Conditional_begin dummy_entity,
- Declaration
- let tok_Conditional_begin__Ignored = Conditional_begin dummy_entity,
- Ignored_section
- let tok_Conditional_end__Declaration = Conditional_end dummy_entity,
- Declaration
- let tok_Conditional_end__Ignored = Conditional_end dummy_entity,
- Ignored_section
- let tok_Conditional_body__Declaration = Conditional_body dummy_entity,
- Declaration
- let tok_Decl_rangle__Declaration = Decl_rangle dummy_entity, Declaration
- let tok_Dtd_end__Document_type = Dtd_end dummy_entity, Document_type
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/18 20:19:59 gerd
- * Comments return different comment tokens.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_utf8 *)
-
- open Pxp_types
- open Pxp_lexer_types
-
- let get_name_end s k =
- (* Get the index of the end+1 of the name beginning at position k *)
- let l = String.length s in
- let rec find j =
- if j < l then
- match s.[j] with
- | ('\009'|'\010'|'\013'|'\032') -> j
- |_ -> find (j+1)
- else
- l
- in
- find k
-
- let get_ws_end s k =
- let l = String.length s in
- let rec find j =
- if j < l then
- match s.[j] with
- (' '|'\t'|'\r'|'\n') -> find (j+1)
- | _ -> j
- else
- l
- in
- find k
-
- let scan_pi pi xml_scanner =
- let s = String.sub pi 2 (String.length pi - 4) in
- (* the PI without the leading "<?" and the trailing "?>" *)
- let xml_lexbuf = Lexing.from_string (s ^ " ") in
- (* Add space because the lexer expects whitespace after every
- * clause; by adding a space there is always whitespace at the
- * end of the string.
- *)
-
- (* The first word of a PI must be a name: Extract it. *)
-
- let s_name, s_len =
- match xml_scanner xml_lexbuf with
- Pro_name n ->
- let ltok = String.length (Lexing.lexeme xml_lexbuf) in
- if String.length n = ltok then
- (* No whitespace after the name *)
- raise (WF_error ("Bad processing instruction"));
- n, ltok
- | _ -> raise (WF_error ("Bad processing instruction"))
- in
-
- (* Note: s_len is the length of s_name + the whitespace following s_name *)
-
- match s_name with
- "xml" -> begin
- (* It is a <?xml ...?> PI: Get the other tokens *)
- let rec collect () =
- let t = xml_scanner xml_lexbuf in
- (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
- if t = Pro_eof then
- []
- else
- t :: collect()
- in
- PI_xml (collect())
- end
- | _ ->
- let len_param = String.length s - s_len in
- (* It is possible that len_param = -1 *)
- if len_param >= 1 then
- PI(s_name, String.sub s s_len len_param)
- else
- PI(s_name, "")
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_iso88591 *)
-
- open Pxp_types
- open Pxp_lexer_types
-
- let get_name_end s k =
- (* Get the index of the end+1 of the name beginning at position k *)
- let l = String.length s in
- let rec find j =
- if j < l then
- match s.[j] with
- | ('\009'|'\010'|'\013'|'\032') -> j
- |_ -> find (j+1)
- else
- l
- in
- find k
-
- let get_ws_end s k =
- let l = String.length s in
- let rec find j =
- if j < l then
- match s.[j] with
- (' '|'\t'|'\r'|'\n') -> find (j+1)
- | _ -> j
- else
- l
- in
- find k
-
- let scan_pi pi xml_scanner =
- let s = String.sub pi 2 (String.length pi - 4) in
- (* the PI without the leading "<?" and the trailing "?>" *)
- let xml_lexbuf = Lexing.from_string (s ^ " ") in
- (* Add space because the lexer expects whitespace after every
- * clause; by adding a space there is always whitespace at the
- * end of the string.
- *)
-
- (* The first word of a PI must be a name: Extract it. *)
-
- let s_name, s_len =
- match xml_scanner xml_lexbuf with
- Pro_name n ->
- let ltok = String.length (Lexing.lexeme xml_lexbuf) in
- if String.length n = ltok then
- (* No whitespace after the name *)
- raise (WF_error ("Bad processing instruction"));
- n, ltok
- | _ -> raise (WF_error ("Bad processing instruction"))
- in
-
- (* Note: s_len is the length of s_name + the whitespace following s_name *)
-
- match s_name with
- "xml" -> begin
- (* It is a <?xml ...?> PI: Get the other tokens *)
- let rec collect () =
- let t = xml_scanner xml_lexbuf in
- (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
- if t = Pro_eof then
- []
- else
- t :: collect()
- in
- PI_xml (collect())
- end
- | _ ->
- let len_param = String.length s - s_len in
- (* It is possible that len_param = -1 *)
- if len_param >= 1 then
- PI(s_name, String.sub s s_len len_param)
- else
- PI(s_name, "")
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert pxp_lex_aux.src
-
-#insert open_pxp_lex_aux_*.src
-#insert open_pxp_lex_misc_*.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-rule scan_content = parse
- "<?" pi_string "?>"
- { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Content }
- | "<?"
- { raise (WF_error ("Illegal processing instruction")) }
- | "<!--"
- { Comment_begin, Content_comment }
- | '<' '/'? name
- (* One rule for Tag_beg and Tag_end saves transitions. *)
- { let s = Lexing.lexeme lexbuf in
- if s.[1] = '/' then
- Tag_end (String.sub s 2 (String.length s - 2), dummy_entity),
- Within_tag
- else
- Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity),
- Within_tag
- }
- | "<![CDATA[" cdata_string "]]>"
- { let s = Lexing.lexeme lexbuf in
- Cdata (String.sub s 9 (String.length s - 12)), Content }
- | "<!"
- { raise (WF_error "Declaration either malformed or not allowed in this context")
- }
- | "<"
- { raise (WF_error ("The left angle bracket '<' must be written as '<'"))
- }
- | "&#" ascii_digit+ ";"
- { let s = Lexing.lexeme lexbuf in
- CRef (int_of_string (String.sub s 2 (String.length s - 3))), Content }
- | "&#x" ascii_hexdigit+ ";"
- { let s = Lexing.lexeme lexbuf in
- CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))), Content }
- | "&" name ";"
- { let s = Lexing.lexeme lexbuf in
- ERef (String.sub s 1 (String.length s - 2)), Content }
- | "&"
- { raise (WF_error ("The ampersand '&' must be written as '&'"))
- }
-
- (* LineEnd: Depending on whether we are reading from a primary source
- * (file) or from the replacement text of an internal entity, line endings
- * must be normalized (converted to \n) or not.
- * The entity classes do that. The yacc parser will never see LineEnd;
- * this token is always converted to the appropriate CharData token.
- *)
-
- | '\013' '\010'
- { tok_LineEndCRLF__Content }
- | '\013'
- { tok_LineEndCR__Content }
- | '\010'
- { tok_LineEndLF__Content }
- | eof
- { tok_Eof__Content }
- | "]]>"
- { raise (WF_error ("The sequence ']]>' must be written as ']]>'"))
- }
- | "]"
- { tok_CharDataRBRACKET__Content }
- | normal_character+
- { let s = Lexing.lexeme lexbuf in
- CharData s, Content
- }
- | _
- { raise Netconversion.Malformed_code }
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/08/18 20:19:59 gerd
- * Comments return different comment tokens.
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert open_pxp_lex_aux_*.src
-#insert pxp_lex_aux.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-(* This lexer is used to expand and normalize attribute values: *)
-
-rule scan_content_string = parse
- '&' name ';'
- { let s = Lexing.lexeme lexbuf in
- ERef (String.sub s 1 (String.length s - 2)) }
- | "&#" ascii_digit+ ";"
- { let s = Lexing.lexeme lexbuf in
- CRef (int_of_string (String.sub s 2 (String.length s - 3))) }
- | "&#x" ascii_hexdigit+ ";"
- { let s = Lexing.lexeme lexbuf in
- CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))) }
- | '&'
- { raise(WF_error("The character '&' must be written as '&'")) }
- | printable_character_except_amp_lt+
- { CharData (Lexing.lexeme lexbuf) }
- | '\009'
- { CRef 32 }
- | '\013' '\010'
- { CRef(-1) (* A special case *)
- }
- | '\013'
- { CRef 32 }
- | '\010'
- { CRef 32 }
- | '<'
- {
- (* Depending on the situation, '<' may be legal or not: *)
- CharData "<"
- }
- | eof
- { Eof }
- | _
- { raise Netconversion.Malformed_code }
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert pxp_lex_aux.src
-
-#insert open_pxp_lex_aux_*.src
-#insert open_pxp_lex_misc_*.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-(* scan_declaration: after "[" in DTD until matching "]" *)
-
-rule scan_declaration = parse
- ws+
- { tok_Ignore__Declaration }
- | '%' name ';'
- { let s = Lexing.lexeme lexbuf in
- (PERef (String.sub s 1 (String.length s - 2))), Declaration }
- | '%'
- { tok_Percent__Declaration }
- | '&'
- { raise(WF_error("References to general entities not allowed in DTDs")) }
- | name
- { Name (Lexing.lexeme lexbuf), Declaration }
- | nmtoken
- { Nametoken (Lexing.lexeme lexbuf), Declaration }
- | '+'
- { tok_Plus__Declaration }
- | '*'
- { tok_Star__Declaration }
- | '|'
- { tok_Bar__Declaration }
- | ','
- { tok_Comma__Declaration }
- | '?'
- { tok_Qmark__Declaration }
- | '('
- { tok_Lparen__Declaration }
- | ")+"
- { tok_RparenPlus__Declaration }
- | ")*"
- { tok_RparenStar__Declaration }
- | ")?"
- { tok_RparenQmark__Declaration }
- | ')'
- { tok_Rparen__Declaration }
- | "#REQUIRED"
- { tok_Required__Declaration }
- | "#IMPLIED"
- { tok_Implied__Declaration }
- | "#FIXED"
- { tok_Fixed__Declaration }
- | "#PCDATA"
- { tok_Pcdata__Declaration }
- | "<!ELEMENT"
- { tok_Decl_element__Declaration }
- | "<!ATTLIST"
- { tok_Decl_attlist__Declaration }
- | "<!ENTITY"
- { tok_Decl_entity__Declaration }
- | "<!NOTATION"
- { tok_Decl_notation__Declaration }
- | "<!--"
- { Comment_begin, Decl_comment }
- | "<!["
- { tok_Conditional_begin__Declaration }
- | "]]>"
- { tok_Conditional_end__Declaration }
- | "["
- { tok_Conditional_body__Declaration }
-
- (* TODO: PIs modified *)
-
- | "<?" pi_string "?>"
- { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Declaration }
- | "<?"
- { raise (WF_error ("Illegal processing instruction")) }
- | '"' [^ '"']* '"'
- { let s = Lexing.lexeme lexbuf in
- (* Check that characters are well-formed: *)
- ignore(scan_characters (Lexing.from_string s));
- (Unparsed_string (String.sub s 1 (String.length s - 2))), Declaration }
- | '"'
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | "'" [^ '\'']* "'"
- { let s = Lexing.lexeme lexbuf in
- (* Check that characters are well-formed: *)
- ignore(scan_characters (Lexing.from_string s));
- (Unparsed_string (String.sub s 1 (String.length s - 2))), Declaration }
- | "'"
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | '>'
- { tok_Decl_rangle__Declaration }
- | ']'
- { tok_Dtd_end__Document_type }
- | eof
- { tok_Eof__Declaration }
- | "<!"
- { raise (WF_error "Declaration either malformed or not allowed in this context")
- }
- | character
- { raise (WF_error("Illegal token or character")) }
- | _
- { raise Netconversion.Malformed_code }
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/08/18 20:19:59 gerd
- * Comments return different comment tokens.
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-let ws = [ ' ' '\t' '\r' '\n' ]
-
-let ascii_digit = ['0'-'9']
-
-let ascii_hexdigit = ['0'-'9' 'a'-'h' 'A'-'H']
-
-let namechar = letter | digit | '.' | ':' | '-' | '_' | combiningChar | extender
-
-let name = ( letter | '_' | ':' ) namechar*
-
-let nmtoken = namechar+
-
-(* Valid characters are:
- * #9, #10, #13, #32-#xD7FF, #xE000-#xFFFD, #x10000-#x10FFFF
- *
- * #xD7FF as UTF-8 sequence:
- * 1110xxxx 10xxxxxx 10xxxxxx
- * 1110...D 10...7.. 10.F...F = ED 9F BF
- *
- * #xE000 as UTF-8 sequence:
- * 1110xxxx 10xxxxxx 10xxxxxx
- * 1110...E 10...0.. 10.0...0 = EE 80 80
- *
- * UTF-8 sequence CF BE BF as character:
- * 1110xxxx 10xxxxxx 10xxxxxx
- * 1110...F 10111110 10111111 = #FFBF
- *
- * #xFFFD as UTF-8 sequence:
- * 1110xxxx 10xxxxxx 10xxxxxx
- * 1110...F 10...F.. 10.F...D = EF BF BD
- *
- * #x010000 as UTF-8 sequence:
- * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 111100.. 10.1...0 10...0.. 10.0...0 = F0 90 80 80
- *
- * #x10FFFF as UTF-8 sequence:
- * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 111101.. 10.0...F 10...F.. 10.F...F = F4 8F BF BF
- *)
-
-
-let non_ascii_character =
- ['\192'-'\223'] ['\128'-'\191'] (* #x80-#x7FF *)
-| ['\224'-'\236'] ['\128'-'\191'] ['\128'-'\191'] (* #x800-#xCFFF *)
-| '\237' ['\128'-'\159'] ['\128'-'\191'] (* #xD000-#xD7FF *)
-| '\238' ['\128'-'\191'] ['\128'-'\191'] (* #xE000-#xEFFF *)
-| '\239' ['\128'-'\190'] ['\128'-'\191'] (* #xF000-#xFFBF *)
-| '\239' '\191' ['\128'-'\189'] (* #xFFC0-#xFFFD *)
-| '\240' ['\144'-'\191'] ['\128'-'\191'] ['\128'-'\191']
- (* #x010000-#x03FFFF *)
-| ['\241'-'\243'] ['\128'-'\191'] ['\128'-'\191'] ['\128'-'\191']
- (* #x040000-#x0FFFFF *)
-| '\244' ['\128'-'\143'] ['\128'-'\191'] ['\128'-'\191']
- (* #x100000-#10FFFFF *)
-
-let character =
- [ '\009' '\010' '\013' '\032'-'\127' ]
-| non_ascii_character
-
-
-let character_except_question_mark = (* '?' = '\063' *)
- [ '\009' '\010' '\013' '\032'-'\062' '\064'-'\127' ]
-| non_ascii_character
-
-
-let character_except_right_angle_bracket = (* '>' = '\062' *)
- [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\127' ]
-| non_ascii_character
-
-
-let character_except_minus = (* '-' = '\045' *)
- [ '\009' '\010' '\013' '\032'-'\044' '\046'-'\127' ]
-| non_ascii_character
-
-
-let character_except_quot = (* '"' = '\034' *)
- [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\255' ]
-| non_ascii_character
-
-
-let character_except_apos = (* '\'' = '\039' *)
- [ '\009' '\010' '\013' '\032'-'\038' '\040'-'\255' ]
-| non_ascii_character
-
-
-let pi_string = character_except_question_mark*
- ( '?' character_except_right_angle_bracket
- character_except_question_mark* )*
- '?'?
-
-
-let comment_string = character_except_minus*
- ('-' character_except_minus+ )*
-
-
-let normal_character =
- (* Character except '&' = '\038', '<' = '\060', ']' = '\093', and CR LF *)
- [ '\009' '\032'-'\037' '\039'-'\059' '\061'-'\092' '\094'-'\127' ]
-| non_ascii_character
-
-
-let character_except_rbracket = (* ']' = '\093' *)
- [ '\009' '\010' '\013' '\032'-'\092' '\094'-'\127' ]
-| non_ascii_character
-
-
-let character_except_rbracket_rangle = (* ']' = '\093', '>' = '\062' *)
- [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\092' '\094'-'\127' ]
-| non_ascii_character
-
-
-let cdata_string =
- character_except_rbracket*
- ( "]" character_except_rbracket+ |
- "]]" ']'* character_except_rbracket_rangle character_except_rbracket*
- )*
- ']'*
-
-
-let printable_character_except_amp_lt =
- (* '&' = '\038', '<' = '\060' *)
- [ '\032'-'\037' '\039'-'\059' '\061'-'\127']
-| non_ascii_character
-
-
-let printable_character_except_amp_percent =
- (* '%' = '\037', '&' = '\038' *)
- [ '\032'-'\036' '\039'-'\127']
-| non_ascii_character
-
-
-let character_except_special =
- (* '<'=060, ']'=093, '"'=034, '\''=039 *)
- [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\038' '\040'-'\059'
- '\061'-'\092' '\094'-'\127' ]
-| non_ascii_character
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/26 19:58:08 gerd
- * Bugfix in character_except_apos. The bug caused that attribute
- * values delimited by ' could not be scanned at all.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(*****************************************************************)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 14/05/2000 *)
-(* *)
-(* These are taken from the appendix B of the XML reccomendation *)
-(* *)
-(*****************************************************************)
-
-(* 85 *)
-let baseChar =
- [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6]
- | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148]
- | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
- | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386
- | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE]
- | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3]
- | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
- | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB]
- | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559
- | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
- | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE]
- | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
- | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8]
- | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD]
- | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
- | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36]
- | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74]
- | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
- | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0
- | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
- | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D]
- | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95]
- | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
- | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C]
- | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39]
- | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
- | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1]
- | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39]
- | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
- | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A
- | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5
- | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
- | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69]
- | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103]
- | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C
- | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159
- | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E]
- | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF]
- | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9
- | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
- | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B
- | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE
- | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB]
- | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
- | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094]
- | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]
-;;
-
-(* 86 *)
-let ideographic = [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] ;;
-
-(* 84 *)
-let letter = baseChar | ideographic ;;
-
-(* 87 *)
-let combiningChar =
- [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1]
- | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4
- | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF]
- | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903]
- | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963]
- | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4]
- | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02
- | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48]
- | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC
- | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
- | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D]
- | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8]
- | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
- | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83]
- | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6]
- | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
- | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1
- | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
- | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84]
- | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD]
- | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
- | #x3099 | #x309A
-;;
-
-(* 88 *)
-let digit =
- [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F]
- | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F]
- | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
- | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
-;;
-
-(* 89 *)
-let extender =
- #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005
- | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-let ws = [ ' ' '\t' '\r' '\n' ]
-
-(* Note: ISO-8859-1 charset does not have 'combining characters' *)
-
-let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
-let extender = '\183'
-let digit = ['0'-'9']
-let ascii_digit = ['0'-'9']
-let ascii_hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
-let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
-let name = ( letter | '_' | ':' ) namechar*
-let nmtoken = namechar+
-
-let character = ['\009' '\010' '\013' '\032'-'\255']
-
-let character_except_question_mark = (* '?' = '\063' *)
- [ '\009' '\010' '\013' '\032'-'\062' '\064'-'\255' ]
-
-let character_except_right_angle_bracket = (* '>' = '\062' *)
- [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\255' ]
-
-let character_except_minus = (* '-' = '\045' *)
- [ '\009' '\010' '\013' '\032'-'\044' '\046'-'\255' ]
-
-let character_except_quot = (* '"' = '\034' *)
- [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\255' ]
-
-let character_except_apos = (* '\'' = '\039' *)
- [ '\009' '\010' '\013' '\032'-'\038' '\040'-'\255' ]
-
-let pi_string = character_except_question_mark*
- ( '?' character_except_right_angle_bracket
- character_except_question_mark* )*
- '?'?
-
-let comment_string = character_except_minus*
- ('-' character_except_minus+ )*
-
-let normal_character =
- [^ '&' '<' ']' '\000'-'\008' '\010'-'\031']
-
-let character_except_rbracket = (* ']' = '\093' *)
- [ '\009' '\010' '\013' '\032'-'\092' '\094'-'\255' ]
-
-let character_except_rbracket_rangle = (* ']' = '\093', '>' = '\062' *)
- [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\092' '\094'-'\255' ]
-
-let cdata_string =
- character_except_rbracket*
- ( "]" character_except_rbracket+ |
- "]]" ']'* character_except_rbracket_rangle character_except_rbracket*
- )*
- ']'*
-(* cdata_string = char* - ( char* ']]>' char* ) *)
-
-let printable_character_except_amp_lt =
- (* '&' = '\038', '<' = '\060' *)
- [ '\032'-'\037' '\039'-'\059' '\061'-'\255']
-
-let printable_character_except_amp_percent =
- (* '%' = '\037', '&' = '\038' *)
- [ '\032'-'\036' '\039'-'\255']
-
-let character_except_special =
- (* '<'=060, ']'=093, '"'=034, '\''=039 *)
- [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\038' '\040'-'\059'
- '\061'-'\092' '\094'-'\255' ]
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert pxp_lex_aux.src
-
-#insert open_pxp_lex_aux_*.src
-#insert open_pxp_lex_misc_*.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-
-(* scan_document: Lexer for the outermost structures *)
-
-rule scan_document = parse
- "<?" pi_string "?>"
- { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Document }
- | "<?"
- { raise (WF_error ("Illegal processing instruction")) }
- | "<!DOCTYPE"
- { tok_Doctype__Document_type }
- | "<!--"
- { Comment_begin, Document_comment }
- | "<!"
- { raise (WF_error "Declaration either malformed or not allowed in this context")
- }
- | "<" name
- { let s = Lexing.lexeme lexbuf in
- Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity), Within_tag
- }
- | '<'
- { raise (WF_error ("Illegal token")) }
- | ws+
- { tok_Ignore__Document }
- | eof
- { tok_Eof__Document }
- | character
- { raise (WF_error ("Content not allowed here")) }
- | _
- { raise Netconversion.Malformed_code }
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/08/18 20:19:59 gerd
- * Comments return different comment tokens.
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert open_pxp_lex_aux_*.src
-#insert pxp_lex_aux.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-
-(* scan_document_type: after "<!DOCTYPE" until matching ">" *)
-
-rule scan_document_type = parse
- name
- { let s = Lexing.lexeme lexbuf in
- Name s, Document_type }
- | ws+
- { tok_Ignore__Document_type }
- | '"' character_except_quot* '"'
- { let s = Lexing.lexeme lexbuf in
- (Unparsed_string (String.sub s 1 (String.length s - 2))), Document_type }
- | '"'
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | "'" character_except_apos* "'"
- { let s = Lexing.lexeme lexbuf in
- (Unparsed_string (String.sub s 1 (String.length s - 2))), Document_type }
- | "'"
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | '['
- { tok_Dtd_begin__Declaration }
- | '>'
- { tok_Doctype_rangle__Document }
- | eof
- { tok_Eof__Document_type }
- | '&'
- { raise (WF_error("References to general entities not allowed here")) }
- | '%'
- { raise (WF_error("References to parameter entities not allowed here")) }
- | character
- { raise (WF_error("Content not allowed here")) }
- | _
- { raise Netconversion.Malformed_code }
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert open_pxp_lex_aux_*.src
-#insert pxp_lex_aux.src
-
-}
-
-#insert pxp_lex_defs_*.def
-(* The following scanner is used to determine the replacement text of
- * internal entities:
- *)
-
-rule scan_dtd_string = parse
- '%' name ';'
- { let s = Lexing.lexeme lexbuf in
- PERef (String.sub s 1 (String.length s - 2)) }
- | '%'
- { raise(WF_error("The character '%' must be written as '%'")) }
- | '&' name ';'
- { let s = Lexing.lexeme lexbuf in
- ERef (String.sub s 1 (String.length s - 2)) }
- | "&#" ascii_digit+ ";"
- { let s = Lexing.lexeme lexbuf in
- CRef (int_of_string (String.sub s 2 (String.length s - 3))) }
- | "&#x" ascii_hexdigit+ ";"
- { let s = Lexing.lexeme lexbuf in
- CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))) }
- | '&'
- { raise(WF_error("The character '&' must be written as '&'")) }
- | '\013' '\010'
- { CRef(-1) }
- | '\013'
- { CRef(-2) }
- | '\010'
- { CRef(-3) }
- | '\009'
- { CharData "\009" }
- | printable_character_except_amp_percent+
- { CharData (Lexing.lexeme lexbuf) }
- | eof
- { Eof }
- | _
- { raise Netconversion.Malformed_code }
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert open_pxp_lex_aux_*.src
-#insert pxp_lex_aux.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-(* The remaining, smaller lexers *)
-
-rule scan_characters = parse
- character*
- { () }
-| eof
- { () }
-| _
- { raise Netconversion.Malformed_code }
-
-
-and scan_xml_pi = parse
- name ws*
- { let s = Lexing.lexeme lexbuf in
- let j = get_name_end s 0 in
- Pro_name (String.sub s 0 j)
- }
- | "=" ws*
- { Pro_eq }
- | "'" character_except_apos* "'" ws+
- { let s = Lexing.lexeme lexbuf in
- let j = String.index_from s 1 '\'' in
- Pro_string (String.sub s 1 (j-1))
- }
- | "'"
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | '"' character_except_quot* '"' ws+
- { let s = Lexing.lexeme lexbuf in
- let j = String.index_from s 1 '"' in
- Pro_string (String.sub s 1 (j-1))
- }
- | '"'
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | eof
- { Pro_eof }
- | character
- { (* prerr_endline (Lexing.lexeme lexbuf); *)
- raise (WF_error("Illegal token or character"))
- }
- | _
- { raise Netconversion.Malformed_code }
-
-and scan_only_xml_decl = parse
- "<?xml" ws+ pi_string "?>"
- { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi }
- | ""
- { Eof }
-
-and scan_for_crlf = parse
- | '\013' '\010'
- { CharData "\n" }
- | '\013'
- { CharData "\n" }
- | '\010'
- { CharData "\n" }
- | [^ '\010' '\013' ]+
- { CharData (Lexing.lexeme lexbuf) }
- | eof
- { Eof }
-
-and scan_content_comment = parse
- "-->"
- { Comment_end, Content }
- | "--"
- { raise (WF_error "Double hyphens are illegal inside comments") }
- | "-"
- { Comment_material "-", Content_comment }
- | character_except_minus+
- { Comment_material(Lexing.lexeme lexbuf), Content_comment }
- | eof
- { Eof, Content_comment }
- | _
- { raise Netconversion.Malformed_code }
-
-
-(* In declarations, comments are always thrown away. *)
-
-and scan_decl_comment = parse
- "-->"
- { Comment_end, Declaration }
- | "--"
- { raise (WF_error "Double hyphens are illegal inside comments") }
- | "-"
- { Comment_material "", Decl_comment }
- | character_except_minus+
- { Comment_material "", Decl_comment }
- | eof
- { Eof, Decl_comment }
- | _
- { raise Netconversion.Malformed_code }
-
-
-and scan_document_comment = parse
- "-->"
- { Comment_end, Document }
- | "--"
- { raise (WF_error "Double hyphens are illegal inside comments") }
- | "-"
- { Comment_material "-", Document_comment }
- | character_except_minus+
- { Comment_material(Lexing.lexeme lexbuf), Document_comment }
- | eof
- { Eof, Document_comment }
- | _
- { raise Netconversion.Malformed_code }
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/08/18 20:19:59 gerd
- * Comments return different comment tokens.
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert open_pxp_lex_aux_*.src
-#insert pxp_lex_aux.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-rule scan_name_string = parse
- name
- { Name (Lexing.lexeme lexbuf) }
- | ws+
- { Ignore }
- | nmtoken
- { Nametoken (Lexing.lexeme lexbuf) }
- | eof
- { Eof }
- | character
- { CharData (Lexing.lexeme lexbuf) }
- | _
- { raise Netconversion.Malformed_code }
-
-
-and scan_ignored_section = parse
- | "<!["
- { tok_Conditional_begin__Ignored }
- | "]]>"
- { tok_Conditional_end__Ignored }
- | "<!--" comment_string "-->"
- { tok_Ignore__Ignored }
- | '"' character_except_quot* '"'
- { tok_Ignore__Ignored }
- | "'" character_except_apos* "'"
- { tok_Ignore__Ignored }
- | eof
- { tok_Eof__Ignored }
- | character_except_special+
- { tok_Ignore__Ignored }
- | "<"
- { tok_Ignore__Ignored }
- | "]"
- { tok_Ignore__Ignored }
- | "'"
- { tok_Ignore__Ignored }
- | "\""
- { tok_Ignore__Ignored }
- | _
- { raise Netconversion.Malformed_code }
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-{
- open Pxp_types
- open Pxp_lexer_types
-
-#insert open_pxp_lex_aux_*.src
-#insert pxp_lex_aux.src
-
-}
-
-#insert pxp_lex_defs_*.def
-
-
-rule scan_within_tag = parse
- ws+
- { tok_Ignore__Within_tag }
- | name
- { Name (Lexing.lexeme lexbuf ), Within_tag }
- | '='
- { tok_Eq__Within_tag }
- | '"' character_except_quot* '"'
- { let s = Lexing.lexeme lexbuf in
- let v = String.sub s 1 (String.length s - 2) in
- Attval v, Within_tag }
- | '"'
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | "'" character_except_apos* "'"
- { let s = Lexing.lexeme lexbuf in
- let v = String.sub s 1 (String.length s - 2) in
- Attval v, Within_tag }
- | "'"
- { raise (WF_error ("Cannot find the second quotation mark"))
- }
- | '>'
- { tok_Rangle__Content }
- | "/>"
- { tok_Rangle_empty__Content }
- | eof
- { tok_Eof__Within_tag }
- | character
- { raise (WF_error ("Illegal inside tags")) }
- | _
- { raise Netconversion.Malformed_code }
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/08/14 22:18:34 gerd
- * Bad_character_stream -> Netconversion.Malformed_code
- *
- * Revision 1.2 2000/05/29 23:53:12 gerd
- * Updated because Markup_* modules have been renamed to Pxp_*.
- *
- * Revision 1.1 2000/05/20 20:33:25 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-# make all: make bytecode executable
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-
-#----------------------------------------------------------------------
-
-SRC = ast.ml lexer.ml parser.ml generator.ml
-OBJ = $(SRC:.ml=.cmo)
-
-#----------------------------------------------------------------------
-
-
-.PHONY: all
-all: m2parsergen
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa lexer.ml parser.ml \
- parser.mli
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~ depend depend.pkg m2parsergen a.out x.ml
-
-#----------------------------------------------------------------------
-# general rules:
-
-OPTIONS =
-OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
-OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
-OCAMLDEP = ocamldep $(OPTIONS)
-OCAMLFIND = ocamlfind
-
-#----------------------------------------------------------------------
-
-depend: $(SRC)
- $(OCAMLDEP) $(SRC) >depend
-
-m2parsergen: $(OBJ)
- $(OCAMLC) -o m2parsergen $(OBJ)
-
-.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
-
-.ml.cmx:
- $(OCAMLOPT) -c $<
-
-.ml.cmo:
- $(OCAMLC) -c $<
-
-.mli.cmi:
- $(OCAMLC) -c $<
-
-.mll.ml:
- ocamllex $<
-
-.mly.ml:
- ocamlyacc $<
-
-include depend
+++ /dev/null
-----------------------------------------------------------------------
-m2parsergen
-----------------------------------------------------------------------
-
-This is a parser generator for top-down (or recursively descending) parsers.
-The input file must be structured as follows:
-
----------------------------------------- Begin of file
-
-<OCAML TEXT ("preamble")>
-
-%%
-
-<DECLARATIONS>
-
-%%
-
-<RULES>
-
-%%
-
-<OCAML TEXT ("postamble")>
-
----------------------------------------- End of file
-
-The two-character combination %% separates the various sections. The
-text before the first %% and after the last %% will be copied verbatim
-to the output file.
-
-Within the declarations and rules sections you must use /* ... */ as
-comment braces.
-
-There are two types of declarations:
-
-%token Name
-
-declares that Name is a token without associated value, and
-
-%token <> Name
-
-declares that Name is a token with associated value (i.e. Name x).
-
-In contrast to ocamlyacc, you need not to specify a type. This is a
-fundamental difference, because m2parsergen will not generate a type
-declaration for a "token" type; you must do this yourself.
-
-You need not to declare start symbols; every grammar rule may be used
-as start symbol.
-
-The rules look like:
-
-name_of_rule(arg1, arg2, ...):
- label1:symbol1 label2:symbol2 ... {{ CODE }}
-| label1:symbol1 label2:symbol2 ... {{ CODE }}
-...
-| label1:symbol1 label2:symbol2 ... {{ CODE }}
-
-The rules may have arguments (note that you must write the
-parantheses, even if the rule does not have arguments). Here, arg1,
-arg2, ... are the formal names of the arguments; you may refer to them
-in OCaml code.
-
-Furthermore, the symbols may have labels (you can leave the labels
-out). You can refer to the value associated with a symbol by its
-label, i.e. there is an OCaml variable with the same name as the label
-prescribes, and this variable contains the value.
-
-The OCaml code must be embraced by {{ and }}, and these separators
-must not occur within the code.
-
-EXAMPLE:
-
-prefix_term():
- Plus_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
- {{ v1 + v2 }}
-| Times_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
- {{ v1 * v2 }}
-| n:Number
- {{ n }}
-
-As you can see in the example, you must pass values for the arguments
-if you call non-terminal symbols (here, the argument list is empty: ()).
-
-The generated parsers behave as follows:
-
-- A rule is applicable to a token sequence if the first token is
- matched by the rule.
-
- In the example: prefix_term is applicable if the first token of a
- sequence is either Plus_symbol, Times_symbol, or Number.
-
-- One branch of the applicable rule is selected: it is the first
- branch that matches the first token. THE OTHER TOKENS DO NOT HAVE
- ANY EFFECT ON BRANCH SELECTION!
-
- For instance, in the following rule the second branch is never
- selected, because only the A is used to select the branch:
-
- a():
- A B {{ ... }}
- | A C {{ ... }}
-
-- Once a branch is selected, it is checked whether the branch matches
- the token sequence. If this check succeeds, the code section of the
- branch is executed, and the resulting value is returned to the
- caller.
- If the check fails, the exception Parsing.Parse_error is raised.
- Normally, this exception is not caught, and will force the parser
- to stop.
-
- The check in detail:
-
- If the rule demands a terminal, there a must be exactly this
- terminal at the corresponding location in the token sequence.
-
- If the rule demands a non-terminal, it is checked whether the rule
- for to this non-terminal is applicable. If so, the branch
- is selected, and recursively checked. If the rule is not applicable,
- the check fails immediately.
-
-- THERE IS NO BACKTRACKING!
-
- Note that the following works (but the construction is resolved at
- generation time):
-
- rule1() =
- rule2() A B ... {{ ... }}
-
- rule2() =
- C {{ ... }}
- | D {{ ... }}
-
- In this case, the (only) branch of rule1 is selected if the next
- token is C or D.
-
----
-
-
-
-*** Options and repetitions ***
-
-Symbols can be tagged as being optional, or to occur repeatedly:
-
-rule():
- Name whitespace()* Question_mark?
-
-- "*": The symbol matches zero or more occurrences.
-
-- "?": The symbol matches zero or one occurrence.
-
-This is done as follows:
-
-- terminal*: The maximum number of consecutive tokens <terminal> are
- matched.
-- non-terminal*: The maximum number of the subsequences matching
- <non-terminal> are matched. Before another
- subsequence is matched, it is checked whether the
- rule for <non-terminal> is applicable. If so, the
- rule is invoked and must succeed (otherwise Parsing.
- Parse_error). If not, the loop is exited.
-
-- terminal?: If the next token is <terminal>, it is matched. If not,
- no token is matched.
-
-- non-terminal?: It is checked whether the rule for <non-terminal>
- is applicable. If so, the rule is invoked, and
- matches a sequence of tokens. If not, no token is
- matched.
-
-You may refer to repeated or optional symbols by labels. In this case,
-the label is associated with lists of values, or optional values,
-respectively:
-
-rule():
- A lab:other()* lab':unlikely()?
- {{ let n = List.length lab in ...
- match lab' with
- None -> ...
- | Some v -> ...
- }}
-
-A different scheme is applied if the symbol is a token without
-associated value (%token Name, and NOT %token <> Name):
-
-rule():
- A lab:B* lab':C?
-
-Here, "lab" becomes an integer variable counting the number of Bs, and
-"lab'" becomes a boolean variable denoting whether there is a C or not.
-
-
-*** Early let-binding ***
-
-You may put some OCaml code directly after the first symbol of a
-branch:
-
-rule():
- A $ {{ let-binding }} C D ... {{ ... }}
-
-The code brace {{ let-binding }} must be preceded by a dollar
-sign. You can put "let ... = ... in" statements into this brace:
-
-rule1():
- n:A $ {{ let twice = 2 * n in }} rule2(twice) {{ ... }}
-
-This code is executed once the branch is selected.
-
-
-*** Very early let-binding ***
-
-This is also possible:
-
-rule():
- $ {{ CODE }}
- A
- ...
-
-The CODE is executed right when the branch is selected, and before any
-other happens. (Only for hacks!)
-
-
-
-*** Computed rules ***
-
-rule():
- A $ {{ let followup = ... some function ... in }} [ followup ]()
- {{ ... }}
-
-Between [ and ], you can refer to the O'Caml name of *any* function.
-Here, the function "followup" is bound in the let-binding.
-
-
-*** Error handling ***
-
-If a branch is already selected, but the check fails whether the other
-symbols of the branch match, it is possible to catch the resulting
-exception and to find out at which position the failure has occurred.
-
-rule():
- x:A y:B z:C {{ ... }} ? {{ ERROR-CODE }}
-
-After a question mark, it is allowed to append another code
-brace. This code is executed if the branch check fails (but not if the
-branch is not selected nor if no branches are selected). The string
-variable !yy_position contains the label of the symbol that caused the
-failure (or it contains the empty string if the symbol does not have a
-label).
-
-Example:
-
-rule():
- x:A y:B z:C {{ print_endline "SUCCESS" }} ? {{ print_endline !yy_position }}
-
-If the token sequence is A B C, "SUCCESS" will be printed. If the
-sequence is A C, the second symbol fails, and "y" will be printed. If
-the sequence is A B D, the third symbol fails, and "z" will be
-printed. If the sequence is B, the rule will be never selected because
-it is not applicable.
-
-
-
-*** Error recovery ***
-
-You may call the functions yy_current, yy_get_next, or one of the
-parse_* functions in the error brace to recover from the error
-(e.g. to move ahead until a certain token is reached). See below.
-
-
-
-*** How to call the parser ***
-
-The rules are rewritten into a OCaml let-binding:
-
-let rec parse_<rule1> ... = ...
- and parse_<rule2> ... = ...
- ...
- and parse_<ruleN> ... = ...
-in
-
-i.e. there are lots of functions, and the name of the functions are
-"parse_" plus the name of the rules. You can call every function.
-
-The first two arguments of the functions have a special meaning; the
-other arguments are the arguments coming from the rule description:
-
-rule(a,b):
- ...
-
-===>
-
-let rec parse_rule yy_current yy_get_next a b = ...
-
-The first argument, yy_current, is a function that returns the current
-token. The second arguments, yy_get_next, is a function that switches
-to the next token, and returns it.
-
-If the tokens are stored in a list, this may be a definition:
-
-let input = ref [ Token1; Token2; ... ] in
-let yy_current() = List.hd !input in
-let yy_get_next () =
- input := List.tl !input;
- List.hd !input
-
-When you call one of the parser functions, the current token must
-already be loaded, i.e. yy_current returns the first token to match by
-the function.
-
-After the functions has returned, the current token is the token
-following the sequence of tokens that have been matched by the
-function.
-
-The function returns the value computed by the OCaml code brace of the
-rule (or the value of the error brace).
-
-If the rule is not applicable, the exception Not_found is raised.
-
-If the rule is applicable, but it does not match, the exception
-Parsing.Parse_error is raised.
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-type declaration =
- D_token of string (* D_token name *)
- | D_typed_token of string (* D_typed_token name *)
-;;
-
-type symbol =
- U_symbol of (string * string option) (* U_symbol(token, label) *)
- | L_symbol of (string * string list * string option)
- (* L_symbol(token, args, label) *)
- | L_indirect of (string * string list * string option)
-;;
-
-
-type modifier =
- Exact
- | Option
- | Repetition
-;;
-
-
-type pattern =
- { pat_symbol : symbol;
- pat_modifier : modifier;
- }
-
-
-type branch =
- { branch_selector : symbol;
- branch_early_code : (string * int * int);
- branch_binding_code : (string * int * int);
- branch_pattern : pattern list;
- branch_result_code : (string * int * int);
- branch_error_code : (string * int * int) option;
- }
-;;
-
-type rule =
- { rule_name : string;
- rule_arguments : string list; (* List of names *)
- rule_branches : branch list;
- }
-;;
-
-type text =
- { text_decls : declaration list;
- text_rules : rule list;
- }
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/05/09 00:03:22 gerd
- * Added [ ml_name ] symbols, where ml_name is an arbitrary
- * OCaml identifier.
- *
- * Revision 1.2 2000/05/08 22:03:01 gerd
- * It is now possible to have a $ {{ }} sequence right BEFORE
- * the first token. This code is executed just after the first token
- * has been recognized.
- *
- * Revision 1.1 2000/05/06 17:36:17 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Parser
-open Ast
-
-(* Overall scheme:
- *
- * The rules are translated to:
- *
- * let rec parse_<rule1> ... = ...
- * and parse_<rule2> ... = ...
- * and ...
- * and parse_<ruleN> ... = ...
- * in
- *
- * Every rule has at least two arguments: 'current' and 'get_next'.
- * 'current()' is the token that should match the first symbol of the
- * rule. 'get_next()' returns the next token.
- *
- * The rules may have further user arguments; these are the next arguments
- * in turn.
- *
- * The rules return the user value. After they have returned to the caller
- * the current token is the token that follows the sequence of tokens
- * matching the rule.
- *
- * The rules will raise:
- * - Not_found if the first token does not match
- * - Parsing.Parse_error if the rest does not match.
- *
- * Rule scheme:
- *
- * rule(arg1,arg2,...):
- * (l1:x1)
- * {{ let-CODE }}
- * (l2:y2(name1,...)) y3 ...
- * {{ CODE }}
- * ? {{ ?-CODE }}
- * | x2 ...
- * | ...
- * | xN
- *
- * let parse_<rule> current get_next arg1 arg2 ... =
- * match current() with
- * S(x1) -> ...
- * | S(x2) -> ...
- * | ...
- * | S(xN) -> ...
-* | _ -> raise Not_found
- *
- * Here, S(xi) denotes the set of tokens matched by xi without all tokens
- * already matched by x1 to x(i-1). (If S(xi) = empty, a warning is printed,
- * and this branch of the rule is omitted.)
- *
- * S(xi) may be a set because xi may be a reference to another rule. In this
- * case, S(xi) bases on the set of tokens that match the first symbol of
- * the other rule. (In general, S(xi) must be computed recursively.)
- *
- * If the "?" clause is present, every branch is embraced by the following:
- *
- * let position = ref "<Label of x1>" in
- * ( try ...
- * with Parsing.Parse_error -> ( <<?-CODE>> )
- * )
- *
- * Next: The "..." is
- *
- * OPTIONAL: let <l1> = parse_<rule(x1)> in
- * <<let-CODE>>
- * M(y1)
- * M(y2)
- * ...
- * M(yN)
- * <<CODE>>
- *
- * If x1 is a rule invocation, it is now parsed, and the result is bound
- * to a variable.
- *
- * Note: After x1 has matched, the Caml variable <l1> must be either
- * bound to the result of the sub parsing, or to the value associated
- * with the token (if any). The latter is already done in the main
- * "match" statement, i.e. "match ... with S(x1) -> ..." is actually
- * "match ... with Token1 <l1> -> ...".
- *
- * Note: After calling parse_<rule(x1)> the exception Not_found is NEVER
- * converted to Parsing.Parse_error. It is simply not possible that this
- * happens.
-
- * For every remaining symbol yi of the rule, a matching statement M(yi)
- * is produced. These statements have the form:
- *
- * OPTIONAL: position := "<Label of yi>";
- * CASE: yi is a token without associated value
- * let yy_i = get_next() OR current() in
- * if yy_i <> Token(yi) then raise Parsing.Parse_error;
- * CASE: yi is a token with value
- * let yy_i = get_next() OR current() in
- * let <li> = match yy_i with Token x -> x | _ -> raise Parsing.Parse_error
- * in
- * CASE: yi is a rule invocation
- * OPTIONAL: let _ = get_next() in
- * let <li> = try parse_<rule(yi)>
- * with Not_found -> raise Parsing.Parse_error in
- *
- * yy_i is get_next() if y(i-1) was a token, and yy_i is current() if
- * y(i-1) was a rule invocation.
- *
- * Repetitions:
- *
- * If yi = (yi')*:
- *
- * CASE no label given:
- *
- * ( try
- * while true do
- * M(yi') with the modification that top-level mismatches raise
- * Not_found instead of Parsing.Parse_error
- * done
- * with Not_found -> ()
- * )
- *
- * CASE a label <li> is given: The list of results must be bound to <li>!
- *
- * let yy_list = ref [] in
- * ( try
- * while true do
- * let yy_first = M(yi') (with some modifications) in
- * yy_list := yy_first :: !yy_list;
- * done
- * with Not_found -> ()
- * );
- * let <li> = List.rev !yy_list in
- *
- * Note that this scheme minimizes stack and heap allocations.
- *
- * Options:
- *
- * If yi = (yi')?:
- *
- * CASE no label given:
- *
- * ( try
- * M(yi') with the modification that top-level mismatches raise
- * Not_found instead of Parsing.Parse_error
- * with Not_found -> ()
- * )
- *
- * CASE a label <li> is given: The optional result must be bound to <li>!
- *
- * let <li> =
- * try
- * Some( M(yi') (with some modifications) )
- * with Not_found -> None
- * );
- *)
-
-
-let lookup_rule tree name =
- try
- List.find (fun r -> r.rule_name = name) tree.text_rules
- with
- Not_found ->
- failwith ("Rule `" ^ name ^ "' not found")
-;;
-
-
-let is_typed tree name =
- (* Find out whether the token 'name' is typed or not *)
- let decl =
- try
- List.find (fun d -> match d with
- D_token n -> n = name
- | D_typed_token n -> n = name
- )
- tree.text_decls
- with
- Not_found ->
- failwith ("Token `" ^ name ^ "' not found")
- in
- match decl with
- D_token _ -> false
- | D_typed_token _ -> true
-;;
-
-
-let label_of_symbol tree sym =
- match sym with
- U_symbol (tok, lab) ->
- (* if is_typed tree tok then lab else None *)
- lab
- | L_symbol (_, _, lab) -> lab
- | L_indirect (_, _, lab) -> lab
-;;
-
-
-let is_untyped_U_symbol tree sym =
- match sym with
- U_symbol (tok, _) ->
- not(is_typed tree tok)
- | L_symbol (_, _, _) -> false
- | L_indirect (_, _, _) -> false
-;;
-
-
-
-let rec set_of_list l =
- (* Removes duplicate members of l *)
- match l with
- [] -> []
- | x :: l' -> if List.mem x l' then set_of_list l' else x :: (set_of_list l')
-;;
-
-
-let selector_set_of_rule tree name =
- (* Determines the set of tokens that match the first symbol of a rule *)
-
- let rec collect visited_rules name =
- if List.mem name visited_rules then
- []
- else
- let r = lookup_rule tree name in
- List.flatten
- (List.map
- (fun branch ->
- match branch.branch_selector with
- U_symbol (tok_name,_) ->
- [ tok_name ]
- | L_symbol (rule_name, _, _) ->
- collect (name :: visited_rules) rule_name
- | L_indirect (_, _, _) ->
- failwith("The first symbol in rule `" ^ name ^
- "' is an indirect call; this is not allowed")
- )
- r.rule_branches
- )
- in
- set_of_list (collect [] name)
-;;
-
-
-let output_code_location b file_name (_, line, column) =
- Buffer.add_string b "\n";
- Buffer.add_string b ("# " ^ string_of_int line ^ " \"" ^
- file_name ^ "\"\n");
- Buffer.add_string b (String.make column ' ')
-;;
-
-
-let phantasy_line = ref 100000;;
-
-let output_code b file_name ((code, line, column) as triple) =
- if code <> "" then begin
- output_code_location b file_name triple;
- Buffer.add_string b code;
- Buffer.add_string b ("\n# " ^ string_of_int !phantasy_line ^ " \"<Generated Code>\"\n");
- phantasy_line := !phantasy_line + 10000;
- end
-;;
-
-
-let process_branch b file_name tree branch =
-
- let make_rule_invocation called_rule args lab allow_not_found =
- (* Produces: let <label> = parse_<called_rule> ... args in
- * If not allow_not_found, the exception Not_found is caught and
- * changed into Parsing.Parse_error.
- *)
- let r = lookup_rule tree called_rule in
- if List.length r.rule_arguments <> List.length args then
- failwith("Calling rule `" ^ called_rule ^ "' with the wrong number of arguments!");
-
- Buffer.add_string b "let ";
- begin match lab with
- None -> Buffer.add_string b "_"
- | Some l -> Buffer.add_string b l
- end;
- Buffer.add_string b " = ";
- if not allow_not_found then
- Buffer.add_string b "try ";
- Buffer.add_string b "parse_";
- Buffer.add_string b called_rule;
- Buffer.add_string b " yy_current yy_get_next";
- List.iter
- (fun a -> Buffer.add_string b " ";
- Buffer.add_string b a;
- )
- args;
- if not allow_not_found then
- Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
- Buffer.add_string b " in\n"
- in
-
- let make_indirect_rule_invocation ml_name args lab allow_not_found =
- (* Produces: let <label> = ml_name ... args in
- * If not allow_not_found, the exception Not_found is caught and
- * changed into Parsing.Parse_error.
- *)
- Buffer.add_string b "let ";
- begin match lab with
- None -> Buffer.add_string b "_"
- | Some l -> Buffer.add_string b l
- end;
- Buffer.add_string b " = ";
- if not allow_not_found then
- Buffer.add_string b "try ";
- Buffer.add_string b ml_name;
- Buffer.add_string b " yy_current yy_get_next";
- List.iter
- (fun a -> Buffer.add_string b " ";
- Buffer.add_string b a;
- )
- args;
- if not allow_not_found then
- Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
- Buffer.add_string b " in\n"
- in
-
- let process_symbol sym previous_was_token allow_not_found =
- match sym with
- U_symbol(tok, lab) ->
- (* Distinguish between simple tokens and typed tokens *)
- if is_typed tree tok then begin
- (* Typed token *)
- Buffer.add_string b "let ";
- begin match lab with
- None -> Buffer.add_string b "_"
- | Some l -> Buffer.add_string b l
- end;
- Buffer.add_string b " = match ";
- if previous_was_token then
- Buffer.add_string b "yy_get_next()"
- else
- Buffer.add_string b "yy_current()";
- Buffer.add_string b " with ";
- Buffer.add_string b tok;
- Buffer.add_string b " x -> x | _ -> raise ";
- if allow_not_found then
- Buffer.add_string b "Not_found"
- else
- Buffer.add_string b "Parsing.Parse_error";
- Buffer.add_string b " in\n";
- end
- else begin
- (* Simple token *)
- Buffer.add_string b "if (";
- if previous_was_token then
- Buffer.add_string b "yy_get_next()"
- else
- Buffer.add_string b "yy_current()";
- Buffer.add_string b ") <> ";
- Buffer.add_string b tok;
- Buffer.add_string b " then raise ";
- if allow_not_found then
- Buffer.add_string b "Not_found;\n"
- else
- Buffer.add_string b "Parsing.Parse_error;\n"
- end
- | L_symbol(called_rule, args, lab) ->
- if previous_was_token then
- Buffer.add_string b "ignore(yy_get_next());\n";
- make_rule_invocation called_rule args lab allow_not_found
- | L_indirect(ml_name, args, lab) ->
- if previous_was_token then
- Buffer.add_string b "ignore(yy_get_next());\n";
- make_indirect_rule_invocation ml_name args lab allow_not_found
- in
-
- let process_pattern (current_position, previous_was_token) pat =
- (* Assign "position" if necessary. *)
- let new_position =
- if branch.branch_error_code <> None then begin
- match pat.pat_symbol with
- U_symbol(_,Some l) -> l
- | L_symbol(_,_,Some l) -> l
- | L_indirect(_,_,Some l) -> l
- | _ -> ""
- end
- else ""
- in
- if new_position <> current_position then begin
- Buffer.add_string b "yy_position := \"";
- Buffer.add_string b new_position;
- Buffer.add_string b "\";\n";
- end;
-
- let this_is_token =
- match pat.pat_symbol with
- U_symbol(_,_) -> pat.pat_modifier = Exact
- | L_symbol(_,_,_) -> false
- | L_indirect(_,_,_) -> false
- in
-
- (* First distinguish between Exact, Option, and Repetition: *)
- begin match pat.pat_modifier with
- Exact ->
- process_symbol pat.pat_symbol previous_was_token false
- | Option ->
- begin match label_of_symbol tree pat.pat_symbol with
- None ->
- (* CASE: optional symbol without label *)
- (* OPTIMIZATION: If the symbol is
- * a token, the loop becomes very simple.
- *)
- if (match pat.pat_symbol with
- U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
- then begin
- let tok = match pat.pat_symbol with
- U_symbol(t,_) -> t | _ -> assert false in
- (* Optimized case *)
- Buffer.add_string b "if ";
- if previous_was_token then
- Buffer.add_string b "yy_get_next()"
- else
- Buffer.add_string b "yy_current()";
- Buffer.add_string b " = ";
- Buffer.add_string b tok;
- Buffer.add_string b " then ignore(yy_get_next());\n";
- end
- else begin
- (* General, non-optimized case: *)
- Buffer.add_string b "( try (";
- process_symbol pat.pat_symbol previous_was_token true;
- Buffer.add_string b "ignore(yy_get_next());\n";
- Buffer.add_string b ") with Not_found -> ());\n";
- end
- | Some l ->
- (* CASE: optional symbol with label *)
- if is_untyped_U_symbol tree pat.pat_symbol then begin
- (* SUBCASE: The label becomes a boolean variable *)
- Buffer.add_string b "let ";
- Buffer.add_string b l;
- Buffer.add_string b " = try (";
- process_symbol pat.pat_symbol previous_was_token true;
- Buffer.add_string b ");\n";
- Buffer.add_string b "ignore(yy_get_next());\n";
- Buffer.add_string b "true with Not_found -> false in\n";
- end
- else begin
- (* SUBCASE: the symbol has a value *)
- Buffer.add_string b "let ";
- Buffer.add_string b l;
- Buffer.add_string b " = try let yy_tok = Some(";
- process_symbol pat.pat_symbol previous_was_token true;
- Buffer.add_string b l;
- Buffer.add_string b ") in\n";
-
- if (match pat.pat_symbol with
- U_symbol(_,_) -> true | _ -> false) then
- Buffer.add_string b "ignore(yy_get_next());\n";
-
- Buffer.add_string b "yy_tok with Not_found -> None in\n";
- end
- end
- | Repetition ->
- begin match label_of_symbol tree pat.pat_symbol with
- None ->
- (* CASE: repeated symbol without label *)
- (* OPTIMIZATION: If the symbol is
- * a token, the loop becomes very simple.
- *)
- if (match pat.pat_symbol with
- U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
- then begin
- let tok = match pat.pat_symbol with
- U_symbol(t,_) -> t | _ -> assert false in
- if previous_was_token then begin
- (* Optimized case I *)
- Buffer.add_string b "while yy_get_next() = ";
- Buffer.add_string b tok;
- Buffer.add_string b " do () done;\n";
- end
- else begin
- (* Optimized case II *)
- Buffer.add_string b "if yy_current() = ";
- Buffer.add_string b tok;
- Buffer.add_string b " then (";
- Buffer.add_string b "while yy_get_next() = ";
- Buffer.add_string b tok;
- Buffer.add_string b " do () done);\n";
- end
- end
- else begin
- (* General, non-optimized case: *)
- if previous_was_token then
- Buffer.add_string b "ignore(yy_get_next());\n";
- Buffer.add_string b "( try while true do (";
- process_symbol pat.pat_symbol false true;
-
- if (match pat.pat_symbol with
- U_symbol(_,_) -> true | _ -> false) then
- Buffer.add_string b "ignore(yy_get_next());\n"
- else
- Buffer.add_string b "();\n";
-
- Buffer.add_string b ") done with Not_found -> ());\n";
- end
- | Some l ->
- (* CASE: repeated symbol with label *)
- if is_untyped_U_symbol tree pat.pat_symbol then begin
- (* SUBCASE: The label becomes an integer variable *)
- if previous_was_token then
- Buffer.add_string b "ignore(yy_get_next());\n";
- Buffer.add_string b "let yy_counter = ref 0 in\n";
- Buffer.add_string b "( try while true do \n";
- process_symbol pat.pat_symbol false true;
- Buffer.add_string b "incr yy_counter;\n";
-
- if (match pat.pat_symbol with
- U_symbol(_,_) -> true | _ -> false) then
- Buffer.add_string b "ignore(yy_get_next());\n";
-
- Buffer.add_string b "done with Not_found -> ());\n";
- Buffer.add_string b "let ";
- Buffer.add_string b l;
- Buffer.add_string b " = !yy_counter in\n";
- end
- else begin
- (* SUBCASE: the symbol has a value *)
- if previous_was_token then
- Buffer.add_string b "ignore(yy_get_next());\n";
- Buffer.add_string b "let yy_list = ref [] in\n";
- Buffer.add_string b "( try while true do \n";
- process_symbol pat.pat_symbol false true;
- Buffer.add_string b "yy_list := ";
- Buffer.add_string b l;
- Buffer.add_string b " :: !yy_list;\n";
-
- if (match pat.pat_symbol with
- U_symbol(_,_) -> true | _ -> false) then
- Buffer.add_string b "ignore(yy_get_next());\n";
-
- Buffer.add_string b "done with Not_found -> ());\n";
- Buffer.add_string b "let ";
- Buffer.add_string b l;
- Buffer.add_string b " = List.rev !yy_list in\n";
- end
- end
- end;
-
- (* Continue: *)
- (new_position, this_is_token)
- in
-
-
- let process_inner_branch current_position =
- (* If there is "early code", run this now: *)
- output_code b file_name branch.branch_early_code;
- Buffer.add_string b "\n";
-
- (* If the first symbol is a rule invocation, call the corresponding
- * parser function now.
- *)
- let previous_was_token =
- begin match branch.branch_selector with
- U_symbol(_,_) ->
- true
- | L_symbol(called_rule, args, lab) ->
- make_rule_invocation called_rule args lab true;
- false
- | L_indirect(_,_,_) ->
- failwith("The first symbol in some rule is an indirect call; this is not allowed")
- end
- in
-
- (* Now output the "let-CODE". *)
- output_code b file_name branch.branch_binding_code;
- Buffer.add_string b "\n";
-
- (* Process the other symbols in turn: *)
- let (_, previous_was_token') =
- (List.fold_left
- process_pattern
- (current_position, previous_was_token)
- branch.branch_pattern
- )
- in
-
- (* Special case:
- *
- * If previous_was_token', we must invoke yy_get_next one more time.
- * This is deferred until "CODE" is executed to give this code
- * the chance to make the next token available (in XML, the next token
- * might come from a different entity, and "CODE" must switch to this
- * entity).
- *)
-
- (* Now output "CODE": *)
- Buffer.add_string b "let result = \n";
- output_code b file_name branch.branch_result_code;
- Buffer.add_string b "\nin\n";
-
- if previous_was_token' then
- Buffer.add_string b "ignore(yy_get_next());\nresult\n"
- else
- Buffer.add_string b "result\n"
- in
-
- (* If we have a ? clause, generate now the "try" statement *)
- match branch.branch_error_code with
- None ->
- Buffer.add_string b "( ";
- process_inner_branch "";
- Buffer.add_string b " )";
- | Some code ->
-
- (* let position = ref "<label>" in *)
-
- Buffer.add_string b "let yy_position = ref \"";
- let current_position =
- match branch.branch_selector with
- U_symbol(_,_) -> ""
- | L_symbol(_,_,None) -> ""
- | L_symbol(_,_,Some l) -> l
- | L_indirect(_,_,None) -> ""
- | L_indirect(_,_,Some l) -> l
- in
- Buffer.add_string b current_position;
- Buffer.add_string b "\" in\n";
-
- (* The "try" statement: *)
-
- Buffer.add_string b "( try (\n";
-
- process_inner_branch current_position;
-
- Buffer.add_string b "\n) with Parsing.Parse_error -> (\n";
- output_code b file_name code;
- Buffer.add_string b "\n))\n"
-;;
-
-
-let process b file_name tree =
- (* Iterate over the rules and output the parser functions: *)
- let is_first = ref true in
- List.iter
- (fun r ->
-
- (* Generate the function header: *)
-
- if !is_first then
- Buffer.add_string b "let rec "
- else
- Buffer.add_string b "and ";
- is_first := false;
- Buffer.add_string b "parse_";
- Buffer.add_string b r.rule_name;
- Buffer.add_string b " yy_current yy_get_next";
- List.iter
- (fun arg -> Buffer.add_string b " ";
- Buffer.add_string b arg)
- r.rule_arguments;
- Buffer.add_string b " =\n";
-
- (* Generate the "match" statement: *)
-
- Buffer.add_string b "match yy_current() with\n";
- let s_done = ref [] in
- (* s_done: The set of already matched tokens *)
-
- List.iter
- (fun branch ->
- match branch.branch_selector with
- U_symbol(tok, lab) ->
- (* A simple token *)
- if List.mem tok !s_done then begin
- prerr_endline("WARNING: In rule `" ^ r.rule_name ^
- "': Match for token `" ^
- tok ^ "' hidden by previous match");
- end
- else
- if is_typed tree tok then begin
- match lab with
- None ->
- Buffer.add_string b "| ";
- Buffer.add_string b tok;
- Buffer.add_string b " _ -> ";
- process_branch b file_name tree branch;
- Buffer.add_string b "\n";
- s_done := tok :: !s_done;
- | Some l ->
- Buffer.add_string b "| ";
- Buffer.add_string b tok;
- Buffer.add_string b " ";
- Buffer.add_string b l;
- Buffer.add_string b " -> ";
- process_branch b file_name tree branch;
- Buffer.add_string b "\n";
- s_done := tok :: !s_done;
- end
- else begin
- Buffer.add_string b "| ";
- Buffer.add_string b tok;
- Buffer.add_string b " -> ";
- process_branch b file_name tree branch;
- Buffer.add_string b "\n";
- s_done := tok :: !s_done;
- end
- | L_symbol(called_rule, args, lab) ->
- (* An invocation of a rule *)
- let s_rule = selector_set_of_rule tree called_rule in
- let s_rule' =
- List.filter
- (fun tok ->
- if List.mem tok !s_done then begin
- prerr_endline("WARNING: In rule `" ^ r.rule_name ^
- "': Match for token `" ^
- tok ^ "' hidden by previous match");
- false
- end
- else true)
- s_rule in
- if s_rule' <> [] then begin
- Buffer.add_string b "| ( ";
- let is_first = ref true in
- List.iter
- (fun tok ->
- if not !is_first then
- Buffer.add_string b " | ";
- is_first := false;
- Buffer.add_string b tok;
- if is_typed tree tok then
- Buffer.add_string b " _";
- )
- s_rule';
- Buffer.add_string b ") -> ";
- process_branch b file_name tree branch;
- Buffer.add_string b "\n";
- s_done := s_rule' @ !s_done;
- end
- | L_indirect(ml_name, args, lab) ->
- (* An invocation of an indirect rule *)
- failwith("The first symbol in rule `" ^ r.rule_name ^
- "' is an indirect call; this is not allowed")
- )
- r.rule_branches;
-
- Buffer.add_string b "\n| _ -> raise Not_found\n";
- )
- tree.text_rules;
-
- Buffer.add_string b " in\n"
-;;
-
-
-let count_lines s =
- (* returns number of lines in s, number of columns of the last line *)
- let l = String.length s in
-
- let rec count n k no_cr no_lf =
- let next_cr =
- if no_cr then
- (-1)
- else
- try String.index_from s k '\013' with Not_found -> (-1) in
- let next_lf =
- if no_lf then
- (-1)
- else
- try String.index_from s k '\010' with Not_found -> (-1) in
- if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
- if next_cr+1 < l & s.[next_cr+1] = '\010' then
- count (n+1) (next_cr+2) false (next_lf < 0)
- else
- count (n+1) (next_cr+1) false (next_lf < 0)
- end
- else if next_lf >= 0 then begin
- count (n+1) (next_lf+1) (next_cr < 0) false
- end
- else
- n, (l - k)
-
- in
- count 0 0 false false
-;;
-
-
-type scan_context =
- { mutable old_line : int;
- mutable old_column : int;
- mutable line : int;
- mutable column : int;
- }
-;;
-
-
-let rec next_token context lexbuf =
- let t = Lexer.scan_file lexbuf in
- let line = context.line in
- let column = context.column in
- context.old_line <- line;
- context.old_column <- column;
- let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
- if n_lines > 0 then begin
- context.line <- line + n_lines;
- context.column <- n_columns;
- end
- else
- context.column <- column + n_columns;
- match t with
- Space -> next_token context lexbuf
- | Code(s,_,_) -> Code(s,line,column + 2)
- | Eof -> failwith "Unexpected end of file"
- | _ -> t
-;;
-
-
-let parse_and_generate ch =
- let b = Buffer.create 20000 in
-
- let rec find_sep context lexbuf =
- let t = Lexer.scan_header lexbuf in
- let line = context.line in
- let column = context.column in
- context.old_line <- line;
- context.old_column <- column;
- let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
- if n_lines > 0 then begin
- context.line <- line + n_lines;
- context.column <- n_columns;
- end
- else
- context.column <- column + n_columns;
- match t with
- Code(s,_,_) ->
- Buffer.add_string b s;
- find_sep context lexbuf
- | Eof -> failwith "Unexpected end of file"
- | Separator -> ()
- | _ -> assert false
- in
-
- let rec find_rest context lexbuf =
- let t = Lexer.scan_header lexbuf in
- let line = context.line in
- let column = context.column in
- context.old_line <- line;
- context.old_column <- column;
- let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
- if n_lines > 0 then begin
- context.line <- line + n_lines;
- context.column <- n_columns;
- end
- else
- context.column <- column + n_columns;
- match t with
- Code(s,_,_) ->
- Buffer.add_string b s;
- find_rest context lexbuf
- | Eof -> ()
- | _ -> assert false
- in
-
- (* First read until '%%' *)
- let lexbuf = Lexing.from_channel ch in
- let context = { old_line = 0; old_column = 0; line = 1; column = 0 } in
- let file_name = "stdin" in
- try
- output_code_location b file_name ("", 1, 0);
- find_sep context lexbuf;
- (* Parse the following text *)
- let text = (Parser.text (next_token context) lexbuf : Ast.text) in
- (* Process it: *)
- process b file_name text;
- (* Read rest *)
- output_code_location b file_name ("", context.line, context.column);
- find_rest context lexbuf;
- (* Output everything: *)
- print_string (Buffer.contents b)
- with
- any ->
- Printf.eprintf
- "Error at line %d column %d: %s\n"
- context.old_line
- context.old_column
- (Printexc.to_string any);
- exit 1
-;;
-
-
-parse_and_generate stdin;;
-exit 0;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.7 2000/08/17 00:33:02 gerd
- * Bugfix: tok* and tok? work now if tok is an untyped token
- * without label.
- *
- * Revision 1.6 2000/05/14 20:59:24 gerd
- * Added "phantasy line numbers" to help finding errorneous locations.
- *
- * Revision 1.5 2000/05/14 20:41:58 gerd
- * x: Token? means: if Token is detected x=true else x=false.
- * x: Token* means: x becomes the number of ocurrences of Token.
- *
- * Revision 1.4 2000/05/09 00:03:22 gerd
- * Added [ ml_name ] symbols, where ml_name is an arbitrary
- * OCaml identifier.
- *
- * Revision 1.3 2000/05/08 22:03:01 gerd
- * It is now possible to have a $ {{ }} sequence right BEFORE
- * the first token. This code is executed just after the first token
- * has been recognized.
- *
- * Revision 1.2 2000/05/06 21:51:08 gerd
- * Numerous bugfixes.
- *
- * Revision 1.1 2000/05/06 17:36:17 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-{
- open Parser
-}
-
-rule scan_file = parse
- "/*" [^ '*']* ('*'+ [^ '/' '*'] [^ '*']* )* '*'* "*/"
- { Space }
- | "%token"
- { Token }
- | "<" [' ' '\t' '\r' '\n']* ">"
- { Type
- }
- | [ 'a'-'z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
- { let s = Lexing.lexeme lexbuf in
- Lname s
- }
- | [ 'A'-'Z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
- { let s = Lexing.lexeme lexbuf in
- Uname s
- }
- | "%%"
- { Separator }
- | "("
- { Lparen }
- | ","
- { Comma }
- | ")"
- { Rparen }
- | "["
- { Lbracket }
- | "]"
- { Rbracket }
- | ":"
- { Colon }
- | "{{" [^ '}']* ( '}' [^ '}']+ )* "}}"
- { let s = Lexing.lexeme lexbuf in
- Code (String.sub s 2 (String.length s - 4), 0, 0)
- }
- | "?"
- { Error }
- | "|"
- { Alt }
- | "+"
- { Loop_plus }
- | "*"
- { Loop_star }
- | [' ' '\t' '\r' '\n']+
- { Space }
- | "$"
- { Dollar }
- | eof
- { Eof }
-
-and scan_header = parse
- "%%"
- { Separator }
- | "%"
- { Code("%", 0, 0) }
- | [^ '%']*
- { Code(Lexing.lexeme lexbuf, 0, 0) }
- | eof
- { Eof }
-
-and scan_rest = parse
- _*
- { Code(Lexing.lexeme lexbuf, 0, 0) }
- | eof
- { Eof }
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/05/09 00:03:22 gerd
- * Added [ ml_name ] symbols, where ml_name is an arbitrary
- * OCaml identifier.
- *
- * Revision 1.2 2000/05/06 21:51:24 gerd
- * New symbol Dollar.
- *
- * Revision 1.1 2000/05/06 17:36:17 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-/* $Id$
- * ----------------------------------------------------------------------
- *
- */
-
-%{
- open Ast
-
-%}
-
-%token Space
-%token Token
-%token Type
-%token <string> Lname
-%token <string> Uname
-%token Separator
-%token Lparen
-%token Rparen
-%token Comma
-%token Colon
-%token <string * int * int> Code
-%token Error
-%token Alt
-%token Loop_plus
-%token Loop_star
-%token Dollar
-%token Lbracket
-%token Rbracket%token Eof
-
-%start text
-%type <Ast.text> text
-
-%%
-
-text:
- declarations rules
- { { text_decls = $1; text_rules = $2; } }
-
-declarations:
- declaration declarations
- { $1 :: $2 }
-| Separator
- { [] }
-
-declaration:
- Token Uname
- { D_token $2 }
-| Token Type Uname
- { D_typed_token $3 }
-
-rules:
- rule rules
- { $1 :: $2 }
-| Separator
- { [] }
-
-rule:
- Lname Lparen formal_arguments Colon branches
- { { rule_name = $1;
- rule_arguments = $3;
- rule_branches = $5;
- }
- }
-
-formal_arguments:
- Rparen
- { [] }
-| Lname comma_formal_arguments
- { $1 :: $2 }
-
-comma_formal_arguments:
- Comma Lname comma_formal_arguments
- { $2 :: $3 }
-| Rparen
- { [] }
-
-branches:
- branch alt_branches
- { $1 :: $2 }
-
-alt_branches:
- Alt branch alt_branches
- { $2 :: $3 }
-|
- { [] }
-
-branch:
- simple_branch
- { $1 }
-| Dollar Code simple_branch
- { { $3 with branch_early_code = $2 } }
-
-simple_branch:
- symbol Dollar Code patterns Code opt_error_handler
- { { branch_selector = $1;
- branch_early_code = ("",0,0);
- branch_binding_code = $3;
- branch_pattern = $4;
- branch_result_code = $5;
- branch_error_code = $6;
- }
- }
-| symbol patterns Code opt_error_handler
- { { branch_selector = $1;
- branch_early_code = ("",0,0);
- branch_binding_code = ("", 0, 0);
- branch_pattern = $2;
- branch_result_code = $3;
- branch_error_code = $4;
- }
- }
-
-patterns:
- pattern patterns
- { $1 :: $2 }
-|
- { [] }
-
-pattern:
- symbol Loop_star
- { { pat_symbol = $1;
- pat_modifier = Repetition;
- }
- }
-| symbol Error
- { { pat_symbol = $1;
- pat_modifier = Option;
- }
- }
-| symbol
- { { pat_symbol = $1;
- pat_modifier = Exact;
- }
- }
-
-symbol:
- Lname Colon Uname
- { U_symbol($3, Some $1) }
-| Lname Colon Lname Lparen actual_arguments
- { L_symbol($3, $5, Some $1) }
-| Lname Colon Lbracket Lname Rbracket Lparen actual_arguments
- { L_indirect($4, $7, Some $1) }
-| Uname
- { U_symbol($1, None) }
-| Lname Lparen actual_arguments
- { L_symbol($1, $3, None) }
-| Lbracket Lname Rbracket Lparen actual_arguments
- { L_indirect($2, $5, None) }
-
-
-actual_arguments:
- Rparen
- { [] }
-| Lname comma_actual_arguments
- { $1 :: $2 }
-
-comma_actual_arguments:
- Rparen
- { [] }
-| Comma Lname comma_actual_arguments
- { $2 :: $3 }
-
-opt_error_handler:
- Error Code
- { Some $2 }
-|
- { None }
-
-%%
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/05/09 00:03:22 gerd
- * Added [ ml_name ] symbols, where ml_name is an arbitrary
- * OCaml identifier.
- *
- * Revision 1.3 2000/05/08 22:03:01 gerd
- * It is now possible to have a $ {{ }} sequence right BEFORE
- * the first token. This code is executed just after the first token
- * has been recognized.
- *
- * Revision 1.2 2000/05/06 21:51:46 gerd
- * New Dollar tag.
- *
- * Revision 1.1 2000/05/06 17:36:17 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-
-type token =
- A | B | C of int | EOF
-;;
-
-%%
-
-%token A
-%token B
-%token <> C
-%token EOF
-
-%%
-
-r():
- one:s()
- {{ }}
- b:B
- two:B?
- three:s()
- {{ prerr_endline ("Result: " ^ string_of_int three) }}
-? {{ prerr_endline ("ERROR: " ^ !yy_position) }}
-
-s():
- A
- {{ }}
- {{ prerr_endline "A"; 0 }}
-| B
- {{ }}
- {{ prerr_endline "B"; 0 }}
-| n:C
- {{ }}
- {{ prerr_endline ("C: " ^ string_of_int n); n }}
-%%
-
-let input = ref [ A; B; B; B; C 5; EOF ] in
-let current() = List.hd !input in
-let next_token () =
- prerr_endline "get_next";
- input := List.tl !input;
- List.hd !input
-in
-parse_r current next_token
-;;
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- * Some auxiliary functions
- *)
-
-(**********************************************************************)
-(* Lexing *)
-
-
-open Pxp_types
-open Pxp_lexer_types
-open Pxp_lexers
-open Netconversion
-
-let character enc warner k =
- assert (k>=0);
- if (k >= 0xd800 & k < 0xe000) or (k >= 0xfffe & k <= 0xffff) or k > 0x10ffff
- or (k < 8) or (k = 11) or (k = 12) or (k >= 14 & k <= 31)
- then
- raise (WF_error("Code point " ^ string_of_int k ^
- " outside the accepted range of code points"));
-
- try
- makechar (enc : rep_encoding :> encoding) k
- with
- Not_found ->
- warner # warn ("Code point cannot be represented in internal encoding: "
- ^ string_of_int k);
- ""
-;;
-
-
-let check_name warner name =
- (* produces a warning for names beginning with "xml". *)
- if String.length name >= 3 then begin
- match String.sub name 0 3 with
- ("xml" | "xmL" | "xMl" | "xML" | "Xml" | "XmL" | "XMl" | "XML") ->
- warner # warn ("Name is reserved for future extensions: " ^ name)
- | _ ->
- ()
- end
-;;
-
-
-let tokens_of_content_string lexerset s =
- (* tokenizes general entities and character entities *)
- let lexbuf = Lexing.from_string s in
- let rec next_token () =
- match lexerset.scan_content_string lexbuf with
- Eof -> []
- | tok -> tok :: next_token()
- in
- next_token()
-;;
-
-
-let rec expand_attvalue_with_rec_check lexerset dtd s warner entities norm_crlf =
- (* recursively expands general entities and character entities;
- * checks "standalone" document declaration;
- * normalizes whitespace
- *)
- let toklist = tokens_of_content_string lexerset s in
- let rec expand tl =
- match tl with
- [] -> ""
- | ERef n :: tl' ->
- if List.mem n entities then
- raise(WF_error("Recursive reference to general entity `" ^ n ^ "'"));
- let en, extdecl = dtd # gen_entity n in
- if dtd # standalone_declaration && extdecl then
- raise(Validation_error("Reference to entity `" ^ n ^
- "' violates standalone declaration"));
- let rtext, rtext_contains_ext_refs = en # replacement_text in
- if rtext_contains_ext_refs then
- raise(Validation_error("Found reference to external entity in attribute value"));
- expand_attvalue_with_rec_check
- lexerset dtd rtext warner (n :: entities) false ^ expand tl'
- | CRef(-1) :: tl' ->
- if norm_crlf then
- " " ^ expand tl'
- else
- " " ^ expand tl'
- | CRef n :: tl' ->
- character lexerset.lex_encoding warner n ^ expand tl'
- | CharData "<" :: tl' ->
- raise
- (WF_error
- ("Attribute value contains character '<' literally"))
- | CharData x :: tl' ->
- x ^ expand tl'
- | _ -> assert false
- in
- expand toklist
-;;
-
-
-let expand_attvalue lexerset dtd s warner norm_crlf =
- (* norm_crlf: whether the sequence CRLF is recognized as one character or
- * not (i.e. two characters)
- *)
- expand_attvalue_with_rec_check lexerset dtd s warner [] norm_crlf
-;;
-
-
-let count_lines s =
- (* returns number of lines in s, number of columns of the last line *)
- let l = String.length s in
-
- let rec count n k no_cr no_lf =
- let next_cr =
- if no_cr then
- (-1)
- else
- try String.index_from s k '\013' with Not_found -> (-1) in
- let next_lf =
- if no_lf then
- (-1)
- else
- try String.index_from s k '\010' with Not_found -> (-1) in
- if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
- if next_cr+1 < l & s.[next_cr+1] = '\010' then
- count (n+1) (next_cr+2) false (next_lf < 0)
- else
- count (n+1) (next_cr+1) false (next_lf < 0)
- end
- else if next_lf >= 0 then begin
- count (n+1) (next_lf+1) (next_cr < 0) false
- end
- else
- n, (l - k)
-
- in
- count 0 0 false false
-;;
-
-
-let tokens_of_xml_pi lexers s =
- let lexbuf = Lexing.from_string (s ^ " ") in
- let rec collect () =
- let t = lexers.scan_xml_pi lexbuf in
- match t with
- Pro_eof -> []
- | _ -> t :: collect()
- in
- collect()
-;;
-
-
-let decode_xml_pi pl =
- (* 'pl' must consist of name="value" or name='value' pairs which are returned
- * as list of pairs.
- * The "value" is returned as it is; no substitution of &entities; happens.
- *)
- let rec decode pl =
- match pl with
- Pro_name name :: Pro_eq :: Pro_string value :: pl' ->
- (name, value) :: decode pl'
- | [] ->
- []
- | _ ->
- raise (WF_error("Bad XML processing instruction"))
- in
- decode pl
-;;
-
-
-let decode_doc_xml_pi pl =
- match pl with
- [ "version", v ] -> (v, None, None)
- | [ "version", v; "encoding", e ] -> (v, Some e, None)
- | [ "version", v; "standalone", s ] -> (v, None, Some s)
- | [ "version", v; "encoding", e; "standalone", s ] -> (v, Some e, Some s)
- | _ ->
- raise(WF_error("Bad XML declaration"))
-;;
-
-
-let check_text_xml_pi pl =
- match pl with
- | [ "version", v; "encoding", e ] -> ()
- | [ "encoding", e ] -> ()
- | _ ->
- raise(WF_error("Bad XML declaration"))
-;;
-
-
-let check_version_num s =
- let l = String.length s in
- for i = 0 to l - 1 do
- match s.[i] with
- ('a'..'z'|'A'..'Z'|'0'..'9'|
- '-'|'_'|'.'|':') -> ()
- | _ ->
- raise(WF_error("Bad XML version string"))
- done
-;;
-
-
-let check_public_id s =
- let l = String.length s in
- for i = 0 to l - 1 do
- match s.[i] with
- (' '|'\013'|'\010'|'a'..'z'|'A'..'Z'|'0'..'9'|
- '-'|'\''|'('|')'|'+'|','|'.'|'/'|':'|'='|'?'|
- ';'|'!'|'*'|'#'|'@'|'$'|'_'|'%') -> ()
- | _ ->
- raise(WF_error("Illegal character in PUBLIC identifier"))
- done
-;;
-
-
-(**********************************************************************)
-(* list functions *)
-
-
-let rec check_dups l =
- match l with
- [] -> false
- | c :: l' ->
- if List.mem c l' then true else check_dups l'
-;;
-
-
-let rec count pred l =
- match l with
- [] -> 0
- | x :: l' ->
- if pred x then 1 + (count pred l') else count pred l'
-;;
-
-
-(**********************************************************************)
-(* attributes *)
-
-let check_attribute_value_lexically lexerset x t v =
- (* raises x if the attribute value v does not match the lexical rules
- * for attribute type t:
- * - t = A_id: v must be a <name>
- * - t = A_idref: v must match <name>
- * - t = A_idrefs: v must match <names>
- * - t = A_entity: v must match <name>
- * - t = A_entities: v must match <names>
- * - t = A_nmtoken: v must match <nmtoken>
- * - t = A_nmtokens: v must match <nmtokens>
- * - t = A_notation _: v must match <name>
- * - t = A_enum _: v must match <nmtoken>
- * - t = A_cdata: not checked
- *)
- let lexbuf = Lexing.from_string v in
- let rec get_name_list() =
- match lexerset.scan_name_string lexbuf with
- Eof -> []
- | Ignore -> get_name_list()
- | tok -> tok :: get_name_list()
- in
- let l = get_name_list() in
- match t with
- (A_id | A_idref | A_entity | A_notation _) ->
- begin match l with
- [ Name n ] -> ()
- | _ -> raise (Lazy.force x)
- end
- | (A_idrefs | A_entities) ->
- if List.exists (fun tok ->
- match tok with
- Name _ -> false
- | _ -> true) l then
- raise (Lazy.force x)
- | (A_nmtoken | A_enum _) ->
- begin match l with
- [ Name n ] -> ()
- | [ Nametoken n ] -> ()
- | _ -> raise (Lazy.force x)
- end
- | A_nmtokens ->
- if List.exists (fun tok ->
- match tok with
- Name _ -> false
- | Nametoken _ -> false
- | _ -> true
- ) l then
- raise (Lazy.force x)
- | _ -> ()
-;;
-
-
-let split_attribute_value lexerset v =
- (* splits 'v' into a list of names or nmtokens. The white space separating
- * the names/nmtokens in 'v' is suppressed and not returned.
- *)
- let lexbuf = Lexing.from_string v in
- let rec get_name_list() =
- match lexerset.scan_name_string lexbuf with
- Eof -> []
- | Ignore -> get_name_list()
- | Name s -> s :: get_name_list()
- | Nametoken s -> s :: get_name_list()
- | _ -> raise(Validation_error("Illegal attribute value"))
- in
- get_name_list()
-;;
-
-
-let normalize_line_separators lexerset s =
- let lexbuf = Lexing.from_string s in
- let rec get_string() =
- match lexerset.scan_for_crlf lexbuf with
- Eof -> ""
- | CharData s -> s ^ get_string()
- | _ -> assert false
- in
- get_string()
-;;
-
-
-let value_of_attribute lexerset dtd n atype v =
- (* The attribute with name 'n', type 'atype' and string value 'v' is
- * decomposed, and the att_value is returned:
- * - It is checked whether 'v' conforms to the lexical rules for attributes
- * of type 'atype'
- * - If 'atype <> A_cdata', leading and trailing spaces are removed from 'v'.
- * - If 'atype = A_notation d', it is checked if 'v' matches one of the
- * notation names contained in d.
- * - If 'atype = A_enum d', it is checked whether 'v' matches one of the
- * tokens from d
- * - If 'atype' refers to a "single-value" type, the value is retured as
- * Value u, where u is the normalized value. If 'atype' refers to a
- * "list" type, the value if returned as Valuelist l, where l contains
- * the tokens.
- *
- * Note that this function does not implement all normalization rules.
- * It is expected that the string passed as 'v' is already preprocessed;
- * i.e. character and entity references are resolved, and the substitution
- * of white space characters by space characters has already been performed.
- * If these requirements are met, the value returned by this function
- * will be perfectly normalized.
- *
- * Further checks:
- * - ENTITY and ENTITIES values: It is checked whether there is an
- * unparsed general entity
- * [ Other checks planned: ID, IDREF, IDREFS but not yet implemented ]
- *)
-
- let lexical_error() =
- lazy (raise(Validation_error("Attribute `" ^ n ^ "' is lexically malformed"))) in
-
- let remove_leading_and_trailing_spaces u =
- (* Precondition: 'u' matches <name> or <nmtoken> *)
- match split_attribute_value lexerset u with
- [ u' ] -> u'
- | _ -> assert false
- in
-
- let check_ndata_entity u =
- let en, extdecl = dtd # gen_entity u in (* or Validation_error *)
- if not (en # is_ndata) then
- raise(Validation_error("Reference to entity `" ^ u ^
- "': NDATA entity expected"));
- if dtd # standalone_declaration && extdecl then
- raise(Validation_error("Reference to entity `" ^ u ^
- "' violates standalone declaration"));
- in
-
- match atype with
- A_cdata ->
- Value v
-
- | (A_id | A_idref | A_nmtoken) ->
- check_attribute_value_lexically lexerset (lexical_error()) atype v;
- Value (remove_leading_and_trailing_spaces v)
- | A_entity ->
- check_attribute_value_lexically lexerset (lexical_error()) atype v;
- let v' = remove_leading_and_trailing_spaces v in
- check_ndata_entity v';
- Value v'
-
- | (A_idrefs | A_nmtokens) ->
- check_attribute_value_lexically lexerset (lexical_error()) atype v;
- Valuelist (split_attribute_value lexerset v)
-
- | A_entities ->
- check_attribute_value_lexically lexerset (lexical_error()) atype v;
- let l = split_attribute_value lexerset v in
- List.iter check_ndata_entity l;
- Valuelist l
-
- | A_notation nl ->
- check_attribute_value_lexically lexerset (lexical_error()) atype v;
- let v' = remove_leading_and_trailing_spaces v in
- if not (List.mem v' nl) then
- raise(Validation_error
- ("Attribute `" ^ n ^
- "' does not match one of the declared notation names"));
- Value v'
-
- | A_enum enuml ->
- check_attribute_value_lexically lexerset (lexical_error()) atype v;
- let v' = remove_leading_and_trailing_spaces v in
- if not (List.mem v' enuml) then
- raise(Validation_error
- ("Attribute `" ^ n ^
- "' does not match one of the declared enumerator tokens"));
- Value v'
-;;
-
-
-let normalization_changes_value lexerset atype v =
- (* Returns true if:
- * - 'atype' is a "single-value" type, and the normalization of the string
- * value 'v' of this type discards leading and/or trailing spaces
- * - 'atype' is a "list" type, and the normalization of the string value
- * 'v' of this type discards leading and/or trailing spaces, or spaces
- * separating the tokens of the list (i.e. the normal form is that
- * the tokens are separated by exactly one space character).
- *
- * Note: It is assumed that TABs, CRs, and LFs in 'v' are already converted
- * to spaces.
- *)
-
- match atype with
- A_cdata ->
- false
-
- | (A_id | A_idref | A_entity | A_nmtoken | A_notation _ | A_enum _) ->
- (* Return 'true' if the first or last character is a space.
- * The following check works for both ISO-8859-1 and UTF-8.
- *)
- v <> "" && (v.[0] = ' ' || v.[String.length v - 1] = ' ')
-
- | (A_idrefs | A_entities | A_nmtokens) ->
- (* Split the list, and concatenate the tokens as required by
- * the normal form. Return 'true' if this operation results in
- * a different string than 'v'.
- * This check works for both ISO-8859-1 and UTF-8.
- *)
- let l = split_attribute_value lexerset v in
- let v' = String.concat " " l in
- v <> v'
-;;
-
-
-(**********************************************************************)
-
-let write_markup_string ~(from_enc:rep_encoding) ~to_enc os s =
- (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
- * 'os'. All characters are written as they are.
- *)
- let s' =
- if to_enc = (from_enc :> encoding)
- then s
- else recode_string
- ~in_enc:(from_enc :> encoding)
- ~out_enc:to_enc
- ~subst:(fun n ->
- failwith
- ("Pxp_aux.write_markup_string: Cannot represent " ^
- "code point " ^ string_of_int n))
- s
- in
- write os s' 0 (String.length s')
-;;
-
-
-let write_data_string ~(from_enc:rep_encoding) ~to_enc os content =
- (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
- * 'os'. The characters '&', '<', '>', '"', '%' and every character that
- * cannot be represented in 'to_enc' are paraphrased as entity reference
- * "&...;".
- *)
- let convert_ascii s =
- (* Convert the ASCII-encoded string 's'. Note that 'from_enc' is
- * always ASCII-compatible
- *)
- if to_enc = (from_enc :> encoding)
- then s
- else
- recode_string
- ~in_enc:(from_enc :> encoding)
- ~out_enc:to_enc
- ~subst:(fun n -> assert false)
- s
- in
-
- let write_ascii s =
- (* Write the ASCII-encoded string 's' *)
- let s' = convert_ascii s in
- write os s' 0 (String.length s')
- in
-
- let write_part j l =
- (* Writes the substring of 'content' beginning at pos 'j' with length 'l'
- *)
- if to_enc = (from_enc :> encoding) then
- write os content j l
- else begin
- let s' = recode_string
- ~in_enc:(from_enc :> encoding)
- ~out_enc:to_enc
- ~subst:(fun n ->
- convert_ascii ("&#" ^ string_of_int n ^ ";"))
- (String.sub content j l)
- in
- write os s' 0 (String.length s')
- end
- in
-
- let i = ref 0 in
- for k = 0 to String.length content - 1 do
- match content.[k] with
- ('&' | '<' | '>' | '"' | '%') as c ->
- if !i < k then
- write_part !i (k - !i);
- begin match c with
- '&' -> write_ascii "&"
- | '<' -> write_ascii "<"
- | '>' -> write_ascii ">"
- | '"' -> write_ascii """
- | '%' -> write_ascii "%" (* reserved in DTDs *)
- | _ -> assert false
- end;
- i := k+1
- | _ -> ()
- done;
- if !i < String.length content then
- write_part !i (String.length content - !i)
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.6 2000/08/14 22:24:55 gerd
- * Moved the module Pxp_encoding to the netstring package under
- * the new name Netconversion.
- *
- * Revision 1.5 2000/07/25 00:30:01 gerd
- * Added support for pxp:dtd PI options.
- *
- * Revision 1.4 2000/07/16 18:31:09 gerd
- * The exception Illegal_character has been dropped.
- *
- * Revision 1.3 2000/07/16 16:33:57 gerd
- * New function write_markup_string: Handles the encoding
- * of the string.
- *
- * Revision 1.2 2000/07/08 22:15:45 gerd
- * [Merging 0.2.10:] write_data_string: The character '%' is special, too.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_aux.ml:
- *
- * Revision 1.12 2000/05/27 19:08:30 gerd
- * Added functionality to check standalone declaration:
- *
- * expand_attvalue: Checks whether included entities violate the
- * stand-alone declaration.
- *
- * value_of_attribute: Checks whether ENTITY/ENTITIES values violate
- * this declaration. (Furthermore, it is checked whether the NDATA
- * entity exists - this has been forgotten in previous versions.)
- *
- * value_of_attribute/check_attribute_value_lexically: improved.
- *
- * New function normalization_changes_value: helps detecting
- * one case which violates the standalone declaration.
- *
- * Revision 1.11 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.10 2000/05/01 20:41:56 gerd
- * New function write_data_string.
- *
- * Revision 1.9 2000/04/30 18:11:31 gerd
- * New function normalize_line_separators.
- * In function expand_attvalue: New argument norm_crlf. If the attvalue
- * is read directly from a file, the sequence CR LF must be converted to a
- * single space. If the attvalue is read from a replacement text, CR LF has
- * already converted to a single LF, and CR LF, if still occurring, must be
- * converted to two spaces. The caller can indicate the case by passing
- * true/false as norm_crlf.
- *
- * Revision 1.8 1999/09/01 22:51:07 gerd
- * Added functions.
- * 'character' raises Illegal_character if characters are found that
- * do not match the production Char.
- *
- * Revision 1.7 1999/09/01 16:17:37 gerd
- * Added function 'check_name'.
- *
- * Revision 1.6 1999/08/15 20:33:19 gerd
- * Added: a function that checks public identifiers. Only certain
- * characters may occur in these identifiers.
- * Control characters are rejected by the "character" function.
- * Bugfix: recursive entity references are detected in attribute
- * expansion
- *
- * Revision 1.5 1999/08/15 02:18:02 gerd
- * That '<' is not allowed in attribute values, is a violation
- * of well-formedness, not of the validity; so WF_error is raised.
- *
- * Revision 1.4 1999/08/15 00:20:37 gerd
- * When expanding attribute values, references to parameter
- * entities are now resolved by the method "replacement_text" which
- * has an additional return value, and no longer by "attlist_replacement_text".
- * The new return value indicates whether references to external entities
- * have been resolved (directly or indirectly); this is allowed at some
- * locations but not in attribute values.
- *
- * Revision 1.3 1999/08/14 22:05:53 gerd
- * Several functions have now a "warner" as argument which is
- * an object with a "warn" method. This is used to warn about characters
- * that cannot be represented in the Latin 1 alphabet.
- *
- * Revision 1.2 1999/08/10 21:35:06 gerd
- * The XML/encoding declaration at the beginning of entities is
- * evaluated. In particular, entities have now a method "xml_declaration"
- * which returns the name/value pairs of such a declaration. The "encoding"
- * setting is interpreted by the entity itself; "version", and "standalone"
- * are interpreted by Markup_yacc.parse_document_entity. Other settings
- * are ignored (this does not conform to the standard; the standard prescribes
- * that "version" MUST be given in the declaration of document; "standalone"
- * and "encoding" CAN be declared; no other settings are allowed).
- * TODO: The user should be warned if the standard is not exactly
- * fulfilled. -- The "standalone" property is not checked yet.
- *
- * Revision 1.1 1999/08/10 00:35:50 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Pxp_document
-open Pxp_yacc
-open Pxp_dtd
-open Pxp_types
-
-let write_expr_ext_id out extid =
- match extid with
- System s ->
- output_string out ("(Pxp_types.System\"" ^ String.escaped s ^ "\")")
- | Public(s,t) ->
- output_string out ("(Pxp_types.Public(\"" ^ String.escaped s ^
- "\",\"" ^
- String.escaped t ^ "\"))")
- | Anonymous ->
- output_string out "Pxp_types.Anonymous"
-;;
-
-
-let rec write_expr_content_model out cm =
- match cm with
- Unspecified -> output_string out "Pxp_types.Unspecified"
- | Empty -> output_string out "Pxp_types.Empty"
- | Any -> output_string out "Pxp_types.Any"
- | Mixed msl -> output_string out "(Pxp_types.Mixed [";
- List.iter
- (fun ms ->
- write_expr_mixed_spec out ms;
- output_string out "; ";
- )
- msl;
- output_string out "])";
- | Regexp re -> output_string out "(Pxp_types.Regexp ";
- write_expr_regexp_spec out re;
- output_string out ")";
-
-and write_expr_mixed_spec out ms =
- match ms with
- MPCDATA -> output_string out "Pxp_types.MPCDATA"
- | MChild s -> output_string out ("(Pxp_types.MChild \"" ^
- String.escaped s ^ "\")")
-
-and write_expr_regexp_spec out re =
- match re with
- Optional re' -> output_string out "(Pxp_types.Optional ";
- write_expr_regexp_spec out re';
- output_string out ")";
- | Repeated re' -> output_string out "(Pxp_types.Repeated ";
- write_expr_regexp_spec out re';
- output_string out ")";
- | Repeated1 re' -> output_string out "(Pxp_types.Repeated1 ";
- write_expr_regexp_spec out re';
- output_string out ")";
- | Alt rel -> output_string out "(Pxp_types.Alt [";
- List.iter
- (fun re' ->
- write_expr_regexp_spec out re';
- output_string out "; ";
- )
- rel;
- output_string out "])";
- | Seq rel -> output_string out "(Pxp_types.Seq [";
- List.iter
- (fun re' ->
- write_expr_regexp_spec out re';
- output_string out "; ";
- )
- rel;
- output_string out "])";
- | Child s -> output_string out ("(Pxp_types.Child \"" ^
- String.escaped s ^ "\")")
-;;
-
-
-let write_expr_att_type out at =
- match at with
- A_cdata -> output_string out "Pxp_types.A_cdata"
- | A_id -> output_string out "Pxp_types.A_id"
- | A_idref -> output_string out "Pxp_types.A_idref"
- | A_idrefs -> output_string out "Pxp_types.A_idrefs"
- | A_entity -> output_string out "Pxp_types.A_entity"
- | A_entities -> output_string out "Pxp_types.A_entities"
- | A_nmtoken -> output_string out "Pxp_types.A_nmtoken"
- | A_nmtokens -> output_string out "Pxp_types.A_nmtokens"
- | A_notation sl -> output_string out "(Pxp_types.A_notation [";
- List.iter
- (fun s ->
- output_string out ("\"" ^
- String.escaped s ^ "\"; "))
- sl;
- output_string out "])";
- | A_enum sl -> output_string out "(Pxp_types.A_enum [";
- List.iter
- (fun s ->
- output_string out ("\"" ^
- String.escaped s ^ "\"; "))
- sl;
- output_string out "])";
-;;
-
-
-let write_expr_att_default out ad =
- match ad with
- D_required -> output_string out "Pxp_types.D_required"
- | D_implied -> output_string out "Pxp_types.D_implied"
- | D_default s -> output_string out ("(Pxp_types.D_default \"" ^
- String.escaped s ^ "\")")
- | D_fixed s -> output_string out ("(Pxp_types.D_fixed \"" ^
- String.escaped s ^ "\")")
-;;
-
-
-let write_expr_att_value out av =
- match av with
- Value s -> output_string out ("(Pxp_types.Value \"" ^
- String.escaped s ^ "\")")
- | Valuelist sl -> output_string out ("(Pxp_types.Valuelist [");
- List.iter
- (fun s ->
- output_string out ("\"" ^ String.escaped s ^
- "\"; ")
- )
- sl;
- output_string out "])";
- | Implied_value -> output_string out "Pxp_types.Implied_value"
-;;
-
-
-let ocaml_encoding enc =
- match enc with
- `Enc_utf8 -> "`Enc_utf8"
- | `Enc_utf16 -> "`Enc_utf16"
- | `Enc_utf16_le -> "`Enc_utf16_le"
- | `Enc_utf16_be -> "`Enc_utf16_be"
- | `Enc_iso88591 -> "`Enc_iso88591"
-;;
-
-
-let write_expr_new_pi out pi =
- output_string out ("(new Pxp_dtd.proc_instruction \"" ^
- String.escaped(pi # target) ^ "\" \"" ^
- String.escaped(pi # value) ^ "\" " ^
- ocaml_encoding(pi # encoding) ^ ")")
-;;
-
-
-let write_expr_node_type out nt =
- match nt with
- T_data -> output_string out "Pxp_document.T_data"
- | T_element s -> output_string out ("(Pxp_document.T_element \"" ^
- String.escaped s ^ "\")")
- | T_super_root -> output_string out "Pxp_document.T_super_root"
- | T_pinstr s -> output_string out ("(Pxp_document.T_pinstr \"" ^
- String.escaped s ^ "\")")
- | T_comment -> output_string out "Pxp_document.T_comment"
- | _ -> assert false
-;;
-
-
-let write_local_dtd out (dtd : dtd) =
- (* Outputs "let mkdtd warner = ... in" to 'out' *)
- output_string out "let mkdtd warner =\n";
- output_string out ("let encoding = " ^ ocaml_encoding (dtd # encoding) ^
- " in\n");
- output_string out "let dtdobj = new Pxp_dtd.dtd warner encoding in\n";
-
- (* Set the ID: *)
- output_string out "dtdobj # set_id ";
- begin match dtd # id with
- None -> ()
- | Some(External x) ->
- output_string out "(Pxp_types.External ";
- write_expr_ext_id out x;
- output_string out ");\n"
- | Some(Derived x) ->
- output_string out "(Pxp_types.Derived ";
- write_expr_ext_id out x;
- output_string out ");\n"
- | Some Internal ->
- output_string out "Pxp_types.Internal;\n";
- end;
-
- (* Set standalone declaration: *)
- output_string out ("dtdobj # set_standalone_declaration " ^
- string_of_bool (dtd # standalone_declaration) ^ ";\n");
-
- (* Add notations: *)
- List.iter
- (fun noname ->
- let no = dtd # notation noname in
- output_string out ("let no = new Pxp_dtd.dtd_notation \"" ^
- String.escaped noname ^ "\" ");
- write_expr_ext_id out (no # ext_id);
- output_string out " encoding in\n";
- output_string out "dtdobj # add_notation no;\n";
- )
- (List.sort Pervasives.compare (dtd # notation_names));
-
- (* Add unparsed entities: *)
- List.iter
- (fun enname ->
- let en, _ = dtd # gen_entity enname in
- if en # is_ndata then begin
- let ext_id = en # ext_id in
- let notation = en # notation in
- let encoding = en # encoding in
- output_string out ("let ndata = new Pxp_entity.ndata_entity \"" ^
- String.escaped enname ^ "\" ");
- write_expr_ext_id out ext_id;
- output_string out ("\"" ^ String.escaped notation ^ "\" " ^
- ocaml_encoding encoding ^ " in \n");
- output_string out "dtdobj # add_gen_entity (ndata :> Pxp_entity.entity) false;\n";
- end;
- )
- (List.sort Pervasives.compare (dtd # gen_entity_names));
-
-
- (* Add elements: *)
- List.iter
- (fun elname ->
- (* Create the element 'el': *)
- let el = dtd # element elname in
- output_string out ("let el = new Pxp_dtd.dtd_element dtdobj \"" ^
- String.escaped elname ^ "\" in\n");
- output_string out "let cm = ";
- write_expr_content_model out (el # content_model);
- output_string out " in\n";
- output_string out "el # set_cm_and_extdecl cm false;\n";
- (* Add attributes: *)
- List.iter
- (fun attname ->
- let atttype, attdefault = el # attribute attname in
- output_string out ("el # add_attribute \"" ^
- String.escaped attname ^ "\" ");
- write_expr_att_type out atttype;
- output_string out " ";
- write_expr_att_default out attdefault;
- output_string out " false;\n";
- )
- (List.sort Pervasives.compare (el # attribute_names));
-
- (* Allow arbitrary? *)
- if el # arbitrary_allowed then
- output_string out "el # allow_arbitrary;\n"
- else
- output_string out "el # disallow_arbitrary;\n";
-
- (* Validate: *)
- output_string out "el # validate;\n";
-
- (* Add the element 'el' to 'dtdobj': *)
- output_string out "dtdobj # add_element el;\n";
- )
- (List.sort Pervasives.compare (dtd # element_names));
-
- (* Add processing instructions: *)
- List.iter
- (fun target ->
- let pilist = dtd # pinstr target in
- List.iter
- (fun pi ->
- output_string out "let pi = ";
- write_expr_new_pi out pi;
- output_string out " in\n";
- output_string out "dtdobj # add_pinstr pi;\n";
- )
- pilist;
- )
- (List.sort Pervasives.compare (dtd # pinstr_names));
-
- (* Set the name of the root element: *)
- begin match dtd # root with
- None -> ()
- | Some rootname ->
- output_string out ("dtdobj # set_root \"" ^
- String.escaped rootname ^ "\";\n")
- end;
-
- (* Special options: *)
- if dtd # arbitrary_allowed then
- output_string out "dtdobj # allow_arbitrary;\n"
- else
- output_string out "dtdobj # disallow_arbitrary;\n";
-
- (* Return dtdobj: *)
- output_string out "dtdobj in\n"
-;;
-
-
-let rec write_local_subtree out n =
- (* Outputs the term generating the subtree *)
-
- output_string out "let nt = ";
- write_expr_node_type out (n # node_type);
- output_string out " in\n";
-
- begin match n # node_type with
- T_data ->
- output_string out ("let t = Pxp_document.create_data_node spec dtd \"" ^
- String.escaped (n # data) ^ "\" in\n")
- | T_element elname ->
- let loc, line, col = n # position in
- output_string out
- ("let pos = \"" ^ String.escaped loc ^ "\", " ^
- string_of_int line ^ ", " ^
- string_of_int col ^ " in\n");
- output_string out
- ("let t = Pxp_document.create_element_node ~position:pos spec dtd \"" ^
- String.escaped elname ^ "\" [ ");
- List.iter
- (fun (name,value) ->
- begin match value with
- Value s ->
- output_string out ("\"" ^ String.escaped name ^ "\", ");
- output_string out ("\"" ^ String.escaped s ^ "\"; ")
- | Valuelist sl ->
- output_string out ("\"" ^ String.escaped name ^ "\", ");
- output_string out ("\"" ^
- String.escaped (String.concat " " sl) ^
- "\"; ")
- | Implied_value ->
- ()
- end
- )
- (n # attributes);
- output_string out " ] in\n";
- | T_super_root ->
- let loc, line, col = n # position in
- output_string out
- ("let pos = \"" ^ String.escaped loc ^ "\", " ^
- string_of_int line ^ ", " ^
- string_of_int col ^ " in\n");
- output_string out
- ("let t = Pxp_document.create_super_root_node ~position:pos spec dtd in\n")
- | T_pinstr piname ->
- let loc, line, col = n # position in
- output_string out
- ("let pos = \"" ^ String.escaped loc ^ "\", " ^
- string_of_int line ^ ", " ^
- string_of_int col ^ " in\n");
- output_string out "let pi = ";
- write_expr_new_pi out (List.hd (n # pinstr piname));
- output_string out " in\n";
- output_string out
- ("let t = Pxp_document.create_pinstr_node ~position:pos spec dtd pi in\n")
- | T_comment ->
- let loc, line, col = n # position in
- output_string out
- ("let pos = \"" ^ String.escaped loc ^ "\", " ^
- string_of_int line ^ ", " ^
- string_of_int col ^ " in\n");
- output_string out "let comment = ";
- ( match n # comment with
- None -> assert false
- | Some c -> output_string out ("\"" ^ String.escaped c ^ "\"")
- );
- output_string out " in\n";
- output_string out
- ("let t = Pxp_document.create_comment_node ~position:pos spec dtd comment in\n")
- | _ ->
- assert false
- end;
-
- (* Add processing instructions: *)
- begin match n # node_type with
- T_pinstr _ ->
- ()
- | _ ->
- List.iter
- (fun target ->
- let pilist = n # pinstr target in
- List.iter
- (fun pi ->
- output_string out "let pi = ";
- write_expr_new_pi out pi;
- output_string out " in\n";
- output_string out "add_pinstr t pi;\n";
- )
- pilist;
- )
- (List.sort Pervasives.compare (n # pinstr_names));
- end;
-
- (* Add the sub nodes: *)
- n # iter_nodes
- (fun n' ->
- output_string out "add_node t (\n";
- write_local_subtree out n';
- output_string out ");\n";
- );
-
- (* Validate: *)
- output_string out "local_validate t;\n";
-
- (* Return: *)
- output_string out "t\n"
-;;
-
-
-let write_local_document out (d : 'ext document) =
- (* Outputs "let mkdoc warner spec = ... in" *)
-
- output_string out "let mkdoc warner spec =\n";
- output_string out "let doc = new Pxp_document.document warner in\n";
- output_string out ("doc # init_xml_version \"" ^
- String.escaped (d # xml_version) ^ "\";\n");
- write_local_dtd out (d # dtd);
- output_string out "let dtd = mkdtd warner in\n";
- output_string out "let root = ";
- write_local_subtree out (d # root);
- output_string out " in\n";
- output_string out "doc # init_root root;\n";
-
- (* Add processing instructions: *)
- List.iter
- (fun target ->
- let pilist = d # pinstr target in
- List.iter
- (fun pi ->
- output_string out "let pi = ";
- write_expr_new_pi out pi;
- output_string out " in\n";
- output_string out "doc # add_pinstr pi;\n";
- )
- pilist;
- )
- (List.sort Pervasives.compare (d # pinstr_names));
-
- (* Return the result: *)
- output_string out "doc in\n"
-;;
-
-
-let write_helpers out =
- output_string out "let add_node t n = (t : 'ext Pxp_document.node) # add_node (n : 'ext Pxp_document.node) in\n";
- output_string out "let add_pinstr t pi = (t : 'ext Pxp_document.node) # add_pinstr (pi : Pxp_dtd.proc_instruction) in\n";
- output_string out "let local_validate t = (t : 'ext Pxp_document.node) # local_validate ()in\n"
-;;
-
-
-let write_document out d =
- output_string out "let create_document warner spec =\n";
- write_helpers out;
- write_local_document out d;
- output_string out "mkdoc warner spec;;\n"
-;;
-
-
-let write_dtd out dtd =
- output_string out "let create_dtd warner =\n";
- write_local_dtd out dtd;
- output_string out "mkdtd warner;;\n"
-;;
-
-
-let write_subtree out t =
- output_string out "let create_subtree dtd spec =\n";
- write_helpers out;
- write_local_subtree out t;
- output_string out "mktree dtd spec;;\n"
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.7 2000/08/30 15:48:07 gerd
- * Minor update.
- *
- * Revision 1.6 2000/08/18 20:16:59 gerd
- * Updates because of new node types T_comment, T_pinstr, T_super_root.
- *
- * Revision 1.5 2000/07/23 02:16:51 gerd
- * Changed signature of local_validate.
- *
- * Revision 1.4 2000/07/09 17:59:35 gerd
- * Updated: The position of element nodes is also written.
- *
- * Revision 1.3 2000/07/09 00:30:00 gerd
- * Notations are written before they are used.
- * Unparsed entities are included.
- * Further changes.
- *
- * Revision 1.2 2000/07/08 22:59:14 gerd
- * [Merging 0.2.10:] Improved: The resulting code can be compiled
- * faster, and the compiler is less hungry on memory.
- * Updated because of PXP interface changes.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_codewriter.ml:
- *
- * Revision 1.1 2000/03/11 22:57:28 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Pxp_document
-open Pxp_yacc
-open Pxp_dtd
-
-val write_document : out_channel -> 'ext document -> unit
- (* Writes O'Caml code to the out_channel that is a top-level function
- * creating a fresh document which is equal to the passed document:
- *
- * "let create_document warner spec = ...;;"
- *
- * If you compile the code and call "create_document warner map" the
- * function creates a document tree which is (almost) equal to the
- * passed document.
- *
- * The following properties may not be equal:
- * - Parsed entities
- * - Whether a declaration occurs in an external entity or not
- *
- * 'warner': a collect_warnings object
- * 'spec': a Pxp_document.spec
- *)
-
-
-val write_dtd : out_channel -> dtd -> unit
- (* Writes O'Caml code to the out_channel that is a top-level function
- * creating a fresh DTD which is equal to the passed DTD:
- *
- * "let create_dtd warner = ...;;"
- *
- * If you compile the code and call "create_dtd warner" the
- * function creates a DTD object which is (almost) equal to the
- * passed object.
- *
- * The following properties may not be equal:
- * - Parsed entities
- * - Whether a declaration occurs in an external entity or not
- *
- * 'warner': a collect_warnings object
- *)
-
-val write_subtree : out_channel -> 'ext node -> unit
- (* Writes O'Caml code to the out_channel that is a top-level function
- * creating a fresh node tree which is equal to the passed tree:
- *
- * "let create_subtree dtd map = ...;;"
- *
- * If you compile the code and call "create_subtree dtd map" the
- * function creates a DTD object which is equal to the passed object.
- *
- * 'dtd': a DTD object
- * 'map': a domspec
- *)
-
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/07/09 00:30:14 gerd
- * Updated.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_codewriter.mli:
- *
- * Revision 1.1 2000/03/11 22:57:28 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-module StringOrd = struct
- type t = string
- let compare = (compare : string -> string -> int)
-end;;
-
-module StringMap = Map.Make(StringOrd);;
- (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
-
-module Graph = struct
- type vertex =
- { mutable edges_out : (string * vertex) list;
- mutable edges_out_map : vertex StringMap.t;
- mutable edges_in : (vertex * string) list;
- mutable graph : graph;
- mutable id : int;
- }
- and graph =
- { mutable vertexes : vertex list;
- mutable mid : int; (* maximum id + 1 *)
- }
-
- exception Edge_not_unique
-
- let create () =
- { vertexes = [];
- mid = 0;
- }
-
- let new_vertex g =
- let v =
- { edges_out = [];
- edges_out_map = StringMap.empty;
- edges_in = [];
- graph = g;
- id = g.mid;
- } in
- g.vertexes <- v :: g.vertexes;
- g.mid <- g.mid + 1;
- v
-
- let new_edge v_from e v_to =
- if v_from.graph != v_to.graph then
- invalid_arg "Pxp_dfa.Graph.new_edge";
- try
- let v = StringMap.find e v_from.edges_out_map in
- if v != v_to then
- raise Edge_not_unique;
- with
- Not_found ->
- v_from.edges_out <- (e, v_to) :: v_from.edges_out;
- v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map;
- v_to.edges_in <- (v_from, e) :: v_to.edges_in;
- ()
-
- let graph_of_vertex v = v.graph
-
- let union g1 g2 =
- List.iter
- (fun v ->
- v.graph <- g1;
- v.id <- v.id + g1.mid;
- )
- g2.vertexes;
- g1.vertexes <- g2.vertexes @ g1.vertexes;
- g1.mid <- g1.mid + g2.mid;
- g2.vertexes <- [];
- g2.mid <- 0
-
- let outgoing_edges v =
- v.edges_out
-
- let ingoing_edges v =
- v.edges_in
-
- let follow_edge v e =
- StringMap.find e v.edges_out_map (* or raise Not_found *)
-end
-;;
-
-
-module VertexOrd = struct
- type t = Graph.vertex
- let compare v1 v2 =
- if v1.Graph.graph != v2.Graph.graph then
- invalid_arg "Pxp_dfa.VertexOrd.compare";
- compare v1.Graph.id v2.Graph.id
-end
-;;
-
-module VertexSet = Set.Make(VertexOrd);;
-
-
-type dfa_definition =
- { dfa_graph : Graph.graph;
- dfa_start : Graph.vertex;
- dfa_stops : VertexSet.t;
- dfa_null : bool;
- }
-;;
-
-(**********************************************************************)
-
-(* Now that we have all the auxiliary data types, it is time for the
- * algorithm that transforms regexps to DFAs.
- *)
-
-open Pxp_types
-
-let dfa_of_regexp_content_model re =
- let rec get_dfa re =
- match re with
- Child e ->
- let g = Graph.create() in
- let v1 = Graph.new_vertex g in
- let v2 = Graph.new_vertex g in
- Graph.new_edge v1 e v2;
- { dfa_graph = g;
- dfa_start = v1;
- dfa_stops = VertexSet.singleton v2;
- dfa_null = false;
- }
-
- | Seq [] ->
- invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
- | Seq [re'] ->
- get_dfa re'
- | Seq (re1 :: seq2) ->
- let dfa1 = get_dfa re1 in
- let dfa2 = get_dfa (Seq seq2) in
- (* Merge the two graphs. The result is in dfa1.dfa_graph: *)
- Graph.union dfa1.dfa_graph dfa2.dfa_graph;
- (* Concatenation I: Add additional edges to the graph such
- * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not
- * empty, w1w2 will match the merged DFAs.
- *)
- List.iter
- (fun (e,v') ->
- VertexSet.iter
- (fun v ->
- Graph.new_edge v e v')
- dfa1.dfa_stops
- )
- (Graph.outgoing_edges dfa2.dfa_start);
- (* Concatenation II: If the emtpy string matches dfa2, the stop
- * nodes of dfa1 remain stop nodes.
- *)
- let stops =
- if dfa2.dfa_null then
- VertexSet.union dfa1.dfa_stops dfa2.dfa_stops
- else
- dfa2.dfa_stops
- in
- (* The resulting DFA: *)
- { dfa_graph = dfa1.dfa_graph;
- dfa_start = dfa1.dfa_start;
- dfa_stops = stops;
- dfa_null = dfa1.dfa_null && dfa2.dfa_null;
- }
-
- | Alt [] ->
- invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
- | Alt [re'] ->
- get_dfa re'
- | Alt alt ->
- let dfa_alt = List.map get_dfa alt in
- (* Merge the graphs. The result is in g: *)
- let g = (List.hd dfa_alt).dfa_graph in
- List.iter
- (fun dfa ->
- Graph.union g dfa.dfa_graph
- )
- (List.tl dfa_alt);
- (* Get the new start node: *)
- let start = Graph.new_vertex g in
- (* Add the new edges starting at 'start': *)
- List.iter
- (fun dfa ->
- List.iter
- (fun (e, v) ->
- Graph.new_edge start e v)
- (Graph.outgoing_edges dfa.dfa_start)
- )
- dfa_alt;
- (* If one of the old start nodes was a stop node, the new start
- * node will be a stop node, too.
- *)
- let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in
- let stops =
- List.fold_left
- (fun s dfa -> VertexSet.union s dfa.dfa_stops)
- VertexSet.empty
- dfa_alt in
- let stops' =
- if null then
- VertexSet.union stops (VertexSet.singleton start)
- else
- stops in
- (* The resulting DFA: *)
- { dfa_graph = g;
- dfa_start = start;
- dfa_stops = stops';
- dfa_null = null;
- }
-
- | Optional re' ->
- let dfa' = get_dfa re' in
- if dfa'.dfa_null then
- (* simple case *)
- dfa'
- else begin
- (* Optimization possible: case ingoing_edges dfa_start = [] *)
- let start = Graph.new_vertex dfa'.dfa_graph in
- List.iter
- (fun (e, v) ->
- Graph.new_edge start e v)
- (Graph.outgoing_edges dfa'.dfa_start);
-
- (* The resulting DFA: *)
- { dfa_graph = dfa'.dfa_graph;
- dfa_start = start;
- dfa_stops = VertexSet.union dfa'.dfa_stops
- (VertexSet.singleton start);
- dfa_null = true;
- }
- end
-
- | Repeated1 re' ->
- let dfa' = get_dfa re' in
- List.iter
- (fun (e, v') ->
- VertexSet.iter
- (fun v ->
- Graph.new_edge v e v')
- dfa'.dfa_stops
- )
- (Graph.outgoing_edges dfa'.dfa_start);
-
- (* The resulting DFA: *)
- { dfa_graph = dfa'.dfa_graph;
- dfa_start = dfa'.dfa_start;
- dfa_stops = dfa'.dfa_stops;
- dfa_null = dfa'.dfa_null;
- }
-
- | Repeated re' ->
- get_dfa (Optional (Repeated1 re'))
-
- in
- try
- get_dfa re
- with
- Graph.Edge_not_unique -> raise Not_found
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/07/23 02:16:08 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-module Graph : sig
- type graph
- type vertex
-
- (* A directed graph whose edges are marked with strings (= element types)
- * and with the constraint that for a given vertex and a given element
- * type the edge must be unique.
- *)
-
- exception Edge_not_unique
-
- val create : unit -> graph
- (* Creates an empty graph *)
-
- val new_vertex : graph -> vertex
- (* Adds a new vertex to the graph, and returns the vertex *)
-
- val new_edge : vertex -> string -> vertex -> unit
- (* new_edge v_from etype v_to:
- * Adds a new edge from vertex v_from to vertex v_to, marked with
- * etype.
- * Raises Edge_not_unique if there is already an edge etype starting
- * at v_from to a different vertex than v_to.
- *)
-
- val graph_of_vertex : vertex -> graph
- (* Returns the graph the passed vertex is contained in. *)
-
- val union : graph -> graph -> unit
- (* union g1 g2:
- * Moves the vertexes and edged found in g2 to g1.
- * After that, g2 is empty again.
- *)
-
- val outgoing_edges : vertex -> (string * vertex) list
- (* Returns the list of outgoing edges starting in the passed vertex *)
-
- val follow_edge : vertex -> string -> vertex
- (* Follows the edge starting in the passed vertex which is marked
- * with the passed element type.
- * Raises Not_found if there is no such edge.
- *)
-
- val ingoing_edges : vertex -> (vertex * string) list
- (* Returns the list of ingoing edges ending in the passed vertex *)
-end
-
-module VertexSet : Set.S with type elt = Graph.vertex
-
-
-type dfa_definition =
- { dfa_graph : Graph.graph;
- dfa_start : Graph.vertex; (* Where the automaton starts *)
- dfa_stops : VertexSet.t; (* Where the automaton may stop *)
- dfa_null : bool; (* Whether dfa_start member of dfa_stops *)
- }
-
-val dfa_of_regexp_content_model : Pxp_types.regexp_spec -> dfa_definition
- (* Computes the DFA or raises Not_found if it does not exist *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/07/23 02:16:08 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Pxp_types
-open Pxp_lexer_types
-open Pxp_dtd
-open Pxp_aux
-open Pxp_dfa
-
-
-exception Skip
-
-type node_type =
- T_element of string
- | T_data
- | T_super_root
- | T_pinstr of string
- | T_comment
- | T_none
- | T_attribute of string
- | T_namespace of string
-;;
-
-
-class type ['node] extension =
- object ('self)
- method clone : 'self
- method node : 'node
- method set_node : 'node -> unit
- end
-;;
-
-
-class type [ 'ext ] node =
- object ('self)
- constraint 'ext = 'ext node #extension
- method extension : 'ext
- method delete : unit
- method parent : 'ext node
- method root : 'ext node
- method orphaned_clone : 'self
- method orphaned_flat_clone : 'self
- method add_node : ?force:bool -> 'ext node -> unit
- method add_pinstr : proc_instruction -> unit
- method pinstr : string -> proc_instruction list
- method pinstr_names : string list
- method node_position : int
- method node_path : int list
- method sub_nodes : 'ext node list
- method iter_nodes : ('ext node -> unit) -> unit
- method iter_nodes_sibl :
- ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
- method nth_node : int -> 'ext node
- method previous_node : 'ext node
- method next_node : 'ext node
- method set_nodes : 'ext node list -> unit
- method data : string
- method node_type : node_type
- method position : (string * int * int)
- method attribute : string -> att_value
- method attribute_names : string list
- method attribute_type : string -> att_type
- method attributes : (string * Pxp_types.att_value) list
- method required_string_attribute : string -> string
- method required_list_attribute : string -> string list
- method optional_string_attribute : string -> string option
- method optional_list_attribute : string -> string list
- method id_attribute_name : string
- method id_attribute_value : string
- method idref_attribute_names : string list
- method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
- method attributes_as_nodes : 'ext node list
- method set_comment : string option -> unit
- method comment : string option
- method dtd : dtd
- method encoding : rep_encoding
- method create_element :
- ?position:(string * int * int) ->
- dtd -> node_type -> (string * string) list -> 'ext node
- method create_data : dtd -> string -> 'ext node
- method local_validate : ?use_dfa:bool -> unit -> unit
- method keep_always_whitespace_mode : unit
- method write : output_stream -> encoding -> unit
- method write_compact_as_latin1 : output_stream -> unit
- method internal_adopt : 'ext node option -> int -> unit
- method internal_set_pos : int -> unit
- method internal_delete : 'ext node -> unit
- method internal_init : (string * int * int) ->
- dtd -> string -> (string * string) list -> unit
- method internal_init_other : (string * int * int) ->
- dtd -> node_type -> unit
- end
-;;
-
-type 'ext spec_table =
- { mapping : (string, 'ext node) Hashtbl.t;
- data_node : 'ext node;
- default_element : 'ext node;
- super_root_node : 'ext node option;
- pinstr_mapping : (string, 'ext node) Hashtbl.t;
- default_pinstr_node : 'ext node option;
- comment_node : 'ext node option;
- }
-;;
-
-type 'ext spec =
- Spec_table of 'ext spec_table
-;;
-
-
-let make_spec_from_mapping
- ?super_root_exemplar
- ?comment_exemplar
- ?default_pinstr_exemplar
- ?pinstr_mapping
- ~data_exemplar ~default_element_exemplar ~element_mapping () =
- Spec_table
- { mapping = element_mapping;
- data_node = data_exemplar;
- default_element = default_element_exemplar;
- super_root_node = super_root_exemplar;
- comment_node = comment_exemplar;
- default_pinstr_node = default_pinstr_exemplar;
- pinstr_mapping =
- (match pinstr_mapping with
- None -> Hashtbl.create 1
- | Some m -> m
- )
- }
-;;
-
-
-let make_spec_from_alist
- ?super_root_exemplar
- ?comment_exemplar
- ?default_pinstr_exemplar
- ?(pinstr_alist = [])
- ~data_exemplar ~default_element_exemplar ~element_alist () =
- let m = List.length pinstr_alist in
- let pinstr_mapping = Hashtbl.create m in
- List.iter
- (fun (name,ex) -> Hashtbl.add pinstr_mapping name ex)
- pinstr_alist;
- let n = List.length element_alist in
- let element_mapping = Hashtbl.create m in
- List.iter
- (fun (name,ex) -> Hashtbl.add element_mapping name ex)
- element_alist;
- make_spec_from_mapping
- ?super_root_exemplar: super_root_exemplar
- ?comment_exemplar: comment_exemplar
- ?default_pinstr_exemplar: default_pinstr_exemplar
- ~pinstr_mapping: pinstr_mapping
- ~data_exemplar: data_exemplar
- ~default_element_exemplar: default_element_exemplar
- ~element_mapping: element_mapping
- ()
-;;
-
-(**********************************************************************)
-
-exception Found;;
-
-let validate_content ?(use_dfa=None) model (el : 'a node) =
- (* checks that the nodes of 'el' matches the DTD. Returns 'true'
- * on success and 'false' on failure.
- *)
-
- let rec is_empty cl =
- (* Whether the node list counts as empty or not. *)
- match cl with
- [] -> true
- | n :: cl' ->
- ( match n # node_type with
- | T_element _ -> false
- | _ -> is_empty cl' (* ignore other nodes *)
- )
- in
-
- let rec run_regexp cl ml =
- (* Validates regexp content models ml against instances cl. This
- * function works for deterministic and non-determninistic models.
- * The implementation uses backtracking and may sometimes be slow.
- *
- * cl: the list of children that will have to be matched
- * ml: the list of regexps that will have to match (to be read as
- * sequence)
- * returns () meaning that no match has been found, or raises Found.
- *)
- match ml with
- [] ->
- if cl = [] then raise Found; (* Frequent case *)
- if is_empty cl then raise Found; (* General condition *)
- | Seq seq :: ml' ->
- assert (seq <> []); (* necessary to ensure termination *)
- run_regexp cl (seq @ ml')
- | Alt alts :: ml' ->
- let rec find alts =
- match alts with
- [] -> ()
- | alt :: alts' ->
- run_regexp cl (alt :: ml');
- find alts'
- in
- assert (alts <> []); (* Alt [] matches nothing *)
- find alts
- | Repeated re :: ml' ->
- let rec norm re = (* to avoid infinite loops *)
- match re with
- Repeated subre -> norm subre (* necessary *)
- | Optional subre -> norm subre (* necessary *)
- | Repeated1 subre -> norm subre (* an optimization *)
- | _ -> re
- in
- let re' = norm re in
- run_regexp cl (re' :: Repeated re' :: ml');
- run_regexp cl ml'
- | Repeated1 re :: ml' ->
- run_regexp cl (re :: Repeated re :: ml')
- | Optional re :: ml' ->
- run_regexp cl (re :: ml');
- run_regexp cl ml';
- | Child chld :: ml' ->
- match cl with
- [] ->
- ()
- | sub_el :: cl' ->
- begin match sub_el # node_type with
- T_data -> (* Ignore data *)
- run_regexp cl' ml
- (* Note: It can happen that we find a data node here
- * if the 'keep_always_whitespace' mode is turned on.
- *)
- | T_element nt ->
- if nt = chld then run_regexp cl' ml'
- | _ -> (* Ignore this element *)
- run_regexp cl' ml
- end
- in
-
- let run_dfa cl dfa =
- (* Validates regexp content models ml against instances cl. This
- * function works ONLY for deterministic models.
- * The implementation executes the automaton.
- *)
- let current_vertex = ref dfa.dfa_start in
- let rec next_step cl =
- match cl with
- el :: cl' ->
- begin match el # node_type with
- T_data -> (* Ignore data *)
- next_step cl'
- (* Note: It can happen that we find a data node here
- * if the 'keep_always_whitespace' mode is turned on.
- *)
- | T_element nt ->
- begin try
- current_vertex := Graph.follow_edge !current_vertex nt;
- next_step cl'
- with
- Not_found -> false
- end
- | _ -> (* Ignore this node *)
- next_step cl'
- end
- | [] ->
- VertexSet.mem !current_vertex dfa.dfa_stops
- in
- next_step cl
- in
-
- match model with
- Unspecified -> true
- | Any -> true
- | Empty ->
- let cl = el # sub_nodes in
- is_empty cl
- | Mixed (MPCDATA :: mix) ->
- let mix' = List.map (function
- MPCDATA -> assert false
- | MChild x -> x)
- mix in
- begin try
- el # iter_nodes
- (fun sub_el ->
- let nt = sub_el # node_type in
- match nt with
- | T_element name ->
- if not (List.mem name mix') then raise Not_found;
- | _ -> ()
- );
- true
- with
- Not_found ->
- false
- end
- | Regexp re ->
- let cl = el # sub_nodes in
- begin match use_dfa with
- None ->
- (* General backtracking implementation: *)
- begin try
- run_regexp cl [re];
- false
- with
- Found -> true
- end
- | Some dfa ->
- run_dfa cl dfa
- end
-
- | _ -> assert false
-;;
-
-(**********************************************************************)
-
-
-class virtual ['ext] node_impl an_ext =
- object (self)
- constraint 'ext = 'ext node #extension
-
- val mutable parent = (None : 'ext node option)
- val mutable node_position = -1
- val mutable dtd = (None : dtd option)
- val mutable extension = an_ext
-
- initializer
- extension # set_node (self : 'ext #node :> 'ext node)
-
-
- method extension = (extension : 'ext)
-
- method delete =
- match parent with
- None -> ()
- | Some p -> p # internal_delete (self : 'ext #node :> 'ext node)
-
- method parent =
- match parent with
- None -> raise Not_found
- | Some p -> p
-
- method root =
- match parent with
- None -> (self : 'ext #node :> 'ext node)
- | Some p -> p # root
-
- method node_position =
- if node_position >= 0 then node_position else
- raise Not_found
-
- method node_path =
- let rec collect n path =
- try
- let p = n # node_position in
- collect (n # parent) (p :: path)
- with
- Not_found ->
- (* n is the root *)
- path
- in
- collect (self : 'ext #node :> 'ext node) []
-
- method previous_node =
- self # parent # nth_node (self # node_position - 1)
-
- method next_node =
- self # parent # nth_node (self # node_position + 1)
-
- method orphaned_clone =
- let x = extension # clone in
- let n =
- {< parent = None;
- node_position = -1;
- extension = x;
- >} in
- x # set_node (n : 'ext #node :> 'ext node);
- n
-
- method orphaned_flat_clone =
- let x = extension # clone in
- let n =
- {< parent = None;
- node_position = -1;
- extension = x;
- >} in
- x # set_node (n : 'ext #node :> 'ext node);
- n
-
- method dtd =
- match dtd with
- None -> failwith "Pxp_document.node_impl#dtd: No DTD available"
- | Some d -> d
-
- method encoding =
- match dtd with
- None -> failwith "Pxp_document.node_impl#encoding: No DTD available"
- | Some d -> d # encoding
-
- method internal_adopt (new_parent : 'ext node option) pos =
- begin match parent with
- None -> ()
- | Some p ->
- if new_parent <> None then
- failwith "Pxp_document.node_impl#internal_adopt: Tried to add a bound element"
- end;
- parent <- new_parent;
- node_position <- pos
-
- method internal_set_pos pos =
- node_position <- pos
-
- method virtual add_node : ?force:bool -> 'ext node -> unit
- method virtual add_pinstr : proc_instruction -> unit
- method virtual sub_nodes : 'ext node list
- method virtual pinstr : string -> proc_instruction list
- method virtual pinstr_names : string list
- method virtual iter_nodes : ('ext node -> unit) -> unit
- method virtual iter_nodes_sibl : ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
- method virtual nth_node : int -> 'ext node
- method virtual set_nodes : 'ext node list -> unit
- method virtual data : string
- method virtual node_type : node_type
- method virtual position : (string * int * int)
- method virtual attribute : string -> att_value
- method virtual attribute_names : string list
- method virtual attribute_type : string -> att_type
- method virtual attributes : (string * Pxp_types.att_value) list
- method virtual required_string_attribute : string -> string
- method virtual required_list_attribute : string -> string list
- method virtual optional_string_attribute : string -> string option
- method virtual optional_list_attribute : string -> string list
- method virtual quick_set_attributes : (string * Pxp_types.att_value) list -> unit
- method virtual attributes_as_nodes : 'ext node list
- method virtual set_comment : string option -> unit
- method virtual comment : string option
- method virtual create_element :
- ?position:(string * int * int) ->
- dtd -> node_type -> (string * string) list -> 'ext node
- method virtual create_data : dtd -> string -> 'ext node
- method virtual keep_always_whitespace_mode : unit
- method virtual write : output_stream -> encoding -> unit
- method virtual write_compact_as_latin1 : output_stream -> unit
- method virtual local_validate : ?use_dfa:bool -> unit -> unit
- method virtual internal_delete : 'ext node -> unit
- method virtual internal_init : (string * int * int) ->
- dtd -> string -> (string * string) list -> unit
- method virtual internal_init_other : (string * int * int) ->
- dtd -> node_type -> unit
- end
-;;
-
-
-(**********************************************************************)
-
-let no_position = ("?", 0, 0) ;;
-
-
-class ['ext] data_impl an_ext : ['ext] node =
- object (self)
- inherit ['ext] node_impl an_ext
- val mutable content = ("" : string)
-
- method position = no_position
-
- method add_node ?(force=false) _ =
- failwith "method 'add_node' not applicable to data node"
- method add_pinstr _ =
- failwith "method 'add_pinstr' not applicable to data node"
- method pinstr _ = []
- method pinstr_names = []
- method sub_nodes = []
- method iter_nodes _ = ()
- method iter_nodes_sibl _ = ()
- method nth_node _ = raise Not_found
- method set_nodes _ =
- failwith "method 'set_nodes' not applicable to data node"
- method data = content
- method node_type = T_data
- method attribute _ = raise Not_found
- method attribute_names = []
- method attribute_type _ = raise Not_found
- method attributes = []
- method required_string_attribute _ =
- failwith "Markup.document, method required_string_attribute: not found"
- method required_list_attribute _ =
- failwith "Markup.document, method required_list_attribute: not found"
- method optional_string_attribute _ = None
- method optional_list_attribute _ = []
- method id_attribute_name = raise Not_found
- method id_attribute_value = raise Not_found
- method idref_attribute_names = []
- method quick_set_attributes _ =
- failwith "method 'quick_set_attributes' not applicable to data node"
- method attributes_as_nodes = []
- method comment = None
- method set_comment c =
- match c with
- None -> ()
- | Some _ -> failwith "method 'set_comment' not applicable to data node"
- method create_element ?position _ _ _ =
- failwith "method 'create_element' not applicable to data node"
- method create_data new_dtd new_str =
- let x = extension # clone in
- let n =
- ( {< parent = None;
- extension = x;
- dtd = Some new_dtd;
- content = new_str;
- >}
- : 'ext #node :> 'ext node) in
- x # set_node n;
- n
- method local_validate ?use_dfa () = ()
- method keep_always_whitespace_mode = ()
-
-
- method write os enc =
- let encoding = self # encoding in
- write_data_string ~from_enc:encoding ~to_enc:enc os content
-
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- method internal_delete _ =
- assert false
- method internal_init _ _ _ _ =
- assert false
- method internal_init_other _ _ _ =
- assert false
- end
-;;
-
-
-(**********************************************************************)
-
-class ['ext] attribute_impl ~element ~name value dtd =
- (object (self)
- val mutable parent = (None : 'ext node option)
- val mutable dtd = dtd
- val mutable element_name = element
- val mutable att_name = name
- val mutable att_value = value
-
- method parent =
- match parent with
- None -> raise Not_found
- | Some p -> p
-
- method root =
- match parent with
- None -> (self : 'ext #node :> 'ext node)
- | Some p -> p # root
-
- method internal_adopt new_parent _ =
- parent <- new_parent
-
- method orphaned_clone =
- {< parent = None >}
-
- method orphaned_flat_clone =
- {< parent = None >}
-
- method dtd = dtd
-
- method encoding = dtd # encoding
-
- method node_type = T_attribute att_name
-
- method attribute n =
- if n = att_name then att_value else raise Not_found
-
- method attribute_names = [ att_name ]
-
- method attribute_type n =
- let eltype = dtd # element element_name in
- ( try
- let atype, adefault = eltype # attribute n in
- atype
- with
- Undeclared ->
- A_cdata
- )
-
- method attributes = [ att_name, att_value ]
-
- method required_string_attribute n =
- if n = att_name then
- match att_value with
- Value s -> s
- | Valuelist l -> String.concat " " l
- | Implied_value -> raise Not_found
- else
- failwith "Pxp_document.attribute_impl#required_string_attribute: not found"
-
-
- method required_list_attribute n =
- if n = att_name then
- match att_value with
- Value s -> [ s ]
- | Valuelist l -> l
- | Implied_value -> raise Not_found
- else
- failwith "Pxp_document.attribute_impl#required_list_attribute: not found"
-
- method optional_string_attribute n =
- if n = att_name then
- match att_value with
- Value s -> Some s
- | Valuelist l -> Some(String.concat " " l)
- | Implied_value -> None
- else
- None
-
- method optional_list_attribute n =
- if n = att_name then
- match att_value with
- Value s -> [ s ]
- | Valuelist l -> l
- | Implied_value -> []
- else
- []
-
- (* Senseless methods: *)
-
- method sub_nodes = []
- method pinstr _ = []
- method pinstr_names = []
- method iter_nodes _ = ()
- method iter_nodes_sibl _ = ()
- method nth_node _ = raise Not_found
- method data = ""
- method position = ("?",0,0)
- method comment = None
- method local_validate ?use_dfa () = ()
-
- (* Non-applicable methods: *)
-
- method extension =
- failwith "Pxp_document.attribute_impl#extension: not applicable"
- method delete =
- failwith "Pxp_document.attribute_impl#delete: not applicable"
- method node_position =
- failwith "Pxp_document.attribute_impl#node_position: not applicable"
- method node_path =
- failwith "Pxp_document.attribute_impl#node_path: not applicable"
- method previous_node =
- failwith "Pxp_document.attribute_impl#previous_node: not applicable"
- method next_node =
- failwith "Pxp_document.attribute_impl#next_node: not applicable"
- method internal_set_pos _ =
- failwith "Pxp_document.attribute_impl#internal_set_pos: not applicable"
- method internal_delete _ =
- failwith "Pxp_document.attribute_impl#internal_delete: not applicable"
- method internal_init _ _ _ _ =
- failwith "Pxp_document.attribute_impl#internal_init: not applicable"
- method internal_init_other _ _ _ =
- failwith "Pxp_document.attribute_impl#internal_init_other: not applicable"
- method add_node ?force _ =
- failwith "Pxp_document.attribute_impl#add_node: not applicable"
- method add_pinstr _ =
- failwith "Pxp_document.attribute_impl#add_pinstr: not applicable"
- method set_nodes _ =
- failwith "Pxp_document.attribute_impl#set_nodes: not applicable"
- method quick_set_attributes _ =
- failwith "Pxp_document.attribute_impl#quick_set_attributes: not applicable"
- method attributes_as_nodes =
- failwith "Pxp_document.attribute_impl#dattributes_as_nodes: not applicable"
- method set_comment c =
- if c <> None then
- failwith "Pxp_document.attribute_impl#set_comment: not applicable"
- method create_element ?position _ _ _ =
- failwith "Pxp_document.attribute_impl#create_element: not applicable"
- method create_data _ _ =
- failwith "Pxp_document.attribute_impl#create_data: not applicable"
- method keep_always_whitespace_mode =
- failwith "Pxp_document.attribute_impl#keep_always_whitespace_mode: not applicable"
- method write _ _ =
- failwith "Pxp_document.attribute_impl#write: not applicable"
- method write_compact_as_latin1 _ =
- failwith "Pxp_document.attribute_impl#write_compact_as_latin1: not applicable"
- method id_attribute_name =
- failwith "Pxp_document.attribute_impl#id_attribute_name: not applicable"
- method id_attribute_value =
- failwith "Pxp_document.attribute_impl#id_attribute_value: not applicable"
- method idref_attribute_names =
- failwith "Pxp_document.attribute_impl#idref_attribute_names: not applicable"
- end
- : ['ext] node)
-;;
-
-(**********************************************************************)
-
-class ['ext] element_impl an_ext : ['ext] node =
- object (self:'self)
- inherit ['ext] node_impl an_ext as super
-
- val mutable content_model = Any
- val mutable content_dfa = lazy None
- val mutable ext_decl = false
- val mutable ntype = T_none
- val mutable id_att_name = None
- val mutable idref_att_names = []
- val mutable rev_nodes = ([] : 'c list)
- val mutable nodes = (None : 'c list option)
- val mutable array = (None : 'c array option)
- val mutable size = 0
- val mutable attributes = []
- val mutable att_nodes = []
- val mutable comment = None
- val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
- val mutable keep_always_whitespace = false
-
- val mutable position = no_position
-
- method comment = comment
-
- method set_comment c =
- if ntype = T_comment then
- comment <- c
- else
- failwith "set_comment: not applicable to node types other than T_comment"
-
- method attributes = attributes
-
- method position = position
-
- method private error_name =
- match ntype with
- T_element n -> "Element `" ^ n ^ "'"
- | T_super_root -> "Super root"
- | T_pinstr n -> "Wrapper element for processing instruction `" ^ n ^
- "'"
- | T_comment -> "Wrapper element for comment"
- | T_none -> "NO element"
- | T_attribute _ -> assert false
- | T_namespace _ -> assert false
- | T_data -> assert false
-
- method add_node ?(force = false) n =
- let only_whitespace s =
- (* Checks that the string "s" contains only whitespace. On failure,
- * Validation_error is raised.
- *)
- let l = String.length s in
- if l < 100 then begin
- for i=0 to l - 1 do (* for loop is faster for small 'l' *)
- match s.[i] with
- ('\009'|'\010'|'\013'|'\032') -> ()
- | _ ->
- raise(Validation_error(self # error_name ^
- " must not have character contents"));
- done
- end
- else begin
- let lexbuf = Lexing.from_string s in
- let lexerset = Pxp_lexers.get_lexer_set (self # dtd # encoding) in
- let t = lexerset.scan_name_string lexbuf in
- if t <> Ignore or
- (lexerset.scan_name_string lexbuf <> Eof)
- then
- raise(Validation_error(self # error_name ^
- " must not have character contents"));
- ()
- end
- in
- (* general DTD check: *)
- begin match dtd with
- None -> ()
- | Some d -> if n # dtd != d then
- failwith "Pxp_document.element_impl # add_node: the sub node has a different DTD";
- end;
- (* specific checks: *)
- try
- begin match n # node_type with
- T_data ->
- begin match content_model with
- Any -> ()
- | Unspecified -> ()
- | Empty ->
- if not force then begin
- if n # data <> "" then
- raise(Validation_error(self # error_name ^
- " must be empty"));
- raise Skip
- end
- | Mixed _ -> ()
- | Regexp _ ->
- if not force then begin
- only_whitespace (n # data);
- (* TODO: following check faster *)
- if n # dtd # standalone_declaration &&
- n # data <> ""
- then begin
- (* The standalone declaration is violated if the
- * element declaration is contained in an external
- * entity.
- *)
- if ext_decl then
- raise
- (Validation_error
- (self # error_name ^
- " violates standalone declaration" ^
- " because extra white space separates" ^
- " the sub elements"));
- end;
- if not keep_always_whitespace then raise Skip
- end
- end
- | _ ->
- ()
- end;
- (* all OK, so add this node: *)
- n # internal_adopt (Some (self : 'ext #node :> 'ext node)) size;
- rev_nodes <- n :: rev_nodes;
- nodes <- None;
- array <- None;
- size <- size + 1
- with Skip ->
- ()
-
- method add_pinstr pi =
- begin match dtd with
- None -> ()
- | Some d ->
- if pi # encoding <> d # encoding then
- failwith "Pxp_document.element_impl # add_pinstr: Inconsistent encodings";
- end;
- let name = pi # target in
- Hashtbl.add (Lazy.force pinstr) name pi
-
- method pinstr name =
- Hashtbl.find_all (Lazy.force pinstr) name
-
- method pinstr_names =
- let l = ref [] in
- Hashtbl.iter
- (fun n _ -> l := n :: !l)
- (Lazy.force pinstr);
- !l
-
- method sub_nodes =
- match nodes with
- None ->
- let cl = List.rev rev_nodes in
- nodes <- Some cl;
- cl
- | Some cl ->
- cl
-
- method iter_nodes f =
- let cl = self # sub_nodes in
- List.iter f cl
-
- method iter_nodes_sibl f =
- let cl = self # sub_nodes in
- let rec next last_node l =
- match l with
- [] -> ()
- | [x] ->
- f last_node x None
- | x :: y :: l' ->
- f last_node x (Some y);
- next (Some x) l'
- in
- next None cl
-
- method nth_node p =
- if p < 0 or p >= size then raise Not_found;
- if array = None then
- array <- Some (Array.of_list (self # sub_nodes));
- match array with
- None -> assert false
- | Some a ->
- a.(p)
-
- method set_nodes nl =
- let old_size = size in
- List.iter
- (fun n -> n # internal_adopt None (-1))
- rev_nodes;
- begin try
- size <- 0;
- List.iter
- (fun n -> n # internal_adopt
- (Some (self : 'ext #node :> 'ext node))
- size;
- size <- size + 1)
- nl
- with
- e ->
- (* revert action as much as possible *)
- List.iter
- (fun n -> n # internal_adopt None (-1))
- rev_nodes;
- size <- old_size;
- let pos = ref (size-1) in
- List.iter
- (fun n -> n # internal_adopt
- (Some (self : 'ext #node :> 'ext node))
- !pos;
- decr pos
- )
- rev_nodes;
- (* [TODO] Note: there may be bad members in nl *)
- raise e
- end;
- rev_nodes <- List.rev nl;
- array <- None;
- nodes <- None
-
-
- method orphaned_clone : 'self =
- let sub_clones =
- List.map
- (fun m ->
- m # orphaned_clone)
- rev_nodes
- in
-
- let x = extension # clone in
- let n =
- {< parent = None;
- node_position = -1;
- extension = x;
- rev_nodes = sub_clones;
- nodes = None;
- array = None;
- >} in
-
- let pos = ref (size - 1) in
- List.iter
- (fun m -> m # internal_adopt
- (Some (n : 'ext #node :> 'ext node))
- !pos;
- decr pos
- )
- sub_clones;
-
- x # set_node (n : 'ext #node :> 'ext node);
- n
-
- method orphaned_flat_clone : 'self =
- let x = extension # clone in
- let n =
- {< parent = None;
- node_position = -1;
- extension = x;
- rev_nodes = [];
- nodes = None;
- size = 0;
- array = None;
- >} in
-
- x # set_node (n : 'ext #node :> 'ext node);
- n
-
-
- method internal_delete n =
- rev_nodes <- List.filter (fun n' -> n' != n) rev_nodes;
- size <- size - 1;
- let p = ref (size-1) in
- List.iter
- (fun n' -> n' # internal_set_pos !p; decr p)
- rev_nodes;
- nodes <- None;
- n # internal_adopt None (-1);
-
-
- method data =
- let cl = self # sub_nodes in
- String.concat "" (List.map (fun n -> n # data) cl)
-
- method node_type = ntype
-
-
- method attribute n =
- List.assoc n attributes
-
- method attribute_names =
- List.map fst attributes
-
- method attribute_type n =
- match ntype with
- T_element name ->
- let d =
- match dtd with
- None -> assert false
- | Some d -> d in
- let eltype = d # element name in
- ( try
- let atype, adefault = eltype # attribute n in
- atype
- with
- Undeclared ->
- A_cdata
- )
- | _ ->
- failwith "attribute_type: not available for non-element nodes"
-
-
- method required_string_attribute n =
- try
- match List.assoc n attributes with
- Value s -> s
- | Valuelist l -> String.concat " " l
- | Implied_value -> raise Not_found
- with
- Not_found ->
- failwith "Pxp_document, method required_string_attribute: not found"
-
- method optional_string_attribute n =
- try
- match List.assoc n attributes with
- Value s -> Some s
- | Valuelist l -> Some (String.concat " " l)
- | Implied_value -> None
- with
- Not_found ->
- None
-
- method required_list_attribute n =
- try
- match List.assoc n attributes with
- Value s -> [ s ]
- | Valuelist l -> l
- | Implied_value -> raise Not_found
- with
- Not_found ->
- failwith "Markup.document, method required_list_attribute: not found"
-
- method optional_list_attribute n =
- try
- match List.assoc n attributes with
- Value s -> [ s ]
- | Valuelist l -> l
- | Implied_value -> []
- with
- Not_found ->
- []
-
- method id_attribute_name =
- match id_att_name with
- None -> raise Not_found
- | Some name -> name
-
- method id_attribute_value =
- match id_att_name with
- None -> raise Not_found
- | Some name ->
- begin match List.assoc name attributes (* may raise Not_found *)
- with
- Value s -> s
- | _ -> raise Not_found
- end
-
-
- method idref_attribute_names = idref_att_names
-
-
- method quick_set_attributes atts =
- match ntype with
- T_element _ ->
- attributes <- atts;
- att_nodes <- []
- | _ ->
- failwith "quick_set_attributes: not applicable for non-element node"
-
-
- method attributes_as_nodes =
- match att_nodes with
- [] when attributes = [] ->
- []
- | [] ->
- let dtd = self # dtd in
- let element_name =
- match ntype with
- T_element n -> n
- | _ ->
- assert false in
- let l =
- List.map
- (fun (n,v) ->
- new attribute_impl
- ~element:element_name
- ~name:n
- v
- dtd)
- attributes in
- att_nodes <- l;
- l
- | _ ->
- att_nodes
-
-
- method create_element
- ?(position = no_position) new_dtd new_type new_attlist =
- let x = extension # clone in
- let obj = ( {< parent = None;
- extension = x;
- pinstr = lazy (Hashtbl.create 10)
- >}
- : 'ext #node :> 'ext node
- ) in
- x # set_node obj;
- match new_type with
- T_data ->
- failwith "create_element: Cannot create T_data node"
- | T_element name ->
- obj # internal_init position new_dtd name new_attlist;
- obj
- | (T_comment | T_pinstr _ | T_super_root | T_none) ->
- obj # internal_init_other position new_dtd new_type;
- obj
- | _ ->
- failwith "create_element: Cannot create such node"
-
-
- method internal_init_other new_pos new_dtd new_ntype =
- (* resets the contents of the object *)
- parent <- None;
- rev_nodes <- [];
- nodes <- None;
- ntype <- new_ntype;
- position <- new_pos;
- content_model <- Any;
- content_dfa <- lazy None;
- attributes <- [];
- att_nodes <- [];
- dtd <- Some new_dtd;
- ext_decl <- false;
- id_att_name <- None;
- idref_att_names <- [];
- comment <- None;
-
-
- method internal_init new_pos new_dtd new_name new_attlist =
- (* ONLY FOR T_Element NODES!!! *)
- (* resets the contents of the object *)
- parent <- None;
- rev_nodes <- [];
- nodes <- None;
- ntype <- T_element new_name;
- position <- new_pos;
- comment <- None;
- att_nodes <- [];
-
- let lexerset = Pxp_lexers.get_lexer_set (new_dtd # encoding) in
- let sadecl = new_dtd # standalone_declaration in
-
- (* First validate the element name and the attributes: *)
- (* Well-Formedness Constraint: Unique Att Spec *)
- let rec check_uniqueness al =
- match al with
- [] -> ()
- | (n, av) :: al' ->
- if List.mem_assoc n al' then
- raise (WF_error("Attribute `" ^ n ^ "' occurs twice in element `" ^ new_name ^ "'"));
- check_uniqueness al'
- in
- check_uniqueness new_attlist;
- (* Validity Constraint: Element Valid [element has been declared] *)
- try
- let eltype = new_dtd # element new_name in
- content_model <- eltype # content_model;
- content_dfa <- lazy(eltype # content_dfa);
- ext_decl <- eltype # externally_declared;
- id_att_name <- eltype # id_attribute_name;
- idref_att_names <- eltype # idref_attribute_names;
- (* Validity Constraint: Attribute Value Type *)
- (* Validity Constraint: Fixed Attribute Default *)
- (* Validity Constraint: Standalone Document Declaration (partly) *)
- let undeclared_attlist = ref [] in
- let new_attlist' =
- List.map
- (fun (n,v) ->
- try
- (* Get type, default, and the normalized attribute
- * value 'av':
- *)
- let atype, adefault = eltype # attribute n in
- let av = value_of_attribute lexerset new_dtd n atype v in
- (* If necessary, check whether normalization violates
- * the standalone declaration.
- *)
- if sadecl &&
- eltype #
- attribute_violates_standalone_declaration n (Some v)
- then
- raise
- (Validation_error
- ("Attribute `" ^ n ^ "' of element type `" ^
- new_name ^ "' violates standalone declaration"));
- (* If the default is "fixed", check that. *)
- begin match adefault with
- (D_required | D_implied) -> ()
- | D_default _ -> ()
- | D_fixed u ->
- let uv = value_of_attribute
- lexerset new_dtd "[default]" atype u in
- if av <> uv then
- raise
- (Validation_error
- ("Attribute `" ^ n ^
- "' is fixed, but has here a different value"));
- end;
- n,av
- with
- Undeclared ->
- (* raised by method "# attribute" *)
- undeclared_attlist :=
- (n, value_of_attribute lexerset new_dtd n A_cdata v) ::
- !undeclared_attlist;
- n, Implied_value (* does not matter *)
- )
- new_attlist in
- (* Validity Constraint: Required Attribute *)
- (* Validity Constraint: Standalone Document Declaration (partly) *)
- (* Add attributes with default values *)
- let new_attlist'' =
- List.map
- (fun n ->
- try
- n, List.assoc n new_attlist'
- with
- Not_found ->
- (* Check standalone declaration: *)
- if sadecl &&
- eltype #
- attribute_violates_standalone_declaration
- n None then
- raise
- (Validation_error
- ("Attribute `" ^ n ^ "' of element type `" ^
- new_name ^ "' violates standalone declaration"));
- (* add default value or Implied *)
- let atype, adefault = eltype # attribute n in
- match adefault with
- D_required ->
- raise(Validation_error("Required attribute `" ^ n ^ "' is missing"))
- | D_implied ->
- n, Implied_value
- | D_default v ->
- n, value_of_attribute lexerset new_dtd n atype v
- | D_fixed v ->
- n, value_of_attribute lexerset new_dtd n atype v
- )
- (eltype # attribute_names)
- in
- dtd <- Some new_dtd;
- attributes <- new_attlist'' @ !undeclared_attlist;
- with
- Undeclared ->
- (* The DTD allows arbitrary attributes/contents for this
- * element
- *)
- dtd <- Some new_dtd;
- attributes <- List.map (fun (n,v) -> n, Value v) new_attlist;
- content_model <- Any;
- content_dfa <- lazy None;
-
- method local_validate ?(use_dfa=false) () =
- (* validates that the content of this element matches the model *)
- let dfa = if use_dfa then Lazy.force content_dfa else None in
- if not (validate_content
- ~use_dfa:dfa
- content_model
- (self : 'ext #node :> 'ext node)) then
- raise(Validation_error(self # error_name ^
- " does not match its content model"))
-
-
- method create_data _ _ =
- failwith "method 'create_data' not applicable to element node"
-
- method keep_always_whitespace_mode =
- keep_always_whitespace <- true
-
- method write os enc =
- let encoding = self # encoding in
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- begin match ntype with
- T_element name ->
- wms ("<" ^ name);
- List.iter
- (fun (aname, avalue) ->
- match avalue with
- Implied_value -> ()
- | Value v ->
- wms ("\n" ^ aname ^ "=\"");
- write_data_string ~from_enc:encoding ~to_enc:enc os v;
- wms "\"";
- | Valuelist l ->
- let v = String.concat " " l in
- wms ("\n" ^ aname ^ "=\"");
- write_data_string ~from_enc:encoding ~to_enc:enc os v;
- wms "\"";
- )
- attributes;
- wms "\n>";
- | _ ->
- ()
- end;
-
- Hashtbl.iter
- (fun n pi ->
- pi # write os enc
- )
- (Lazy.force pinstr);
- List.iter
- (fun n -> n # write os enc)
- (self # sub_nodes);
-
- begin match ntype with
- T_element name ->
- wms ("</" ^ name ^ "\n>");
- | _ ->
- ()
- end
-
- (* TODO: How to write comments? The comment string may contain
- * illegal characters or "--".
- *)
-
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- end
-;;
-
-
-let spec_table_find_exemplar tab eltype =
- try
- Hashtbl.find tab.mapping eltype
- with
- Not_found -> tab.default_element
-;;
-
-
-let create_data_node spec dtd str =
- match spec with
- Spec_table tab ->
- let exemplar = tab.data_node in
- exemplar # create_data dtd str
-;;
-
-
-let create_element_node ?position spec dtd eltype atts =
- match spec with
- Spec_table tab ->
- let exemplar = spec_table_find_exemplar tab eltype in
- exemplar # create_element ?position:position dtd (T_element eltype) atts
-;;
-
-
-let create_super_root_node ?position spec dtd =
- match spec with
- Spec_table tab ->
- ( match tab.super_root_node with
- None ->
- failwith "Pxp_document.create_super_root_node: No exemplar"
- | Some x ->
- x # create_element ?position:position dtd T_super_root []
- )
-;;
-
-let create_no_node ?position spec dtd =
- match spec with
- Spec_table tab ->
- let x = tab.default_element in
- x # create_element ?position:position dtd T_none []
-;;
-
-
-let create_comment_node ?position spec dtd text =
- match spec with
- Spec_table tab ->
- ( match tab.comment_node with
- None ->
- failwith "Pxp_document.create_comment_node: No exemplar"
- | Some x ->
- let e = x # create_element ?position:position dtd T_comment []
- in
- e # set_comment (Some text);
- e
- )
-;;
-
-
-let create_pinstr_node ?position spec dtd pi =
- let target = pi # target in
- let exemplar =
- match spec with
- Spec_table tab ->
- ( try
- Hashtbl.find tab.pinstr_mapping target
- with
- Not_found ->
- ( match tab.default_pinstr_node with
- None ->
- failwith
- "Pxp_document.create_pinstr_node: No exemplar"
- | Some x -> x
- )
- )
- in
- let el =
- exemplar # create_element ?position:position dtd (T_pinstr target) [] in
- el # add_pinstr pi;
- el
-;;
-
-
-let find ?(deeply=false) f base =
- let rec search_flat children =
- match children with
- [] -> raise Not_found
- | n :: children' ->
- if f n then n else search_flat children'
- in
- let rec search_deep children =
- match children with
- [] -> raise Not_found
- | n :: children' ->
- if f n then
- n
- else
- try search_deep (n # sub_nodes)
- with Not_found -> search_deep children'
- in
- (if deeply then search_deep else search_flat)
- (base # sub_nodes)
-;;
-
-
-let find_all ?(deeply=false) f base =
- let rec search_flat children =
- match children with
- [] -> []
- | n :: children' ->
- if f n then n :: search_flat children' else search_flat children'
- in
- let rec search_deep children =
- match children with
- [] -> []
- | n :: children' ->
- let rest =
- search_deep (n # sub_nodes) @ search_deep children' in
- if f n then
- n :: rest
- else
- rest
- in
- (if deeply then search_deep else search_flat)
- (base # sub_nodes)
-;;
-
-
-let find_element ?deeply eltype base =
- find
- ?deeply:deeply
- (fun n ->
- match n # node_type with
- T_element name -> name = eltype
- | _ -> false)
- base
-;;
-
-
-let find_all_elements ?deeply eltype base =
- find_all
- ?deeply:deeply
- (fun n ->
- match n # node_type with
- T_element name -> name = eltype
- | _ -> false)
- base
-;;
-
-
-exception Skip;;
-
-let map_tree ~pre ?(post=(fun x -> x)) base =
- let rec map_rec n =
- (try
- let n' = pre n in
- if n' # node_type <> T_data then begin
- let children = n # sub_nodes in
- let children' = map_children children in
- n' # set_nodes children';
- end;
- post n'
- with
- Skip -> raise Not_found
- )
- and map_children l =
- match l with
- [] -> []
- | child :: l' ->
- (try
- let child' = map_rec child in
- child' :: map_children l'
- with
- Not_found ->
- map_children l'
- )
- in
- map_rec base
-;;
-
-
-let map_tree_sibl ~pre ?(post=(fun _ x _ -> x)) base =
- let rec map_rec l n r =
- (try
- let n' = pre l n r in
- if n' # node_type <> T_data then begin
- let children = n # sub_nodes in
- let children' = map_children None children in
- let children'' = postprocess_children None children' in
- n' # set_nodes children'';
- end;
- n'
- with
- Skip -> raise Not_found
- )
- and map_children predecessor l =
- (match l with
- [] -> []
- | child :: l' ->
- let successor =
- match l' with
- [] -> None
- | x :: _ -> Some x in
- (try
- let child' = map_rec predecessor child successor in
- child' :: map_children (Some child) l'
- with
- Not_found ->
- map_children (Some child) l'
- )
- )
- and postprocess_children predecessor l =
- (match l with
- [] -> []
- | child :: l' ->
- let successor =
- match l' with
- [] -> None
- | x :: _ -> Some x in
- (try
- let child' = post predecessor child successor in
- child' :: postprocess_children (Some child) l'
- with
- Skip ->
- postprocess_children (Some child) l'
- )
- )
- in
- let base' = map_rec None base None in
- try post None base' None with Skip -> raise Not_found
-;;
-
-
-let iter_tree ?(pre=(fun x -> ())) ?(post=(fun x -> ())) base =
- let rec iter_rec n =
- (try
- pre n;
- let children = n # sub_nodes in
- iter_children children;
- post n
- with
- Skip -> raise Not_found
- )
- and iter_children l =
- match l with
- [] -> []
- | child :: l' ->
- (try
- iter_rec child;
- iter_children l'
- with
- Not_found ->
- iter_children l'
- )
- in
- iter_rec base
-;;
-
-
-let iter_tree_sibl ?(pre=(fun _ _ _ -> ())) ?(post=(fun _ _ _ -> ())) base =
- let rec iter_rec l n r =
- (try
- pre l n r;
- let children = n # sub_nodes in
- iter_children None children;
- post l n r
- with
- Skip -> raise Not_found
- )
- and iter_children predecessor l =
- (match l with
- [] -> []
- | child :: l' ->
- let successor =
- match l' with
- [] -> None
- | x :: _ -> Some x in
- (try
- iter_rec predecessor child successor;
- iter_children (Some child) l'
- with
- Not_found ->
- iter_children (Some child) l'
- )
- )
- in
- iter_rec None base None
-;;
-
-
-let compare a b =
- let rec cmp p1 p2 =
- match p1, p2 with
- [], [] -> 0
- | [], _ -> -1
- | _, [] -> 1
- | x::p1', y::p2' -> if x = y then cmp p1' p2' else x - y
- in
-
- let a_path = a # node_path in
- let b_path = b # node_path in
-
- cmp a_path b_path
-;;
-
-
-type 'ext ord_index = ('ext node, int) Hashtbl.t;;
-
-let create_ord_index base =
- let n = ref 0 in
- iter_tree ~pre:(fun _ -> incr n) base;
- let idx = Hashtbl.create !n in
- let k = ref 0 in
- iter_tree ~pre:(fun node -> Hashtbl.add idx node !k; incr k) base;
- idx
-;;
-
-
-let ord_number idx node =
- Hashtbl.find idx node
-;;
-
-let ord_compare idx a b =
- let ord_a = Hashtbl.find idx a in
- let ord_b = Hashtbl.find idx b in
- ord_a - ord_b
-;;
-
-class ['ext] document the_warner =
- object (self)
- val mutable xml_version = "1.0"
- val mutable dtd = (None : dtd option)
- val mutable root = (None : 'ext node option)
-
- val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
- val warner = (the_warner : collect_warnings)
-
- method init_xml_version s =
- if s <> "1.0" then
- warner # warn ("XML version '" ^ s ^ "' not supported");
- xml_version <- s
-
- method init_root r =
- let dtd_r = r # dtd in
- match r # node_type with
-
- (**************** CASE: We have a super root element ***************)
-
- | T_super_root ->
- if not (dtd_r # arbitrary_allowed) then begin
- match dtd_r # root with
- Some declared_root_element_name ->
- let real_root_element =
- try
- List.find
- (fun r' ->
- match r' # node_type with
- | T_element _ -> true
- | _ -> false)
- (r # sub_nodes)
- with
- Not_found ->
- failwith "Pxp_document.document#init_root: Super root does not contain root element"
- (* TODO: Check also that there is at most one
- * element in the super root node
- *)
-
- in
- let real_root_element_name =
- match real_root_element # node_type with
- T_element name -> name
- | _ -> assert false
- in
- if real_root_element_name <> declared_root_element_name then
- raise
- (Validation_error ("The root element is `" ^
- real_root_element_name ^
- "' but is declared as `" ^
- declared_root_element_name))
- | None -> ()
- end;
- (* All is okay, so store dtd and root node: *)
- dtd <- Some dtd_r;
- root <- Some r
-
- (**************** CASE: No super root element **********************)
-
- | T_element root_element_name ->
- if not (dtd_r # arbitrary_allowed) then begin
- match dtd_r # root with
- Some declared_root_element_name ->
- if root_element_name <> declared_root_element_name then
- raise
- (Validation_error ("The root element is `" ^
- root_element_name ^
- "' but is declared as `" ^
- declared_root_element_name))
- | None ->
- (* This may happen if you initialize your DTD yourself.
- * The value 'None' means that the method 'set_root' was
- * never called for the DTD; we interpret it here as:
- * The root element does not matter.
- *)
- ()
- end;
- (* All is okay, so store dtd and root node: *)
- dtd <- Some dtd_r;
- root <- Some r
-
- | _ ->
- failwith "Pxp_document.document#init_root: the root node must be an element or super-root"
-
- method xml_version = xml_version
-
- method xml_standalone =
- match dtd with
- None -> false
- | Some d -> d # standalone_declaration
-
- method dtd =
- match dtd with
- None -> failwith "Pxp_document.document#dtd: Document has no DTD"
- | Some d -> d
-
- method encoding =
- match dtd with
- None -> failwith "Pxp_document.document#encoding: Document has no DTD"
- | Some d -> d # encoding
-
- method root =
- match root with
- None -> failwith "Pxp_document.document#root: Document has no root element"
- | Some r -> r
-
- method add_pinstr pi =
- begin match dtd with
- None -> ()
- | Some d ->
- if pi # encoding <> d # encoding then
- failwith "Pxp_document.document # add_pinstr: Inconsistent encodings";
- end;
- let name = pi # target in
- Hashtbl.add (Lazy.force pinstr) name pi
-
- method pinstr name =
- Hashtbl.find_all (Lazy.force pinstr) name
-
- method pinstr_names =
- let l = ref [] in
- Hashtbl.iter
- (fun n _ -> l := n :: !l)
- (Lazy.force pinstr);
- !l
-
- method write os enc =
- let encoding = self # encoding in
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- let r = self # root in
- wms ("<?xml version='1.0' encoding='" ^
- Netconversion.string_of_encoding enc ^
- "'?>\n");
- ( match self # dtd # root with
- None ->
- self # dtd # write os enc false
- | Some _ ->
- self # dtd # write os enc true
- );
- Hashtbl.iter
- (fun n pi ->
- pi # write os enc
- )
- (Lazy.force pinstr);
- r # write os enc;
- wms "\n";
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- end
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.14 2000/08/30 15:47:52 gerd
- * Implementation of pxp_document.mli rev 1.10.
- *
- * Revision 1.13 2000/08/26 23:29:10 gerd
- * Implementations for the changed in rev 1.9 of pxp_document.mli.
- *
- * Revision 1.12 2000/08/18 20:14:00 gerd
- * New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
- * (T_none), (T_namespace).
- *
- * Revision 1.11 2000/08/14 22:24:55 gerd
- * Moved the module Pxp_encoding to the netstring package under
- * the new name Netconversion.
- *
- * Revision 1.10 2000/07/23 02:16:34 gerd
- * Support for DFAs.
- *
- * Revision 1.9 2000/07/16 19:37:09 gerd
- * Simplification.
- *
- * Revision 1.8 2000/07/16 17:50:01 gerd
- * Fixes in 'write'
- *
- * Revision 1.7 2000/07/16 16:34:41 gerd
- * New method 'write', the successor of 'write_compact_as_latin1'.
- *
- * Revision 1.6 2000/07/14 13:56:11 gerd
- * Added methods id_attribute_name, id_attribute_value,
- * idref_attribute_names.
- *
- * Revision 1.5 2000/07/09 17:51:14 gerd
- * Element nodes can store positions.
- *
- * Revision 1.4 2000/07/08 23:04:06 gerd
- * [Merging 0.2.10:] Bugfix: allow_undeclared_attribute
- *
- * Revision 1.3 2000/07/04 22:10:06 gerd
- * Implemented rev 1.3 of pxp_document.mli in a straight-
- * forward fashion.
- *
- * Revision 1.2 2000/06/14 22:19:06 gerd
- * Added checks such that it is impossible to mix encodings.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_document.ml:
- *
- * Revision 1.19 2000/05/27 19:14:42 gerd
- * value_of_attribute: this function has been moved to
- * markup_aux.ml.
- *
- * Added the following checks whether there is a violation
- * against the standalone declaration:
- * - Externally declared elements with regexp content model
- * must not contain extra white space
- * - The effect of normalization of externally declared attributes
- * must not depend on the type of the attributes
- * - Declared default values of externally declared attributes
- * must not have an effect on the value of the attributes.
- *
- * Removed the method init_xml_standalone. It is now stored in
- * the DTD whether there is a standalone declaration.
- *
- * Revision 1.18 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.17 2000/05/06 23:12:20 gerd
- * Allow undeclared attributes.
- *
- * Revision 1.16 2000/05/01 20:42:28 gerd
- * New method write_compact_as_latin1.
- *
- * Revision 1.15 2000/04/30 18:15:22 gerd
- * In function validate_content: Special handling of the pseudo
- * nodes "-pi" and "-vr".
- * Method init_root, class document: Recognizes whether the
- * root is virtual or real. The check on the root element name is different
- * in each case.
- * New method keep_always_whitespace_mode: Turns a special mode
- * on in which ignorable whitespace is included into the document.
- *
- * Revision 1.14 2000/03/11 22:58:15 gerd
- * Updated to support Markup_codewriter.
- *
- * Revision 1.13 2000/01/27 21:51:56 gerd
- * Added method 'attributes'.
- *
- * Revision 1.12 2000/01/27 21:19:34 gerd
- * Added methods.
- * Bugfix: 'orphaned_clone' performs now really a clone.
- *
- * Revision 1.11 2000/01/20 21:57:58 gerd
- * Bugfix: method set_nodes does no longer add the new subnodes
- * in the reverse order.
- *
- * Revision 1.10 1999/12/17 21:35:37 gerd
- * Bugfix: If the name of the root element is not specified in
- * the DTD, the document does not check whether the root element is a
- * specific element.
- *
- * Revision 1.9 1999/11/09 22:22:01 gerd
- * The "document" classes now checks that the root element is the
- * same as the declared root element. Thanks to Claudio Sacerdoti Coen
- * for his bug report.
- *
- * Revision 1.8 1999/09/01 22:51:40 gerd
- * Added methods to store processing instructions.
- *
- * Revision 1.7 1999/09/01 16:19:18 gerd
- * Added some warnings.
- * If an element type has the content model EMPTY, it is now strictly
- * checked that the element instance is really empty. Especially, white space
- * is NOT allowed in such instances.
- *
- * Revision 1.6 1999/08/19 21:58:59 gerd
- * Added method "reset_finder". This is not very convincing, but
- * currently the simplest way to update the ID hash table.
- *
- * Revision 1.5 1999/08/19 01:08:15 gerd
- * Added method "find" that searches node by ID in the whole
- * tree.
- * Bugfix: After the extension has been cloned, the "set_node" method
- * is invoked telling the clone to which node it is associated.
- *
- * Revision 1.4 1999/08/15 13:52:52 gerd
- * Bugfix: WF_error "Attribute x occurs twice in element [unnamed]"
- * no longer possible; instead of "[unnamed]" the actual name is printed.
- * Improved some of the error messages.
- *
- * Revision 1.3 1999/08/15 02:19:01 gerd
- * If the DTD allows arbitrary elements, unknown elements are not
- * rejected.
- *
- * Revision 1.2 1999/08/11 14:54:23 gerd
- * Optimizations: The hashtable for the 'pinstr' variable is only
- * created on demand. -- The 'only_whitespace' function uses a simple "for"
- * loop is the string is small and a lexer if the string is big.
- *
- * Revision 1.1 1999/08/10 00:35:50 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-(**********************************************************************)
-(* *)
-(* Pxp_document: *)
-(* Object model of the document/element instances *)
-(* *)
-(**********************************************************************)
-
-
-(* ======================================================================
- * OVERVIEW
- *
- * class type node ............. The common class type of the nodes of
- * the element tree. Nodes are either
- * elements (inner nodes) or data nodes
- * (leaves)
- * class type extension ........ The minimal properties of the so-called
- * extensions of the nodes: Nodes can be
- * customized by applying a class parameter
- * that adds methods/values to nodes.
- * class data_impl : node ...... Implements data nodes.
- * class element_impl : node ... Implements element nodes
- * class document .............. A document is an element with some additional
- * properties
- *
- * ======================================================================
- *
- * THE STRUCTURE OF NODE TREES:
- *
- * Every node except the root node has a parent node. The parent node is
- * always an element, because data nodes never contain other nodes.
- * In the other direction, element nodes may have children; both elements
- * and data nodes are possible as children.
- * Every node knows its parent (if any) and all its children (if any);
- * the linkage is maintained in both directions. A node without a parent
- * is called a root.
- * It is not possible that a node is the child of two nodes (two different nodes
- * or a multiple child of the same node).
- * You can break the connection between a node and its parent; the method
- * "delete" performs this operations and deletes the node from the parent's
- * list of children. The node is now a root, for itself and for all
- * subordinate nodes. In this context, the node is also called an orphan,
- * because it has lost its parent (this is a bit misleading because the
- * parent is not always the creator of a node).
- * In order to simplify complex operations, you can also set the list of
- * children of an element. Nodes that have been children before are unchanged;
- * new nodes are added (and the linkage is set up), nodes no more occurring
- * in the list are handled if they have been deleted.
- * If you try to add a node that is not a root (either by an "add" or by a
- * "set" operation) the operation fails.
- *
- * CREATION OF NODES
- *
- * The class interface supports creation of nodes by cloning a so-called
- * exemplar. The idea is that it is sometimes useful to implement different
- * element types by different classes, and to implement this by looking up
- * exemplars.
- * Imagine you have three element types A, B, and C, and three classes
- * a, b, and c implementing the node interface (for example, by providing
- * different extensions, see below). The XML parser can be configured to
- * have a lookup table
- * { A --> a0, B --> b0, C --> c0 }
- * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
- * objects belonging to these classes. If the parser finds an instance of
- * A, it looks up the exemplar a0 of A and clones it (actually, the method
- * "create_element" performs this for elements, and "create_data" for data
- * nodes). Clones belong to the same class as the original nodes, so the
- * instances of the elements have the same classes as the configured
- * exemplars.
- * Note: This technique assumes that the interface of all exemplars is the
- * same!
- *
- * THE EXTENSION
- *
- * The class type node and all its implementations have a class parameter
- * 'ext which must at least fulfil the properties of the class type "extension".
- * The idea is that you can add properties, for example:
- *
- * class my_extension =
- * object
- * (* minimal properties required by class type "extension": *)
- * method clone = ...
- * method node = ...
- * method set_node n = ...
- * (* here my own methods: *)
- * method do_this_and_that ...
- * end
- *
- * class my_element_impl = [ my_extension ] element_impl
- * class my_data_impl = [ my_extension ] data_impl
- *
- * The whole XML parser is parameterized with 'ext, so your extension is
- * visible everywhere (this is the reason why extensibility is solved by
- * parametric polymorphism and not by inclusive polymorphism (subtyping)).
- *
- *
- * SOME COMPLICATED TYPE EXPRESSIONS
- *
- * Sometimes the following type expressions turn out to be necessary:
- *
- * 'a node extension as 'a
- * This is the type of an extension that belongs to a node that
- * has an extension that is the same as we started with.
- *
- * 'a extension node as 'a
- * This is the type of a node that has an extension that belongs to a
- * node of the type we started with.
- *
- *
- * DOCUMENTS
- * ...
- *
- * ======================================================================
- *
- * SIMPLE USAGE: ...
- *)
-
-
-open Pxp_dtd
-
-
-type node_type =
- (* The basic and most important node types:
- * - T_element element_type is the type of element nodes
- * - T_data is the type of text data nodes
- * By design of the parser, neither CDATA sections nor entity references
- * are represented in the node tree; so there are no types for them.
- *)
- T_element of string
- | T_data
-
- (* The following types are extensions to my original design. They have mainly
- * been added to simplify the implementation of standards (such as
- * XPath) that require that nodes of these types are included into the
- * main document tree.
- * There are options (see Pxp_yacc) forcing the parser to insert such
- * nodes; in this case, the nodes are actually element nodes serving
- * as wrappers for the additional data structures. The options are:
- * enable_super_root_node, enable_pinstr_nodes, enable_comment_nodes.
- * By default, such nodes are not created.
- *)
- | T_super_root
- | T_pinstr of string (* The string is the target of the PI *)
- | T_comment
-
- (* The following types are fully virtual. This means that it is impossible
- * to make the parser insert such nodes. However, these types might be
- * practical when defining views on the tree.
- * Note that the list of virtual node types will be extended if necessary.
- *)
- | T_none
- | T_attribute of string (* The string is the name of the attribute *)
- | T_namespace of string (* The string is the namespace prefix *)
-;;
-
-
-class type [ 'node ] extension =
- object ('self)
- method clone : 'self
- (* "clone" should return an exact deep copy of the object. *)
- method node : 'node
- (* "node" returns the corresponding node of this extension. This method
- * intended to return exactly what previously has been set by "set_node".
- *)
- method set_node : 'node -> unit
- (* "set_node" is invoked once the extension is associated to a new
- * node object.
- *)
- end
-;;
-
-
-class type [ 'ext ] node =
- object ('self)
- constraint 'ext = 'ext node #extension
-
- method extension : 'ext
- (* Return the extension of this node: *)
-
- method delete : unit
- (* Delete this node from the parent's list of sub nodes. This node gets
- * orphaned.
- * 'delete' does nothing if this node does not have a parent.
- *)
-
- method parent : 'ext node
- (* Get the parent, or raise Not_found if this node is an orphan. *)
-
- method root : 'ext node
- (* Get the direct or indirect parent that does not have a parent itself,
- * i.e. the root of the tree.
- *)
-
- method orphaned_clone : 'self
- (* return an exact clone of this element and all sub nodes (deep copy)
- * except string values which are shared by this node and the clone.
- * The other exception is that the clone has no parent (i.e. it is now
- * a root).
- *)
-
- method orphaned_flat_clone : 'self
- (* return a clone of this element where all subnodes are omitted.
- * The type of the node, and the attributes are the same as in the
- * original node.
- * The clone has no parent.
- *)
-
- method add_node : ?force:bool -> 'ext node -> unit
- (* Append new sub nodes -- mainly used by the parser itself, but
- * of course open for everybody. If an element is added, it must be
- * an orphan (i.e. does not have a parent node); and after addition
- * *this* node is the new parent.
- * The method performs some basic validation checks if the current node
- * has a regular expression as content model, or is EMPTY. You can
- * turn these checks off by passing ~force:true to the method.
- *)
-
- method add_pinstr : proc_instruction -> unit
- (* Add a processing instruction to the set of processing instructions of
- * this node. Usually only elements contain processing instructions.
- *)
-
- method pinstr : string -> proc_instruction list
- (* Get all processing instructions with the passed name *)
-
- method pinstr_names : string list
- (* Get a list of all names of processing instructions *)
-
- method node_position : int
- (* Returns the position of this node among all children of the parent
- * node. Positions are counted from 0.
- * Raises Not_found if the node is the root node.
- *)
-
- method node_path : int list
- (* Returns the list of node positions of the ancestors of this node,
- * including this node. The first list element is the node position
- * of this child of the root, and the last list element is the
- * node position of this node.
- * Returns [] if the node is the root node.
- *)
-
- method sub_nodes : 'ext node list
- (* Get the list of sub nodes *)
-
- method iter_nodes : ('ext node -> unit) -> unit
- (* iterate over the sub nodes *)
-
- method iter_nodes_sibl :
- ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
- (* Here every iteration step can also access to the previous and to the
- * following node if present.
- *)
-
- method nth_node : int -> 'ext node
- (* Returns the n-th sub node of this node, n >= 0. Raises Not_found
- * if the index is out of the valid range.
- * Note that the first invocation of this method requires additional
- * overhead.
- *)
-
- method previous_node : 'ext node
- method next_node : 'ext node
- (* Return the previous and next nodes, respectively. These methods are
- * equivalent to
- * - parent # nth_node (self # node_position - 1) and
- * - parent # nth_node (self # node_position + 1), respectively.
- *)
-
- method set_nodes : 'ext node list -> unit
- (* Set the list of sub nodes. Elements that are no longer sub nodes gets
- * orphaned, and all new elements that previously were not sub nodes
- * must have been orphaned.
- *)
-
- method data : string
- (* Get the data string of this node. For data nodes, this string is just
- * the content. For elements, this string is the concatenation of all
- * subordinate data nodes.
- *)
-
- method node_type : node_type
- (* Get the name of the element type. *)
-
- method position : (string * int * int)
- (* Return the name of the entity, the line number, and the column
- * position (byte offset) of the beginning of the element.
- * Only available if the element has been created with position
- * information.
- * Returns "?",0,0 if not available. (Note: Line number 0 is not
- * possible otherwise.)
- *)
-
- method attribute : string -> Pxp_types.att_value
- method attribute_names : string list
- method attribute_type : string -> Pxp_types.att_type
- method attributes : (string * Pxp_types.att_value) list
- (* Get a specific attribute; get the names of all attributes; get the
- * type of a specific attribute; get names and values of all attributes.
- * Only elements have attributes.
- * Note: If the DTD allows arbitrary for this element, "attribute_type"
- * raises Undeclared.
- *)
-
- method required_string_attribute : string -> string
- method required_list_attribute : string -> string list
- (* Return the attribute or fail if the attribute is not present:
- * The first version passes the value always as string back;
- * the second version always as list.
- *)
-
- method optional_string_attribute : string -> string option
- method optional_list_attribute : string -> string list
- (* Return some attribute value or return None if the attribute is not
- * present:
- * The first version passes the value always as string back;
- * the second version always as list.
- *)
-
- method id_attribute_name : string
- method id_attribute_value : string
- (* Return the name and value of the ID attribute. The methods may
- * raise Not_found if there is no ID attribute in the DTD, or no
- * ID attribute in the element, respectively.
- *)
-
- method idref_attribute_names : string list
- (* Returns the list of attribute names of IDREF or IDREFS type. *)
-
- method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
- (* Sets the attributes but does not check whether they match the DTD.
- *)
-
- method attributes_as_nodes : 'ext node list
- (* Experimental feature: Return the attributes as node list. Every node
- * has type T_attribute n, and contains only the single attribute n.
- * This node list is computed on demand, so the first invocation of this
- * method will create the list, and following invocations will only
- * return the existing list.
- *)
-
- method set_comment : string option -> unit
- (* Sets the comment string; only applicable for T_comment nodes *)
-
- method comment : string option
- (* Get the comment string.
- * Returns always None for nodes with a type other than T_comment.
- *)
-
- method dtd : dtd
- (* Get the DTD. Fails if no DTD is specified (which is impossible if
- * 'create_element' or 'create_data' have been used to create this
- * object)
- *)
-
- method encoding : Pxp_types.rep_encoding
- (* Get the encoding which is always the same as the encoding of the
- * DTD. See also method 'dtd' (Note: This method fails, too, if
- * no DTD is present.)
- *)
-
- method create_element :
- ?position:(string * int * int) ->
- dtd -> node_type -> (string * string) list -> 'ext node
- (* create an "empty copy" of this element:
- * - new DTD
- * - new node type (which must not be T_data)
- * - new attribute list
- * - empty list of nodes
- *)
-
- method create_data : dtd -> string -> 'ext node
- (* create an "empty copy" of this data node: *)
-
- method local_validate :
- ?use_dfa:bool ->
- unit -> unit
- (* Check that this element conforms to the DTD.
- * Option ~use_dfa: If true, the deterministic finite automaton of
- * regexp content models is used for validation, if available.
- * Defaults to false.
- *)
-
- method keep_always_whitespace_mode : unit
- (* Normally, add_node does not accept data nodes when the DTD does not
- * allow data nodes or only whitespace ("ignorable whitespace").
- * Once you have invoked this method, ignorable whitespace is forced
- * to be included into the document.
- *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* Write the contents of this node and the subtrees to the passed
- * output stream; the passed encoding is used. The format
- * is compact (the opposite of "pretty printing").
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
-
- (* ---------------------------------------- *)
- (* The methods 'find' and 'reset_finder' are no longer supported.
- * The functionality is provided by the configurable index object
- * (see Pxp_yacc).
- *)
-
-
- (* ---------------------------------------- *)
- (* internal methods: *)
- method internal_adopt : 'ext node option -> int -> unit
- method internal_set_pos : int -> unit
- method internal_delete : 'ext node -> unit
- method internal_init : (string * int * int) ->
- dtd -> string -> (string * string) list -> unit
- method internal_init_other : (string * int * int) ->
- dtd -> node_type -> unit
- end
-;;
-
-
-class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
- (* Creation:
- * new data_impl an_extension
- * creates a new data node with the given extension and the empty string
- * as content.
- *)
-;;
-
-
-class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
- (* Creation:
- * new element_impl an_extension
- * creates a new empty element node with the given extension.
- *)
-;;
-
-
-(* Attribute and namespace nodes are experimental: *)
-
-class [ 'ext ] attribute_impl :
- element:string -> name:string -> Pxp_types.att_value -> dtd -> [ 'ext ] node
-
- (* Creation:
- * new attribute_impl element_name attribute_name attribute_value dtd
- * Note that attribute nodes do intentionally not have extensions.
- *)
-
-(* Once namespaces get implemented:
-class [ 'ext ] namespace_impl :
- prefix:string -> name:string -> dtd -> [ 'ext ] node
-*)
-
-(********************************** spec *********************************)
-
-type 'ext spec
-constraint 'ext = 'ext node #extension
- (* Contains the exemplars used for the creation of new nodes
- *)
-
-
-val make_spec_from_mapping :
- ?super_root_exemplar : 'ext node ->
- ?comment_exemplar : 'ext node ->
- ?default_pinstr_exemplar : 'ext node ->
- ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
- data_exemplar: 'ext node ->
- default_element_exemplar: 'ext node ->
- element_mapping: (string, 'ext node) Hashtbl.t ->
- unit ->
- 'ext spec
- (* Specifies:
- * - For new data nodes, the ~data_exemplar must be used
- * - For new element nodes: If the element type is mentioned in the
- * ~element_mapping hash table, the exemplar found in this table is
- * used. Otherwise, the ~default_element_exemplar is used.
- * Optionally:
- * - You may also specify exemplars for super root nodes, for comments
- * and for processing instructions
- *)
-
-val make_spec_from_alist :
- ?super_root_exemplar : 'ext node ->
- ?comment_exemplar : 'ext node ->
- ?default_pinstr_exemplar : 'ext node ->
- ?pinstr_alist : (string * 'ext node) list ->
- data_exemplar: 'ext node ->
- default_element_exemplar: 'ext node ->
- element_alist: (string * 'ext node) list ->
- unit ->
- 'ext spec
- (* This is a convenience function: You can pass the mappings from
- * elements and PIs to exemplar by associative lists.
- *)
-
-val create_data_node :
- 'ext spec -> dtd -> string -> 'ext node
-val create_element_node :
- ?position:(string * int * int) ->
- 'ext spec -> dtd -> string -> (string * string) list -> 'ext node
-val create_super_root_node :
- ?position:(string * int * int) ->
- 'ext spec -> dtd -> 'ext node
-val create_comment_node :
- ?position:(string * int * int) ->
- 'ext spec -> dtd -> string -> 'ext node
-val create_pinstr_node :
- ?position:(string * int * int) ->
- 'ext spec -> dtd -> proc_instruction -> 'ext node
- (* These functions use the exemplars contained in a spec and create fresh
- * node objects from them.
- *)
-
-val create_no_node :
- ?position:(string * int * int) -> 'ext spec -> dtd -> 'ext node
- (* Creates a T_none node with limited functionality *)
-
-(*********************** Ordering of nodes ******************************)
-
-val compare : 'ext node -> 'ext node -> int
- (* Returns -1 if the first node is before the second node, or +1 if the
- * first node is after the second node, or 0 if both nodes are identical.
- * If the nodes are unrelated (do not have a common ancestor), the result
- * is undefined.
- * This test is rather slow.
- *)
-
-type 'ext ord_index
-constraint 'ext = 'ext node #extension
- (* The type of ordinal indexes *)
-
-val create_ord_index : 'ext node -> 'ext ord_index
- (* Creates an ordinal index for the subtree starting at the passed node.
- * This index assigns to every node an ordinal number (beginning with 0) such
- * that nodes are numbered upon the order of the first character in the XML
- * representation (document order).
- * Note that the index is not automatically updated when the tree is
- * modified.
- *)
-
-val ord_number : 'ext ord_index -> 'ext node -> int
- (* Returns the ordinal number of the node, or raises Not_found *)
-
-val ord_compare : 'ext ord_index -> 'ext node -> 'ext node -> int
- (* Compares two nodes like 'compare':
- * Returns -1 if the first node is before the second node, or +1 if the
- * first node is after the second node, or 0 if both nodes are identical.
- * If one of the nodes does not occur in the ordinal index, Not_found
- * is raised.
- * This test is much faster than 'compare'.
- *)
-
-
-(***************************** Iterators ********************************)
-
-val find : ?deeply:bool ->
- f:('ext node -> bool) -> 'ext node -> 'ext node
- (* Searches the first node for which the predicate f is true, and returns
- * it. Raises Not_found if there is no such node.
- * By default, ~deeply=false. In this case, only the children of the
- * passed node are searched.
- * If passing ~deeply=true, the children are searched recursively
- * (depth-first search).
- *)
-
-val find_all : ?deeply:bool ->
- f:('ext node -> bool) -> 'ext node -> 'ext node list
- (* Searches all nodes for which the predicate f is true, and returns them.
- * By default, ~deeply=false. In this case, only the children of the
- * passed node are searched.
- * If passing ~deeply=true, the children are searched recursively
- * (depth-first search).
- *)
-
-val find_element : ?deeply:bool ->
- string -> 'ext node -> 'ext node
- (* Searches the first element with the passed element type.
- * By default, ~deeply=false. In this case, only the children of the
- * passed node are searched.
- * If passing ~deeply=true, the children are searched recursively
- * (depth-first search).
- *)
-
-val find_all_elements : ?deeply:bool ->
- string -> 'ext node -> 'ext node list
- (* Searches all elements with the passed element type.
- * By default, ~deeply=false. In this case, only the children of the
- * passed node are searched.
- * If passing ~deeply=true, the children are searched recursively
- * (depth-first search).
- *)
-
-exception Skip
-val map_tree : pre:('exta node -> 'extb node) ->
- ?post:('extb node -> 'extb node) ->
- 'exta node ->
- 'extb node
- (* Traverses the passed node and all children recursively. After entering
- * a node, the function ~pre is called. The result of this function must
- * be a new node; it must not have children nor a parent (you can simply
- * pass (fun n -> n # orphaned_flat_clone) as ~pre).
- * After that, the children are processed in the same way (from left to
- * right); the results of the transformation will be added to the
- * new node as new children.
- * Now, the ~post function is invoked with this node as argument, and
- * the result is the result of the function (~post should return a root
- * node, too; if not specified, the identity is the ~post function).
- * Both ~pre and ~post may raise Skip, which causes that the node is
- * left out. If the top node is skipped, the exception Not_found is
- * raised.
- *)
-
-val map_tree_sibl :
- pre: ('exta node option -> 'exta node -> 'exta node option ->
- 'extb node) ->
- ?post:('extb node option -> 'extb node -> 'extb node option ->
- 'extb node) ->
- 'exta node ->
- 'extb node
- (* Works like map_tree, but the function ~pre and ~post have additional
- * arguments:
- * - ~pre l n r: The node n is the node to map, and l is the previous
- * node, and r is the next node (both None if not present). l and r
- * are both nodes before the transformation.
- * - ~post l n r: The node n is the node which is the result of ~pre
- * plus adding children. l and r are again the previous and the next
- * node, respectively, but after being transformed.
- *)
-
-val iter_tree : ?pre:('ext node -> unit) ->
- ?post:('ext node -> unit) ->
- 'ext node ->
- unit
- (* Iterates only instead of mapping the nodes. *)
-
-val iter_tree_sibl :
- ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
- ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
- 'ext node ->
- unit
- (* Iterates only instead of mapping the nodes. *)
-
-
-(******************************* document ********************************)
-
-
-class [ 'ext ] document :
- Pxp_types.collect_warnings ->
- object
- (* Documents: These are containers for root elements and for DTDs.
- *
- * Important invariant: A document is either empty (no root element,
- * no DTD), or it has both a root element and a DTD.
- *
- * A fresh document created by 'new' is empty.
- *)
-
- method init_xml_version : string -> unit
- (* Set the XML version string of the XML declaration. *)
-
- method init_root : 'ext node -> unit
- (* Set the root element. It is expected that the root element has
- * a DTD.
- * Note that 'init_root' checks whether the passed root element
- * has the type expected by the DTD. The check takes into account
- * that the root element might be a virtual root node.
- *)
-
- method xml_version : string
- (* Returns the XML version from the XML declaration. Returns "1.0"
- * if the declaration is missing.
- *)
-
- method xml_standalone : bool
- (* Returns whether this document is declared as being standalone.
- * This method returns the same value as 'standalone_declaration'
- * of the DTD (if there is a DTD).
- * Returns 'false' if there is no DTD.
- *)
-
- method dtd : dtd
- (* Returns the DTD of the root element.
- * Fails if there is no root element.
- *)
-
- method encoding : Pxp_types.rep_encoding
- (* Returns the string encoding of the document = the encoding of
- * the root element = the encoding of the element tree = the
- * encoding of the DTD.
- * Fails if there is no root element.
- *)
-
- method root : 'ext node
- (* Returns the root element, or fails if there is not any. *)
-
- method add_pinstr : proc_instruction -> unit
- (* Adds a processing instruction to the document container.
- * The parser does this for PIs occurring outside the DTD and outside
- * the root element.
- *)
-
- method pinstr : string -> proc_instruction list
- (* Return all PIs for a passed target string. *)
-
- method pinstr_names : string list
- (* Return all target strings of all PIs. *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* Write the document to the passed
- * output stream; the passed encoding used. The format
- * is compact (the opposite of "pretty printing").
- * If a DTD is present, the DTD is included into the internal subset.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
- end
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.10 2000/08/30 15:47:37 gerd
- * New method node_path.
- * New function compare.
- * New type ord_index with functions.
- *
- * Revision 1.9 2000/08/26 23:27:53 gerd
- * New function: make_spec_from_alist.
- * New iterators: find, find_all, find_element, find_all_elements,
- * map_tree, map_tree_sibl, iter_tree, iter_tree_sibl.
- * New node methods: node_position, nth_node, previous_node,
- * next_node.
- * Attribute and namespace types have now a string argument:
- * the name/prefix. I hope this simplifies the handling of view nodes.
- * First implementation of view nodes: attribute_impl. The
- * method attributes_as_nodes returns the attributes wrapped into
- * T_attribute nodes which reside outside the document tree.
- *
- * Revision 1.8 2000/08/18 20:14:00 gerd
- * New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
- * (T_none), (T_namespace).
- *
- * Revision 1.7 2000/07/23 02:16:34 gerd
- * Support for DFAs.
- *
- * Revision 1.6 2000/07/16 16:34:41 gerd
- * New method 'write', the successor of 'write_compact_as_latin1'.
- *
- * Revision 1.5 2000/07/14 13:56:11 gerd
- * Added methods id_attribute_name, id_attribute_value,
- * idref_attribute_names.
- *
- * Revision 1.4 2000/07/09 17:51:14 gerd
- * Element nodes can store positions.
- *
- * Revision 1.3 2000/07/04 22:05:10 gerd
- * New functions make_spec_from_mapping, create_data_node,
- * create_element_node.
- *
- * Revision 1.2 2000/06/14 22:19:06 gerd
- * Added checks such that it is impossible to mix encodings.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_document.mli:
- *
- * Revision 1.13 2000/05/27 19:15:08 gerd
- * Removed the method init_xml_standalone.
- *
- * Revision 1.12 2000/05/01 20:42:34 gerd
- * New method write_compact_as_latin1.
- *
- * Revision 1.11 2000/04/30 18:15:57 gerd
- * Beautifications.
- * New method keep_always_whitespace_mode.
- *
- * Revision 1.10 2000/03/11 22:58:15 gerd
- * Updated to support Markup_codewriter.
- *
- * Revision 1.9 2000/01/27 21:51:56 gerd
- * Added method 'attributes'.
- *
- * Revision 1.8 2000/01/27 21:19:07 gerd
- * Added further methods.
- *
- * Revision 1.7 1999/11/09 22:20:14 gerd
- * Removed method init_dtd from class "document". The DTD is
- * implicitly passed to the document by the root element.
- *
- * Revision 1.6 1999/09/01 22:51:40 gerd
- * Added methods to store processing instructions.
- *
- * Revision 1.5 1999/09/01 16:19:57 gerd
- * The "document" class has now a "warner" as class argument.
- *
- * Revision 1.4 1999/08/19 21:59:13 gerd
- * Added method "reset_finder".
- *
- * Revision 1.3 1999/08/19 01:08:29 gerd
- * Added method "find".
- *
- * Revision 1.2 1999/08/15 02:19:41 gerd
- * Some new explanations: That unknown elements are not rejected
- * if the DTD allows them.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Pxp_types
-open Pxp_lexer_types
-open Pxp_lexers
-open Pxp_entity
-open Pxp_aux
-open Pxp_dfa
-
-(**********************************************************************)
-
-class dtd the_warner init_encoding =
- object (self)
- val mutable root = (None : string option)
- val mutable id = (None : dtd_id option)
-
- val warner = (the_warner : collect_warnings)
- val encoding = init_encoding
- val lexerset = Pxp_lexers.get_lexer_set init_encoding
-
- val elements = (Hashtbl.create 100 : (string,dtd_element) Hashtbl.t)
- val gen_entities = (Hashtbl.create 100 : (string,entity * bool) Hashtbl.t)
- val par_entities = (Hashtbl.create 100 : (string,entity) Hashtbl.t)
- val notations = (Hashtbl.create 100 : (string,dtd_notation) Hashtbl.t)
- val pinstr = (Hashtbl.create 100 : (string,proc_instruction) Hashtbl.t)
- val mutable element_names = []
- val mutable gen_entity_names = []
- val mutable par_entity_names = []
- val mutable notation_names = []
- val mutable pinstr_names = []
-
- val mutable allow_arbitrary = false
- val mutable standalone_declaration = false
-
- val mutable validated = false
-
- initializer
- let w = new drop_warnings in
- self # add_gen_entity
- (new internal_entity self "lt" w "&#60;" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "gt" w ">" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "amp" w "&#38;" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "apos" w "'" false false false encoding)
- false;
- self # add_gen_entity
- (new internal_entity self "quot" w """ false false false encoding)
- false;
-
-
- method encoding = encoding
-
- method warner = warner
-
- method set_root r =
- if root = None then
- root <- Some r
- else
- assert false
-
-
- method set_id j =
- if id = None then
- id <- Some j
- else
- assert false
-
-
- method standalone_declaration = standalone_declaration
-
- method set_standalone_declaration b =
- standalone_declaration <- b
-
- method allow_arbitrary =
- allow_arbitrary <- true
-
- method disallow_arbitrary =
- allow_arbitrary <- false
-
- method arbitrary_allowed = allow_arbitrary
-
- method root = root
- method id = id
-
-
- method add_element el =
- (* raises Not_found if 'el' has already been added *)
- (* Note: 'el' is encoded in the same way as 'self'! *)
- let name = el # name in
- check_name warner name;
- if Hashtbl.mem elements name then
- raise Not_found;
- Hashtbl.add elements name el;
- element_names <- name :: element_names;
- validated <- false
-
-
- method add_gen_entity en extdecl =
- (* The following is commented out; perhaps there should be an option
- * to reactivate it on demand
- *)
- (* raises Validation_error if the predefines entities 'lt', 'gt', 'amp',
- * 'quot', and 'apos' are redeclared with an improper value.
- *)
- if en # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings";
- let name = en # name in
- check_name warner name;
- if Hashtbl.mem gen_entities name then begin
- if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin
- (* These are allowed to be declared several times *)
- let (rt,_) = en # replacement_text in
- let toks = tokens_of_content_string lexerset rt in
- try
- begin match toks with
- [CRef 60] -> if name <> "lt" then raise Not_found
- | [CharData ">"] -> if name <> "gt" then raise Not_found
- | [CRef 62] -> if name <> "gt" then raise Not_found
- | [CRef 38] -> if name <> "amp" then raise Not_found
- | [CharData "'"] -> if name <> "apos" then raise Not_found
- | [CRef 39] -> if name <> "apos" then raise Not_found
- | [CharData "\""] -> if name <> "quot" then raise Not_found
- | [CRef 34] -> if name <> "quot" then raise Not_found
- | _ -> raise Not_found
- end
- with
- Not_found ->
- raise (Validation_error("Predefined entity `" ^ name ^
- "' redeclared"))
- end
- else
- warner # warn ("Entity `" ^ name ^ "' declared twice")
- end
- else begin
- Hashtbl.add gen_entities name (en, extdecl);
- gen_entity_names <- name :: gen_entity_names
- end
-
-
- method add_par_entity en =
- if en # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings";
- let name = en # name in
- check_name warner name;
- if not (Hashtbl.mem par_entities name) then begin
- Hashtbl.add par_entities name en;
- par_entity_names <- name :: par_entity_names
- end
- else
- warner # warn ("Entity `" ^ name ^ "' declared twice")
-
-
- method add_notation no =
- (* raises Validation_error if 'no' already added *)
- if no # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings";
- let name = no # name in
- check_name warner name;
- if Hashtbl.mem notations name then
- raise (Validation_error("Notation `" ^ name ^ "' declared twice"));
- Hashtbl.add notations name no;
- notation_names <- name :: notation_names
-
-
- method add_pinstr pi =
- if pi # encoding <> encoding then
- failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings";
- let name = pi # target in
- check_name warner name;
-
- if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin
- match name with
- "pxp:dtd" ->
- let _, optname, atts = pi # parse_pxp_option in
- begin match optname with
- "optional-element-and-notation-declarations" ->
- self # allow_arbitrary
- | "optional-attribute-declarations" ->
- let lexers = Pxp_lexers.get_lexer_set encoding in
- let el_string =
- try List.assoc "elements" atts
- with Not_found ->
- raise(Error("Missing `elements' attribute for pxp:dtd"))
- in
- let el = split_attribute_value lexers el_string in
- List.iter
- (fun e_name ->
- let e =
- try Hashtbl.find elements e_name
- with
- Not_found ->
- raise(Error("Reference to unknown element `" ^
- e_name ^ "'"))
- in
- e # allow_arbitrary
- )
- el
- | _ ->
- raise(Error("Unknown PXP option `" ^
- optname ^ "'"))
- end
- | _ ->
- raise(Error("The processing instruction target `" ^
- name ^ "' is not defined by this PXP version"))
- end
- else begin
- (*----------------------------------------------------------------------
- * SUPPORT FOR DEPRECATED PI OPTIONS:
- * - <?xml:allow_undeclared_elements_and_notations?>
- * is now <?pxp:dtd optional-element-and-notation-declarations?>
- * - <?xml:allow_undeclared_attributes <elementname>?>
- * is now <?pxp:dtd optional-attribute-declarations
- * elements='<elementname> ...'?>
- * Please update your DTDs! Alternatively, you may uncommment the
- * following piece of code.
- *)
-(* if name = "xml:allow_undeclared_elements_and_notations" then *)
-(* self # allow_arbitrary; *)
-(* if name = "xml:allow_undeclared_attributes" then begin *)
-(* let v = pi # value in *)
-(* let e = *)
-(* try *)
-(* Hashtbl.find elements v *)
-(* with *)
-(* Not_found -> *)
-(* raise(Validation_error("Reference to undeclared element `"*)
-(* ^ v ^ "'")) *)
-(* in *)
-(* e # allow_arbitrary; *)
-(* end; *)
- (*----------------------------------------------------------------------
- *)
- ()
- end;
- Hashtbl.add pinstr name pi;
- pinstr_names <- name :: pinstr_names;
-
-
- method element name =
- (* returns the element 'name' or raises Validation_error if not found *)
- try
- Hashtbl.find elements name
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Reference to undeclared element `" ^ name ^ "'"))
-
- method element_names =
- (* returns the list of all names of element declarations *)
- element_names
-
-
- method gen_entity name =
- (* returns the entity 'name' or raises WF_error if not found *)
- try
- Hashtbl.find gen_entities name
- with
- Not_found ->
- raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'"))
-
-
- method gen_entity_names = gen_entity_names
-
-
- method par_entity name =
- (* returns the entity 'name' or raises WF_error if not found *)
- try
- Hashtbl.find par_entities name
- with
- Not_found ->
- raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'"))
-
-
- method par_entity_names = par_entity_names
-
-
- method notation name =
- (* returns the notation 'name' or raises Validation_error if not found *)
- try
- Hashtbl.find notations name
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'"))
-
-
- method notation_names = notation_names
-
-
- method pinstr name =
- (* returns the list of all processing instructions contained in the DTD
- * with target 'name'
- *)
- Hashtbl.find_all pinstr name
-
-
- method pinstr_names = pinstr_names
-
- method write os enc doctype =
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- let write_sysid s =
- if String.contains s '"' then
- wms ("'" ^ s ^ "'")
- else
- wms ("\"" ^ s ^ "\"");
- in
-
- if doctype then begin
- wms "<!DOCTYPE ";
- ( match root with
- None -> failwith "#write: DTD without root";
- | Some r -> wms r
- );
- wms " [\n";
- end;
-
- (* Notations: *)
- List.iter
- (fun name ->
- let notation =
- try Hashtbl.find notations name with Not_found -> assert false in
- notation # write os enc)
- (List.sort compare notation_names);
-
- (* Unparsed entities: *)
- List.iter
- (fun name ->
- let ent,_ =
- try Hashtbl.find gen_entities name with Not_found -> assert false
- in
- if ent # is_ndata then begin
- let xid = ent # ext_id in
- let notation = ent # notation in
- wms ("<!ENTITY " ^ name ^ " " );
- ( match xid with
- System s ->
- wms "SYSTEM ";
- write_sysid s;
- | Public (p,s) ->
- wms "PUBLIC ";
- write_sysid p;
- if (s <> "") then begin
- wms " ";
- write_sysid s;
- end;
- | Anonymous ->
- failwith "#write: External ID Anonymous cannot be represented"
- );
- wms (" NDATA " ^ notation ^ ">\n");
- end
- )
- (List.sort compare gen_entity_names);
-
- (* Elements: *)
- List.iter
- (fun name ->
- let element =
- try Hashtbl.find elements name with Not_found -> assert false in
- element # write os enc)
- (List.sort compare element_names);
-
- (* Processing instructions: *)
- List.iter
- (fun name ->
- let pi =
- try Hashtbl.find pinstr name with Not_found -> assert false in
- pi # write os enc)
- (List.sort compare pinstr_names);
-
- if doctype then
- wms "]>\n";
-
- method write_compact_as_latin1 os doctype =
- self # write os `Enc_iso88591 doctype
-
-
-
- (************************************************************)
- (* VALIDATION *)
- (************************************************************)
-
- method only_deterministic_models =
- Hashtbl.iter
- (fun n el ->
- let cm = el # content_model in
- match cm with
- Regexp _ ->
- if el # content_dfa = None then
- raise(Validation_error("The content model of element `" ^
- n ^ "' is not deterministic"))
- | _ ->
- ()
- )
- elements;
-
-
- method validate =
- if validated or allow_arbitrary then
- ()
- else begin
- (* Validity constraint: Notations in NDATA entity declarations must
- * be declared
- *)
- List.iter
- (fun name ->
- let ent,_ =
- try Hashtbl.find gen_entities name with Not_found -> assert false
- in
- if ent # is_ndata then begin
- let xid = ent # ext_id in
- let notation = ent # notation in
- try
- ignore(self # notation notation)
- (* Raises Validation_error if the constraint is violated *)
- with
- Undeclared -> ()
- end
- )
- gen_entity_names;
-
- (* Validate the elements: *)
- Hashtbl.iter
- (fun n el ->
- el # validate)
- elements;
-
- (* Check the root element: *)
- (* TODO: Check if this piece of code is executed at all! *)
- begin match root with
- None -> ()
- | Some r ->
- begin try
- let _ = Hashtbl.find elements r in ()
- with
- Not_found ->
- raise(Validation_error("The root element is not declared"))
- end
- end;
- validated <- true;
- end
-
- method invalidate =
- validated <- false
-
- (************************************************************)
-
- end
-
-
-(**********************************************************************)
-
-and dtd_element the_dtd the_name =
- object (self)
- val dtd = (the_dtd : dtd)
- val name = the_name
- val lexerset = Pxp_lexers.get_lexer_set (the_dtd # encoding)
- val mutable content_model = Unspecified
- val mutable content_model_validated = false
- val mutable content_dfa = lazy None
-
- val mutable externally_declared = false
-
- val mutable attributes =
- ([] : (string * ((att_type * att_default) * bool)) list)
- val mutable attributes_validated = false
-
- val mutable id_att_name = None
- val mutable idref_att_names = []
-
- val mutable allow_arbitrary = false
-
- method name = name
-
- method set_cm_and_extdecl m extdecl =
- if content_model = Unspecified then begin
- content_model <- m;
- content_model_validated <- false;
- content_dfa <- lazy (self # compute_content_dfa);
- externally_declared <- extdecl;
- dtd # invalidate
- end
- else
- raise(Validation_error("Element `" ^ name ^ "' has already a content model"))
-
- method content_model = content_model
-
- method content_dfa = Lazy.force content_dfa
-
- method private compute_content_dfa =
- match content_model with
- Regexp re ->
- ( try Some (dfa_of_regexp_content_model re)
- with Not_found -> None
- )
- | _ ->
- None
-
- method externally_declared = externally_declared
-
- method encoding = dtd # encoding
-
- method allow_arbitrary =
- allow_arbitrary <- true
-
- method disallow_arbitrary =
- allow_arbitrary <- false
-
- method arbitrary_allowed = allow_arbitrary
-
- method add_attribute aname t d extdecl =
- if aname <> "xml:lang" & aname <> "xml:space" then
- check_name (dtd#warner) aname;
- if List.mem_assoc aname attributes then
- dtd # warner # warn ("More than one declaration for attribute `" ^
- aname ^ "' of element type `" ^ name ^ "'")
- else begin
- begin match aname with
- "xml:space" ->
- begin match t with
- A_enum l ->
- let l' = Sort.list ( <= ) l in
- if l' <> [ "default"; "preserve" ] then
- raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
- | _ ->
- raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
- end
- | _ -> ()
- end;
- begin match t with
- A_id ->
- id_att_name <- Some aname;
- | (A_idref | A_idrefs) ->
- idref_att_names <- aname :: idref_att_names
- | _ ->
- ()
- end;
- attributes <- (aname, ((t,d),extdecl)) :: attributes;
- attributes_validated <- false;
- dtd # invalidate;
- end
-
- method attribute attname =
- try
- fst (List.assoc attname attributes)
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
- ^ name ^ "' not declared"))
-
- method attribute_violates_standalone_declaration attname v =
- try
- let (atype, adefault), extdecl = List.assoc attname attributes in
- extdecl &&
- ( match v with
- None ->
- adefault <> D_required && adefault <> D_implied
- (* i.e. adefault matches D_default or D_fixed *)
- | Some s ->
- atype <> A_cdata &&
- normalization_changes_value lexerset atype s
- )
- with
- Not_found ->
- if allow_arbitrary then
- raise Undeclared
- else
- raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
- ^ name ^ "' not declared"))
-
-
- method attribute_names =
- List.map fst attributes
-
- method names_of_required_attributes =
- List.flatten
- (List.map
- (fun (n,((t,d),_)) ->
- if d = D_required then
- [n]
- else
- [])
- attributes)
-
- method id_attribute_name = id_att_name
-
- method idref_attribute_names = idref_att_names
-
-
- method write os enc =
- let encoding = self # encoding in
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- let rec write_contentspec cs =
- match cs with
- Unspecified ->
- failwith "#write: Unspecified content model found"
- | Empty ->
- wms "EMPTY"
- | Any ->
- wms "ANY"
- | Mixed ml ->
- wms "(";
- write_mixedspec_list ml;
- wms ")*";
- | Regexp re ->
- write_children re false
-
- and write_mixedspec_list ml =
- match ml with
- MPCDATA :: ml' ->
- wms "#PCDATA";
- if ml' <> [] then wms "|";
- write_mixedspec_list ml';
- | MChild s :: ml' ->
- wms s;
- if ml' <> [] then wms "|";
- write_mixedspec_list ml';
- | [] ->
- ()
-
- and write_children re cp =
- match re with
- Optional re' ->
- let p = needs_parens re' in
- if p then wms "(";
- write_children re' cp;
- if p then wms ")";
- wms "?";
- | Repeated re' ->
- let p = needs_parens re' in
- if p then wms "(";
- write_children re' cp;
- if p then wms ")";
- wms "*";
- | Repeated1 re' ->
- let p = needs_parens re' in
- if p then wms "(";
- write_children re' cp;
- if p then wms ")";
- wms "+";
- | Alt re' ->
- wms "(";
- ( match re' with
- re1' :: rer' ->
- write_children re1' true;
- List.iter
- (fun ren' ->
- wms "|";
- write_children ren' true;
- )
- rer';
- | [] ->
- failwith "#write: Illegal content model"
- );
- wms ")";
- | Seq re' ->
- wms "(";
- ( match re' with
- re1' :: rer' ->
- write_children re1' true;
- List.iter
- (fun ren' ->
- wms ",";
- write_children ren' true;
- )
- rer';
- | [] ->
- failwith "#write: Illegal content model"
- );
- wms ")";
- | Child ch ->
- if not cp then wms "(";
- wms ch;
- if not cp then wms ")";
-
- and needs_parens re =
- match re with
- (Optional _ | Repeated _ | Repeated1 _ ) -> true
- | _ -> false
- in
-
- wms ("<!ELEMENT " ^ name ^ " ");
- write_contentspec content_model;
- wms ">\n";
-
- wms ("<!ATTLIST " ^ name);
- List.iter
- (fun (n,((t,d),_)) ->
- wms ("\n " ^ n);
- ( match t with
- A_cdata -> wms " CDATA";
- | A_id -> wms " ID";
- | A_idref -> wms " IDREF";
- | A_idrefs -> wms " IDREFS";
- | A_entity -> wms " ENTITY";
- | A_entities -> wms " ENTITIES";
- | A_nmtoken -> wms " NMTOKEN";
- | A_nmtokens -> wms " NMTOKENS";
- | A_notation nl ->
- wms " NOTATION (";
- ( match nl with
- nl1:: nl' ->
- wms nl1;
- List.iter
- (fun n ->
- wms ("|" ^ n);
- )
- nl'
- | [] ->
- failwith "#write: Illegal content model";
- );
- wms ")";
- | A_enum el ->
- wms " (";
- ( match el with
- el1:: el' ->
- wms el1;
- List.iter
- (fun e ->
- wms ("|" ^ e);
- )
- el'
- | [] ->
- failwith "#write: Illegal content model";
- );
- wms ")";
- );
- ( match d with
- D_required -> wms " #REQUIRED"
- | D_implied -> wms " #IMPLIED"
- | D_default s ->
- wms " \"";
- write_data_string ~from_enc:encoding ~to_enc:enc os s;
- wms "\"";
- | D_fixed s ->
- wms " FIXED \"";
- write_data_string ~from_enc:encoding ~to_enc:enc os s;
- wms "\"";
- );
- )
- attributes;
-
- wms ">\n";
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- (************************************************************)
- (* VALIDATION *)
- (************************************************************)
-
- method validate =
- self # validate_attributes();
- self # validate_content_model()
-
- method private validate_attributes() =
- if attributes_validated then
- ()
- else begin
- (* Validity Constraint: One ID per Element Type *)
- let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in
- if n > 1 then
- raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'"));
- (* Validity Constraint: ID Attribute Default *)
- if List.exists
- (fun (n,((t,d),_)) ->
- t = A_id & (d <> D_required & d <> D_implied))
- attributes
- then
- raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'"));
- (* Validity Constraint: One Notation per Element Type *)
- let n = count (fun (n,((t,d),_)) ->
- match t with A_notation _ -> true | _ -> false)
- attributes in
- if n > 1 then
- raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'"));
- (* Validity Constraint: Notation Attributes [second part] *)
- List.iter
- (fun (n,((t,d),_)) ->
- match t with
- A_notation l ->
- List.iter
- (fun nname ->
- let _ = dtd # notation nname in ())
- l
- | _ -> ())
- attributes;
- (* Validity Constraint: Attribute Default Legal *)
- List.iter
- (fun (n,((t,d),_)) ->
-
- let check v =
- let lexical_error() =
- lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in
- check_attribute_value_lexically lexerset (lexical_error()) t v;
- begin match t with
- (A_entity|A_entities) ->
- List.iter
- (fun nd ->
- let en, extdecl = dtd # gen_entity nd in
- if not (en # is_ndata) then
- raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
-(* if dtd # standalone_declaration && extdecl then
- raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
--- This is checked anyway when the attribute value is normalized
-*)
- )
- (split_attribute_value lexerset v)
- | A_notation nl ->
- if not (List.mem v nl) then
- raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
- | A_enum nl ->
- if not (List.mem v nl) then
- raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
- | _ -> ()
- end
- in
-
- match d with
- D_required -> ()
- | D_implied -> ()
- | D_default v -> check v
- | D_fixed v -> check v
- )
- attributes;
-
- (* Ok: This element declaration is valid *)
- attributes_validated <- true;
-
- end
-
- method private validate_content_model () =
- (* checks:
- * - Validity Constraint: No Duplicate Types
- * It is not an error if there is a child in the declaration for which
- * no element declaration is provided.
- *)
- match content_model with
- Unspecified ->
- dtd # warner # warn ("Element type `" ^ name ^ "' mentioned but not declared");
- ()
- | Empty -> ()
- | Any -> ()
- | Mixed (pcdata :: l) ->
- (* MPCDATA is always the first element by construction *)
- assert (pcdata = MPCDATA);
- if check_dups l then
- raise (Validation_error("Double children in declaration for element `" ^ name ^ "'"))
- | Regexp _ -> ()
- | _ -> assert false
-
-
-
- (************************************************************)
-
- end
-
-and dtd_notation the_name the_xid init_encoding =
-object (self)
- val name = the_name
- val xid = (the_xid : ext_id)
- val encoding = (init_encoding : Pxp_types.rep_encoding)
- method name = name
- method ext_id = xid
- method encoding = encoding
-
- method write os enc =
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- let write_sysid s =
- if String.contains s '"' then
- wms ("'" ^ s ^ "'")
- else
- wms ("\"" ^ s ^ "\"");
- in
-
- wms ("<!NOTATION " ^ name ^ " ");
- ( match xid with
- System s ->
- wms "SYSTEM ";
- write_sysid s;
- | Public (p,s) ->
- wms "PUBLIC ";
- write_sysid p;
- if (s <> "") then begin
- wms " ";
- write_sysid s;
- end;
- | Anonymous ->
- failwith "#write: External ID Anonymous cannot be represented"
- );
- wms ">\n";
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- end
-
-and proc_instruction the_target the_value init_encoding =
-object (self)
- val target = the_target
- val value = (the_value : string)
- val encoding = (init_encoding : Pxp_types.rep_encoding)
-
- initializer
- match target with
- ("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") ->
- (* This is an error, not a warning, because I do not have a
- * "warner" object by hand.
- *)
- raise(WF_error("Reserved processing instruction"))
- | _ -> ()
-
- method target = target
- method value = value
- method encoding = encoding
-
- method write os enc =
- let wms =
- write_markup_string ~from_enc:encoding ~to_enc:enc os in
-
- wms "<?";
- wms target;
- wms " ";
- wms value;
- wms "?>";
-
- method write_compact_as_latin1 os =
- self # write os `Enc_iso88591
-
- method parse_pxp_option =
- let lexers = get_lexer_set encoding in
- try
- let toks = tokens_of_xml_pi lexers value in (* may raise WF_error *)
- begin match toks with
- (Pro_name option_name) :: toks' ->
- let atts = decode_xml_pi toks' in (* may raise WF_error *)
- (target, option_name, atts)
- | _ ->
- raise(Error("Bad PXP processing instruction"))
- end
- with
- WF_error _ ->
- raise(Error("Bad PXP processing instruction"))
-
- end
-;;
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.10 2000/08/18 21:18:45 gerd
- * Updated wrong comments for methods par_entity and gen_entity.
- * These can raise WF_error and not Validation_error, and this is the
- * correct behaviour.
- *
- * Revision 1.9 2000/07/25 00:30:01 gerd
- * Added support for pxp:dtd PI options.
- *
- * Revision 1.8 2000/07/23 02:16:34 gerd
- * Support for DFAs.
- *
- * Revision 1.7 2000/07/16 17:50:01 gerd
- * Fixes in 'write'
- *
- * Revision 1.6 2000/07/16 16:34:41 gerd
- * New method 'write', the successor of 'write_compact_as_latin1'.
- *
- * Revision 1.5 2000/07/14 13:56:48 gerd
- * Added methods id_attribute_name and idref_attribute_names.
- *
- * Revision 1.4 2000/07/09 00:13:37 gerd
- * Added methods gen_entity_names, par_entity_names.
- *
- * Revision 1.3 2000/07/04 22:10:55 gerd
- * Update: collect_warnings -> drop_warnings.
- * Update: Case ext_id = Anonymous.
- *
- * Revision 1.2 2000/06/14 22:19:06 gerd
- * Added checks such that it is impossible to mix encodings.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- *
- * Revision 1.18 2000/05/28 17:24:55 gerd
- * Bugfixes.
- *
- * Revision 1.17 2000/05/27 19:21:25 gerd
- * Implemented the changes of rev. 1.10 of markup_dtd.mli.
- *
- * Revision 1.16 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.15 2000/05/14 21:50:07 gerd
- * Updated: change in internal_entity.
- *
- * Revision 1.14 2000/05/06 23:08:46 gerd
- * It is possible to allow undeclared attributes.
- *
- * Revision 1.13 2000/05/01 20:42:46 gerd
- * New method write_compact_as_latin1.
- *
- * Revision 1.12 2000/05/01 15:16:57 gerd
- * The errors "undeclared parameter/general entities" are
- * well-formedness errors, not validation errors.
- *
- * Revision 1.11 2000/03/11 22:58:15 gerd
- * Updated to support Markup_codewriter.
- *
- * Revision 1.10 2000/01/20 20:53:47 gerd
- * Changed such that it runs with Markup_entity's new interface.
- *
- * Revision 1.9 1999/11/09 22:15:41 gerd
- * Added method "arbitrary_allowed".
- *
- * Revision 1.8 1999/09/01 22:52:22 gerd
- * If 'allow_arbitrary' is in effect, no validation happens anymore.
- *
- * Revision 1.7 1999/09/01 16:21:24 gerd
- * Added several warnings.
- * The attribute type of "xml:space" is now strictly checked.
- *
- * Revision 1.6 1999/08/15 20:34:21 gerd
- * Improved error messages.
- * Bugfix: It is no longer allowed to create processing instructions
- * with target "xml".
- *
- * Revision 1.5 1999/08/15 02:20:16 gerd
- * New feature: a DTD can allow arbitrary elements.
- *
- * Revision 1.4 1999/08/15 00:21:39 gerd
- * Comments have been updated.
- *
- * Revision 1.3 1999/08/14 22:12:52 gerd
- * Several functions have now a "warner" as argument which is
- * an object with a "warn" method. This is used to warn about characters
- * that cannot be represented in the Latin 1 alphabet.
- * Bugfix: if two general entities with the same name are definied,
- * the first counts, not the second.
- *
- * Revision 1.2 1999/08/11 14:56:35 gerd
- * Declaration of the predfined entities {lt,gt,amp,quot,apos}
- * is no longer forbidden; but the original definition cannot be overriddden.
- * TODO: If these entities are redeclared with problematic values,
- * the user should be warned.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-(*$ markup-dtd1.mli *)
-
-(**********************************************************************)
-(* *)
-(* Pxp_dtd: *)
-(* Object model of document type declarations *)
-(* *)
-(**********************************************************************)
-
-(* ======================================================================
- * OVERVIEW
- *
- * class dtd ............... represents the whole DTD, including element
- * declarations, entity declarations, notation
- * declarations, and processing instructions
- * class dtd_element ....... represents an element declaration consisting
- * of a content model and an attribute list
- * declaration
- * class dtd_notation ...... represents a notation declaration
- * class proc_instruction .. represents a processing instruction
- * ======================================================================
- *
- *)
-
-
-class dtd :
- (* Creation:
- * new dtd
- * creates a new, empty DTD object without any declaration, without a root
- * element, without an ID.
- *)
- Pxp_types.collect_warnings ->
- Pxp_types.rep_encoding ->
- object
- method root : string option
- (* get the name of the root element if present *)
-
- method set_root : string -> unit
- (* set the name of the root element. This method can be invoked
- * only once
- *)
-
- method id : Pxp_types.dtd_id option
- (* get the identifier for this DTD *)
-
- method set_id : Pxp_types.dtd_id -> unit
- (* set the identifier. This method can be invoked only once *)
-
- method encoding : Pxp_types.rep_encoding
- (* returns the encoding used for character representation *)
-
-
- method allow_arbitrary : unit
- (* After this method has been invoked, the object changes its behaviour:
- * - elements and notations that have not been added may be used in an
- * arbitrary way; the methods "element" and "notation" indicate this
- * by raising Undeclared instead of Validation_error.
- *)
-
- method disallow_arbitrary : unit
-
- method arbitrary_allowed : bool
- (* Returns whether arbitrary contents are allowed or not. *)
-
- method standalone_declaration : bool
- (* Whether there is a 'standalone' declaration or not. Strictly
- * speaking, this declaration is not part of the DTD, but it is
- * included here because of practical reasons.
- * If not set, this property defaults to 'false'.
- *)
-
- method set_standalone_declaration : bool -> unit
- (* Sets the 'standalone' declaration. *)
-
-
- method add_element : dtd_element -> unit
- (* add the given element declaration to this DTD. Raises Not_found
- * if there is already an element declaration with the same name.
- *)
-
- method add_gen_entity : Pxp_entity.entity -> bool -> unit
- (* add_gen_entity e extdecl:
- * add the entity 'e' as general entity to this DTD (general entities
- * are those represented by &name;). If there is already a declaration
- * with the same name, the second definition is ignored; as exception from
- * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
- * may only be redeclared with a definition that is equivalent to the
- * standard definition; otherwise a Validation_error is raised.
- *
- * 'extdecl': 'true' indicates that the entity declaration occurs in
- * an external entity. (Used for the standalone check.)
- *)
-
- method add_par_entity : Pxp_entity.entity -> unit
- (* add the given entity as parameter entity to this DTD (parameter
- * entities are those represented by %name;). If there is already a
- * declaration with the same name, the second definition is ignored.
- *)
-
- method add_notation : dtd_notation -> unit
- (* add the given notation to this DTD. If there is already a declaration
- * with the same name, a Validation_error is raised.
- *)
-
- method add_pinstr : proc_instruction -> unit
- (* add the given processing instruction to this DTD. *)
-
- method element : string -> dtd_element
- (* looks up the element declaration with the given name. Raises
- * Validation_error if the element cannot be found. (If "allow_arbitrary"
- * has been invoked before, Unrestricted is raised instead.)
- *)
-
- method element_names : string list
- (* returns the list of the names of all element declarations. *)
-
- method gen_entity : string -> (Pxp_entity.entity * bool)
- (* let e, extdecl = obj # gen_entity n:
- * looks up the general entity 'e' with the name 'n'. Raises
- * WF_error if the entity cannot be found.
- * 'extdecl': indicates whether the entity declaration occured in an
- * external entity.
- *)
-
- method gen_entity_names : string list
- (* returns the list of all general entity names *)
-
- method par_entity : string -> Pxp_entity.entity
- (* looks up the parameter entity with the given name. Raises
- * WF_error if the entity cannot be found.
- *)
-
- method par_entity_names : string list
- (* returns the list of all parameter entity names *)
-
- method notation : string -> dtd_notation
- (* looks up the notation declaration with the given name. Raises
- * Validation_error if the notation cannot be found. (If "allow_arbitrary"
- * has been invoked before, Unrestricted is raised instead.)
- *)
-
- method notation_names : string list
- (* Returns the list of the names of all added notations *)
-
- method pinstr : string -> proc_instruction list
- (* looks up all processing instructions with the given target.
- * The "target" is the identifier following "<?".
- * Note: It is not possible to find out the exact position of the
- * processing instruction.
- *)
-
- method pinstr_names : string list
- (* Returns the list of the names (targets) of all added pinstrs *)
-
- method validate : unit
- (* ensures that the DTD is valid. This method is optimized such that
- * actual validation is only performed if DTD has changed.
- * If the DTD is invalid, mostly a Validation_error is raised,
- * but other exceptions are possible, too.
- *)
-
- method only_deterministic_models : unit
- (* Succeeds if all regexp content models are deterministic.
- * Otherwise Validation_error.
- *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
- (* write_compact_as_latin1 os enc doctype:
- * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
- * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
- * only the declarations are written (the material within the
- * square brackets).
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
-
- (*----------------------------------------*)
- method invalidate : unit
- (* INTERNAL METHOD *)
- method warner : Pxp_types.collect_warnings
- (* INTERNAL METHOD *)
- end
-
-(*$-*)
-
-(*$ markup-dtd2.mli *)
-
-(* ---------------------------------------------------------------------- *)
-
-and dtd_element : dtd -> string ->
- (* Creation:
- * new dtd_element init_dtd init_name:
- * creates a new dtd_element object for init_dtd with init_name.
- * The strings are represented in the same encoding as init_dtd.
- *)
- object
-
- method name : string
- (* returns the name of the declared element *)
-
- method externally_declared : bool
- (* returns whether the element declaration occurs in an external
- * entity.
- *)
-
- method content_model : Pxp_types.content_model_type
- (* get the content model of this element declaration, or Unspecified *)
-
- method content_dfa : Pxp_dfa.dfa_definition option
- (* return the DFA of the content model if there is a DFA, or None.
- * A DFA exists only for regexp style content models which are
- * deterministic.
- *)
-
- method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
- (* set_cm_and_extdecl cm extdecl:
- * set the content model to 'cm'. Once the content model is not
- * Unspecified, it cannot be set to a different value again.
- * Furthermore, it is set whether the element occurs in an external
- * entity ('extdecl').
- *)
-
- method encoding : Pxp_types.rep_encoding
- (* Return the encoding of the strings *)
-
- method allow_arbitrary : unit
- (* After this method has been invoked, the object changes its behaviour:
- * - attributes that have not been added may be used in an
- * arbitrary way; the method "attribute" indicates this
- * by raising Undeclared instead of Validation_error.
- *)
-
- method disallow_arbitrary : unit
-
- method arbitrary_allowed : bool
- (* Returns whether arbitrary attributes are allowed or not. *)
-
- method attribute : string ->
- Pxp_types.att_type * Pxp_types.att_default
- (* get the type and default value of a declared attribute, or raise
- * Validation_error if the attribute does not exist.
- * If 'arbitrary_allowed', the exception Undeclared is raised instead
- * of Validation_error.
- *)
-
- method attribute_violates_standalone_declaration :
- string -> string option -> bool
- (* attribute_violates_standalone_declaration name v:
- * Checks whether the attribute 'name' violates the "standalone"
- * declaration if it has value 'v'.
- * The method returns true if:
- * - The attribute declaration occurs in an external entity,
- * and if one of the two conditions holds:
- * - v = None, and there is a default for the attribute value
- * - v = Some s, and the type of the attribute is not CDATA,
- * and s changes if normalized according to the rules of the
- * attribute type.
- *
- * The method raises Validation_error if the attribute does not exist.
- * If 'arbitrary_allowed', the exception Undeclared is raised instead
- * of Validation_error.
- *)
-
- method attribute_names : string list
- (* get the list of all declared attributes *)
-
- method names_of_required_attributes : string list
- (* get the list of all attributes that are specified as required
- * attributes
- *)
-
- method id_attribute_name : string option
- (* Returns the name of the attribute with type ID, or None. *)
-
- method idref_attribute_names : string list
- (* Returns the names of the attributes with type IDREF or IDREFS. *)
-
- method add_attribute : string ->
- Pxp_types.att_type ->
- Pxp_types.att_default ->
- bool ->
- unit
- (* add_attribute name type default extdecl:
- * add an attribute declaration for an attribute with the given name,
- * type, and default value. If there is more than one declaration for
- * an attribute name, the first declaration counts; the other declarations
- * are ignored.
- * 'extdecl': if true, the attribute declaration occurs in an external
- * entity. This property is used to check the "standalone" attribute.
- *)
-
- method validate : unit
- (* checks whether this element declaration (i.e. the content model and
- * all attribute declarations) is valid for the associated DTD.
- * Raises mostly Validation_error if the validation fails.
- *)
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write_compact_as_latin1 os enc:
- * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
- end
-
-(* ---------------------------------------------------------------------- *)
-
-and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
- (* Creation:
- * new dtd_notation a_name an_external_ID init_encoding
- * creates a new dtd_notation object with the given name and the given
- * external ID.
- *)
- object
- method name : string
- method ext_id : Pxp_types.ext_id
- method encoding : Pxp_types.rep_encoding
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write_compact_as_latin1 os enc:
- * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
- * string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
- end
-
-(* ---------------------------------------------------------------------- *)
-
-and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
- (* Creation:
- * new proc_instruction a_target a_value
- * creates a new proc_instruction object with the given target string and
- * the given value string.
- * Note: A processing instruction is written as <?target value?>.
- *)
- object
- method target : string
- method value : string
- method encoding : Pxp_types.rep_encoding
-
- method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
- (* write os enc:
- * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
- *)
-
- method write_compact_as_latin1 : Pxp_types.output_stream -> unit
- (* DEPRECATED METHOD; included only to keep compatibility with
- * older versions of the parser
- *)
-
- method parse_pxp_option : (string * string * (string * string) list)
- (* Parses a PI containing a PXP option. Such PIs are formed like:
- * <?target option-name option-att="value" option-att="value" ... ?>
- * The method returns a triple
- * (target, option-name, [option-att, value; ...])
- * or raises Error.
- *)
-
- end
-
-;;
-
-(*$-*)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/08/18 21:18:45 gerd
- * Updated wrong comments for methods par_entity and gen_entity.
- * These can raise WF_error and not Validation_error, and this is the
- * correct behaviour.
- *
- * Revision 1.7 2000/07/25 00:30:01 gerd
- * Added support for pxp:dtd PI options.
- *
- * Revision 1.6 2000/07/23 02:16:33 gerd
- * Support for DFAs.
- *
- * Revision 1.5 2000/07/16 16:34:41 gerd
- * New method 'write', the successor of 'write_compact_as_latin1'.
- *
- * Revision 1.4 2000/07/14 13:56:49 gerd
- * Added methods id_attribute_name and idref_attribute_names.
- *
- * Revision 1.3 2000/07/09 00:13:37 gerd
- * Added methods gen_entity_names, par_entity_names.
- *
- * Revision 1.2 2000/06/14 22:19:06 gerd
- * Added checks such that it is impossible to mix encodings.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_dtd.ml:
- *
- * Revision 1.11 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.10 2000/05/27 19:20:38 gerd
- * Changed the interfaces for the standalone check: New
- * methods: standalone_declaration, set_standalone_declaration,
- * externally_declared, attribute_violates_standalone_declaration.
- * The method set_content_model has been renamed to
- * set_cm_and_extdecl; it now initializes also whether the element
- * has been declared in an external entity.
- * Methods add_gen_entity and gen_entity pass an additional
- * boolean argument containing whether the declaration of the
- * general entity happened in an external entity.
- * Method add_attribute expects this argument, too, which
- * states whether the declaration of the attribute happened in an
- * external entity.
- *
- * Revision 1.9 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.8 2000/05/06 23:10:26 gerd
- * allow_arbitrary for elements, too.
- *
- * Revision 1.7 2000/05/01 20:42:52 gerd
- * New method write_compact_as_latin1.
- *
- * Revision 1.6 2000/03/11 22:58:15 gerd
- * Updated to support Markup_codewriter.
- *
- * Revision 1.5 2000/02/22 02:32:02 gerd
- * Updated.
- *
- * Revision 1.4 1999/11/09 22:15:41 gerd
- * Added method "arbitrary_allowed".
- *
- * Revision 1.3 1999/09/01 16:21:56 gerd
- * "dtd" classes have now an argument that passes a "warner".
- *
- * Revision 1.2 1999/08/15 02:20:23 gerd
- * New feature: a DTD can allow arbitrary elements.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-
-(* TODO:
- * - Wie verhindert man, dass ein internal entity eine XML-Dekl. im
- * replacement text akzeptiert?
- *)
-
-
-open Pxp_types
-open Pxp_lexer_types
-open Pxp_aux
-open Pxp_reader
-
-(* Hierarchy of parsing layers:
- *
- * - Parser: Pxp_yacc
- * + gets input stream from the main entity object
- * + checks most of the grammar
- * + creates the DTD object as side-effect
- * + creates the element tree as side-effect
- * + creates further entity objects that are entered into the DTD
- * - Entity layer: Pxp_entity
- * + gets input stream from the lexers, or another entity object
- * + handles entity references: if a reference is encountered the
- * input stream is redirected such that the tokens come from the
- * referenced entity object
- * + handles conditional sections
- * - Lexer layer: Pxp_lexers
- * + gets input from lexbuffers created by resolvers
- * + different lexers for different lexical contexts
- * + a lexer returns pairs (token,lexid), where token is the scanned
- * token, and lexid is the name of the lexer that must be used for
- * the next token
- * - Resolver layer: Pxp_entity
- * + a resolver creates the lexbuf from some character source
- * + a resolver recodes the input and handles the encoding scheme
- *)
-
-(**********************************************************************)
-
-(* Variables of type 'state' are used to insert Begin_entity and End_entity
- * tokens into the stream.
- * - At_beginning: Nothing has been read so far
- * - First_token tok: A Begin_entity has been inserted; and the next token
- * is 'tok' which is not Eof. (Begin_entity/End_entity must not be inserted
- * if the entity is empty.)
- * - In_stream: After the first token has been read, but befor Eof.
- * - At_end: Eof has been read, and End_entity has been returned.
- *)
-
-type state =
- At_beginning
- | Inserted_begin_entity
- | At_end
-;;
-
-
-(**********************************************************************)
-
-class virtual entity the_dtd the_name the_warner
- init_errors_with_line_numbers init_encoding =
- object (self)
- (* This class prescribes the type of all entity objects. Furthermore,
- * the default 'next_token' mechanism is implemented.
- *)
-
- (* 'init_errors_with_line_numbers': whether error messages contain line
- * numbers or not.
- * Calculating line numbers is expensive.
- *)
-
- val mutable dtd = the_dtd
- val mutable name = the_name
- val mutable warner = the_warner
-
- val encoding = (init_encoding : rep_encoding)
- val lexerset = Pxp_lexers.get_lexer_set init_encoding
-
- method encoding = encoding
- (* method lexerset = lexerset *)
-
- val mutable manager = None
- (* The current entity_manager, see below *)
-
- method private manager =
- ( match manager with
- None -> assert false
- | Some m -> m
- : < current_entity : entity;
- pop_entity : unit;
- push_entity : entity -> unit >
- )
-
- method set_manager m = manager <- Some m
-
-
- val mutable lexbuf = Lexing.from_string ""
- (* The lexical buffer currently used as character source. *)
-
- val mutable prolog = None
- (* Stores the initial <?xml ...?> token as PI_xml *)
-
- val mutable prolog_pairs = []
- (* If prolog <> None, these are the (name,value) pairs of the
- * processing instruction.
- *)
-
-
- val mutable lex_id = Document
- (* The name of the lexer that should be used for the next token *)
-
- method set_lex_id id = lex_id <- lex_id
-
-
-
- val mutable force_parameter_entity_parsing = false
- (* 'true' forces that inner entities will always be embraced by
- * Begin_entity and End_entity.
- * 'false': the inner entity itself decides this
- *)
-
- val mutable check_text_declaration = true
- (* 'true': It is checked that the <?xml..?> declaration matches the
- * production TextDecl.
- *)
-
- val mutable normalize_newline = true
- (* Whether this entity converts CRLF or CR to LF, or not *)
-
-
- val mutable line = 1 (* current line *)
- val mutable column = 0 (* current column *)
- val mutable pos = 0 (* current absolute character position *)
- val errors_with_line_numbers = init_errors_with_line_numbers
-
- val mutable p_line = 1
- val mutable p_column = 1
-
- method line = p_line
- method column = p_column
-
-
- val mutable counts_as_external = false
-
- method counts_as_external = counts_as_external
- (* Whether the entity counts as external (for the standalone check). *)
-
- method set_counts_as_external =
- counts_as_external <- true
-
-
- val mutable last_token = Bof
- (* XXX
- * These two variables are used to check that between certain pairs of
- * tokens whitespaces exist. 'last_token' is simply the last token,
- * but not Ignore, and not PERef (which both represent whitespace).
- * 'space_seen' records whether Ignore or PERef was seen between this
- * token and 'last_token'.
- *)
-
- val mutable deferred_token = None
- (* If you set this to Some tl, the next invocations of
- * next_token_from_entity will return the tokens in tl.
- * This makes it possible to insert tokens into the stream.
- *)
-
- val mutable debug = false
-
- method is_ndata = false
- (* Returns if this entity is an NDATA (unparsed) entity *)
-
- method name = name
-
- method virtual open_entity : bool -> lexers -> unit
- (* open_entity force_parsing lexid:
- * opens the entity, and the first token is scanned by the lexer
- * 'lexid'. 'force_parsing' forces that Begin_entity and End_entity
- * tokens embrace the inner tokens of the entity; otherwise this
- * depends on the entity.
- * By opening an entity, reading tokens from it, and finally closing
- * the entity, the inclusion methods "Included",
- * "Included if validating", and "Included as PE" can be carried out.
- * Which method is chosen depends on the 'lexid', i.e. the lexical
- * context: 'lexid = Content' performs "Included (if validating)" (we
- * are always validating); 'lexid = Declaration' performs
- * "Included as PE". The difference is which tokens are recognized,
- * and how spaces are handled.
- * 'force_parsing' causes that a Begin_entity token is inserted before
- * and an End_entity token is inserted after the entity. The yacc
- * rules allow the Begin_entity ... End_entity brace only at certain
- * positions; this is used to restrict the possible positions where
- * entities may be included, and to guarantee that the entity matches
- * a certain production of the grammar ("parsed entities").
- * 'open_entity' is currently invoked with 'force_parsing = true'
- * for toplevel nodes, for inclusion of internal general entities,
- * and for inclusion of parameter entities into document entities.
- * 'force_parsing = false' is used for all other cases: External
- * entities add the Begin_entity/End_entity tokens anyway; internal
- * entities do not. Especially internal parameter entities referenced
- * from non-document entities do not add these tokens.
- *)
-
- method virtual close_entity : lexers
- (* close_entity:
- * closes the entity and returns the name of the lexer that must
- * be used to scan the next token.
- *)
-
- method virtual replacement_text : (string * bool)
- (* replacement_text:
- * returns the replacement text of the entity, and as second value,
- * whether the replacement text was constructed by referencing
- * external entities (directly or indirectly).
- * This method implements the inclusion method "Included in Literal".
- *)
-
-
- method lexbuf = lexbuf
-
-
- method xml_declaration =
- (* return the (name,value) pairs of the initial <?xml name=value ...?>
- * processing instruction.
- *)
- match prolog with
- None ->
- None
- | Some p ->
- Some prolog_pairs
-
-
- method set_debugging_mode m =
- debug <- m
-
- method private virtual set_encoding : string -> unit
-
-
- method full_name =
- name
-
-
- method next_token =
- (* read next token from this entity *)
-
- match deferred_token with
- Some toklist ->
- ( match toklist with
- [] ->
- deferred_token <- None;
- self # next_token
- | tok :: toklist' ->
- deferred_token <- Some toklist';
- if debug then
- prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok ^ " (deferred)");
- tok
- )
- | None -> begin
- let this_line = line
- and this_column = column in
- let this_pos = pos in
- p_line <- this_line;
- p_column <- this_column;
- (* Read the next token from the appropriate lexer lex_id, and get the
- * name lex_id' of the next lexer to be used.
- *)
- let tok, lex_id' =
- match lex_id with
- Document -> lexerset.scan_document lexbuf
- | Document_type -> lexerset.scan_document_type lexbuf
- | Content -> lexerset.scan_content lexbuf
- | Within_tag -> lexerset.scan_within_tag lexbuf
- | Declaration -> lexerset.scan_declaration lexbuf
- | Content_comment -> lexerset.scan_content_comment lexbuf
- | Decl_comment -> lexerset.scan_decl_comment lexbuf
- | Document_comment -> lexerset.scan_document_comment lexbuf
- | Ignored_section -> assert false
- (* Ignored_section: only used by method next_ignored_token *)
- in
- if debug then
- prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok);
- (* Find out the number of lines and characters of the last line: *)
- let n_lines, n_columns =
- if errors_with_line_numbers then
- count_lines (Lexing.lexeme lexbuf)
- else
- 0, (Lexing.lexeme_end lexbuf - Lexing.lexeme_start lexbuf)
- in
- line <- this_line + n_lines;
- column <- if n_lines = 0 then this_column + n_columns else n_columns;
- pos <- Lexing.lexeme_end lexbuf;
- lex_id <- lex_id';
- (* Throw Ignore and Comment away; Interpret entity references: *)
- (* NOTE: Of course, references to general entities are not allowed
- * everywhere; parameter references, too. This is already done by the
- * lexers, i.e. &name; and %name; are recognized only where they
- * are allowed.
- *)
-
- (* TODO: last_token is only used to detect Bof. Can be simplified *)
-
- let at_bof = (last_token = Bof) in
- last_token <- tok;
-
- let tok' =
- match tok with
-
- (* Entity references: *)
-
- | ERef n ->
- let en, extdecl = dtd # gen_entity n in
- if dtd # standalone_declaration && extdecl then
- raise
- (Validation_error
- ("Reference to entity `" ^ n ^
- "' violates standalone declaration"));
- en # set_debugging_mode debug;
- en # open_entity true lex_id;
- self # manager # push_entity en;
- en # next_token;
- | PERef n ->
- let en = dtd # par_entity n in
- en # set_debugging_mode debug;
- en # open_entity force_parameter_entity_parsing lex_id;
- self # manager # push_entity en;
- en # next_token;
-
- (* Convert LineEnd to CharData *)
- | LineEnd s ->
- if normalize_newline then
- CharData "\n"
- else
- CharData s
-
- (* Also normalize CDATA sections *)
- | Cdata value as cd ->
- if normalize_newline then
- Cdata(normalize_line_separators lexerset value)
- else
- cd
-
- (* If there are CRLF sequences in a PI value, normalize them, too *)
- | PI(name,value) as pi ->
- if normalize_newline then
- PI(name, normalize_line_separators lexerset value)
- else
- pi
-
- (* Attribute values: If they are already normalized, they are turned
- * into Attval_nl_normalized. This is detected by other code.
- *)
- | Attval value as av ->
- if normalize_newline then
- av
- else
- Attval_nl_normalized value
-
- (* Another CRLF normalization case: Unparsed_string *)
- | Unparsed_string value as ustr ->
- if normalize_newline then
- Unparsed_string(normalize_line_separators lexerset value)
- else
- ustr
-
- (* These tokens require that the entity_id parameter is set: *)
- | Doctype _ -> Doctype (self :> entity_id)
- | Doctype_rangle _ ->Doctype_rangle(self :> entity_id)
- | Dtd_begin _ -> Dtd_begin (self :> entity_id)
- | Dtd_end _ -> Dtd_end (self :> entity_id)
- | Decl_element _ -> Decl_element (self :> entity_id)
- | Decl_attlist _ -> Decl_attlist (self :> entity_id)
- | Decl_entity _ -> Decl_entity (self :> entity_id)
- | Decl_notation _ ->Decl_notation (self :> entity_id)
- | Decl_rangle _ -> Decl_rangle (self :> entity_id)
- | Lparen _ -> Lparen (self :> entity_id)
- | Rparen _ -> Rparen (self :> entity_id)
- | RparenPlus _ -> RparenPlus (self :> entity_id)
- | RparenStar _ -> RparenStar (self :> entity_id)
- | RparenQmark _ -> RparenQmark (self :> entity_id)
- | Conditional_begin _ -> Conditional_begin (self :> entity_id)
- | Conditional_body _ -> Conditional_body (self :> entity_id)
- | Conditional_end _ -> Conditional_end (self :> entity_id)
- | Tag_beg (n,_) -> Tag_beg (n, (self :> entity_id))
- | Tag_end (n,_) -> Tag_end (n, (self :> entity_id))
-
- (* End of file: *)
-
- | Eof ->
- if debug then begin
- prerr_endline ("- Entity " ^ name ^ " # handle_eof");
- let tok = self # handle_eof in
- prerr_endline ("- Entity " ^ name ^ " # handle_eof: returns " ^ string_of_tok tok);
- tok
- end
- else
- self # handle_eof;
-
- (* The default case. *)
-
- | _ ->
- tok
-
- in
- if at_bof & tok <> Eof
- then begin
- if debug then
- prerr_endline ("- Entity " ^ name ^ " # handle_bof");
- self # handle_bof tok'
- end
- else
- tok'
- end
-
-
- (* 'handle_bof' and 'handle_eof' can be used as hooks. Behaviour:
- *
- * - Normally, the first token t is read in, and 'handle_bof t' is
- * called. The return value of this method is what is returned to
- * the user.
- * - If the EOF has been reached, 'handle_eof' is called.
- * - BUT: If the first token is already EOF, 'handle_eof' is called
- * ONLY, and 'handle_bof' is NOT called.
- *
- * The default implementations:
- * - handle_bof: does nothing
- * - handle_eof: Pops the previous entity from the stack, switches back
- * to this entity, and returns the next token of this entity.
- *)
-
-
- method private handle_bof tok =
- tok
-
-
- method private handle_eof =
- let mng = self # manager in
- begin try
- mng # pop_entity;
- let next_lex_id = self # close_entity in
- let en = mng # current_entity in
- en # set_lex_id next_lex_id;
- en # next_token
- with
- Stack.Empty ->
- (* The outermost entity is at EOF *)
- Eof
- end
-
-
- method next_ignored_token =
- (* used after <![ IGNORE *)
-
- (* TODO: Do we need a test on deferred tokens here? *)
-
- let this_line = line
- and this_column = column in
- let this_pos = pos in
- let tok, lex_id' = lexerset.scan_ignored_section lexbuf in
- if debug then
- prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok ^ " (Ignored)");
- let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
- line <- this_line + n_lines;
- column <- if n_lines = 0 then this_column + n_columns else n_columns;
- pos <- Lexing.lexeme_end lexbuf;
- match tok with
- | Conditional_begin _ -> Conditional_begin (self :> entity_id)
- | Conditional_end _ -> Conditional_end (self :> entity_id)
- | _ -> tok
-
-
- method process_xmldecl pl =
- (* The parser calls this method just after the XML declaration
- * <?xml ...?> has been detected.
- * 'pl': This is the argument of the PI_xml token.
- *)
- if debug then
- prerr_endline ("- Entity " ^ name ^ " # process_xmldecl");
- prolog <- Some pl;
- prolog_pairs <- decode_xml_pi pl;
- if check_text_declaration then
- check_text_xml_pi prolog_pairs;
- begin
- try
- let e = List.assoc "encoding" prolog_pairs in
- self # set_encoding e
- with
- Not_found ->
- self # set_encoding ""
- end;
-
-
- method process_missing_xmldecl =
- (* The parser calls this method if the XML declaration is missing *)
- if debug then
- prerr_endline ("- Entity " ^ name ^ " # process_missing_xmldecl");
- self # set_encoding ""
-
-
- (* Methods for NDATA entities only: *)
- method ext_id = (assert false : ext_id)
- method notation = (assert false : string)
-
- end
-;;
-
-
-class ndata_entity the_name the_ext_id the_notation init_encoding =
- object (self)
- (* An NDATA entity is very restricted; more or less you can only find out
- * its external ID and its notation.
- *)
-
- val mutable name = the_name
- val mutable ext_id = the_ext_id
- val mutable notation = the_notation
- val encoding = (init_encoding : rep_encoding)
-
- method name = (name : string)
- method ext_id = (ext_id : ext_id)
- method notation = (notation : string)
-
- method is_ndata = true
-
- method encoding = encoding
-
-
- val mutable counts_as_external = false
-
- method counts_as_external = counts_as_external
- (* Whether the entity counts as external (for the standalone check). *)
-
- method set_counts_as_external =
- counts_as_external <- true
-
-
- method set_manager (m : < current_entity : entity;
- pop_entity : unit;
- push_entity : entity -> unit >) =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : unit )
-
- method set_lex_id (id : lexers) =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : unit )
-
- method line =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : int )
-
- method column =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : int )
-
- method full_name =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : string )
-
- method private set_encoding (_:string) =
- assert false
-
- method xml_declaration = (None : (string*string) list option)
-
- method set_debugging_mode (_:bool) = ()
-
- method open_entity (_:bool) (_:lexers) =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : unit )
-
- method close_entity =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : lexers )
-
- method replacement_text =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : (string * bool) )
-
- method lexbuf =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : Lexing.lexbuf )
-
- method next_token =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : token )
-
- method next_ignored_token =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : token )
-
- method process_xmldecl (pl:prolog_token list) =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : unit )
-
- method process_missing_xmldecl =
- ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
- : unit )
-
- end
-;;
-
-
-class external_entity the_resolver the_dtd the_name the_warner the_ext_id
- the_p_special_empty_entities
- init_errors_with_line_numbers
- init_encoding
- =
- object (self)
- inherit entity
- the_dtd the_name the_warner init_errors_with_line_numbers
- init_encoding
- as super
-
- (* An external entity gets the lexbuf that is used as character source
- * from a resolver.
- * Furthermore, before the first token an Begin_entity is inserted, and
- * before Eof an End_entity token is inserted into the stream. This done
- * always regardless of the argument 'force_parsing' of the method
- * 'open_entity'.
- *
- * 'the_p_internal_subset': see class internal_entity
- * 'the_p_special_empty_entities': if true, a Begin_entity/End_entity
- * brace is left out if the entity is otherwise empty.
- *)
-
- val resolver = (the_resolver : resolver)
- val ext_id = (the_ext_id : ext_id)
-
- val p_special_empty_entities = (the_p_special_empty_entities : bool)
-
- val mutable resolver_is_open = false
- (* Track if the resolver is open. This is also used to find recursive
- * references of entities.
- *)
-
- val mutable state = At_beginning
-
- initializer
- counts_as_external <- true;
-
-
- method private set_encoding e =
- assert resolver_is_open;
- resolver # change_encoding e
-
-
- method full_name =
- name ^
- match ext_id with
- System s -> " = SYSTEM \"" ^ s ^ "\""
- | Public(p,s) -> " = PUBLIC \"" ^ p ^ "\" \"" ^ s ^ "\""
- | Anonymous -> " = ANONYMOUS"
-
-
- method open_entity force_parsing init_lex_id =
- (* Note that external entities are always parsed, i.e. Begin_entity
- * and End_entity tokens embrace the inner tokens to force that
- * the entity is only called where the syntax allows it.
- *)
- if resolver_is_open then
- raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
- let lex =
- try
- resolver # open_in ext_id
- with
- Pxp_reader.Not_competent ->
- raise(Error ("No input method available for this external entity: " ^
- self # full_name))
- | Pxp_reader.Not_resolvable Not_found ->
- raise(Error ("Unable to open the external entity: " ^
- self # full_name))
- | Pxp_reader.Not_resolvable e ->
- raise(Error ("Unable to open the external entity: " ^
- self # full_name ^ "; reason: " ^
- string_of_exn e))
- in
- resolver_is_open <- true;
- lexbuf <- lex;
- prolog <- None;
- lex_id <- init_lex_id;
- state <- At_beginning;
- line <- 1;
- column <- 0;
- pos <- 0;
- last_token <- Bof;
- normalize_newline <- true;
-
-
- method private handle_bof tok =
- (* This hook is only called if the stream is not empty. *)
- deferred_token <- Some [ tok ];
- state <- Inserted_begin_entity;
- Begin_entity
-
-
- method private handle_eof =
- (* This hook is called if the end of the stream is reached *)
- match state with
- At_beginning ->
- (* This is only possible if the stream is empty. *)
- if p_special_empty_entities then begin
- (* Continue immediately with the next token *)
- state <- At_end;
- super # handle_eof
- end
- else begin
- (* Insert Begin_entity / End_entity *)
- deferred_token <- Some [ End_entity ];
- state <- At_end;
- Begin_entity;
- (* After these two token have been processed, the lexer
- * is called again, and it will return another Eof.
- *)
- end
- | Inserted_begin_entity ->
- (* Insert End_entity, too. *)
- state <- At_end;
- End_entity;
- | At_end ->
- (* Continue with the next token: *)
- super # handle_eof
-
-
- method close_entity =
- if not resolver_is_open then
- failwith ("External entity " ^ name ^ " not open");
- resolver # close_in;
- resolver_is_open <- false;
- lex_id
-
-
- method replacement_text =
- (* Return the replacement text of the entity. The method used for this
- * is more or less the same as for internal entities; i.e. character
- * and parameter entities are resolved immediately. In addition to that,
- * external entities may begin with an "xml" processing instruction
- * which is considered not to be part of the replacement text.
- *)
- if resolver_is_open then
- raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
- let lex = resolver # open_in ext_id in
- resolver_is_open <- true;
- lexbuf <- lex;
- prolog <- None;
- (* arbitrary: lex_id <- init_lex_id; *)
- state <- At_beginning;
- line <- 1;
- column <- 0;
- pos <- 0;
- last_token <- Bof;
- (* First check if the first token of 'lex' is <?xml...?> *)
- begin match lexerset.scan_only_xml_decl lex with
- PI_xml pl ->
- self # process_xmldecl pl
- | Eof ->
- (* This only means that the first token was not <?xml...?>;
- * the "Eof" token represents the empty string.
- *)
- self # process_missing_xmldecl
- | _ ->
- (* Must not happen. *)
- assert false
- end;
- (* Then create the replacement text. *)
- let rec scan_and_expand () =
- match lexerset.scan_dtd_string lexbuf with
- ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
- | CRef(-1) -> "\n" ^ scan_and_expand()
- | CRef(-2) -> "\n" ^ scan_and_expand()
- | CRef(-3) -> "\n" ^ scan_and_expand()
- | CRef k -> character encoding warner k ^ scan_and_expand()
- | CharData x -> x ^ scan_and_expand()
- | PERef n ->
- let en = dtd # par_entity n in
- let (x,_) = en # replacement_text in
- x ^ scan_and_expand()
- | Eof ->
- ""
- | _ ->
- assert false
- in
- let rtext = scan_and_expand() in
- resolver # close_in;
- resolver_is_open <- false;
- rtext, true
- (* TODO:
- * - The replaced text is not parsed [VALIDATION WEAKNESS]
- *)
- end
-;;
-
-
-class document_entity the_resolver the_dtd the_name the_warner the_ext_id
- init_errors_with_line_numbers
- init_encoding
- =
- object (self)
- inherit external_entity the_resolver the_dtd the_name the_warner
- the_ext_id false init_errors_with_line_numbers
- init_encoding
-
- (* A document entity is an external entity that does not allow
- * conditional sections, and that forces that internal parameter entities
- * are properly nested.
- *)
-
- initializer
- force_parameter_entity_parsing <- true;
- check_text_declaration <- false;
-
- method counts_as_external = false
- (* Document entities count never as external! *)
- end
-;;
-
-
-class internal_entity the_dtd the_name the_warner the_literal_value
- the_p_internal_subset init_errors_with_line_numbers
- init_is_parameter_entity
- init_encoding
- =
- (* An internal entity uses a "literal entity value" as character source.
- * This value is first expanded and preprocessed, i.e. character and
- * parameter references are expanded.
- *
- * 'the_p_internal_subset': indicates that the entity is declared in the
- * internal subset. Such entity declarations are not allowed to contain
- * references to parameter entities.
- * 'init_is_parameter_entity': whether this is a parameter entity or not
- *)
-
- object (self)
- inherit entity
- the_dtd the_name the_warner init_errors_with_line_numbers
- init_encoding
- as super
-
- val p_internal_subset = the_p_internal_subset
-
- val mutable replacement_text = ""
- val mutable contains_external_references = false
- val mutable p_parsed_actually = false
- val mutable is_open = false
- val mutable state = At_beginning
- val mutable is_parameter_entity = init_is_parameter_entity
-
-
- initializer
- let lexbuf = Lexing.from_string the_literal_value in
- let rec scan_and_expand () =
- match lexerset.scan_dtd_string lexbuf with
- ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
- | CRef(-1) -> "\r\n" ^ scan_and_expand()
- | CRef(-2) -> "\r" ^ scan_and_expand()
- | CRef(-3) -> "\n" ^ scan_and_expand()
- | CRef k -> character encoding warner k ^ scan_and_expand()
- | CharData x -> x ^ scan_and_expand()
- | PERef n ->
- if p_internal_subset then
- raise(WF_error("Restriction of the internal subset: parameter entity not allowed here"));
- let en = dtd # par_entity n in
- let (x, extref) = en # replacement_text in
- contains_external_references <-
- contains_external_references or extref;
- x ^ scan_and_expand()
- | Eof ->
- ""
- | _ ->
- assert false
- in
- is_open <- true;
- replacement_text <- scan_and_expand();
- is_open <- false;
- normalize_newline <- false;
- counts_as_external <- false;
-
-
- method process_xmldecl (pl:prolog_token list) =
- raise(Validation_error("The encoding cannot be changed in internal entities"))
-
-
- method process_missing_xmldecl =
- ()
-
-
- method private set_encoding e =
- (* Ignored if e = "" *)
- assert(e = "");
-
-
- method open_entity force_parsing init_lex_id =
- if is_open then
- raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
-
- p_parsed_actually <- force_parsing;
- lexbuf <- Lexing.from_string
- (if is_parameter_entity then
- (" " ^ replacement_text ^ " ")
- else
- replacement_text);
- prolog <- None;
- lex_id <- init_lex_id;
- state <- At_beginning;
- is_open <- true;
- line <- 1;
- column <- 0;
- pos <- 0;
- last_token <- Eof;
-
-
- method private handle_bof tok =
- (* This hook is only called if the stream is not empty. *)
- if p_parsed_actually then begin
- deferred_token <- Some [ tok ];
- state <- Inserted_begin_entity;
- Begin_entity
- end
- else begin
- state <- At_end;
- tok
- end
-
-
- method private handle_eof =
- (* This hook is called if the end of the stream is reached *)
- match state with
- At_beginning ->
- (* This is only possible if the stream is empty. *)
- if p_parsed_actually then begin
- (* Insert Begin_entity / End_entity *)
- deferred_token <- Some [ End_entity ];
- state <- At_end;
- Begin_entity;
- (* After these two token have been processed, the lexer
- * is called again, and it will return another Eof.
- *)
- end
- else begin
- (* Continue immediately with the next token *)
- state <- At_end;
- super # handle_eof
- end
- | Inserted_begin_entity ->
- (* Insert End_entity, too. *)
- state <- At_end;
- End_entity;
- | At_end ->
- (* Continue with the next token: *)
- super # handle_eof
-
-
- method close_entity =
- if not is_open then
- failwith ("Internal entity " ^ name ^ " not open");
- is_open <- false;
- lex_id
-
-
- method replacement_text =
- if is_open then
- raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
- replacement_text, contains_external_references
- end
-;;
-
-(**********************************************************************)
-
-(* An 'entity_manager' is a stack of entities, where the topmost entity
- * is the currently active entity, the second entity is the entity that
- * referred to the active entity, and so on.
- *
- * The entity_manager can communicate with the currently active entity.
- *
- * The entity_manager provides an interface for the parser; the functions
- * returning the current token and the next token are exported.
- *)
-
-class entity_manager (init_entity : entity) =
- object (self)
- val mutable entity_stack = Stack.create()
- val mutable current_entity = init_entity
- val mutable current_entity's_full_name = lazy (init_entity # full_name)
-
- val mutable yy_get_next_ref = ref (fun () -> assert false)
-
- initializer
- init_entity # set_manager (self :>
- < current_entity : entity;
- pop_entity : unit;
- push_entity : entity -> unit >
- );
- yy_get_next_ref := (fun () -> init_entity # next_token)
-
- method push_entity e =
- e # set_manager (self :>
- < current_entity : entity;
- pop_entity : unit;
- push_entity : entity -> unit >
- );
- Stack.push (current_entity, current_entity's_full_name) entity_stack;
- current_entity <- e;
- current_entity's_full_name <- lazy (e # full_name);
- yy_get_next_ref := (fun () -> e # next_token);
-
- method pop_entity =
- (* May raise Stack.Empty *)
- let e, e_name = Stack.pop entity_stack in
- current_entity <- e;
- current_entity's_full_name <- e_name;
- yy_get_next_ref := (fun () -> e # next_token);
-
-
-
- method position_string =
- (* Gets a string describing the position of the last token;
- * includes an entity backtrace
- *)
- let b = Buffer.create 200 in
- Buffer.add_string b
- ("In entity " ^ current_entity # full_name
- ^ ", at line " ^ string_of_int (current_entity # line)
- ^ ", position " ^ string_of_int (current_entity # column)
- ^ ":\n");
- Stack.iter
- (fun (e, e_name) ->
- Buffer.add_string b
- ("Called from entity " ^ Lazy.force e_name
- ^ ", line " ^ string_of_int (e # line)
- ^ ", position " ^ string_of_int (e # column)
- ^ ":\n");
- )
- entity_stack;
- Buffer.contents b
-
-
- method position =
- (* Returns the triple (full_name, line, column) of the last token *)
- Lazy.force current_entity's_full_name,
- current_entity # line,
- current_entity # column
-
-
- method current_entity_counts_as_external =
- (* Whether the current entity counts as external to the main
- * document for the purpose of stand-alone checks.
- *)
- (* TODO: improve performance *)
- let is_external = ref false in
- let check (e, _) =
- if e # counts_as_external then begin
- is_external := true;
- end;
- in
- check (current_entity,());
- Stack.iter check entity_stack;
- !is_external
-
-
- method current_entity = current_entity
-
- method yy_get_next_ref = yy_get_next_ref
-
- end
-;;
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.6 2000/07/14 13:55:00 gerd
- * Cosmetic changes.
- *
- * Revision 1.5 2000/07/09 17:51:50 gerd
- * Entities return now the beginning of a token as its
- * position.
- * New method 'position' for entity_manager.
- *
- * Revision 1.4 2000/07/09 01:05:04 gerd
- * Exported methods 'ext_id' and 'notation' anyway.
- *
- * Revision 1.3 2000/07/08 16:28:05 gerd
- * Updated: Exception 'Not_resolvable' is taken into account.
- *
- * Revision 1.2 2000/07/04 22:12:47 gerd
- * Update: Case ext_id = Anonymous.
- * Update: Handling of the exception Not_competent when reading
- * from a resolver.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_entity.ml:
- *
- * Revision 1.27 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.26 2000/05/28 17:24:55 gerd
- * Bugfixes.
- *
- * Revision 1.25 2000/05/27 19:23:32 gerd
- * The entities store whether they count as external with
- * respect to the standalone check: New methods counts_as_external
- * and set_counts_as_external.
- * The entity manager can find out whether the current
- * entity counts as external: method current_entity_counts_as_external.
- *
- * Revision 1.24 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.23 2000/05/14 21:51:24 gerd
- * Change: Whitespace is handled by the grammar, and no longer
- * by the entity.
- *
- * Revision 1.22 2000/05/14 17:50:54 gerd
- * Updates because of changes in the token type.
- *
- * Revision 1.21 2000/05/09 00:02:44 gerd
- * Conditional sections are now recognized by the parser.
- * There seem some open questions; see the TODO comments!
- *
- * Revision 1.20 2000/05/08 21:58:22 gerd
- * Introduced entity_manager as communication object between
- * the parser and the currently active entity.
- * New hooks handle_bof and handle_eof.
- * Removed "delegated entities". The entity manager contains
- * the stack of open entities.
- * Changed the way Begin_entity and End_entity are inserted.
- * This is now done by handle_bof and handle_eof.
- * The XML declaration is no longer detected by the entity.
- * This is now done by the parser.
- *
- * Revision 1.19 2000/05/01 15:18:44 gerd
- * Improved CRLF handling in the replacement text of entities.
- * Changed one error message.
- *
- * Revision 1.18 2000/04/30 18:18:39 gerd
- * Bugfixes: The conversion of CR and CRLF to LF is now hopefully
- * done right. The new variable "normalize_newline" indicates whether
- * normalization must happen for that type of entity. The normalization
- * if actually carried out separately for every token that needs it.
- *
- * Revision 1.17 2000/03/13 23:42:38 gerd
- * Removed the resolver classes, and put them into their
- * own module (Markup_reader).
- *
- * Revision 1.16 2000/02/22 01:06:58 gerd
- * Bugfix: Resolvers are properly re-initialized. This bug caused
- * that entities could not be referenced twice in the same document.
- *
- * Revision 1.15 2000/01/20 20:54:11 gerd
- * New config.errors_with_line_numbers.
- *
- * Revision 1.14 2000/01/08 18:59:03 gerd
- * Corrected the string resolver.
- *
- * Revision 1.13 1999/09/01 22:58:23 gerd
- * Method warn_not_latin1 raises Illegal_character if the character
- * does not match the Char production.
- * External entities that are not document entities check if the
- * <?xml...?> declaration at the beginning matches the TextDecl production.
- * Method xml_declaration has type ... list option, not ... list.
- * Tag_beg and Tag_end now carry an entity_id with them.
- * The code to check empty entities has changed. That the Begin_entity/
- * End_entity pair is not to be added must be explicitly turned on. See the
- * description of empty entity handling in design.txt.
- * In internal subsets entity declarations are not allowed to refer
- * to parameter entities. The internal_entity class can do this now.
- * The p_parsed parameter of internal_entity has gone. It was simply
- * superflous.
- *
- * Revision 1.12 1999/09/01 16:24:13 gerd
- * The method replacement_text returns the text as described for
- * "included in literal". The former behaviour has been dropped to include
- * a leading and a trailing space character for parameter entities.
- * Bugfix: When general entities are included, they are always parsed.
- *
- * Revision 1.11 1999/08/31 19:13:31 gerd
- * Added checks on proper PE nesting. The idea is that tokens such
- * as Decl_element and Decl_rangle carry an entity ID with them. This ID
- * is simply an object of type < >, i.e. you can only test on identity.
- * The lexer always produces tokens with a dummy ID because it does not
- * know which entity is the current one. The entity layer replaces the dummy
- * ID with the actual ID. The parser checks that the IDs of pairs such as
- * Decl_element and Decl_rangle are the same; otherwise a Validation_error
- * is produced.
- *
- * Revision 1.10 1999/08/19 01:06:41 gerd
- * Improved error messages: external entities print their
- * ext id, too
- *
- * Revision 1.9 1999/08/15 20:35:48 gerd
- * Improved error messages.
- * Before the tokens Plus, Star, Qmark space is not allowed any longer.
- * Detection of recursive entity references is a bit cleaner.
- *
- * Revision 1.8 1999/08/15 15:33:44 gerd
- * Revised whitespace checking: At certain positions there must be
- * white space. These checks cannot be part of the lexer, as %entity; counts
- * as white space. They cannot be part of the yacc parser because one look-ahead
- * token would not suffice if we did that. So these checks must be done by the
- * entity layer. Luckily, the rules are simple: There are simply a number of
- * token pairs between which white space must occur independently of where
- * these token have been found. Two variables, "space_seen", and "last_token"
- * have been added in order to check these rules.
- *
- * Revision 1.7 1999/08/15 00:41:06 gerd
- * The [ token of conditional sections is now allowed to occur
- * in a different entity.
- *
- * Revision 1.6 1999/08/15 00:29:02 gerd
- * The method "attlist_replacement_text" has gone. There is now a
- * more general "replacement_text" method that computes the replacement
- * text for both internal and external entities. Additionally, this method
- * returns whether references to external entities have been resolved;
- * this is checked in the cases where formerly "attlist_replacement_text"
- * was used as it is not allowed everywhere.
- * Entities have a new slot "need_spaces" that indicates that the
- * next token must be white space or a parameter reference. The problem
- * was that "<!ATTLIST%e;" is legal because when including parameter
- * entities white space is added implicitly. Formerly, the white space
- * was expected by the underlying lexer; now the lexer does not check
- * anymore that "<!ATTLIST" is followed by white space because the lexer
- * cannot handle parameter references. Because of this, the check on
- * white space must be done by the entity.
- *
- * Revision 1.5 1999/08/14 22:57:19 gerd
- * It is allowed that external entities are empty because the
- * empty string is well-parsed for both declarations and contents. Empty
- * entities can be referenced anywhere because the references are replaced
- * by nothing. Because of this, the Begin_entity...End_entity brace is only
- * inserted if the entity is non-empty. (Otherwise references to empty
- * entities would not be allowed anywhere.)
- * As a consequence, the grammar has been changed such that a
- * single Eof is equivalent to Begin_entity,End_entity without content.
- *
- * Revision 1.4 1999/08/14 22:11:19 gerd
- * Several objects have now a "warner" as argument which is
- * an object with a "warn" method. This is used to warn about characters
- * that cannot be represented in the Latin 1 alphabet.
- * Previously, the resolvers had features in order to warn about
- * such characters; this has been removed.
- * UTF-8 streams can be read even if they contain characters
- * that cannot be represented by 16 bits.
- * The buffering used in the resolvers is now solved in a
- * cleaner way; the number of characters that are expected to be read
- * from a source can be limited. This removes a bug with UTF-16 streams
- * that previously lead to wrong exceptions; and the buffering is more
- * efficient, too.
- *
- * Revision 1.3 1999/08/11 14:58:53 gerd
- * Some more names for encodings are allowed, such as "utf8" instead
- * of the standard name "UTF-8".
- * 'resolve_as_file' interprets relative file names as relative to
- * the "parent" resolver.
- *
- * Revision 1.2 1999/08/10 21:35:07 gerd
- * The XML/encoding declaration at the beginning of entities is
- * evaluated. In particular, entities have now a method "xml_declaration"
- * which returns the name/value pairs of such a declaration. The "encoding"
- * setting is interpreted by the entity itself; "version", and "standalone"
- * are interpreted by Markup_yacc.parse_document_entity. Other settings
- * are ignored (this does not conform to the standard; the standard prescribes
- * that "version" MUST be given in the declaration of document; "standalone"
- * and "encoding" CAN be declared; no other settings are allowed).
- * TODO: The user should be warned if the standard is not exactly
- * fulfilled. -- The "standalone" property is not checked yet.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-type lexers =
- Document
- | Document_type
- | Content
- | Within_tag
- | Declaration
- | Content_comment
- | Decl_comment
- | Document_comment
- | Ignored_section
-
-
-type prolog_token =
- Pro_name of string
- | Pro_eq (* "=" *)
- | Pro_string of string (* "..." or '...' *)
- | Pro_eof
-
-
-type entity_id = < >
- (* The class without properties; but you can still compare if two objects
- * are the same.
- *)
-
-type token =
- | Begin_entity (* Beginning of entity *)
- | End_entity (* End of entity *)
- | Comment_begin (* <!-- *)
- | Comment_material of string (* within a comment *)
- | Comment_end (* --> *)
- | Ignore (* ignored whitespace *)
- | Eq (* = *)
- | Rangle (* > as tag delimiter *)
- | Rangle_empty (* /> as tag delimiter *)
- | Percent (* % followed by space in declaration *)
- | Plus (* + in declaration *)
- | Star (* * in declaration *)
- | Bar (* | in declaration *)
- | Comma (* , in declaration *)
- | Qmark (* ? in declaration *)
- | Pcdata (* #PCDATA in declaration *)
- | Required (* #REQUIRED in declaration *)
- | Implied (* #IMPLIED in declaration *)
- | Fixed (* #FIXED in declaration *)
- | Bof (* A marker for 'beginning of file' *)
- | Eof (* End of file *)
- | Conditional_begin of entity_id (* <![ in declaration *)
- | Conditional_body of entity_id (* [ in declaration *)
- | Conditional_end of entity_id (* ]]> in declaration *)
- | Doctype of entity_id (* <!DOCTYPE *)
- | Doctype_rangle of entity_id (* > as DOCTYPE delimiter *)
- | Dtd_begin of entity_id (* '[' after DOCTYPE *)
- | Dtd_end of entity_id (* ']' *)
- | Decl_element of entity_id (* <!ELEMENT *)
- | Decl_attlist of entity_id (* <!ATTLIST *)
- | Decl_entity of entity_id (* <!ENTITY *)
- | Decl_notation of entity_id (* <!NOTATION *)
- | Decl_rangle of entity_id (* > *)
- | Lparen of entity_id (* ( in declaration *)
- | Rparen of entity_id (* ) in declaration *)
- | RparenPlus of entity_id (* )+ in declaration *)
- | RparenStar of entity_id (* )* in declaration *)
- | RparenQmark of entity_id (* )? in declaration *)
-
- | Tag_beg of (string*entity_id) (* <name *)
- | Tag_end of (string*entity_id) (* </name *)
-
- | PI of (string*string) (* <?name ... ?> *)
- | PI_xml of (prolog_token list) (* <?xml ...?> *)
- | Cdata of string (* <![CDATA[...]]> *)
- | CRef of int (* &#digits; *)
- | ERef of string (* &name; *)
- | PERef of string (* %name; *)
- | CharData of string (* any characters not otherwise matching *)
- | LineEnd of string
- | Name of string (* name *)
- | Nametoken of string (* nmtoken but not name *)
- | Attval of string (* attribute value; may contain entity refs *)
- | Attval_nl_normalized of string
- | Unparsed_string of string (* "data" or 'data' *)
-
-
-(**********************************************************************)
-(* debugging *)
-
-let string_of_tok tok =
- match tok with
- Begin_entity -> "Begin_entity"
- | End_entity -> "End_entity"
- | Doctype _ -> "Doctype"
- | Doctype_rangle _ -> "Doctype_rangle"
- | Comment_begin -> "Comment_begin"
- | Comment_end -> "Comment_end"
- | Comment_material _ -> "Comment_material"
- | Rangle -> "Rangle"
- | Rangle_empty -> "Rangle_empty"
- | Ignore -> "Ignore"
- | Eq -> "Eq"
- | Dtd_begin _ -> "Dtd_begin"
- | Dtd_end _ -> "Dtd_end"
- | Conditional_begin _ -> "Conditional_begin"
- | Conditional_body _ -> "Conditional_body"
- | Conditional_end _ -> "Conditional_end"
- | Percent -> "Percent"
- | Lparen _ -> "Lparen"
- | Rparen _ -> "Rparen"
- | Plus -> "Plus"
- | Star -> "Star"
- | Bar -> "Bar"
- | Comma -> "Comma"
- | Qmark -> "Qmark"
- | Pcdata -> "Pcdata"
- | Required -> "Required"
- | Implied -> "Implied"
- | Fixed -> "Fixed"
- | Decl_element _ -> "Decl_element"
- | Decl_attlist _ -> "Decl_attlist"
- | Decl_entity _ -> "Decl_entity"
- | Decl_notation _ -> "Decl_notation"
- | Decl_rangle _ -> "Decl_rangle"
- | RparenPlus _ -> "RparenPlus"
- | RparenStar _ -> "RparenStar"
- | RparenQmark _ -> "RparenQmark"
- | Bof -> "Bof"
- | Eof -> "Eof"
- | PI _ -> "PI"
- | PI_xml _ -> "PI_xml"
- | Tag_beg _ -> "Tag_beg"
- | Tag_end _ -> "Tag_end"
- | Cdata _ -> "Cdata"
- | CRef _ -> "CRef"
- | ERef _ -> "ERef"
- | PERef _ -> "PERef"
- | CharData _ -> "CharData"
- | Name _ -> "Name"
- | Nametoken _ -> "Nametoken"
- | Attval _ -> "Attval"
- | Attval_nl_normalized _ -> "Attval_nl_normalized"
- | Unparsed_string _ -> "Unparsed_string"
- | LineEnd _ -> "LineEnd"
-
-
-type lexer_set =
- { lex_encoding : Pxp_types.rep_encoding;
- scan_document : Lexing.lexbuf -> (token * lexers);
- scan_content : Lexing.lexbuf -> (token * lexers);
- scan_within_tag : Lexing.lexbuf -> (token * lexers);
- scan_document_type : Lexing.lexbuf -> (token * lexers);
- scan_declaration : Lexing.lexbuf -> (token * lexers);
- scan_content_comment : Lexing.lexbuf -> (token * lexers);
- scan_decl_comment : Lexing.lexbuf -> (token * lexers);
- scan_document_comment: Lexing.lexbuf -> (token * lexers);
- scan_ignored_section : Lexing.lexbuf -> (token * lexers);
- scan_xml_pi : Lexing.lexbuf -> prolog_token;
- scan_dtd_string : Lexing.lexbuf -> token;
- scan_content_string : Lexing.lexbuf -> token;
- scan_name_string : Lexing.lexbuf -> token;
- scan_only_xml_decl : Lexing.lexbuf -> token;
- scan_for_crlf : Lexing.lexbuf -> token;
- }
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/18 20:14:31 gerd
- * Comment -> Comment_begin, Comment_material, Comment_end.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_lexer_types.ml:
- *
- * Revision 1.6 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.5 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.4 2000/05/14 17:45:36 gerd
- * Bugfix.
- *
- * Revision 1.3 2000/05/14 17:35:12 gerd
- * Conditional_begin, _end, and _body have an entity_id.
- *
- * Revision 1.2 2000/05/08 21:59:06 gerd
- * New token Bof (beginning of file).
- *
- * Revision 1.1 2000/05/06 23:21:49 gerd
- * Initial revision.
- *
- *
- * ======================================================================
- *
- * DERIVED FROM REVISION 1.4 of markup_lexer_types_shadow.ml
- *
- * Revision 1.4 2000/04/30 18:19:04 gerd
- * Added new tokens.
- *
- * Revision 1.3 1999/08/31 19:13:31 gerd
- * Added checks on proper PE nesting. The idea is that tokens such
- * as Decl_element and Decl_rangle carry an entity ID with them. This ID
- * is simply an object of type < >, i.e. you can only test on identity.
- * The lexer always produces tokens with a dummy ID because it does not
- * know which entity is the current one. The entity layer replaces the dummy
- * ID with the actual ID. The parser checks that the IDs of pairs such as
- * Decl_element and Decl_rangle are the same; otherwise a Validation_error
- * is produced.
- *
- * Revision 1.2 1999/08/10 21:35:08 gerd
- * The XML/encoding declaration at the beginning of entities is
- * evaluated. In particular, entities have now a method "xml_declaration"
- * which returns the name/value pairs of such a declaration. The "encoding"
- * setting is interpreted by the entity itself; "version", and "standalone"
- * are interpreted by Markup_yacc.parse_document_entity. Other settings
- * are ignored (this does not conform to the standard; the standard prescribes
- * that "version" MUST be given in the declaration of document; "standalone"
- * and "encoding" CAN be declared; no other settings are allowed).
- * TODO: The user should be warned if the standard is not exactly
- * fulfilled. -- The "standalone" property is not checked yet.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-type lexers =
- Document
- | Document_type
- | Content
- | Within_tag
- | Declaration
- | Content_comment
- | Decl_comment
- | Document_comment
- | Ignored_section
-
-
-type prolog_token =
- Pro_name of string
- | Pro_eq (* "=" *)
- | Pro_string of string (* "..." or '...' *)
- | Pro_eof
-
-type entity_id = < >
- (* The class without properties; but you can still compare if two objects
- * are the same.
- *)
-
-type token =
- | Begin_entity (* Beginning of entity *)
- | End_entity (* End of entity *)
- | Comment_begin (* <!-- *)
- | Comment_material of string (* within a comment *)
- | Comment_end (* --> *)
- | Ignore (* ignored whitespace *)
- | Eq (* = *)
- | Rangle (* > as tag delimiter *)
- | Rangle_empty (* /> as tag delimiter *)
- | Percent (* % followed by space in declaration *)
- | Plus (* + in declaration *)
- | Star (* * in declaration *)
- | Bar (* | in declaration *)
- | Comma (* , in declaration *)
- | Qmark (* ? in declaration *)
- | Pcdata (* #PCDATA in declaration *)
- | Required (* #REQUIRED in declaration *)
- | Implied (* #IMPLIED in declaration *)
- | Fixed (* #FIXED in declaration *)
- | Bof (* A marker for 'beginning of file' *)
- | Eof (* End of file *)
- | Conditional_begin of entity_id (* <![ in declaration *)
- | Conditional_body of entity_id (* [ in declaration *)
- | Conditional_end of entity_id (* ]]> in declaration *)
- | Doctype of entity_id (* <!DOCTYPE *)
- | Doctype_rangle of entity_id (* > as DOCTYPE delimiter *)
- | Dtd_begin of entity_id (* '[' after DOCTYPE *)
- | Dtd_end of entity_id (* ']' *)
- | Decl_element of entity_id (* <!ELEMENT *)
- | Decl_attlist of entity_id (* <!ATTLIST *)
- | Decl_entity of entity_id (* <!ENTITY *)
- | Decl_notation of entity_id (* <!NOTATION *)
- | Decl_rangle of entity_id (* > *)
- | Lparen of entity_id (* ( in declaration *)
- | Rparen of entity_id (* ) in declaration *)
- | RparenPlus of entity_id (* )+ in declaration *)
- | RparenStar of entity_id (* )* in declaration *)
- | RparenQmark of entity_id (* )? in declaration *)
-
- | Tag_beg of (string*entity_id) (* <name *)
- | Tag_end of (string*entity_id) (* </name *)
-
- | PI of (string*string) (* <?name ... ?> *)
- | PI_xml of (prolog_token list) (* <?xml ...?> *)
- | Cdata of string (* <![CDATA[...]]> *)
- | CRef of int (* &#digits; *)
- | ERef of string (* &name; *)
- | PERef of string (* %name; *)
- | CharData of string (* any characters not otherwise matching *)
- | LineEnd of string
- | Name of string (* name *)
- | Nametoken of string (* nmtoken but not name *)
- | Attval of string (* attribute value; may contain entity refs *)
- | Attval_nl_normalized of string
- | Unparsed_string of string (* "data" or 'data' *)
-
-
-val string_of_tok : token -> string
-
-
-type lexer_set =
- { lex_encoding : Pxp_types.rep_encoding;
- scan_document : Lexing.lexbuf -> (token * lexers);
- scan_content : Lexing.lexbuf -> (token * lexers);
- scan_within_tag : Lexing.lexbuf -> (token * lexers);
- scan_document_type : Lexing.lexbuf -> (token * lexers);
- scan_declaration : Lexing.lexbuf -> (token * lexers);
- scan_content_comment : Lexing.lexbuf -> (token * lexers);
- scan_decl_comment : Lexing.lexbuf -> (token * lexers);
- scan_document_comment: Lexing.lexbuf -> (token * lexers);
- scan_ignored_section : Lexing.lexbuf -> (token * lexers);
- scan_xml_pi : Lexing.lexbuf -> prolog_token;
- scan_dtd_string : Lexing.lexbuf -> token;
- scan_content_string : Lexing.lexbuf -> token;
- scan_name_string : Lexing.lexbuf -> token;
- scan_only_xml_decl : Lexing.lexbuf -> token;
- scan_for_crlf : Lexing.lexbuf -> token;
- }
-
-(* lexer_set: Every internal encoding has its own set of lexer functions *)
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/18 20:14:31 gerd
- * Comment -> Comment_begin, Comment_material, Comment_end.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_lexer_types.mli:
- *
- * Revision 1.5 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.4 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.3 2000/05/14 17:35:12 gerd
- * Conditional_begin, _end, and _body have an entity_id.
- *
- * Revision 1.2 2000/05/08 21:59:17 gerd
- * New token Bof (beginning of file).
- *
- * Revision 1.1 2000/05/06 23:21:49 gerd
- * Initial revision.
- *
- *
- * ======================================================================
- *
- * DERIVED FROM REVISION 1.3 of markup_lexer_types_shadow.mli
- *
- * Revision 1.3 1999/08/31 19:13:31 gerd
- * Added checks on proper PE nesting. The idea is that tokens such
- * as Decl_element and Decl_rangle carry an entity ID with them. This ID
- * is simply an object of type < >, i.e. you can only test on identity.
- * The lexer always produces tokens with a dummy ID because it does not
- * know which entity is the current one. The entity layer replaces the dummy
- * ID with the actual ID. The parser checks that the IDs of pairs such as
- * Decl_element and Decl_rangle are the same; otherwise a Validation_error
- * is produced.
- *
- * Revision 1.2 1999/08/10 21:35:09 gerd
- * The XML/encoding declaration at the beginning of entities is
- * evaluated. In particular, entities have now a method "xml_declaration"
- * which returns the name/value pairs of such a declaration. The "encoding"
- * setting is interpreted by the entity itself; "version", and "standalone"
- * are interpreted by Markup_yacc.parse_document_entity. Other settings
- * are ignored (this does not conform to the standard; the standard prescribes
- * that "version" MUST be given in the declaration of document; "standalone"
- * and "encoding" CAN be declared; no other settings are allowed).
- * TODO: The user should be warned if the standard is not exactly
- * fulfilled. -- The "standalone" property is not checked yet.
- *
- * Revision 1.1 1999/08/10 00:35:51 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *)
-
-
-open Pxp_types
-open Pxp_lexer_types
-
-let lexer_set_iso88591 =
- { lex_encoding = `Enc_iso88591;
- scan_document = Pxp_lex_document_iso88591.scan_document;
- scan_content = Pxp_lex_content_iso88591.scan_content;
- scan_within_tag = Pxp_lex_within_tag_iso88591.scan_within_tag;
- scan_document_type = Pxp_lex_document_type_iso88591.
- scan_document_type;
- scan_declaration = Pxp_lex_declaration_iso88591.scan_declaration;
- scan_content_comment = Pxp_lex_misc_iso88591.scan_content_comment;
- scan_decl_comment = Pxp_lex_misc_iso88591.scan_decl_comment;
- scan_document_comment = Pxp_lex_misc_iso88591.scan_document_comment;
- scan_ignored_section = Pxp_lex_name_string_iso88591.
- scan_ignored_section;
- scan_xml_pi = Pxp_lex_misc_iso88591.scan_xml_pi;
- scan_dtd_string = Pxp_lex_dtd_string_iso88591.scan_dtd_string;
- scan_content_string = Pxp_lex_content_string_iso88591.
- scan_content_string;
- scan_name_string = Pxp_lex_name_string_iso88591.scan_name_string;
- scan_only_xml_decl = Pxp_lex_misc_iso88591.scan_only_xml_decl;
- scan_for_crlf = Pxp_lex_misc_iso88591.scan_for_crlf;
- }
-;;
-
-
-let lexer_set_utf8 = ref None
-;;
-
-
-let init_utf8 ls =
- lexer_set_utf8 := Some ls
-;;
-
-
-let get_lexer_set enc =
- match enc with
- `Enc_iso88591 -> lexer_set_iso88591
- | `Enc_utf8 ->
- ( match !lexer_set_utf8 with
- None ->
- failwith ("Pxp_lexers: UTF-8 lexers not initialized")
- | Some ls ->
- ls
- )
- | _ ->
- failwith ("Pxp_lexers: This type of internal encoding is not supported")
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * Revision 1.3 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.2 2000/05/23 00:09:44 gerd
- * The UTF-8 lexer set is no longer initialized here. It is done
- * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
- *
- * Revision 1.1 2000/05/20 20:30:50 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *)
-
-
-open Pxp_types
-open Pxp_lexer_types
-
-val get_lexer_set : rep_encoding -> lexer_set
- (* Return the set of lexer functions that is able to handle the passed
- * encoding.
- *)
-
-val init_utf8 : lexer_set -> unit
- (* Internally used. *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * Revision 1.3 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.2 2000/05/23 00:09:44 gerd
- * The UTF-8 lexer set is no longer initialized here. It is done
- * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
- *
- * Revision 1.1 2000/05/20 20:30:50 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Pxp_types;;
-exception Not_competent;;
-exception Not_resolvable of exn;;
-
-class type resolver =
- object
- method init_rep_encoding : rep_encoding -> unit
- method init_warner : collect_warnings -> unit
- method rep_encoding : rep_encoding
- method open_in : ext_id -> Lexing.lexbuf
- method close_in : unit
- method close_all : unit
- method change_encoding : string -> unit
- method clone : resolver
- end
-;;
-
-
-class virtual resolve_general
- =
- object (self)
- val mutable internal_encoding = `Enc_utf8
-
- val mutable encoding = `Enc_utf8
- val mutable encoding_requested = false
-
- val mutable warner = new drop_warnings
-
- val mutable enc_initialized = false
- val mutable wrn_initialized = false
-
- val mutable clones = []
-
- method init_rep_encoding e =
- internal_encoding <- e;
- enc_initialized <- true;
-
- method init_warner w =
- warner <- w;
- wrn_initialized <- true;
-
- method rep_encoding = (internal_encoding :> rep_encoding)
-
-(*
- method clone =
- ( {< encoding = `Enc_utf8;
- encoding_requested = false;
- >}
- : # resolver :> resolver )
-*)
-
- method private warn (k:int) =
- (* Called if a character not representable has been found.
- * k is the character code.
- *)
- if k < 0xd800 or (k >= 0xe000 & k <= 0xfffd) or
- (k >= 0x10000 & k <= 0x10ffff) then begin
- warner # warn ("Code point cannot be represented: " ^ string_of_int k);
- end
- else
- raise (WF_error("Code point " ^ string_of_int k ^
- " outside the accepted range of code points"))
-
-
- method private autodetect s =
- (* s must be at least 4 bytes long. The slot 'encoding' is
- * set to:
- * "UTF-16-BE": UTF-16/UCS-2 encoding big endian
- * "UTF-16-LE": UTF-16/UCS-2 encoding little endian
- * "UTF-8": UTF-8 encoding
- *)
- if String.length s < 4 then
- encoding <- `Enc_utf8
- else if String.sub s 0 2 = "\254\255" then
- encoding <- `Enc_utf16
- (* Note: Netconversion.recode will detect the big endianess, too *)
- else if String.sub s 0 2 = "\255\254" then
- encoding <- `Enc_utf16
- (* Note: Netconversion.recode will detect the little endianess, too *)
- else
- encoding <- `Enc_utf8
-
-
- method private virtual next_string : string -> int -> int -> int
- method private virtual init_in : ext_id -> unit
- method virtual close_in : unit
-
- method close_all =
- List.iter (fun r -> r # close_in) clones
-
- method open_in xid =
- assert(enc_initialized && wrn_initialized);
-
- encoding <- `Enc_utf8;
- encoding_requested <- false;
- self # init_in xid; (* may raise Not_competent *)
- (* init_in: may already set 'encoding' *)
-
- let buffer_max = 512 in
- let buffer = String.make buffer_max ' ' in
- let buffer_len = ref 0 in
- let buffer_end = ref false in
- let fillup () =
- if not !buffer_end & !buffer_len < buffer_max then begin
- let l =
- self # next_string buffer !buffer_len (buffer_max - !buffer_len) in
- if l = 0 then
- buffer_end := true
- else begin
- buffer_len := !buffer_len + l
- end
- end
- in
- let consume n =
- let l = !buffer_len - n in
- String.blit buffer n buffer 0 l;
- buffer_len := l
- in
-
- fillup();
- if not encoding_requested then self # autodetect buffer;
-
- Lexing.from_function
- (fun s n ->
- (* TODO: if encoding = internal_encoding, it is possible to
- * avoid copying buffer to s because s can be directly used
- * as buffer.
- *)
-
- fillup();
- if !buffer_len = 0 then
- 0
- else begin
- let m_in = !buffer_len in
- let m_max = if encoding_requested then n else 1 in
- let n_in, n_out, encoding' =
- if encoding = (internal_encoding : rep_encoding :> encoding) &&
- encoding_requested
- then begin
- (* Special case encoding = internal_encoding *)
- String.blit buffer 0 s 0 m_in;
- m_in, m_in, encoding
- end
- else
- Netconversion.recode
- ~in_enc:encoding
- ~in_buf:buffer
- ~in_pos:0
- ~in_len:m_in
- ~out_enc:(internal_encoding : rep_encoding :> encoding)
- ~out_buf:s
- ~out_pos:0
- ~out_len:n
- ~max_chars:m_max
- ~subst:(fun k -> self # warn k; "")
- in
- if n_in = 0 then
- (* An incomplete character at the end of the stream: *)
- raise Netconversion.Malformed_code;
- (* failwith "Badly encoded character"; *)
- encoding <- encoding';
- consume n_in;
- assert(n_out <> 0);
- n_out
- end)
-
- method change_encoding enc =
- if not encoding_requested then begin
- if enc <> "" then begin
- match Netconversion.encoding_of_string enc with
- `Enc_utf16 ->
- (match encoding with
- (`Enc_utf16_le | `Enc_utf16_be) -> ()
- | `Enc_utf16 -> assert false
- | _ ->
- raise(WF_error "Encoding of data stream and encoding declaration mismatch")
- )
- | e ->
- encoding <- e
- end;
- (* else: the autodetected encoding counts *)
- encoding_requested <- true;
- end;
- end
-;;
-
-
-class resolve_read_any_channel ?(auto_close=true) ~channel_of_id =
- object (self)
- inherit resolve_general as super
-
- val f_open = channel_of_id
- val mutable current_channel = None
- val auto_close = auto_close
-
- method private init_in (id:ext_id) =
- if current_channel <> None then
- failwith "Pxp_reader.resolve_read_any_channel # init_in";
- let ch, enc_opt = f_open id in (* may raise Not_competent *)
- begin match enc_opt with
- None -> ()
- | Some enc -> encoding <- enc; encoding_requested <- true
- end;
- current_channel <- Some ch;
-
- method private next_string s ofs len =
- match current_channel with
- None -> failwith "Pxp_reader.resolve_read_any_channel # next_string"
- | Some ch ->
- input ch s ofs len
-
- method close_in =
- match current_channel with
- None -> ()
- | Some ch ->
- if auto_close then close_in ch;
- current_channel <- None
-
- method clone =
- let c = new resolve_read_any_channel
- ?auto_close:(Some auto_close) f_open in
- c # init_rep_encoding internal_encoding;
- c # init_warner warner;
- clones <- c :: clones;
- (c :> resolver)
-
- end
-;;
-
-
-class resolve_read_this_channel1 is_stale ?id ?fixenc ?auto_close ch =
-
- let getchannel = ref (fun xid -> assert false) in
-
- object (self)
- inherit resolve_read_any_channel
- ?auto_close:auto_close
- (fun xid -> !getchannel xid)
- as super
-
- val mutable is_stale = is_stale
- (* The channel can only be read once. To avoid that the channel
- * is opened several times, the flag 'is_stale' is set after the
- * first time.
- *)
-
- val fixid = id
- val fixenc = fixenc
- val fixch = ch
-
- initializer
- getchannel := self # getchannel
-
- method private getchannel xid =
- begin match fixid with
- None -> ()
- | Some bound_xid ->
- if xid <> bound_xid then raise Not_competent
- end;
- ch, fixenc
-
- method private init_in (id:ext_id) =
- if is_stale then
- raise Not_competent
- else begin
- super # init_in id;
- is_stale <- true
- end
-
- method close_in =
- current_channel <- None
-
- method clone =
- let c = new resolve_read_this_channel1
- is_stale
- ?id:fixid ?fixenc:fixenc ?auto_close:(Some auto_close) fixch
- in
- c # init_rep_encoding internal_encoding;
- c # init_warner warner;
- clones <- c :: clones;
- (c :> resolver)
-
- end
-;;
-
-
-class resolve_read_this_channel =
- resolve_read_this_channel1 false
-;;
-
-
-class resolve_read_any_string ~string_of_id =
- object (self)
- inherit resolve_general as super
-
- val f_open = string_of_id
- val mutable current_string = None
- val mutable current_pos = 0
-
- method private init_in (id:ext_id) =
- if current_string <> None then
- failwith "Pxp_reader.resolve_read_any_string # init_in";
- let s, enc_opt = f_open id in (* may raise Not_competent *)
- begin match enc_opt with
- None -> ()
- | Some enc -> encoding <- enc; encoding_requested <- true
- end;
- current_string <- Some s;
- current_pos <- 0;
-
- method private next_string s ofs len =
- match current_string with
- None -> failwith "Pxp_reader.resolve_read_any_string # next_string"
- | Some str ->
- let l = min len (String.length str - current_pos) in
- String.blit str current_pos s ofs l;
- current_pos <- current_pos + l;
- l
-
- method close_in =
- match current_string with
- None -> ()
- | Some _ ->
- current_string <- None
-
- method clone =
- let c = new resolve_read_any_string f_open in
- c # init_rep_encoding internal_encoding;
- c # init_warner warner;
- clones <- c :: clones;
- (c :> resolver)
- end
-;;
-
-
-class resolve_read_this_string1 is_stale ?id ?fixenc str =
-
- let getstring = ref (fun xid -> assert false) in
-
- object (self)
- inherit resolve_read_any_string (fun xid -> !getstring xid) as super
-
- val is_stale = is_stale
- (* For some reasons, it is not allowed to open a clone of the resolver
- * a second time when the original resolver is already open.
- *)
-
- val fixid = id
- val fixenc = fixenc
- val fixstr = str
-
- initializer
- getstring := self # getstring
-
- method private getstring xid =
- begin match fixid with
- None -> ()
- | Some bound_xid ->
- if xid <> bound_xid then raise Not_competent
- end;
- fixstr, fixenc
-
-
- method private init_in (id:ext_id) =
- if is_stale then
- raise Not_competent
- else
- super # init_in id
-
- method clone =
- let c = new resolve_read_this_string1
- (is_stale or current_string <> None)
- ?id:fixid ?fixenc:fixenc fixstr
- in
- c # init_rep_encoding internal_encoding;
- c # init_warner warner;
- clones <- c :: clones;
- (c :> resolver)
- end
-;;
-
-
-class resolve_read_this_string =
- resolve_read_this_string1 false
-;;
-
-
-class resolve_read_url_channel
- ?(base_url = Neturl.null_url)
- ?auto_close
- ~url_of_id
- ~channel_of_url
-
- : resolver
- =
-
- let getchannel = ref (fun xid -> assert false) in
-
- object (self)
- inherit resolve_read_any_channel
- ?auto_close:auto_close
- (fun xid -> !getchannel xid)
- as super
-
- val base_url = base_url
- val mutable own_url = Neturl.null_url
-
- val url_of_id = url_of_id
- val channel_of_url = channel_of_url
-
-
- initializer
- getchannel := self # getchannel
-
- method private getchannel xid =
- let rel_url = url_of_id xid in (* may raise Not_competent *)
-
- try
- (* Now compute the absolute URL: *)
- let abs_url = Neturl.apply_relative_url base_url rel_url in
- (* may raise Malformed_URL *)
-
- (* Simple check whether 'abs_url' is really absolute: *)
- if not(Neturl.url_provides ~scheme:true abs_url)
- then raise Not_competent;
-
- own_url <- abs_url;
- (* FIXME: Copy 'abs_url' ? *)
-
- (* Get and return the channel: *)
- channel_of_url abs_url (* may raise Not_competent *)
- with
- Neturl.Malformed_URL -> raise (Not_resolvable Neturl.Malformed_URL)
- | Not_competent -> raise (Not_resolvable Not_found)
-
- method clone =
- let c =
- new resolve_read_url_channel
- ?base_url:(Some own_url)
- ?auto_close:(Some auto_close)
- ~url_of_id:url_of_id
- ~channel_of_url:channel_of_url
- in
- c # init_rep_encoding internal_encoding;
- c # init_warner warner;
- clones <- c :: clones;
- (c :> resolve_read_url_channel)
- end
-;;
-
-
-type spec = [ `Not_recognized | `Allowed | `Required ]
-
-class resolve_as_file
- ?(file_prefix = (`Allowed :> spec))
- ?(host_prefix = (`Allowed :> spec))
- ?(system_encoding = `Enc_utf8)
- ?url_of_id:passed_url_of_id
- ?channel_of_url:passed_channel_of_url
- ()
- =
-
- let url_syntax =
- let enable_if =
- function
- `Not_recognized -> Neturl.Url_part_not_recognized
- | `Allowed -> Neturl.Url_part_allowed
- | `Required -> Neturl.Url_part_required
- in
- { Neturl.null_url_syntax with
- Neturl.url_enable_scheme = enable_if file_prefix;
- Neturl.url_enable_host = enable_if host_prefix;
- Neturl.url_enable_path = Neturl.Url_part_required;
- Neturl.url_accepts_8bits = true;
- }
- in
-
- let base_url_syntax =
- { Neturl.null_url_syntax with
- Neturl.url_enable_scheme = Neturl.Url_part_required;
- Neturl.url_enable_host = Neturl.Url_part_allowed;
- Neturl.url_enable_path = Neturl.Url_part_required;
- Neturl.url_accepts_8bits = true;
- }
- in
-
- let default_base_url =
- Neturl.make_url
- ~scheme: "file"
- ~host: ""
- ~path: (Neturl.split_path (Sys.getcwd() ^ "/"))
- base_url_syntax
- in
-
- let file_url_of_id xid =
- let file_url_of_sysname sysname =
- (* By convention, we can assume that sysname is a URL conforming
- * to RFC 1738 with the exception that it may contain non-ASCII
- * UTF-8 characters.
- *)
- try
- Neturl.url_of_string url_syntax sysname
- (* may raise Malformed_URL *)
- with
- Neturl.Malformed_URL -> raise Not_competent
- in
- let url =
- match xid with
- Anonymous -> raise Not_competent
- | Public (_,sysname) -> if sysname <> "" then file_url_of_sysname sysname
- else raise Not_competent
- | System sysname -> file_url_of_sysname sysname
- in
- let scheme =
- try Neturl.url_scheme url with Not_found -> "file" in
- let host =
- try Neturl.url_host url with Not_found -> "" in
-
- if scheme <> "file" then raise Not_competent;
- if host <> "" && host <> "localhost" then raise Not_competent;
-
- url
- in
-
- let channel_of_file_url url =
- try
- let path_utf8 =
- try Neturl.join_path (Neturl.url_path ~encoded:false url)
- with Not_found -> raise Not_competent
- in
-
- let path =
- Netconversion.recode_string
- ~in_enc: `Enc_utf8
- ~out_enc: system_encoding
- path_utf8 in
- (* May raise Bad_character_stream *)
-
- open_in_bin path, None
- (* May raise Sys_error *)
-
- with
- | Netconversion.Malformed_code -> assert false
- (* should not happen *)
-
- in
-
- let url_of_id id =
- match passed_url_of_id with
- None ->
- file_url_of_id id
- | Some f ->
- begin
- try f id
- with
- Not_competent -> file_url_of_id id
- end
- in
-
- let channel_of_url url =
- match passed_channel_of_url with
- None ->
- channel_of_file_url url
- | Some f ->
- begin
- try f url
- with
- Not_competent -> channel_of_file_url url
- end
- in
-
- resolve_read_url_channel
- ~base_url: default_base_url
- ~auto_close: true
- ~url_of_id: url_of_id
- ~channel_of_url: channel_of_url
-;;
-
-
-class combine ?prefer rl =
- object (self)
- val prefered_resolver = prefer
- val resolvers = (rl : resolver list)
- val mutable internal_encoding = `Enc_utf8
- val mutable warner = new drop_warnings
- val mutable active_resolver = None
- val mutable clones = []
-
- method init_rep_encoding enc =
- List.iter
- (fun r -> r # init_rep_encoding enc)
- rl;
- internal_encoding <- enc
-
- method init_warner w =
- List.iter
- (fun r -> r # init_warner w)
- rl;
- warner <- w;
-
- method rep_encoding = internal_encoding
- (* CAUTION: This may not be the truth! *)
-
- method open_in xid =
- let rec find_competent_resolver rl =
- match rl with
- r :: rl' ->
- begin try
- r, (r # open_in xid)
- with
- Not_competent -> find_competent_resolver rl'
- end;
- | [] ->
- raise Not_competent
- in
-
- if active_resolver <> None then failwith "Pxp_reader.combine # open_in";
- let r, lb =
- match prefered_resolver with
- None -> find_competent_resolver resolvers
- | Some r -> find_competent_resolver (r :: resolvers)
- in
- active_resolver <- Some r;
- lb
-
- method close_in =
- match active_resolver with
- None -> ()
- | Some r -> r # close_in;
- active_resolver <- None
-
- method close_all =
- List.iter (fun r -> r # close_in) clones
-
- method change_encoding (enc:string) =
- match active_resolver with
- None -> failwith "Pxp_reader.combine # change_encoding"
- | Some r -> r # change_encoding enc
-
- method clone =
- let c =
- match active_resolver with
- None ->
- new combine ?prefer:None (List.map (fun q -> q # clone) resolvers)
- | Some r ->
- let r' = r # clone in
- new combine
- ?prefer:(Some r')
- (List.map
- (fun q -> if q == r then r' else q # clone)
- resolvers)
- in
- c # init_rep_encoding internal_encoding;
- c # init_warner warner;
- clones <- c :: clones;
- c
- end
-
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.9 2000/08/14 22:24:55 gerd
- * Moved the module Pxp_encoding to the netstring package under
- * the new name Netconversion.
- *
- * Revision 1.8 2000/07/16 18:31:09 gerd
- * The exception Illegal_character has been dropped.
- *
- * Revision 1.7 2000/07/09 15:32:01 gerd
- * Fix in resolve_this_channel, resolve_this_string
- *
- * Revision 1.6 2000/07/09 01:05:33 gerd
- * New methode 'close_all' that closes the clones, too.
- *
- * Revision 1.5 2000/07/08 16:24:56 gerd
- * Introduced the exception 'Not_resolvable' to indicate that
- * 'combine' should not try the next resolver of the list.
- *
- * Revision 1.4 2000/07/06 23:04:46 gerd
- * Quick fix for 'combine': The active resolver is "prefered",
- * but the other resolvers are also used.
- *
- * Revision 1.3 2000/07/06 21:43:45 gerd
- * Fix: Public(_,name) is now treated as System(name) if
- * name is non-empty.
- *
- * Revision 1.2 2000/07/04 22:13:30 gerd
- * Implemented the new API rev. 1.2 of pxp_reader.mli.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_reader.ml:
- *
- * Revision 1.3 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.2 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.1 2000/03/13 23:41:44 gerd
- * Initial revision; this code was formerly part of Markup_entity.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Pxp_types;;
-
-exception Not_competent;;
- (* Raised by the 'open_in' method if the object does not know how to
- * handle the passed external ID.
- *)
-
-exception Not_resolvable of exn;;
- (* Indicates that one resolver was competent, but there was an error
- * while resolving the external ID. The passed exception explains the
- * reason.
- * Not_resolvable(Not_found) serves as indicator for an unknown reason.
- *)
-
-
-(* The class type 'resolver' is the official type of all "resolvers".
- * Resolvers take file names (or better, external identifiers) and
- * return lexbufs, scanning the file for tokens. Resolvers may be
- * cloned, and clones can interpret relative file names relative to
- * their creator.
- *
- * Example of the latter:
- *
- * Resolver r reads from file:/dir/f1.xml
- *
- * <tag>some XML text
- * &e; -----> Entity e is bound to "subdir/f2.xml"
- * </tag> Step (1): let r' = "clone of r"
- * Step (2): open file "subdir/f2.xml"
- *
- * r' must still know the directory of the file r is reading, otherwise
- * it would not be able to resolve "subdir/f2.xml" = "file:/dir/subdir/f2.xml".
- *
- * Actually, this example can be coded as:
- *
- * let r = new resolve_as_file in
- * let lbuf = r # open_in "file:/dir/f1.xml" in
- * ... read from lbuf ...
- * let r' = r # clone in
- * let lbuf' = r' # open_in "subdir/f2.xml" in
- * ... read from lbuf' ...
- * r' # close_in;
- * ... read from lbuf ...
- * r # close_in;
- *)
-
-class type resolver =
- object
- (* A resolver can open an input source, and returns this source as
- * Lexing.lexbuf.
- *
- * After creating a resolver, one must invoke the two methods
- * init_rep_encoding and init_warner to set the internal encoding of
- * strings and the warner object, respectively. This is normally
- * done by the parsing functions in Pxp_yacc.
- * It is not necessary to invoke these two methods for a fresh
- * clone.
- *
- * It is possible that the character encoding of the source and the
- * internal encoding of the parser are different. To cope with this,
- * one of the tasks of the resolver is to recode the characters of
- * the input source into the internal character encoding.
- *
- * Note that there are several ways of determining the encoding of the
- * input: (1) It is possible that the transport protocol (e.g. HTTP)
- * transmits the encoding, and (2) it is possible to inspect the beginning
- * of the file, and to analyze:
- * (2.1) The first two bytes indicate whether UTF-16 is used
- * (2.2) Otherwise, one can assume that an ASCII-compatible character
- * set is used. It is now possible to read the XML declaration
- * <?xml ... encoding="xyz" ...?>. The encoding found here is
- * to be used.
- * (2.3) If the XML declaration is missing, the encoding is UTF-8.
- * The resolver needs only to distinguish between cases (1), (2.1),
- * and the rest.
- * The details of analyzing whether (2.2) or (2.3) applies are programmed
- * elsewhere, and the resolver will be told the result (see below).
- *
- * A resolver is like a file: it must be opened before one can work
- * with it, and it should be closed after all operations on it have been
- * done. The method 'open_in' is called with the external ID as argument
- * and it must return the lexbuf reading from the external resource.
- * The method 'close_in' does not require an argument.
- *
- * It is allowed to re-open a resolver after it has been closed. It is
- * forbidden to open a resolver again while it is open.
- * It is allowed to close a resolver several times: If 'close_in' is
- * invoked while the resolver is already closed, nothing happens.
- *
- * The method 'open_in' may raise Not_competent to indicate that this
- * resolver is not able to open this type of IDs.
- *
- * The method 'change_encoding' is called from the parser after the
- * analysis of case (2) has been done; the argument is either the
- * string name of the encoding, or the empty string to indicate
- * that no XML declaration was found. It is guaranteed that
- * 'change_encoding' is invoked after only a few tokens of the
- * file. The resolver should react as follows:
- * - If case (1) applies: Ignore the encoding passed to 'change_encoding'.
- * - If case (2.1) applies: The encoding passed to 'change_encoding' must
- * be compatible with UTF-16. This should be
- * checked, and violations should be reported.
- * - Else: If the passed encoding is "", assume UTF-8.
- * Otherwise, assume the passed encoding.
- *
- * The following rule helps synchronizing the lexbuf with the encoding:
- * If the resolver has been opened, but 'change_encoding' has not yet
- * been invoked, the lexbuf contains at most one character (which may
- * be represented by multiple bytes); i.e. the lexbuf is created by
- * Lexing.from_function, and the function puts only one character into
- * the buffer at once.
- * After 'change_encoding' has been invoked, there is no longer a limit
- * on the lexbuf size.
- *
- * The reason for this rule is that you know exactly the character where
- * the encoding changes to the encoding passed by 'change_encoding'.
- *
- * The method 'clone' may be invoked for open or closed resolvers.
- * Basically, 'clone' returns a new resolver which is always closed.
- * If the original resolver is closed, the clone is simply a clone.
- * If the original resolver is open at the moment of cloning:
- * If the clone is later opened for a relative system ID (i.e. relative
- * URL), the clone must interpret this ID relative to the ID of the
- * original resolver.
- *)
- method init_rep_encoding : rep_encoding -> unit
- method init_warner : collect_warnings -> unit
-
- method rep_encoding : rep_encoding
-
- method open_in : ext_id -> Lexing.lexbuf
- (* May raise Not_competent if the object does not know how to handle
- * this ext_id.
- *)
- method close_in : unit
- method change_encoding : string -> unit
-
-
- (* Every resolver can be cloned. The clone does not inherit the connection
- * with the external object, i.e. it is initially closed.
- *)
- method clone : resolver
-
- method close_all : unit
- (* Closes this resolver and every clone *)
-
- end
-;;
-
-(* Note: resolve_general is no longer exported. In most cases, the classes
- * resolve_read_any_channel or resolve_read_any_string are applicable, too,
- * and much easier to configure.
- *)
-
-
-(* The next classes are resolvers for concrete input sources. *)
-
-class resolve_read_this_channel :
- ?id:ext_id -> ?fixenc:encoding -> ?auto_close:bool ->
- in_channel -> resolver;;
-
- (* Reads from the passed channel (it may be even a pipe). If the ~id
- * argument is passed to the object, the created resolver accepts only
- * this ID. Otherwise all IDs are accepted.
- * Once the resolver has been cloned, it does not accept any ID. This
- * means that this resolver cannot handle inner references to external
- * entities. Note that you can combine this resolver with another resolver
- * that can handle inner references (such as resolve_as_file); see
- * class 'combine' below.
- * If you pass the ~fixenc argument, the encoding of the channel is
- * set to the passed value, regardless of any auto-recognition or
- * any XML declaration.
- * If ?auto_close = true (which is the default), the channel is
- * closed after use. If ?auto_close = false, the channel is left open.
- *)
-
-
-class resolve_read_any_channel :
- ?auto_close:bool ->
- channel_of_id:(ext_id -> (in_channel * encoding option)) ->
- resolver;;
-
- (* resolve_read_any_channel f_open:
- * This resolver calls the function f_open to open a new channel for
- * the passed ext_id. This function must either return the channel and
- * the encoding, or it must fail with Not_competent.
- * The function must return None as encoding if the default mechanism to
- * recognize the encoding should be used. It must return Some e if it is
- * already known that the encoding of the channel is e.
- * If ?auto_close = true (which is the default), the channel is
- * closed after use. If ?auto_close = false, the channel is left open.
- *)
-
-
-class resolve_read_url_channel :
- ?base_url:Neturl.url ->
- ?auto_close:bool ->
- url_of_id:(ext_id -> Neturl.url) ->
- channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
- resolver;;
-
- (* resolve_read_url_channel url_of_id channel_of_url:
- *
- * When this resolver gets an ID to read from, it calls the function
- * ~url_of_id to get the corresponding URL. This URL may be a relative
- * URL; however, a URL scheme must be used which contains a path.
- * The resolver converts the URL to an absolute URL if necessary.
- * The second function, ~channel_of_url, is fed with the absolute URL
- * as input. This function opens the resource to read from, and returns
- * the channel and the encoding of the resource.
- *
- * Both functions, ~url_of_id and ~channel_of_url, can raise
- * Not_competent to indicate that the object is not able to read from
- * the specified resource. However, there is a difference: A Not_competent
- * from ~url_of_id is left as it is, but a Not_competent from ~channel_of_url
- * is converted to Not_resolvable. So only ~url_of_id decides which URLs
- * are accepted by the resolver and which not.
- *
- * The function ~channel_of_url must return None as encoding if the default
- * mechanism to recognize the encoding should be used. It must return
- * Some e if it is already known that the encoding of the channel is e.
- *
- * If ?auto_close = true (which is the default), the channel is
- * closed after use. If ?auto_close = false, the channel is left open.
- *
- * Objects of this class contain a base URL relative to which relative
- * URLs are interpreted. When creating a new object, you can specify
- * the base URL by passing it as ~base_url argument. When an existing
- * object is cloned, the base URL of the clone is the URL of the original
- * object.
- *
- * Note that the term "base URL" has a strict definition in RFC 1808.
- *)
-
-
-class resolve_read_this_string :
- ?id:ext_id -> ?fixenc:encoding -> string -> resolver;;
-
- (* Reads from the passed string. If the ~id
- * argument is passed to the object, the created resolver accepts only
- * this ID. Otherwise all IDs are accepted.
- * Once the resolver has been cloned, it does not accept any ID. This
- * means that this resolver cannot handle inner references to external
- * entities. Note that you can combine this resolver with another resolver
- * that can handle inner references (such as resolve_as_file); see
- * class 'combine' below.
- * If you pass the ~fixenc argument, the encoding of the string is
- * set to the passed value, regardless of any auto-recognition or
- * any XML declaration.
- *)
-
-
-class resolve_read_any_string :
- string_of_id:(ext_id -> (string * encoding option)) -> resolver;;
-
- (* resolver_read_any_string f_open:
- * This resolver calls the function f_open to get the string for
- * the passed ext_id. This function must either return the string and
- * the encoding, or it must fail with Not_competent.
- * The function must return None as encoding if the default mechanism to
- * recognize the encoding should be used. It must return Some e if it is
- * already known that the encoding of the string is e.
- *)
-
-
-class resolve_as_file :
- ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
- ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
- ?system_encoding:encoding ->
- ?url_of_id:(ext_id -> Neturl.url) ->
- ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
- unit ->
- resolver;;
-
- (* Reads from the local file system. Every file name is interpreted as
- * file name of the local file system, and the referred file is read.
- *
- * The full form of a file URL is: file://host/path, where
- * 'host' specifies the host system where the file identified 'path'
- * resides. host = "" or host = "localhost" are accepted; other values
- * will raise Not_competent. The standard for file URLs is
- * defined in RFC 1738.
- *
- * Option ~file_prefix: Specifies how the "file:" prefix of file names
- * is handled:
- * `Not_recognized: The prefix is not recognized.
- * `Allowed: The prefix is allowed but not required (the default).
- * `Required: The prefix is required.
- *
- * Option ~host_prefix: Specifies how the "//host" phrase of file names
- * is handled:
- * `Not_recognized: The phrase is not recognized.
- * `Allowed: The phrase is allowed but not required (the default).
- * `Required: The phrase is required.
- *
- * Option ~system_encoding: Specifies the encoding of file names of
- * the local file system. Default: UTF-8.
- *
- * Options ~url_of_id, ~channel_of_url: Not for the end user!
- *)
-
-
-class combine : ?prefer:resolver -> resolver list -> resolver;;
-
- (* Combines several resolver objects. If a concrete entity with an
- * ext_id is to be opened, the combined resolver tries the contained
- * resolvers in turn until a resolver accepts opening the entity
- * (i.e. it does not raise Not_competent on open_in).
- *
- * Clones: If the 'clone' method is invoked before 'open_in', all contained
- * resolvers are cloned and again combined. If the 'clone' method is
- * invoked after 'open_in' (i.e. while the resolver is open), only the
- * active resolver is cloned.
- *)
-
-(* EXAMPLES OF RESOLVERS:
- *
- * let r1 = new resolve_as_file
- * - r1 can open all local files
- *
- * let r2 = new resolve_read_this_channel
- * ~id:"file:/dir/f.xml"
- * (open_in "/dir/f.xml")
- * - r2 can only read /dir/f.xml of the local file system. If this file
- * contains references to other files, r2 will fail
- *
- * let r3 = new combine [ r2; r1 ]
- * - r3 reads /dir/f.xml of the local file system by calling r2, and all
- * other files by calling r1
- *)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.5 2000/07/09 01:05:33 gerd
- * New methode 'close_all' that closes the clones, too.
- *
- * Revision 1.4 2000/07/08 16:24:56 gerd
- * Introduced the exception 'Not_resolvable' to indicate that
- * 'combine' should not try the next resolver of the list.
- *
- * Revision 1.3 2000/07/06 23:04:46 gerd
- * Quick fix for 'combine': The active resolver is "prefered",
- * but the other resolvers are also used.
- *
- * Revision 1.2 2000/07/04 22:06:49 gerd
- * MAJOR CHANGE: Complete redesign of the reader classes.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_reader.mli:
- *
- * Revision 1.3 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.2 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.1 2000/03/13 23:41:54 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *)
-
-type ext_id =
- System of string
- | Public of (string * string)
- | Anonymous
-
-
-type dtd_id =
- External of ext_id
- | Derived of ext_id
- | Internal
-;;
-
-type content_model_type =
- Unspecified
- | Empty
- | Any
- | Mixed of mixed_spec list
- | Regexp of regexp_spec
-
-and mixed_spec =
- MPCDATA
- | MChild of string
-
-and regexp_spec =
- Optional of regexp_spec
- | Repeated of regexp_spec
- | Repeated1 of regexp_spec
- | Alt of regexp_spec list
- | Seq of regexp_spec list
- | Child of string
-;;
-
-
-type att_type =
- A_cdata
- | A_id
- | A_idref
- | A_idrefs
- | A_entity
- | A_entities
- | A_nmtoken
- | A_nmtokens
- | A_notation of string list
- | A_enum of string list
-;;
-
-
-type att_default =
- D_required
- | D_implied
- | D_default of string (* The default value is already expanded *)
- | D_fixed of string (* The default value is already expanded *)
-;;
-
-
-type att_value =
- Value of string
- | Valuelist of string list
- | Implied_value
-;;
-
-
-class type collect_warnings =
- object
- method warn : string -> unit
- end
-;;
-
-
-class drop_warnings =
- object
- method warn (w:string) = ()
- end
-;;
-
-
-type encoding = Netconversion.encoding;;
-
-type rep_encoding =
- (* The subset of 'encoding' that may be used for internal representation
- * of strings.
- *)
- [ `Enc_utf8 (* UTF-8 *)
- | `Enc_iso88591 (* ISO-8859-1 *)
- ]
-;;
-
-
-exception Validation_error of string
-
-exception WF_error of string
-
-exception Error of string
-
-exception Character_not_supported
-
-exception At of (string * exn)
-
-exception Undeclared
-
-
-let rec string_of_exn x0 =
- match x0 with
- At (s, x) ->
- s ^ string_of_exn x
- | Validation_error s ->
- "ERROR (Validity constraint): " ^ s
- | WF_error s ->
- "ERROR (Well-formedness constraint): " ^ s
- | Error s ->
- "ERROR: " ^ s
- | Character_not_supported ->
- "RESTRICTION: Character not supported"
- | Netconversion.Malformed_code ->
- "ERROR: Bad character stream"
- | Undeclared ->
- "INFORMATION: Undeclared"
- | Parsing.Parse_error ->
- "SYNTAX ERROR"
- | _ ->
- "Other exception: " ^ Printexc.to_string x0
-;;
-
-
-type output_stream =
- Out_buffer of Buffer.t
- | Out_channel of out_channel
- | Out_function of (string -> int -> int -> unit)
-;;
-
-
-let write os str pos len =
- match os with
- Out_buffer b -> Buffer.add_substring b str pos len
- | Out_channel ch -> output ch str pos len
- | Out_function f -> f str pos len
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.7 2000/08/14 22:24:55 gerd
- * Moved the module Pxp_encoding to the netstring package under
- * the new name Netconversion.
- *
- * Revision 1.6 2000/07/27 00:41:15 gerd
- * new 8 bit codes
- *
- * Revision 1.5 2000/07/16 18:31:09 gerd
- * The exception Illegal_character has been dropped.
- *
- * Revision 1.4 2000/07/14 21:25:27 gerd
- * Simplified the type 'collect_warnings'.
- *
- * Revision 1.3 2000/07/08 16:23:50 gerd
- * Added the exception 'Error'.
- *
- * Revision 1.2 2000/07/04 22:14:05 gerd
- * Implemented the changes of rev. 1.2 of pxp_types.mli.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_types.ml:
- *
- * Revision 1.7 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.6 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.5 2000/05/01 20:43:19 gerd
- * New type output_stream; new function 'write'.
- *
- * Revision 1.4 1999/09/01 16:25:35 gerd
- * Dropped Illegal_token and Content_not_allowed_here. WF_error can
- * be used instead.
- *
- * Revision 1.3 1999/08/15 02:22:33 gerd
- * Added exception Undeclared.
- *
- * Revision 1.2 1999/08/14 22:14:58 gerd
- * New class "collect_warnings".
- *
- * Revision 1.1 1999/08/10 00:35:52 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
- *)
-
-
-type ext_id =
- System of string
- | Public of (string * string)
- | Anonymous
-
- (* external identifiers are either "system identifiers" (filenames or URLs),
- * or "public identifiers" Public(id,sysid) where "id" is the representation
- * of the public ID, and "sysid" a fallback system ID, or the empty string.
- *
- * New in PXP: Sometimes the external ID is not known. This case can be
- * referred to as Anonymous ID.
- *
- * Encoding: The identifiers are _always_ encoded as UTF8 strings,
- * regardless of whether another encoding is configured for the parser.
- * TODO: umsetzen
- *)
-
-
-type dtd_id =
- External of ext_id (* DTD is completely external *)
- | Derived of ext_id (* DTD is derived from an external DTD *)
- | Internal (* DTD is completely internal *)
-;;
-
-type content_model_type =
- Unspecified (* A specification of the model has not yet been
- * found
- *)
- | Empty (* Nothing is allowed as content *)
- | Any (* Everything is allowed as content *)
- | Mixed of mixed_spec list (* The contents consist of elements and PCDATA
- * in arbitrary order. What is allowed in
- * particular is given as mixed_spec.
- *)
- | Regexp of regexp_spec (* The contents are elements following this regular
- * expression
- *)
-
-and mixed_spec =
- MPCDATA (* PCDATA children are allowed *)
- | MChild of string (* This kind of Element is allowed *)
-
-and regexp_spec =
- Optional of regexp_spec (* subexpression? *)
- | Repeated of regexp_spec (* subexpression* *)
- | Repeated1 of regexp_spec (* subexpression+ *)
- | Alt of regexp_spec list (* subexpr1 | subexpr2 | ... | subexprN *)
- | Seq of regexp_spec list (* subexpr1 , subexpr2 , ... , subexprN *)
- | Child of string (* This kind of Element is allowed here *)
-;;
-
-
-type att_type =
- A_cdata (* CDATA *)
- | A_id (* ID *)
- | A_idref (* IDREF *)
- | A_idrefs (* IDREFS *)
- | A_entity (* ENTITY *)
- | A_entities (* ENTiTIES *)
- | A_nmtoken (* NMTOKEN *)
- | A_nmtokens (* NMTOKENS *)
- | A_notation of string list (* NOTATION (name1 | name2 | ... | nameN) *)
- | A_enum of string list (* (name1 | name2 | ... | nameN) *)
-;;
-
-
-type att_default =
- D_required (* #REQUIRED *)
- | D_implied (* #IMPLIED *)
- | D_default of string (* <value> -- The value is already expanded *)
- | D_fixed of string (* FIXED <value> -- The value is already expanded *)
-;;
-
-
-type att_value =
- Value of string (* a single value *)
- | Valuelist of string list (* a list of values *)
- | Implied_value (* a value left out *)
-;;
-
-
-class type collect_warnings =
- object
- method warn : string -> unit
- end
-;;
-
-
-class drop_warnings : collect_warnings;;
-
-
-type encoding = Netconversion.encoding;;
- (* We accept all encodings for character sets which are defined in
- * Netconversion (package netstring).
- *)
-
-type rep_encoding =
- (* The subset of 'encoding' that may be used for internal representation
- * of strings.
- * Note: The following encodings are ASCII-compatible! This is an important
- * property used throghout the whole PXP code.
- *)
- [ `Enc_utf8 (* UTF-8 *)
- | `Enc_iso88591 (* ISO-8859-1 *)
- ]
-;;
-
-
-exception Validation_error of string
- (* Violation of a validity constraint *)
-
-exception WF_error of string
- (* Violation of a well-formedness constraint *)
-
-exception Error of string
- (* Other error *)
-
-exception Character_not_supported
-
-exception At of (string * exn)
- (* The string is a description where the exn happened. The exn value can
- * again be At(_,_) (for example, when an entity within an entity causes
- * the error).
- *)
-
-exception Undeclared
- (* Indicates that declaration is available and because of this every kind
- * of usage is allowed.
- *)
-
-val string_of_exn : exn -> string
- (* Converts a Markup exception into a readable string *)
-
-
-type output_stream =
- Out_buffer of Buffer.t
- | Out_channel of out_channel
- | Out_function of (string -> int -> int -> unit)
-
-val write : output_stream -> string -> int -> int -> unit
- (* write os s pos len: Writes the string to the buffer/channel/stream *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/08/14 22:24:55 gerd
- * Moved the module Pxp_encoding to the netstring package under
- * the new name Netconversion.
- *
- * Revision 1.7 2000/07/27 00:41:15 gerd
- * new 8 bit codes
- *
- * Revision 1.6 2000/07/16 18:31:09 gerd
- * The exception Illegal_character has been dropped.
- *
- * Revision 1.5 2000/07/16 16:34:21 gerd
- * Updated comments.
- *
- * Revision 1.4 2000/07/14 21:25:27 gerd
- * Simplified the type 'collect_warnings'.
- *
- * Revision 1.3 2000/07/08 16:23:50 gerd
- * Added the exception 'Error'.
- *
- * Revision 1.2 2000/07/04 22:08:26 gerd
- * type ext_id: New variant Anonymous. - The System and Public
- * variants are now encoded as UTF-8.
- * collect_warnings is now a class type only. New class
- * drop_warnings.
- * New functions encoding_of_string and string_of_encoding.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from Markup_types.mli:
- *
- * Revision 1.7 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.6 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.5 2000/05/01 20:43:25 gerd
- * New type output_stream; new function 'write'.
- *
- * Revision 1.4 1999/09/01 16:25:35 gerd
- * Dropped Illegal_token and Content_not_allowed_here. WF_error can
- * be used instead.
- *
- * Revision 1.3 1999/08/15 02:22:40 gerd
- * Added exception Undeclared.
- *
- * Revision 1.2 1999/08/14 22:15:17 gerd
- * New class "collect_warnings".
- *
- * Revision 1.1 1999/08/10 00:35:52 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-open Pxp_types;;
-open Pxp_lexer_types;;
-
-Pxp_lexers.init_utf8
- { lex_encoding = `Enc_utf8;
- scan_document = Pxp_lex_document_utf8.scan_document;
- scan_content = Pxp_lex_content_utf8.scan_content;
- scan_within_tag = Pxp_lex_within_tag_utf8.scan_within_tag;
- scan_document_type = Pxp_lex_document_type_utf8.
- scan_document_type;
- scan_declaration = Pxp_lex_declaration_utf8.scan_declaration;
- scan_content_comment = Pxp_lex_misc_utf8.scan_content_comment;
- scan_decl_comment = Pxp_lex_misc_utf8.scan_decl_comment;
- scan_document_comment = Pxp_lex_misc_utf8.scan_document_comment;
- scan_ignored_section = Pxp_lex_name_string_utf8.scan_ignored_section;
- scan_xml_pi = Pxp_lex_misc_utf8.scan_xml_pi;
- scan_dtd_string = Pxp_lex_dtd_string_utf8.scan_dtd_string;
- scan_content_string = Pxp_lex_content_string_utf8.
- scan_content_string;
- scan_name_string = Pxp_lex_name_string_utf8.scan_name_string;
- scan_only_xml_decl = Pxp_lex_misc_utf8.scan_only_xml_decl;
- scan_for_crlf = Pxp_lex_misc_utf8.scan_for_crlf;
- }
-;;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.3 2000/06/04 20:31:44 gerd
- * Updated.
- *
- * Revision 1.2 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.1 2000/05/23 00:08:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-(* This is a module without interface. Its initialization part sets up
- * the UTF-8 lexers.
- * Link with this module if you want to use the UTF-8 lexers!
- *)
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.1 2000/05/23 00:08:48 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$ -*- tuareg -*-
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-open Parsing
-open Pxp_types
-open Pxp_lexer_types
-open Pxp_dtd
-open Pxp_entity
-open Pxp_document
-open Pxp_aux
-
-(* Some types from the interface definition: *)
-
-exception ID_not_unique
-
-class type [ 'ext ] index =
-object
- constraint 'ext = 'ext node #extension
- method add : string -> 'ext node -> unit
- method find : string -> 'ext node
-end
-
-
-type config =
- { warner : collect_warnings;
- errors_with_line_numbers : bool;
- enable_pinstr_nodes : bool;
- enable_super_root_node : bool;
- enable_comment_nodes : bool;
- encoding : rep_encoding;
- recognize_standalone_declaration : bool;
- store_element_positions : bool;
- idref_pass : bool;
- validate_by_dfa : bool;
- accept_only_deterministic_models : bool;
- debugging_mode : bool;
- }
-
-type source =
- Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
- | ExtID of (ext_id * Pxp_reader.resolver)
-
-
-type start_symbol =
- Ext_document
- | Ext_declarations
- | Ext_element
-
-
-type context =
- { mutable current : unit -> token; (* get the current token *)
- mutable get_next : unit -> token; (* go on to the next token; return it *)
- mutable current_token : token; (* This is the current token *)
- mutable manager : entity_manager; (* The entity manager *)
- }
-
-
-let make_context entity_manager =
- let c =
- { current = (fun _ -> assert false);
- get_next = (fun _ -> assert false);
- current_token = Eof;
- manager = entity_manager;
- }
- in
- (* Note that the function which is stored in get_next_ref can be changed
- * as a side-effect when an entity is opened or closed. The function in
- * c.get_next must be programmed such that always the current "get_next"
- * function is executed.
- *)
- let get_next_ref = entity_manager # yy_get_next_ref in
- c.current <- (fun () -> c.current_token);
- c.get_next <- (fun () -> let tok = !get_next_ref() in
- c.current_token <- tok;
- tok);
- ignore(c.get_next());
- c
-;;
-
-
-let from_channel ?system_encoding ?id:init_id ?fixenc ch =
-
- (* Reading from a channel works by modifying the algorithm of
- * resolve_as_file.
- *)
-
- let url_syntax = (* A syntax suitable for "file" URLs *)
- { Neturl.null_url_syntax with
- Neturl.url_enable_scheme = Neturl.Url_part_allowed;
- Neturl.url_enable_host = Neturl.Url_part_allowed;
- Neturl.url_enable_path = Neturl.Url_part_required;
- Neturl.url_accepts_8bits = true;
- }
- in
-
- let an_url =
- Neturl.make_url
- ~scheme: "file"
- ~host: ""
- ~path: [ "" ]
- url_syntax
- in
-
- let init_channel_done = ref false in
- (* Whether the first access to this source has already happened. *)
-
- (* The task of url_of_id is:
- * - When it is called the first time, and no init_id is present,
- * the URL file:/// is passed back (an_url). This forces that
- * absolute path names /path/dir/... will be interpreted as
- * file path names. (But relative path names will not work.)
- * - If an init_id has been passed, we can assume that the opened URL
- * is exactly this init_id. By raising Not_competent it is indicated
- * that the standard method is to be used for the interpretation of
- * the URL.
- * - Otherwise, the channel is already being read, and thus cannot again
- * opened. (This case is handled in channel_of_url.)
- *)
-
- let url_of_id xid =
- if !init_channel_done then begin
- (* Use the normal way of determining the URL of the ID: *)
- raise Pxp_reader.Not_competent
- end
- else begin
- match init_id with
- None ->
- an_url
- (* If the channel is not associated with any URL: Simply pass
- * the URL file:/// back.
- *)
- | Some the_init_id ->
- assert (the_init_id = xid);
- raise Pxp_reader.Not_competent
- (* If the channel is associated with a URL, the corresponding
- * ID must be passed when the first invocation happens.
- *)
- end
- in
-
- (* The task of channel_of_url:
- * - If it is called the first time ("else"), the channel is returned
- * - Otherwise, the channel is already being read, and thus cannot again
- * opened. By raising Not_competent it is signaled that the
- * resolve_as_file object must not continue to open the URL.
- *)
-
- let channel_of_url url =
- if !init_channel_done then
- raise Pxp_reader.Not_competent
- else begin
- init_channel_done := true;
- ch, fixenc
- end
- in
-
- let r =
- new Pxp_reader.resolve_as_file
- ?system_encoding:system_encoding
- ~url_of_id:url_of_id
- ~channel_of_url:channel_of_url
- ()
- in
-
- let init_xid =
- match init_id with
- None -> Anonymous
- | Some id ->
- (* Note: 'id' may be illegal (malformed); in this case, the first
- * invocation of url_of_id will raise Not_competent, and the 'open_in'
- * method will fail.
- *)
- id
- in
-
- ExtID(init_xid, r)
-;;
-
-
-let from_file ?system_encoding utf8_filename =
-
- let r =
- new Pxp_reader.resolve_as_file
- ?system_encoding:system_encoding
- ()
- in
-
- let utf8_abs_filename =
- if utf8_filename <> "" && utf8_filename.[0] = '/' then
- utf8_filename
- else
- Sys.getcwd() ^ "/" ^ utf8_filename
- in
-
- let syntax = { Neturl.ip_url_syntax with Neturl.url_accepts_8bits = true } in
- let url = Neturl.make_url
- ~scheme:"file"
- ~host:"localhost"
- ~path:(Neturl.split_path utf8_abs_filename)
- syntax
- in
-
- let xid = System (Neturl.string_of_url url) in
-
-
- ExtID(xid, r)
-;;
-
-
-let from_string ?fixenc s =
- let r =
- new Pxp_reader.resolve_read_this_string ?fixenc:fixenc s in
- ExtID(Anonymous, r)
-;;
-
-
-(**********************************************************************)
-
-class ['ext] parser_object
- init_doc init_dtd init_extend_dtd init_config init_resolver init_spec
- init_process_xmldecl transform_dtd id_index
- =
- object (self)
-
- (* Note that the 'ext parameter has been the motivation to make the
- * parser a class.
- *)
-
- val mutable dtd = init_dtd
- (* The DTD being parsed; or the DTD currently assumed *)
-
- val extend_dtd = init_extend_dtd
- (* Whether the DTD should be extended by ELEMENT, ATTLIST, and
- * NOTATION declarations or not. (True for validating mode,
- * false for well-formedness mode.)
- *)
-
- val transform_dtd = transform_dtd
- (* A function transforming the DTD *)
-
- val id_index = (id_index : 'ext index option)
- (* The ID index or None *)
-
- val process_xmldecl = init_process_xmldecl
- (* Whether the XML declaration is parsed and the found XML version
- * and standalone declaration are passed to 'doc'.
- *)
-
- val lexerset = Pxp_lexers.get_lexer_set (init_config.encoding)
-
- val doc = init_doc
- (* The current document *)
-
- method doc = (doc : 'ext document)
-
- val resolver = init_resolver
- (* The resolver for external IDs *)
-
- val config = init_config
- (* The current configuration *)
-
- val elstack = (Stack.create() : ('ext node * entity_id) Stack.t)
- (* The element stack containing all open elements, i.e. elements that
- * have begun by a start tag but that have not been finished (end tag).
- * If the parser sees a start tag, it creates the element and pushes it
- * on top of this stack. If the parser recognizes an end tag, it pulls
- * one element from the stack and checks if it has the same name as
- * given with the end tag.
- *
- * At initialization time, a special element is pushed on the stack,
- * the so-called super root. It is always the bottommost
- * element of the stack, and serves as a guard.
- * [See "initializer" below.]
- *)
-
- method current =
- (* Get the top element of the element stack *)
- try
- fst(Stack.top elstack)
- with
- Stack.Empty -> assert false
- (* Not possible, because the super root is always the element
- * at the bottom of the stack.
- *)
-
- val mutable n_tags_open = 0
- (* Number of begin tags that have been parsed and whose corresponding
- * end tags have not yet been parsed
- *)
-
- val mutable p_internal_subset = false
- (* true while parsing the internal subset - there are some additional
- * constraints for internal subsets, and because of this it must
- * be known whether the current declaration is contained in the
- * internal or external subset of the DTD.
- *)
-
- val mutable root = None
- (* Contains the root element (topmost element) while it is being parsed
- * and after it has been parsed.
- * This variable is None before the root element is seen.
- *)
-
- method root = root
-
- val spec = init_spec
- (* A hashtable that contains exemplar objects for the various element
- * types. If an element is parsed, the exemplar is looked up and
- * "cloned" (by the "create" method)
- *)
-
- val mutable current_data = []
- (* Collects character data. *)
-
- method collect_data s =
- (* Collects the character material 's' *)
- current_data <- s :: current_data
-
- method save_data =
- (* Puts the material collected in 'current_data' into a new
- * node, and appends this node as new sub node to 'current'
- *)
- match current_data with
- [] ->
- ()
- | [ str ] ->
- if str <> "" then
- self # current # add_node (create_data_node spec dtd str);
- current_data <- []
- | _ ->
- let count = List.fold_left
- (fun acc s -> acc + String.length s)
- 0
- current_data in
- let str = String.create count in
- let pos = ref count in
- List.iter
- (fun s ->
- let l = String.length s in
- pos := !pos - l;
- String.blit
- ~src:s
- ~src_pos:0
- ~dst:str
- ~dst_pos:(!pos)
- ~len:l
- )
- current_data;
- assert(!pos = 0);
- if str <> "" then
- self # current # add_node (create_data_node spec dtd str);
- current_data <- []
-
-
- method only_whitespace data =
- (* Checks that the string "data" contains only whitespace. On failure,
- * Validation_error is raised.
- *)
- let lexbuf = Lexing.from_string data in
- let t1 = lexerset.scan_name_string lexbuf in
- if t1 <> Ignore then
- raise(WF_error("Data not allowed here"));
- let t2 = lexerset.scan_name_string lexbuf in
- if t2 <> Eof then
- raise(WF_error("Data not allowed here"));
- ()
-
- initializer
- (* CHECKS: *)
- if config.encoding <> dtd # encoding then
- failwith("Encoding mismatch");
-
- (* --- Initialize 'elstack': Push the super-root on the stack. *)
- let super_root =
- if config.enable_super_root_node then
- create_super_root_node spec dtd
- else
- (* because spec may not contain an exemplar for the super root: *)
- create_no_node spec dtd
- in
- (* Move the super root or the emulation to the stack: *)
- Stack.push (super_root, (self :> entity_id)) elstack;
-
-
-
- (********* Here the method "parse" begins. The grammar below is
- * transformed to a local function of this method
- *)
-
- method parse context start_symbol =
-
- let parse_ignored_section yy_current yy_get_next =
- (* A special parser which should be used after <![IGNORE[.
- * It parses until the corresponding ]]> is found.
- *)
-
- while yy_current() = Ignore do
- ignore(yy_get_next());
- done;
-
- ( match yy_current() with
- Conditional_body _ -> ()
- | _ -> raise Parsing.Parse_error;
- );
-
- let en = context.manager # current_entity in
- let llev = ref 1 in
- while !llev >= 1 do
- let igntok = en # next_ignored_token in
- (* next_ignored_token: uses a special lexer that only
- * recognizes Conditional_begin and Conditional_end;
- * other character combinations are ignored.
- *)
- (* NOTE: next_ignored_token works much like yy_get_next,
- * but it does not set the current token!
- *)
- match igntok with
- Conditional_begin _ ->
- llev := !llev + 1
- | Conditional_end _ ->
- llev := !llev - 1;
- (* Because the loop may be exited now: *)
- context.current_token <- igntok;
- | (End_entity | Eof) ->
- raise Parsing.Parse_error
- | _ ->
- ()
- done;
-
- in
-
-
- let check_and_parse_xmldecl xmldecl =
- if process_xmldecl then begin
- let v, _, s = decode_doc_xml_pi (decode_xml_pi xmldecl) in
- check_version_num v;
- doc # init_xml_version v;
- let v = match s with
- None -> false
- | Some "yes" -> true
- | Some "no" -> false
- | _ -> raise (WF_error("Illegal 'standalone' declaration"))
- in
- if config.recognize_standalone_declaration then
- dtd # set_standalone_declaration v
- end
- in
-
- let recode_utf8 s =
- (* Recode 's' to UTF-8 *)
- if config.encoding = `Enc_utf8 then
- s (* No recoding necessary *)
- else
- Netconversion.recode_string
- ~in_enc:(config.encoding :> encoding) ~out_enc:`Enc_utf8 s
- in
-
-
-%%
-
-/* The following grammar looks similar to ocamlyacc grammars, but
- * ocamlyacc is actually not used to transform the grammar into a parser.
- * Instead, the parser generator m2parsergen is applied.
- *
- * The format of the grammar is different (see m2parsergen/README),
- * but I hope that you can understand most features immediately.
- *
- * The type of the parser is different: m2parsergen creates a top-down
- * parser while ocamlyacc generates a LALR-1 parser.
- *
- * The way the generated code is called is different: ocamlyacc produces
- * lots of top-level definitions whereas m2parsergen generates only
- * a local let-in-phrase. This is explained in the already mentioned
- * README file.
- */
-
-/* See Pxp_types.ml for comments to the various tokens */
-
-%token Begin_entity
-%token End_entity
-%token Comment_begin
-%token Comment_end
-%token Ignore
-%token Eq
-%token Rangle
-%token Rangle_empty
-%token <> Conditional_begin
-%token <> Conditional_body
-%token <> Conditional_end
-%token Percent
-%token Plus
-%token Star
-%token Bar
-%token Comma
-%token Qmark
-%token Pcdata
-%token Required
-%token Implied
-%token Fixed
-%token Eof
-
-%token <> Comment_material
-%token <> Doctype
-%token <> Doctype_rangle
-%token <> Dtd_begin
-%token <> Dtd_end
-%token <> Decl_element
-%token <> Decl_attlist
-%token <> Decl_entity
-%token <> Decl_notation
-%token <> Decl_rangle
-%token <> Lparen
-%token <> Rparen
-%token <> RparenPlus
-%token <> RparenStar
-%token <> RparenQmark
-
-%token <> Tag_beg
-%token <> Tag_end
-
-%token <> PI
-%token <> PI_xml
-%token <> Cdata
-%token <> CRef
-%token <> ERef
-%token <> PERef
-%token <> CharData
-%token <> LineEnd
-%token <> Name
-%token <> Nametoken
-%token <> Attval
-%token <> Attval_nl_normalized
-%token <> Unparsed_string
-
-/* START SYMBOLS:
- *
- * "ext_document": parses a complete XML document (i.e. containing a
- * <!DOCTYPE..> and an element)
- * "ext_declarations": parses an "external DTD subset", i.e. a sequence
- * of declarations
- * "ext_element": parses a single element (no <!DOCTYPE...> allowed);
- * the element needs not to be the root element of the
- * DTD
- *
- * The functions corresponding to these symbols return always () because
- * they only have side-effects.
- */
-
-/* SOME GENERAL COMMENTS:
- *
- * The parser does not get its tokens from the lexers directly. Instead of
- * this, there is an entity object between the parser and the lexers. This
- * object already handles:
- *
- * - References to general and parameter entities. The token stream is
- * modified such that tokens automatically come from the referenced entities.
- * External parameter entities and all general entities are embraced by
- * the two special tokens Begin_entity and End_entity. The parser must
- * check that these braces are correctly nested.
- */
-
-%%
-
-
-ext_document():
- Begin_entity
- doc_xmldecl_then_misc_then_prolog_then_rest() End_entity
- {{
- if n_tags_open <> 0 then
- raise(WF_error("Missing end tag"))
- }}
-
-
-/* In the following rule, we must find out whether there is an XML declaration
- * or not, and directly after that either "process_xmldecl" or
- * "process_missing_xmldecl" of the current entity must be called.
- * AND IT MUST BE DIRECTLY! Because of this, the invocation is carried out
- * in the "$" clause immediately following the first token.
- *
- * TODO: This is not enough. The first token may be a tag, and the tag
- * may already contain non-ASCII characters. (But in this case, the resolvers
- * assume UTF8, and they are right...)
- */
-
-doc_xmldecl_then_misc_then_prolog_then_rest():
- pl:PI_xml
- $ {{ context.manager # current_entity # process_xmldecl pl;
- check_and_parse_xmldecl pl;
- }}
- misc()* doc_prolog_then_rest()
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- misc() misc()* doc_prolog_then_rest()
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- doctypedecl() misc()* contents_start()
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- contents_start()
- {{ () }}
-
-
-doc_prolog_then_rest():
- doctypedecl() misc()* contents_start()
- {{ () }}
-| contents_start()
- {{ () }}
-
-
-ext_element():
- Begin_entity el_xmldecl_then_misc_then_rest() End_entity
- {{
- if n_tags_open <> 0 then
- raise(WF_error("Missing end tag"))
- }}
-
-
-/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
-
-el_xmldecl_then_misc_then_rest():
- pl:PI_xml
- $ {{ context.manager # current_entity # process_xmldecl pl; }}
- misc()* contents_start()
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- misc() misc()* contents_start()
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- contents_start()
- {{ () }}
-
-
-ext_declarations():
- /* Parses a sequence of declarations given by an entity. As side-effect,
- * the parsed declarations are put into the dtd object.
- */
- Begin_entity decl_xmldecl_then_rest()
- {{ () }}
-| Eof
- {{ () }}
-
-
-decl_xmldecl_then_rest():
- /* Note: This rule is also called from declaration()! */
- pl:PI_xml
- $ {{ context.manager # current_entity # process_xmldecl pl;
- }}
- declaration()* End_entity
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- declaration() declaration()* End_entity
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- End_entity
- {{ () }}
-
-
-misc():
- pi()
- {{ () }}
-| data: CharData
- /* In this context, the lexers sometimes do not recognize white space;
- * instead CharData tokens containing white space are delivered.
- */
- {{ self # only_whitespace data }}
-| Ignore
- {{ () }}
-| comment()
- {{ () }}
-
-
-/********************* DOCUMENT TYPE DECLARATION *************************/
-
-doctypedecl():
- /* parses from <!DOCTYPE to >. As side-effect, first the declarations of
- * the internal DTD (if any) are put into !!on_dtd, then the declarations
- * of the external DTD (if any) are put into this DTD object.
- */
- doctype_entid: Doctype
- ws: Ignore Ignore*
- doctypedecl_material (doctype_entid)
- {{ () }}
- ? {{ match !yy_position with
- "ws" -> raise(WF_error("Whitespace is missing after `DOCTYPE'"))
- | _ -> raise(WF_error("Bad DOCTYPE declaration"))
- }}
-
-
-/* TRICK:
- * ws: Ignore? Ignore*
- * is meant seriously. The effect is that ws becomes a boolean variable
- * which is true if there is an Ignore token and false otherwise.
- * This construct is faster than just
- * ws: Ignore*
- * in which case ws becomes an integer variable containing the number of
- * Ignore tokens. Counting the number of tokens is slower than only checking
- * the existence.
- *
- * We need the information whether there is an Ignore token (representing
- * white space), because white space is only obligatory if also an identifier
- * for the external subset is parsed; this conditional syntax constraint is
- * simply programmed in the body of the grammar rule.
- */
-
-doctypedecl_material(doctype_entid):
- root_name: Name
- ws: Ignore? Ignore*
- external_subset: external_id()?
- Ignore*
- internal_subset: internal_dtd()?
- Ignore*
- doctype_rangle_entid: Doctype_rangle
- {{
- if doctype_entid != doctype_rangle_entid then
- raise (Validation_error("Entities not properly nested with DOCTYPE declaration"));
- dtd # set_root root_name;
- begin match external_subset, internal_subset with
- None, None -> () (* no DTD means no ID *)
- | None, Some _ -> dtd # set_id Internal
- | Some id, None -> dtd # set_id (External id)
- | Some id, Some _ -> dtd # set_id (Derived id)
- end;
- (* Get now the external doctype declaration. Note that the internal
- * subset has precedence and must be read first.
- *)
- begin match external_subset with
- None -> ()
- | Some id ->
- if not ws then
- raise(WF_error("Whitespace is missing after `DOCTYPE " ^
- root_name ^ "'"));
- let r' = resolver # clone in
- let pobj =
- new parser_object
- (new document config.warner)
- dtd
- extend_dtd
- config
- r'
- spec
- process_xmldecl
- (fun x -> x)
- None
- in
- let en = new external_entity r' dtd "[dtd]"
- config.warner id false config.errors_with_line_numbers
- config.encoding
- in
- en # set_debugging_mode (config.debugging_mode);
- let mgr = new entity_manager en in
- en # open_entity true Declaration;
- try
- let context = make_context mgr in
- pobj # parse context Ext_declarations;
- ignore(en # close_entity);
- with
- error ->
- ignore(en # close_entity);
- r' # close_all;
- let pos = mgr # position_string in
- raise (At(pos, error))
- end;
- dtd # validate
- }}
- ? {{
- match !yy_position with
- "doctype_rangle_entid" -> raise(WF_error("`>' expected"))
- | _ -> raise(WF_error("Bad DOCTYPE declaration"))
- }}
-
-/* Note that there are no keywords for SYSTEM or PUBLIC, as these would
- * be difficult to recognize in the lexical contexts. Because of this,
- * SYSTEM/PUBLIC is parsed as name, and the rule for everything after
- * SYSTEM/PUBLIC is computed dynamically.
- */
-
-external_id():
- tok:Name
- $ {{
- let followup =
- match tok with
- "SYSTEM" -> parse_system_id
- (* Apply the rule system_id (below) to parse the
- * rest of the ID
- *)
- | "PUBLIC" -> parse_public_id
- (* Apply the rule public_id (below) to parse the
- * rest of the ID
- *)
- | _ -> raise(WF_error("SYSTEM or PUBLIC expected"))
- in
- }}
- ws:Ignore Ignore*
- r:[followup]()
- {{ r }}
- ? {{ match !yy_position with
- "ws" -> raise(WF_error("Whitespace is missing after " ^ tok))
- | _ -> raise(WF_error("Bad SYSTEM or PUBLIC identifier"))
- }}
-
-
-system_id():
- str:Unparsed_string
- {{ System (recode_utf8 str) }}
-
-
-public_id():
- str1: Unparsed_string
- ws: Ignore Ignore*
- str2: Unparsed_string
- {{ check_public_id str1;
- Public(recode_utf8 str1, recode_utf8 str2)
- }}
- ? {{ match !yy_position with
- "ws" -> raise(WF_error("Whitespace is missing between the literals of the PUBLIC identifier"))
- | _ -> raise(WF_error("Bad PUBLIC identifier"))
- }}
-
-
-/* The internal subset: "[" declaration* "]". While parsing the declarations
- * the object variable p_internal_subset must be true; however, if there
- * are entity references, this variable must be reset to false during
- * the entity. (See the rule for "declaration" below.)
- */
-
-internal_dtd():
- dtd_begin_entid: internal_dtd_begin()
- declaration()*
- dtd_end_entid: internal_dtd_end()
- {{
- if dtd_begin_entid != dtd_end_entid then
- raise(Validation_error("Entities not properly nested with internal DTD subset"))
- }}
- ? {{ match !yy_position with
- "dtd_end_entid" -> raise(WF_error("`]' expected"))
- | _ -> raise(WF_error("Bad internal DTD subset"))
- }}
-
-
-internal_dtd_begin():
- Dtd_begin
- {{ assert (not p_internal_subset);
- p_internal_subset <- true }}
-
-
-internal_dtd_end():
- Dtd_end
- {{ assert p_internal_subset;
- p_internal_subset <- false }}
-
-
-declaration():
- /* Parses a single declaration (or processing instruction). As side-effect
- * the parsed declaration is stored into the dtd object.
- */
- elementdecl()
- {{ () }}
-| attlistdecl()
- {{ () }}
-| entid:Decl_entity ws:Ignore Ignore* e:entitydecl(entid)
- {{ () }}
- ? {{ match !yy_position with
- "ws" -> raise(WF_error("Whitespace is missing after ENTITY"))
- | "e" -> raise(WF_error("Name or `%' expected"))
- | _ -> raise(WF_error("Bad entity declaration"))
- }}
-| notationdecl()
- {{ () }}
-| pi: PI
- {{ let target, value = pi in
- let pi = new proc_instruction target value config.encoding in
- dtd # add_pinstr pi
- }}
-| Ignore
- {{ () }}
-| Comment_begin Comment_material* ce:Comment_end
- {{ () }}
- ? {{ match !yy_position with
- "ce" -> raise(WF_error("`-->' expected"))
- | _ -> raise(WF_error("Bad comment"))
- }}
-| Begin_entity
- $ {{ (* Set 'p_internal_subset' to 'false' until the matching 'end_entity'
- * rule is parsed. This allows unrestricted usage of parameter entities
- * within declarations of internal entities.
- *)
- let old_p_internal_subset = p_internal_subset in
- p_internal_subset <- false;
- }}
- decl_xmldecl_then_rest()
- {{ (* Restore the old value of 'p_internal_subset'. *)
- p_internal_subset <- old_p_internal_subset;
- ()
- }}
-| begin_entid:Conditional_begin
- $ {{ (* Check whether conditional sections are allowed at this position. *)
- if p_internal_subset then
- raise(WF_error("Restriction of the internal subset: Conditional sections not allowed"));
- }}
- Ignore*
- cond:conditional_section() end_entid:Conditional_end
- {{ (* Check whether Conditional_begin and Conditional_end are in the same
- * entity. (This restriction is explained in the file SPECS.)
- *)
- if begin_entid != end_entid then
- raise(Validation_error("The first and the last token of conditional sections must be in the same entity (additional restriction of this parser)"));
- }}
- ? {{ match !yy_position with
- "end_entid" -> raise(WF_error("`>]>' expected"))
- | "cond" -> raise(WF_error("INCLUDE or IGNORE expected"))
- | _ -> raise(WF_error("Bad conditional section"))
- }}
-
-/* The tokens INCLUDE/IGNORE are scanned as names, and the selection of the
- * right parsing rule is dynamic.
- * Note that parse_ignored_section is not defined by a grammar rule but
- * by a conventional let-binding above.
- */
-
-conditional_section():
- include_or_ignore:Name
- $ {{ let parsing_function =
- match include_or_ignore with
- "INCLUDE" -> parse_included_section
- (* invoke rule "included_section" below *)
- | "IGNORE" -> parse_ignored_section
- (* invoke function "parse_ignored_section" *)
- | _ -> raise(WF_error("INCLUDE or IGNORE expected"))
- in
- }}
- [ parsing_function ] ()
- {{ () }}
- ? {{ raise(WF_error("Bad conditional section")) }}
-
-included_section():
- Conditional_body declaration()*
- {{ () }}
-| Ignore Ignore* Conditional_body declaration()*
- {{ () }}
-
-
-/*************************** ELEMENT DECLARATIONS ********************/
-
-elementdecl():
- /* parses <!ELEMENT ... >. Puts the parsed element type as side-effect into
- * dtd.
- */
- decl_element_entid: Decl_element
- $ {{ let extdecl = context.manager # current_entity_counts_as_external in
- }}
- ws1: Ignore Ignore*
- name: Name
- ws2: Ignore Ignore*
- content_model: contentspec()
- Ignore*
- decl_rangle_entid: Decl_rangle
- {{
- if decl_element_entid != decl_rangle_entid then
- raise (Validation_error "Entities not properly nested with ELEMENT declaration");
- if extend_dtd then begin
- let el = new dtd_element dtd name in
- (* It is allowed that an <!ATTLIST...> precedes the corresponding
- * <!ELEMENT...>. Because of this it is possible that there is already
- * an element called 'name' in the DTD, and we only must set the content
- * model of this element.
- *)
- try
- dtd # add_element el;
- el # set_cm_and_extdecl content_model extdecl;
- with
- Not_found -> (* means: there is already an element 'name' *)
- let el' = dtd # element name in
- el' # set_cm_and_extdecl content_model extdecl;
- (* raises Validation_error if el' already has a content model *)
- end
- }}
- ? {{ match !yy_position with
- ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
- | "name" -> raise(WF_error("The name of the element is expected here"))
- | "content_model" -> raise(WF_error("Content model expression expected"))
- | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
- | _ -> raise(WF_error("Bad element type declaration"))
- }}
-
-contentspec():
- /* parses a content model and returns it (type content_model_type) */
- name: Name /* EMPTY or ANY */
- {{ match name with
- "EMPTY" -> Empty
- | "ANY" -> Any
- | _ -> raise(WF_error("EMPTY, ANY, or a subexpression expected"))
- }}
-| entid:Lparen Ignore* term:mixed_or_regexp(entid)
- {{ term }}
- ? {{ raise(WF_error("Bad content model expression")) }}
-
-
-/* Many of the following rules have an lparen_entid argument. This is the
- * internal ID of the entity containing the corresponding left parenthesis;
- * by comparing it with the ID of the entity of the right parenthesis the
- * contraint is implemented that both parentheses must be in the same entity.
- */
-
-mixed_or_regexp(lparen_entid):
- re: choice_or_seq(lparen_entid)
- {{ Regexp re }}
-| m: mixed(lparen_entid)
- {{ m }}
-
-
-multiplier():
- /* returns one of the multiplier symbols (?,*,+) */
- Plus
- {{ Plus }}
-| Star
- {{ Star }}
-| Qmark
- {{ Qmark }}
-
-
-mixed (lparen_entid) :
- Pcdata
- Ignore*
- material: mixed_alternatives_top()
- {{
- let rest, rparen_entid = material in
- if lparen_entid != rparen_entid then
- raise (Validation_error "Entities not properly nested with parentheses");
- Mixed (MPCDATA :: rest)
- }}
- ? {{ raise(WF_error("Bad content model expression")) }}
-
-
-mixed_alternatives_top():
- entid: Rparen
- {{ [], entid }}
-| entid: RparenStar
- {{ [], entid }}
-| Bar Ignore* name:Name Ignore* names:mixed_alternative()* entid:RparenStar
- {{
- (MChild name :: names), entid
- }}
- ? {{ match !yy_position with
- "name" -> raise(WF_error("Name expected"))
- | "entid" -> raise(WF_error("`)*' expected"))
- | _ -> raise(WF_error("Bad content model expression"))
- }}
-
-
-mixed_alternative() :
- Bar Ignore* name:Name Ignore*
- {{ MChild name }}
- ? {{ match !yy_position with
- "name" -> raise(WF_error("Name expected"))
- | _ -> raise(WF_error("Bad content model expression"))
- }}
-
-
-
-choice_or_seq (lparen_entid):
- /* parses either a regular expression, or a mixed expression. Returns
- * Mixed spec or Regexp spec (content_model_type).
- * Which kind of expression (regexp or mixed) is being read is recognized
- * after the first subexpression has been parsed; the other subexpressions
- * must be of the same kind.
- */
- re: cp()
- Ignore*
- factor: choice_or_seq_factor()
- {{
- let (finalmark,subexpr), rparen_entid = factor in
- if lparen_entid != rparen_entid then
- raise (Validation_error "Entities not properly nested with parentheses");
- (* Check that the other subexpressions are "regexp", too, and
- * merge them with the first.
- *)
- let re' =
- match subexpr with
- Alt [] -> re
- | Alt alt -> Alt (re :: alt)
- | Seq seq -> Seq (re :: seq)
- | _ -> assert false
- in
- (* Interpret the finalmark. *)
- match finalmark with
- Ignore -> re'
- | Plus -> Repeated1 re'
- | Star -> Repeated re'
- | Qmark -> Optional re'
- | _ -> assert false
- }}
- ? {{ raise(WF_error("Bad content model expression")) }}
-
-choice_or_seq_factor():
- /* Parses "|<subexpr>|...)" or ",<subexpr>,...)", both forms optionally
- * followed by ?, *, or +.
- * Returns ((finalmark, expr), rparen_entid), where
- * - finalmark is the character after the right parenthesis or Ignore
- * - expr is either
- * Alt [] meaning that only ")" has been found
- * Alt non_empty_list meaning that the subexpressions are separated by '|'
- * Seq non_empty_list meaning that the subexpressions are separated by ','
- */
- entid:Rparen
- {{ (Ignore, Alt []), entid }}
-| entid:RparenPlus
- {{ (Plus, Alt []), entid }}
-| entid:RparenStar
- {{ (Star, Alt []), entid }}
-| entid:RparenQmark
- {{ (Qmark, Alt []), entid }}
-| Bar Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
- {{
- let (finalmark, subexpr), rparen_entid = factor in
- begin match subexpr with
- Alt [] -> (finalmark, (Alt [re])), rparen_entid
- | Alt alt -> (finalmark, (Alt (re :: alt))), rparen_entid
- | _ -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
- end
- }}
- ? {{ raise(WF_error("Bad content model expression")) }}
-| Comma Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
- {{
- let (finalmark, subexpr), rparen_entid = factor in
- begin match subexpr with
- Alt [] -> (finalmark, (Seq [re])), rparen_entid
- | Seq seq -> (finalmark, (Seq (re :: seq))), rparen_entid
- | _ -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
- end
- }}
- ? {{ raise(WF_error("Bad content model expression")) }}
-
-cp():
- /* parse either a name, or a parenthesized subexpression "(...)" */
- name:Name m:multiplier()?
- {{ match m with
- None -> Child name
- | Some Plus -> Repeated1 (Child name)
- | Some Star -> Repeated (Child name)
- | Some Qmark -> Optional (Child name)
- | _ -> assert false
- }}
- ? {{ raise(WF_error("Bad content model expression")) }}
-| entid:Lparen Ignore* m:choice_or_seq(entid)
- {{ m }}
- ? {{ raise(WF_error("Bad content model expression")) }}
-
-
-/********************* ATTRIBUTE LIST DECLARATION ***********************/
-
-attlistdecl():
- /* parses <!ATTLIST ... >. Enters the attribute list in dtd as side-
- * effect.
- */
- decl_attlist_entid: Decl_attlist
- $ {{ let extdecl = context.manager # current_entity_counts_as_external in
- }}
- ws1: Ignore Ignore*
- el_name: Name
- ws: Ignore? Ignore*
- factor: attdef_factor()
- {{
- let at_list, decl_rangle_entid = factor in
-
- if decl_attlist_entid != decl_rangle_entid then
- raise (Validation_error "Entities not properly nested with ATTLIST declaration");
-
- if not ws && at_list <> [] then begin
- match at_list with
- (name,_,_) :: _ ->
- (* This is normally impossible, because the lexer demands
- * some other token between two names.
- *)
- raise(WF_error("Whitespace is missing before `" ^ name ^ "'"));
- | _ -> assert false
- end;
-
- if extend_dtd then begin
- let new_el = new dtd_element dtd el_name in
- (* Note that it is allowed that <!ATTLIST...> precedes the corresponding
- * <!ELEMENT...> declaration. In this case we add the element declaration
- * already to the DTD but leave the content model unspecified.
- *)
- let el =
- try
- dtd # add_element new_el;
- new_el
- with
- Not_found -> (* already added *)
- let old_el = dtd # element el_name in
- if old_el # attribute_names <> [] then
- config.warner # warn ("More than one ATTLIST declaration for element type `" ^
- el_name ^ "'");
- old_el
- in
- List.iter
- (fun (a_name, a_type, a_default) ->
- el # add_attribute a_name a_type a_default extdecl)
- at_list
- end
- }}
- ? {{ match !yy_position with
- "ws1" -> raise(WF_error("Whitespace is missing after ATTLIST"))
- | "el_name" -> raise(WF_error("The name of the element is expected here"))
- | "factor" -> raise(WF_error("Another attribute name or `>' expected"))
- | _ -> raise(WF_error("Bad attribute declaration"))
- }}
-
-
-attdef_factor():
- /* parses a list of triples <name> <type> <default value> and returns the
- * list as (string * att_type * att_default) list.
- */
- attdef:attdef() ws:Ignore? Ignore* factor:attdef_factor()
- {{
- let attdef_rest, decl_rangle_entid = factor in
- if not ws && attdef_rest <> [] then begin
- match attdef_rest with
- (name,_,_) :: _ ->
- raise(WF_error("Missing whitespace before `" ^ name ^ "'"));
- | _ -> assert false
- end;
- (attdef :: attdef_rest), decl_rangle_entid }}
- ? {{ match !yy_position with
- | "factor" -> raise(WF_error("Another attribute name or `>' expected"))
- | _ -> raise(WF_error("Bad attribute declaration"))
- }}
-| entid:Decl_rangle
- {{ [], entid }}
-
-
-attdef():
- /* Parses a single triple */
- name: Name
- ws1: Ignore Ignore*
- tp: atttype()
- ws2: Ignore Ignore*
- default: defaultdecl()
- {{ (name,tp,default) }}
- ? {{ match !yy_position with
- ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
- | "tp" -> raise(WF_error("Type of attribute or `(' expected"))
- | "default" -> raise(WF_error("#REQUIRED, #IMPLIED, #FIXED or a string literal expected"))
- | _ -> raise(WF_error("Bad attribute declaration"))
- }}
-
-atttype():
- /* Parses an attribute type and returns it as att_type. */
- name: Name
- $ {{ let followup =
- if name = "NOTATION" then
- parse_notation
- else
- parse_never
- in
- }}
- nota: [followup]()?
- {{
- match name with
- "CDATA" -> A_cdata
- | "ID" -> A_id
- | "IDREF" -> A_idref
- | "IDREFS" -> A_idrefs
- | "ENTITY" -> A_entity
- | "ENTITIES" -> A_entities
- | "NMTOKEN" -> A_nmtoken
- | "NMTOKENS" -> A_nmtokens
- | "NOTATION" ->
- (match nota with
- None -> raise(WF_error("Error in NOTATION type (perhaps missing whitespace after NOTATION?)"))
- | Some n -> n
- )
- | _ -> raise(WF_error("One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected"))
- }}
- ? {{ raise(WF_error("Bad attribute declaration (perhaps missing whitespace after NOTATION)")) }}
-
-| Lparen
- Ignore*
- name: name_or_nametoken()
- Ignore*
- names: nmtoken_factor()*
- rp: Rparen
- /* Enumeration */
- {{ A_enum(name :: names) }}
- ? {{ match !yy_position with
- "name" -> raise(WF_error("Name expected"))
- | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
- | "rp" -> raise(WF_error("`|' and more names expected, or `)'"))
- | _ -> raise(WF_error("Bad enumeration type"))
- }}
-
-
-never():
- /* The always failing rule */
- $ {{ raise Not_found; }}
- Doctype /* questionable */
- {{ A_cdata (* Does not matter *)
- }}
-
-
-notation():
- Ignore Ignore*
- lp: Lparen
- Ignore*
- name: Name
- Ignore*
- names: notation_factor()*
- rp: Rparen
- {{ A_notation(name :: names) }}
- ? {{ match !yy_position with
- "lp" -> raise(WF_error("`(' expected"))
- | "name" -> raise(WF_error("Name expected"))
- | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
- | "rp" -> raise(WF_error("`|' and more names expected, or `)'"))
- | _ -> raise(WF_error("Bad NOTATION type"))
- }}
-
-
-notation_factor():
- /* Parse "|<name>" and return the name */
- Bar Ignore* name:Name Ignore*
- {{ name }}
- ? {{ match !yy_position with
- "name" -> raise(WF_error("Name expected"))
- | _ -> raise(WF_error("Bad NOTATION type"))
- }}
-
-nmtoken_factor():
- /* Parse "|<nmtoken>" and return the nmtoken */
- Bar Ignore* n:name_or_nametoken() Ignore*
- {{ n }}
- ? {{ match !yy_position with
- "n" -> raise(WF_error("Nametoken expected"))
- | _ -> raise(WF_error("Bad enumeration type"))
- }}
-
-
-name_or_nametoken():
- n:Name {{ n }}
-| n:Nametoken {{ n }}
-
-
-/* The default values must be expanded and normalized. This has been implemented
- * by the function expand_attvalue.
- */
-
-
-defaultdecl():
- /* Parse the default value for an attribute and return it as att_default */
- Required
- {{ D_required }}
-| Implied
- {{ D_implied }}
-| Fixed ws:Ignore Ignore* str:Unparsed_string
- {{ D_fixed (expand_attvalue lexerset dtd str config.warner false) }}
- ? {{ match !yy_position with
- "ws" -> raise(WF_error("Whitespace is missing after #FIXED"))
- | "str" -> raise(WF_error("String literal expected"))
- | _ -> raise(WF_error("Bad #FIXED default value"))
- }}
-| str:Unparsed_string
- {{ D_default (expand_attvalue lexerset dtd str config.warner false) }}
-
-
-/**************************** ENTITY DECLARATION ***********************/
-
-entitydecl(decl_entity_entid):
- /* parses everything _after_ <!ENTITY until the matching >. The parsed
- * entity declaration is entered into the dtd object as side-effect.
- */
- name: Name
- $ {{ let extdecl = context.manager # current_entity_counts_as_external in
- }}
- ws: Ignore Ignore*
- material: entitydef()
- Ignore*
- decl_rangle_entid: Decl_rangle
- /* A general entity */
- {{
- if decl_entity_entid != decl_rangle_entid then
- raise (Validation_error "Entities not properly nested with ENTITY declaration");
- let en =
- (* Distinguish between
- * - internal entities
- * - external entities
- * - NDATA (unparsed) entities
- *)
- match material with
- (Some s, None, None) ->
- new internal_entity dtd name config.warner s p_internal_subset
- config.errors_with_line_numbers false config.encoding
- | (None, Some xid, None) ->
- new external_entity (resolver # clone) dtd name config.warner
- xid false config.errors_with_line_numbers
- config.encoding
-
- | (None, Some xid, Some n) ->
- (new ndata_entity name xid n config.encoding :> entity)
- | _ -> assert false
- in
- dtd # add_gen_entity en extdecl
- }}
- ? {{ match !yy_position with
- "ws" -> raise(WF_error("Whitespace is missing"))
- | "material" -> raise(WF_error("String literal or identifier expected"))
- | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
- | _ -> raise(WF_error("Bad entity declaration"))
- }}
-
-| Percent
- $ {{ let extdecl = context.manager # current_entity_counts_as_external in
- }}
- ws1: Ignore Ignore*
- name: Name
- ws2: Ignore Ignore*
- material: pedef()
- Ignore*
- decl_rangle_entid: Decl_rangle
- /* A parameter entity */
- {{
- if decl_entity_entid != decl_rangle_entid then
- raise (Validation_error "Entities not properly nested with ENTITY declaration");
- let en =
- (* Distinguish between internal and external entities *)
- match material with
- (Some s, None) ->
- new internal_entity dtd name config.warner s p_internal_subset
- config.errors_with_line_numbers true config.encoding
- | (None, Some xid) ->
- new external_entity (resolver # clone) dtd name config.warner
- xid true config.errors_with_line_numbers
- config.encoding
- | _ -> assert false
- in
-
- (* The following two lines force that even internal entities count
- * as external (for the standalone check) if the declaration of
- * the internal entity occurs in an external entity.
- *)
- if extdecl then
- en # set_counts_as_external;
-
- dtd # add_par_entity en;
- }}
- ? {{ match !yy_position with
- ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
- | "material" -> raise(WF_error("String literal or identifier expected"))
- | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
- | _ -> raise(WF_error("Bad entity declaration"))
- }}
-
-
-entitydef():
- /* parses the definition value of a general entity. Returns either:
- * - (Some s, None, None) meaning the definition of an internal entity
- * with (literal) value s has been found
- * - (None, Some x, None) meaning that an external parsed entity with
- * external ID x has been found
- * - (None, Some x, Some n) meaning that an unparsed entity with
- * external ID x and notations n has been found
- */
- str:Unparsed_string
- {{ Some str, None, None }}
-| id:external_id() ws:Ignore? Ignore* decl:ndatadecl()?
- {{ if not ws && decl <> None then
- raise(WF_error("Whitespace missing before `NDATA'"));
- None, Some id, decl
- }}
-
-
-pedef():
- /* parses the definition value of a parameter entity. Returns either:
- * - (Some s, None) meaning that the definition of an internal entity
- * with (literal) value s has been found
- * - (None, Some x) meaning that an external ID x has been found
- */
- str:Unparsed_string
- {{ Some str, None }}
-| id:external_id()
- {{ None, Some id }}
-
-
-ndatadecl():
- /* Parses either NDATA "string" or the empty string; returns Some "string"
- * in the former, None in the latter case.
- */
- ndata:Name ws:Ignore Ignore* name:Name
- {{ if ndata = "NDATA" then
- name
- else
- raise(WF_error("NDATA expected"))
- }}
- ? {{ match !yy_position with
- "ws" -> raise(WF_error("Whitespace is missing after NDATA"))
- | "name" -> raise(WF_error("Name expected"))
- | _ -> raise(WF_error("Bad NDATA declaration"))
- }}
-
-/**************************** NOTATION DECLARATION *******************/
-
-notationdecl():
- /* parses <!NOTATION ... > and enters the notation declaration into the
- * dtd object as side-effect
- */
- decl_notation_entid: Decl_notation
- ws1: Ignore Ignore*
- name: Name
- ws2: Ignore Ignore*
- sys_or_public: Name /* SYSTEM or PUBLIC */
- ws3: Ignore Ignore*
- str1: Unparsed_string
- ws: Ignore? Ignore*
- str2: Unparsed_string?
- Ignore*
- decl_rangle_entid: Decl_rangle
- {{
- if decl_notation_entid != decl_rangle_entid then
- raise (Validation_error "Entities not properly nested with NOTATION declaration");
- let xid =
- (* Note that it is allowed that PUBLIC is only followed by one
- * string literal
- *)
- match sys_or_public with
- "SYSTEM" ->
- if str2 <> None then raise(WF_error("SYSTEM must be followed only by one argument"));
- System (recode_utf8 str1)
- | "PUBLIC" ->
- begin match str2 with
- None ->
- check_public_id str1;
- Public(recode_utf8 str1,"")
- | Some p ->
- if not ws then
- raise(WF_error("Missing whitespace between the string literals of the `PUBLIC' id"));
- check_public_id str1;
- Public(recode_utf8 str1, recode_utf8 p)
- end
- | _ -> raise(WF_error("PUBLIC or SYSTEM expected"))
- in
- if extend_dtd then begin
- let no = new dtd_notation name xid config.encoding in
- dtd # add_notation no
- end
- }}
- ? {{ match !yy_position with
- ("ws1"|"ws2"|"ws3") -> raise(WF_error("Whitespace is missing"))
- | "name" -> raise(WF_error("Name expected"))
- | "sys_or_public" -> raise(WF_error("SYSTEM or PUBLIC expected"))
- | ("str1"|"str2") -> raise(WF_error("String literal expected"))
- | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
- | _ -> raise(WF_error("Bad NOTATION declaration"))
- }}
-
-/****************************** ELEMENTS **************************/
-
-/* In the following rules, the number of error rules is reduced to
- * improve the performance of the parser.
- */
-
-
-contents_start():
- /* parses <element>...</element> misc*, i.e. exactly one element followed
- * optionally by white space or processing instructions.
- * The element is entered into the global variables as follows:
- * - If elstack is non-empty, the parsed element is added as new child to
- * the top element of the stack.
- * - If elstack is empty, the root_examplar object is modified rather than
- * that a new element is created. If additionally the variable root is
- * None, it is assigned Some root_examplar.
- * Note that the modification of the root_exemplar is done by the method
- * internal_init.
- * The reason why the root element is modified rather than newly created
- * is a typing requirement. It must be possible that the class of the root
- * is derived from the original class element_impl, i.e. the user must be
- * able to add additional methods. If we created a new root object, we
- * would have to denote to which class the new object belongs; the root
- * would always be an 'element_impl' object (and not a derived object).
- * If we instead cloned an exemplar object and modified it by the
- * "create" method, the root object would belong to the same class as the
- * exemplar (good), but the type of the parsing function would always
- * state that an 'element_impl' was created (because we can pass the new
- * object only back via a global variable). The only solution is to
- * modify the object that has been passed to the parsing function directly.
- */
- $ {{ dtd <- transform_dtd dtd; }}
- start_tag() content()*
- {{ () }}
-
-
-content():
- /* parses: start tags, end tags, content, or processing
- * instructions. That the tags are properly nested is dynamically checked.
- * As result, recognized elements are added to their parent elements,
- * content is added to the element containing it, and processing instructions
- * are entered into the element embracing them. (All as side-effects.)
- */
- start_tag()
- {{ () }}
-| end_tag()
- {{ () }}
-| char_data()
- {{ () }}
-| cref()
- {{ () }}
-| pi()
- {{ () }}
-| entity_ref()
- {{ () }}
-| comment()
- {{ () }}
-
-
-entity_ref():
- Begin_entity eref_xmldecl_then_rest()
- {{ if n_tags_open = 0 then
- raise(WF_error("Entity reference not allowed here"))
- }}
-
-
-/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
-
-eref_xmldecl_then_rest():
- pl:PI_xml
- $ {{ context.manager # current_entity # process_xmldecl pl;
- }}
- content()* End_entity
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- content() content()* End_entity
- {{ () }}
-
-| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
- End_entity
- {{ () }}
-
-
-start_tag():
- /* parses <element attribute-values> or <element attribute-values/>.
- *
- * EFFECT: If elstack is non-empty, the element is added to the
- * top element of the stack as new child, and the element
- * is pushed on the stack. If elstack is empty, the root_exemplar is
- * modified and gets the parsed name and attribute list. The root_exemplar
- * is pushed on the stack. If additionally the variable root is empty, too,
- * this variable is initialized.
- * If the <element ... /> form has been parsed, no element is pushed
- * on the stack.
- */
- tag: Tag_beg
- $ {{ let position =
- if config.store_element_positions then
- Some(context.manager # position)
- else
- None
- in
- }}
- ws: Ignore? Ignore*
- attlist: attribute()*
- emptiness: start_tag_rangle()
- /* Note: it is guaranteed that there is whitespace between Tag_beg and
- * the name of the first attribute, because there must be some separator.
- * So we need not to check ws!
- */
- {{
- let rec check_attlist al =
- match al with
- (nv1, num1) :: al' ->
- if not num1 && al' <> [] then begin
- match al with
- ((n1,_),_) :: ((n2,_),_) :: _ ->
- raise(WF_error("Whitespace is missing between attributes `" ^
- n1 ^ "' and `" ^ n2 ^ "'"))
- | _ -> assert false
- end;
- check_attlist al'
- | [] -> ()
- in
- check_attlist attlist;
-
- let name, tag_beg_entid = tag in
- let attlist' = List.map (fun (nv,_) -> nv) attlist in
- let d =
- create_element_node ?position:position spec dtd name attlist' in
-
- begin match id_index with
- None -> ()
- | Some idx ->
- (* Put the ID attribute into the index, if present *)
- begin try
- let v = d # id_attribute_value in (* may raise Not_found *)
- idx # add v d (* may raise ID_not_unique *)
- with
- Not_found ->
- (* No ID attribute *)
- ()
- | ID_not_unique ->
- (* There is already an ID with the same value *)
- raise(Validation_error("ID not unique"))
- end
- end;
-
- if n_tags_open = 0 then begin
- if root = None then begin
- (* We have found the begin tag of the root element. *)
- if config.enable_super_root_node then begin
- (* The user wants the super root instead of the real root.
- * The real root element becomes the child of the VR.
- *)
- (* Assertion: self # current is the super root *)
- assert (self # current # node_type = T_super_root);
- root <- Some (self # current);
- self # current # add_node d;
- doc # init_root (self # current);
- end
- else begin
- (* Normal behaviour: The user wants to get the real root. *)
- root <- Some d;
- doc # init_root d;
- end;
- end
- else
- (* We have found a second topmost element. This is illegal. *)
- raise(WF_error("Document must consist of only one toplevel element"))
- end
- else begin
- (* We have found some inner begin tag. *)
- self # save_data; (* Save outstanding data material first *)
- self # current # add_node d
- end;
-
- if emptiness then
- (* An empty tag like <a/>. *)
- d # local_validate ~use_dfa:config.validate_by_dfa ()
- else begin
- (* A non-empty tag. *)
- Stack.push (d, tag_beg_entid) elstack;
- n_tags_open <- n_tags_open + 1;
- end;
- }}
- ? {{ match !yy_position with
- "attlist" -> raise(WF_error("Bad attribute list"))
- | "emptiness" -> raise(WF_error("`>' or `/>' expected"))
- | _ -> raise(WF_error("Bad start tag"))
- }}
-
-
-attribute():
- /* Parses name="value" */
- n:Name Ignore* Eq Ignore* v:attval() ws:Ignore? Ignore*
- {{ (n,v), ws }}
-
-
-attval():
- v:Attval
- {{ expand_attvalue lexerset dtd v config.warner true }}
-| v:Attval_nl_normalized
- {{ expand_attvalue lexerset dtd v config.warner false }}
-
-
-start_tag_rangle():
- Rangle {{ false }}
-| Rangle_empty {{ true }}
-
-
-end_tag():
- /* parses </element>.
- * Pops the top element from the elstack and checks if it is the same
- * element.
- */
- tag:Tag_end Ignore* Rangle
- {{ let name, tag_end_entid = tag in
- if n_tags_open = 0 then
- raise(WF_error("End-tag without start-tag"));
-
- self # save_data; (* Save outstanding data material first *)
-
- let x, tag_beg_entid = Stack.pop elstack in
- let x_name =
- match x # node_type with
- | T_element n -> n
- | _ -> assert false
- in
- if name <> x_name then
- raise(WF_error("End-tag does not match start-tag"));
- if tag_beg_entid != tag_end_entid then
- raise(WF_error("End-tag not in the same entity as the start-tag"));
- x # local_validate ~use_dfa:config.validate_by_dfa ();
-
- n_tags_open <- n_tags_open - 1;
-
- assert (n_tags_open >= 0);
-
- }}
-
-char_data():
- /* Parses any literal characters not otherwise matching, and adds the
- * characters to the top element of elstack.
- * If elstack is empty, it is assumed that there is no surrounding
- * element, and any non-white space character is forbidden.
- */
- data:CharData
- {{
- if n_tags_open = 0 then
- (* only white space is allowed *)
- self # only_whitespace data
- else
- self # collect_data data
- (* We collect the chardata material until the next end tag is
- * reached. Then the collected material will concatenated and
- * stored as a single T_data node (see end_tag rule above)
- * using save_data.
- *)
- }}
-| data:Cdata
- {{
- if n_tags_open = 0 then
- raise (WF_error("CDATA section not allowed here"));
- self # collect_data data
- (* Also collect CDATA material *)
- }}
-
-cref():
- /* Parses &#...; and adds the character to the top element of elstack. */
- code:CRef
- {{
- if n_tags_open = 0 then
- (* No surrounding element: character references are not allowed *)
- raise(WF_error("Character reference not allowed here"));
- self # collect_data (character config.encoding config.warner code)
- (* Also collect character references *)
- }}
-
-pi():
- /* Parses <?...?> (but not <?xml white-space ... ?>).
- * If there is a top element in elstack, the processing instruction is added
- * to this element.
- */
- pi: PI
- {{
- let position =
- if config.store_element_positions then
- Some(context.manager # position)
- else
- None
- in
- let target,value = pi in
-
- if n_tags_open = 0 & not config.enable_super_root_node
- then
- doc # add_pinstr (new proc_instruction target value config.encoding)
- else begin
- (* Special case: if processing instructions are processed inline,
- * they are wrapped into T_pinstr nodes.
- *)
- if config.enable_pinstr_nodes then begin
- self # save_data; (* Save outstanding data material first *)
- let pinstr = new proc_instruction target value config.encoding in
- let wrapper = create_pinstr_node
- ?position:position spec dtd pinstr in
- wrapper # local_validate(); (* succeeds always *)
- self # current # add_node wrapper;
- end
- else
- (* Normal behaviour: Add the PI to the parent element. *)
- self # current # add_pinstr
- (new proc_instruction target value config.encoding)
- end
- }}
-
-
-comment():
- /* Parses <!-- ... -->
- */
- Comment_begin
- $ {{
- let position =
- if config.enable_comment_nodes && config.store_element_positions then
- Some(context.manager # position)
- else
- None
- in
- }}
- mat: Comment_material*
- ce: Comment_end
- {{
- if config.enable_comment_nodes then begin
- self # save_data; (* Save outstanding data material first *)
- let comment_text = String.concat "" mat in
- let wrapper = create_comment_node
- ?position:position spec dtd comment_text in
- wrapper # local_validate(); (* succeeds always *)
- self # current # add_node wrapper;
- end
- }}
- ? {{ match !yy_position with
- | "ce" -> raise(WF_error("`-->' expected"))
- | _ -> raise(WF_error("Bad comment"))
- }}
-
-
-%%
- (* The method "parse" continues here... *)
-
- try
- match start_symbol with
- Ext_document ->
- parse_ext_document context.current context.get_next
- | Ext_declarations ->
- parse_ext_declarations context.current context.get_next
- | Ext_element ->
- parse_ext_element context.current context.get_next
- with
- Not_found ->
- raise Parsing.Parse_error
-
- (*********** The method "parse" ends here *************)
-
-
-(**********************************************************************)
-
-(* Here ends the class definition: *)
-end
-;;
-
-(**********************************************************************)
-
-open Pxp_reader;;
-
-
-class default_ext =
- object(self)
- val mutable node = (None : ('a extension node as 'a) option)
- method clone = {< >}
- method node =
- match node with
- None ->
- assert false
- | Some n -> n
- method set_node n =
- node <- Some n
- end
-;;
-
-
-let default_extension = new default_ext;;
-
-let default_spec =
- make_spec_from_mapping
- ~super_root_exemplar: (new element_impl default_extension)
- ~comment_exemplar: (new element_impl default_extension)
- ~default_pinstr_exemplar: (new element_impl default_extension)
- ~data_exemplar: (new data_impl default_extension)
- ~default_element_exemplar: (new element_impl default_extension)
- ~element_mapping: (Hashtbl.create 1)
- ()
-;;
-
-
-let idref_pass id_index root =
- let error t att value =
- let name =
- match t # node_type with
- T_element name -> name
- | _ -> assert false
- in
- let text =
- "Attribute `" ^ att ^ "' of element `" ^ name ^
- "' refers to unknown ID `" ^ value ^ "'" in
- let pos_ent, pos_line, pos_col = t # position in
- if pos_line = 0 then
- raise(Validation_error text)
- else
- raise(At("In entity " ^ pos_ent ^ " at line " ^
- string_of_int pos_line ^ ", position " ^ string_of_int pos_col ^
- ":\n",
- Validation_error text))
- in
-
- let rec check_tree t =
- let idref_atts = t # idref_attribute_names in
- List.iter
- (fun att ->
- match t # attribute att with
- Value s ->
- begin try ignore(id_index # find s) with
- Not_found ->
- error t att s
- end
- | Valuelist l ->
- List.iter
- (fun s ->
- try ignore(id_index # find s) with
- Not_found ->
- error t att s
- )
- l
- | Implied_value -> ()
- )
- idref_atts;
- List.iter check_tree (t # sub_nodes)
- in
- check_tree root
-;;
-
-
-exception Return_DTD of dtd;;
- (* Used by extract_dtd_from_document_entity to jump out of the parser *)
-
-
-let call_parser ~configuration:cfg
- ~source:src
- ~dtd
- ~extensible_dtd
- ~document:doc
- ~specification:spec
- ~process_xmldecl
- ~transform_dtd
- ~(id_index : 'ext #index option)
- ~use_document_entity
- ~entry
- ~init_lexer =
- let e = cfg.errors_with_line_numbers in
- let w = cfg.warner in
- let r, en =
- match src with
- Entity(m,r') -> r', m dtd
- | ExtID(xid,r') -> r',
- if use_document_entity then
- new document_entity
- r' dtd "[toplevel]" w xid e
- cfg.encoding
- else
- new external_entity
- r' dtd "[toplevel]" w xid false e
- cfg.encoding
- in
- r # init_rep_encoding cfg.encoding;
- r # init_warner w;
- en # set_debugging_mode (cfg.debugging_mode);
- let pobj =
- new parser_object
- doc
- dtd
- extensible_dtd
- cfg
- r
- spec
- process_xmldecl
- transform_dtd
- (id_index :> 'ext index option)
- in
- let mgr = new entity_manager en in
- en # open_entity true init_lexer;
- begin try
- let context = make_context mgr in
- pobj # parse context entry;
- ignore(en # close_entity);
- with
- Return_DTD d ->
- ignore(en # close_entity);
- raise(Return_DTD d)
- | error ->
- ignore(en # close_entity);
- r # close_all;
- let pos = mgr # position_string in
- raise (At(pos, error))
- end;
- if cfg.idref_pass then begin
- match id_index with
- None -> ()
- | Some idx ->
- ( match pobj # root with
- None -> ()
- | Some root ->
- idref_pass idx root;
- )
- end;
- pobj
-
-
-let parse_dtd_entity cfg src =
- (* Parse a DTD given as separate entity. *)
- let dtd = new dtd cfg.warner cfg.encoding in
- let doc = new document cfg.warner in
- let pobj =
- call_parser
- ~configuration:cfg
- ~source:src
- ~dtd:dtd
- ~extensible_dtd:true (* Extend the DTD by parsed declarations *)
- ~document:doc
- ~specification:default_spec
- ~process_xmldecl:false (* The XML declaration is ignored
- * (except 'encoding')
- *)
- ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
- ~id_index: None
- ~use_document_entity:false
- ~entry:Ext_declarations (* Entry point of the grammar *)
- ~init_lexer:Declaration (* The initially used lexer *)
- in
- dtd # validate;
- if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
- dtd
-;;
-
-
-let parse_content_entity ?id_index cfg src dtd spec =
- (* Parse an element given as separate entity *)
- dtd # validate; (* ensure that the DTD is valid *)
- if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
- let doc = new document cfg.warner in
- let pobj =
- call_parser
- ~configuration:cfg
- ~source:src
- ~dtd:dtd
- ~extensible_dtd:true (* Extend the DTD by parsed declarations *)
- ~document:doc
- ~specification:spec
- ~process_xmldecl:false (* The XML declaration is ignored
- * (except 'encoding')
- *)
- ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
- ~id_index:(id_index :> 'ext index option)
- ~use_document_entity:false
- ~entry:Ext_element (* Entry point of the grammar *)
- ~init_lexer:Content (* The initially used lexer *)
- in
- match pobj # root with
- Some r -> r
- | None -> raise(WF_error("No root element"))
-;;
-
-
-let parse_wfcontent_entity cfg src spec =
- let dtd = new dtd cfg.warner cfg.encoding in
- dtd # allow_arbitrary;
- let doc = new document cfg.warner in
- let pobj =
- call_parser
- ~configuration:cfg
- ~source:src
- ~dtd:dtd
- ~extensible_dtd:false (* Do not extend the DTD *)
- ~document:doc
- ~specification:spec
- ~process_xmldecl:false (* The XML declaration is ignored
- * (except 'encoding')
- *)
- ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
- ~id_index:None
- ~use_document_entity:false
- ~entry:Ext_element (* Entry point of the grammar *)
- ~init_lexer:Content (* The initially used lexer *)
- in
- match pobj # root with
- Some r -> r
- | None -> raise(WF_error("No root element"))
-;;
-
-
-let iparse_document_entity ?(transform_dtd = (fun x -> x))
- ?id_index
- cfg0 src spec p_wf =
- (* Parse an element given as separate entity *)
- (* p_wf: 'true' if in well-formedness mode, 'false' if in validating mode *)
- let cfg = { cfg0 with
- recognize_standalone_declaration =
- cfg0.recognize_standalone_declaration && (not p_wf)
- } in
- let dtd = new dtd cfg.warner cfg.encoding in
- if p_wf then
- dtd # allow_arbitrary;
- let doc = new document cfg.warner in
- let pobj =
- call_parser
- ~configuration:cfg
- ~source:src
- ~dtd:dtd
- ~extensible_dtd:(not p_wf) (* Extend the DTD by parsed declarations
- * only if in validating mode
- *)
- ~document:doc
- ~specification:spec
- ~process_xmldecl:true (* The XML declaration is processed *)
- (* TODO: change to 'not p_wf' ? *)
- ~transform_dtd:(fun dtd ->
- let dtd' = transform_dtd dtd in
- if cfg.accept_only_deterministic_models then
- dtd' # only_deterministic_models;
- dtd')
-
- ~id_index:(id_index :> 'ext index option)
- ~use_document_entity:true
- ~entry:Ext_document (* Entry point of the grammar *)
- ~init_lexer:Document (* The initially used lexer *)
- in
- pobj # doc
-;;
-
-
-let parse_document_entity ?(transform_dtd = (fun x -> x))
- ?id_index
- cfg src spec =
- iparse_document_entity
- ~transform_dtd:transform_dtd
- ?id_index:(id_index : 'ext #index option :> 'ext index option)
- cfg src spec false;;
-
-let parse_wfdocument_entity cfg src spec =
- iparse_document_entity cfg src spec true;;
-
-let extract_dtd_from_document_entity cfg src =
- let transform_dtd dtd = raise (Return_DTD dtd) in
- try
- let doc = parse_document_entity
- ~transform_dtd:transform_dtd
- cfg
- src
- default_spec in
- (* Should not happen: *)
- doc # dtd
- with
- Return_DTD dtd ->
- (* The normal case: *)
- dtd
-;;
-
-
-let default_config =
- let w = new drop_warnings in
- { warner = w;
- errors_with_line_numbers = true;
- enable_pinstr_nodes = false;
- enable_super_root_node = false;
- enable_comment_nodes = false;
- encoding = `Enc_iso88591;
- recognize_standalone_declaration = true;
- store_element_positions = true;
- idref_pass = false;
- validate_by_dfa = true;
- accept_only_deterministic_models = true;
- debugging_mode = false;
- }
-
-
-class [ 'ext ] hash_index =
-object
- constraint 'ext = 'ext node #extension
- val ht = (Hashtbl.create 100 : (string, 'ext node) Hashtbl.t)
- method add s n =
- try
- ignore(Hashtbl.find ht s);
- raise ID_not_unique
- with
- Not_found ->
- Hashtbl.add ht s n
-
- method find s = Hashtbl.find ht s
- method index = ht
-end
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:29 lpadovan
- * Initial revision
- *
- * Revision 1.14 2000/08/26 23:23:14 gerd
- * Bug: from_file must not interpret the file name as URL path.
- * Bug: When PI and comment nodes are generated, the collected data
- * material must be saved first.
- *
- * Revision 1.13 2000/08/19 21:30:03 gerd
- * Improved the error messages of the parser
- *
- * Revision 1.12 2000/08/18 20:16:25 gerd
- * Implemented that Super root nodes, pinstr nodes and comment
- * nodes are included into the document tree.
- *
- * Revision 1.11 2000/08/14 22:24:55 gerd
- * Moved the module Pxp_encoding to the netstring package under
- * the new name Netconversion.
- *
- * Revision 1.10 2000/07/23 02:16:33 gerd
- * Support for DFAs.
- *
- * Revision 1.9 2000/07/14 13:57:29 gerd
- * Added the id_index feature.
- *
- * Revision 1.8 2000/07/09 17:52:45 gerd
- * New implementation for current_data.
- * The position of elements is stored on demand.
- *
- * Revision 1.7 2000/07/09 01:00:35 gerd
- * Improvement: It is now guaranteed that only one data node
- * is added for consecutive character material.
- *
- * Revision 1.6 2000/07/08 16:27:29 gerd
- * Cleaned up the functions calling the parser.
- * New parser argument: transform_dtd.
- * Implementations for 'extract_dtd_from_document_entity' and
- * 'parse_wfcontent_entity'.
- *
- * Revision 1.5 2000/07/06 23:05:18 gerd
- * Initializations of resolvers were missing.
- *
- * Revision 1.4 2000/07/06 22:11:01 gerd
- * Fix: The creation of the non-virtual root element is protected
- * in the same way as the virtual root element.
- *
- * Revision 1.3 2000/07/04 22:15:18 gerd
- * Change: Using the new resolver capabilities.
- * Still incomplete: the new extraction and parsing functions.
- *
- * Revision 1.2 2000/06/14 22:19:06 gerd
- * Added checks such that it is impossible to mix encodings.
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_yacc.m2y:
- *
- * Revision 1.9 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.8 2000/05/27 19:26:19 gerd
- * Change: The XML declaration is interpreted right after
- * it has been parsed (no longer after the document): new function
- * check_and_parse_xmldecl.
- * When elements, attributes, and entities are declared
- * it is stored whether the declaration happens in an external
- * entity (for the standalone check).
- * The option recognize_standalone_declaration is interpreted.
- *
- * Revision 1.7 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.6 2000/05/14 21:51:24 gerd
- * Change: Whitespace is handled by the grammar, and no longer
- * by the entity.
- *
- * Revision 1.5 2000/05/14 17:50:54 gerd
- * Updates because of changes in the token type.
- *
- * Revision 1.4 2000/05/11 22:09:17 gerd
- * Fixed the remaining problems with conditional sections.
- * This seems to be also a weakness of the XML spec!
- *
- * Revision 1.3 2000/05/09 00:02:44 gerd
- * Conditional sections are now recognized by the parser.
- * There seem some open questions; see the TODO comments!
- *
- * Revision 1.2 2000/05/08 22:01:44 gerd
- * Introduced entity managers (see markup_entity.ml).
- * The XML declaration is now recognized by the parser. If such
- * a declaration is found, the method process_xmldecl of the currently
- * active entity is called. If the first token is not an XML declaration,
- * the method process_missing_xmldecl is called instead.
- * Some minor changes.
- *
- * Revision 1.1 2000/05/06 23:21:49 gerd
- * Initial revision.
- *
- *
- * ======================================================================
- *
- * COPIED FROM REVISION 1.19 OF markup_yacc.mly
- *
- * Revision 1.19 2000/05/01 15:20:08 gerd
- * "End tag matches start tag" is checked before "End tag in the
- * same entity as start tag".
- *
- * Revision 1.18 2000/04/30 18:23:08 gerd
- * Bigger change: Introduced the concept of virtual roots. First,
- * this reduces the number of checks. Second, it makes it possible to
- * return the virtual root to the caller instead of the real root (new
- * config options 'virtual_root' and 'processing_instructions_inline').
- * Minor changes because of better CR/CRLF handling.
- *
- * Revision 1.17 2000/03/13 23:47:46 gerd
- * Updated because of interface changes. (See markup_yacc_shadow.mli
- * rev. 1.8)
- *
- * Revision 1.16 2000/01/20 20:54:43 gerd
- * New config.errors_with_line_numbers.
- *
- * Revision 1.15 1999/12/17 22:27:58 gerd
- * Bugfix: The value of 'p_internal_subset' (an instance
- * variable of the parser object) is to true when the internal subset
- * begins, and is set to false when this subset ends. The error was
- * that references to external entities within this subset did not
- * set 'p_internal_subset' to false; this is now corrected by introducing
- * the 'p_internal_subset_stack'.
- * This is a typical example of how the code gets more and
- * more complicated and that it is very difficult to really understand
- * what is going on.
- *
- * Revision 1.14 1999/11/09 22:23:37 gerd
- * Removed the invocation of "init_dtd" of the root document.
- * This method is no longer available. The DTD is also passed to the
- * document object by the root element, so nothing essential changes.
- *
- * Revision 1.13 1999/10/25 23:37:09 gerd
- * Bugfix: The warning "More than one ATTLIST declaration for element
- * type ..." is only generated if an ATTLIST is found while there are already
- * attributes for the element.
- *
- * Revision 1.12 1999/09/01 23:08:38 gerd
- * New frontend function: parse_wf_document. This simply uses
- * a DTD that allows anything, and by the new parameter "extend_dtd" it is
- * avoided that element, attlist, and notation declarations are added to this
- * DTD. The idea is that this function simulates a well-formedness parser.
- * Tag_beg, Tag_end carry the entity_id. The "elstack" stores the
- * entity_id of the stacked tag. This was necessary because otherwise there
- * are some examples to produces incorrectly nested elements.
- * p_internal_subset is a variable that stores whether the internal
- * subset is being parsed. This is important beacause entity declarations in
- * internal subsets are not allowed to contain parameter references.
- * It is checked if the "elstack" is empty after all has been parsed.
- * Processing instructions outside DTDs and outside elements are now
- * added to the document.
- * The rules of mixed and regexp style content models have been
- * separated. The code is now much simpler.
- * Entity references outside elements are detected and rejected.
- *
- * Revision 1.11 1999/09/01 16:26:08 gerd
- * Improved the quality of error messages.
- *
- * Revision 1.10 1999/08/31 19:13:31 gerd
- * Added checks on proper PE nesting. The idea is that tokens such
- * as Decl_element and Decl_rangle carry an entity ID with them. This ID
- * is simply an object of type < >, i.e. you can only test on identity.
- * The lexer always produces tokens with a dummy ID because it does not
- * know which entity is the current one. The entity layer replaces the dummy
- * ID with the actual ID. The parser checks that the IDs of pairs such as
- * Decl_element and Decl_rangle are the same; otherwise a Validation_error
- * is produced.
- *
- * Revision 1.9 1999/08/15 20:42:01 gerd
- * Corrected a misleading message.
- *
- * Revision 1.8 1999/08/15 20:37:34 gerd
- * Improved error messages.
- * Bugfix: While parsing document entities, the subclass document_entity is
- * now used instead of external_entity. The rules in document entities are a bit
- * stronger.
- *
- * Revision 1.7 1999/08/15 14:03:59 gerd
- * Empty documents are not allowed.
- * "CDATA section not allowed here" is a WF_error, not a Validation_
- * error.
- *
- * Revision 1.6 1999/08/15 02:24:19 gerd
- * Removed some grammar rules that were used for testing.
- * Documents without DTD can now have arbitrary elements (formerly
- * they were not allowed to have any element).
- *
- * Revision 1.5 1999/08/14 22:57:20 gerd
- * It is allowed that external entities are empty because the
- * empty string is well-parsed for both declarations and contents. Empty
- * entities can be referenced anywhere because the references are replaced
- * by nothing. Because of this, the Begin_entity...End_entity brace is only
- * inserted if the entity is non-empty. (Otherwise references to empty
- * entities would not be allowed anywhere.)
- * As a consequence, the grammar has been changed such that a
- * single Eof is equivalent to Begin_entity,End_entity without content.
- *
- * Revision 1.4 1999/08/14 22:20:01 gerd
- * The "config" slot has now a component "warner" which is
- * an object with a "warn" method. This is used to warn about characters
- * that cannot be represented in the Latin 1 alphabet.
- * Furthermore, there is a new component "debugging_mode".
- * Some Parse_error exceptions have been changed into Validation_error.
- * The interfaces of functions/classes imported from other modules
- * have changed; the invocations have been adapted.
- * Contents may contain CDATA sections that have been forgotten.
- *
- * Revision 1.3 1999/08/11 15:00:41 gerd
- * The Begin_entity ... End_entity brace is also possible in
- * 'contents'.
- * The configuration passed to the parsing object contains always
- * the resolver that is actually used.
- *
- * Revision 1.2 1999/08/10 21:35:12 gerd
- * The XML/encoding declaration at the beginning of entities is
- * evaluated. In particular, entities have now a method "xml_declaration"
- * which returns the name/value pairs of such a declaration. The "encoding"
- * setting is interpreted by the entity itself; "version", and "standalone"
- * are interpreted by Markup_yacc.parse_document_entity. Other settings
- * are ignored (this does not conform to the standard; the standard prescribes
- * that "version" MUST be given in the declaration of document; "standalone"
- * and "encoding" CAN be declared; no other settings are allowed).
- * TODO: The user should be warned if the standard is not exactly
- * fulfilled. -- The "standalone" property is not checked yet.
- *
- * Revision 1.1 1999/08/10 00:35:52 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- * PXP: The polymorphic XML parser for Objective Caml.
- * Copyright by Gerd Stolpmann. See LICENSE for details.
- *)
-
-
-(*$ markup-yacc.mli *)
-
-open Pxp_types
-open Pxp_dtd
-open Pxp_document
-
-exception ID_not_unique
-
-class type [ 'ext ] index =
-object
- (* The type of indexes over the ID attributes of the elements. This type
- * is the minimum requirement needed by the parser to create such an index.
- *)
- constraint 'ext = 'ext node #extension
- method add : string -> 'ext node -> unit
- (* Add the passed node to the index. If there is already an ID with
- * the passed string value, the exception ID_not_unique should be
- * raised. (But the index is free also to accept several identical IDs.)
- *)
- method find : string -> 'ext node
- (* Finds the node with the passed ID value, or raises Not_found *)
-end
-;;
-
-
-class [ 'ext ] hash_index :
-object
- (* This is a simple implementation of 'index' using a hash table. *)
- constraint 'ext = 'ext node #extension
- method add : string -> 'ext node -> unit
- (* See above. *)
- method find : string -> 'ext node
- (* See above. *)
- method index : (string, 'ext node) Hashtbl.t
- (* Returns the hash table. *)
-end
-;;
-
-
-type config =
- { warner : collect_warnings;
- (* An object that collects warnings. *)
-
- errors_with_line_numbers : bool;
- (* Whether error messages contain line numbers or not. The parser
- * is 10 to 20 per cent faster if line numbers are turned off;
- * you get only byte positions in this case.
- *)
-
- enable_pinstr_nodes : bool;
- (* true: turns a special mode for processing instructions on. Normally,
- * you cannot determine the exact location of a PI; you only know
- * in which element the PI occurs. This mode makes it possible
- * to find the exact location out: Every PI is artificially wrapped
- * by a special node with type T_pinstr. For example, if the XML text
- * is <a><?x?><?y?></a>, the parser normally produces only an element
- * object for "a", and puts the PIs "x" and "y" into it (without
- * order). In this mode, the object "a" will contain two objects
- * with type T_pinstr, and the first object will contain "x", and the
- * second "y": the object tree looks like
- * - Node with type = T_element "a"
- * - Node with type = T_pinstr "x"
- * + contains processing instruction "x"
- * - Node with type = T_pinstr "y"
- * + contains processing instruction "y"
- *
- * Notes:
- * (1) In past versions of PXP this mode was called
- * processing_instructions_inline, and it produced nodes of
- * type T_element "-pi" instead of T_pinstr.
- * (2) The T_pinstr nodes are created from the pinstr exemplars
- * in your spec
- *)
-
- enable_super_root_node : bool;
- (* true: the topmost element of the XML tree is not the root element,
- * but the so-called super root. The root element is a son of the
- * super root. The super root is a node with type T_super_root.
- * The following behaviour changes, too:
- * - PIs occurring outside the root element and outside the DTD are
- * added to the super root instead of the document object
- * - If enable_pinstr_nodes is also turned on, the PI wrappers
- * are added to the super root
- *
- * For example, the document
- * <?x?><a>y</a><?y?>
- * is normally represented by:
- * - document object
- * + contains PIs x and y
- * - reference to root node with type = T_element "a"
- * - node with type = T_data: contains "y"
- * With enabled super root node:
- * - document object
- * - reference to super root node with type = T_super_root
- * + contains PIs x and y
- * - root node with type = T_element "a"
- * - node with type = T_data: contains "y"
- * If also enable_pinstr_nodes:
- * - document object
- * - reference to super root node with type = T_super_root
- * - node with type = T_pinstr "x"
- * + contains PI "x"
- * - root node with type = T_element "a"
- * - node with type = T_data: contains "y"
- * - node with type = T_pinstr "y"
- * + contains PI "y"
- * Notes:
- * (1) In previous versions of PXP this mode was called
- * virtual_root, and it produced an additional node of type
- * T_element "-vr" instead of T_super_root.
- * (2) The T_super_root node is created from the super root exemplar
- * in your spec.
- *)
-
- enable_comment_nodes : bool;
- (* When enabled, comments are represented as nodes with type =
- * T_comment.
- * To access the contents of comments, use the method "comment"
- * for the comment nodes.
- * These nodes behave like elements; however, they are normally
- * empty and do not have attributes. Note that it is possible to
- * add children to comment nodes and to set attributes, but it is
- * strongly recommended not to do so. There are no checks on
- * such abnormal use, because they would cost too
- * much time, even when no comment nodes are generated at all.
- *
- * Comment nodes should be disabled unless you must parse a
- * third-party XML text which uses comments as another data
- * container.
- *
- * The nodes of type T_comment are created from the comment exemplars
- * in your spec.
- *)
-
- encoding : rep_encoding;
- (* Specifies the encoding used for the *internal* representation
- * of any character data.
- * Note that the default is still Enc_iso88591.
- *)
-
- recognize_standalone_declaration : bool;
- (* Whether the "standalone" declaration is recognized or not.
- * This option does not have an effect on well-formedness parsing:
- * in this case such declarations are never recognized.
- *
- * Recognizing the "standalone" declaration means that the
- * value of the declaration is scanned and passed to the DTD,
- * and that the "standalone-check" is performed.
- *
- * Standalone-check: If a document is flagged standalone='yes'
- * some additional constraints apply. The idea is that a parser
- * without access to any external document subsets can still parse
- * the document, and will still return the same values as the parser
- * with such access. For example, if the DTD is external and if
- * there are attributes with default values, it is checked that there
- * is no element instance where these attributes are omitted - the
- * parser would return the default value but this requires access to
- * the external DTD subset.
- *)
-
- store_element_positions : bool;
- (* Whether the file name, the line and the column of the
- * beginning of elements are stored in the element nodes.
- * This option may be useful to generate error messages.
- *
- * Positions are only stored for:
- * - Elements
- * - Wrapped processing instructions (see enable_pinstr_nodes)
- * For all other node types, no position is stored.
- *
- * You can access positions by the method "position" of nodes.
- *)
-
- idref_pass : bool;
- (* Whether the parser does a second pass and checks that all
- * IDREF and IDREFS attributes contain valid references.
- * This option works only if an ID index is available. To create
- * an ID index, pass an index object as id_index argument to the
- * parsing functions (such as parse_document_entity; see below).
- *
- * "Second pass" does not mean that the XML text is again parsed;
- * only the existing document tree is traversed, and the check
- * on bad IDREF/IDREFS attributes is performed for every node.
- *)
-
- validate_by_dfa : bool;
- (* If true, and if DFAs are available for validation, the DFAs will
- * actually be used for validation.
- * If false, or if no DFAs are available, the standard backtracking
- * algorithm will be used.
- * DFA = deterministic finite automaton.
- *
- * DFAs are only available if accept_only_deterministic_models is
- * "true" (because in this case, it is relatively cheap to construct
- * the DFAs). DFAs are a data structure which ensures that validation
- * can always be performed in linear time.
- *
- * I strongly recommend using DFAs; however, there are examples
- * for which validation by backtracking is faster.
- *)
-
- accept_only_deterministic_models : bool;
- (* Whether only deterministic content models are accepted in DTDs. *)
-
- (* The following options are not implemented, or only for internal
- * use.
- *)
-
- debugging_mode : bool;
- }
-
-
-type source =
- Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
- | ExtID of (ext_id * Pxp_reader.resolver)
-
-val from_channel :
- ?system_encoding:encoding -> ?id:ext_id -> ?fixenc:encoding ->
- in_channel -> source
-
-val from_string :
- ?fixenc:encoding -> string -> source
-
-val from_file :
- ?system_encoding:encoding -> string -> source
-
-(* Notes on sources (version 2):
- *
- * Sources specify where the XML text to parse comes from. Sources not only
- * represent character streams, but also external IDs (i.e. SYSTEM or PUBLIC
- * names), and they are interpreted as a specific encoding of characters.
- * A source should be associated with an external ID, because otherwise
- * it is not known how to handle relative names.
- *
- * There are two primary sources, Entity and ExtID, and several functions
- * for derived sources. First explanations for the functions:
- *
- * from_channel: The XML text is read from an in_channel. By default, the
- * channel is not associated with an external ID, and it is impossible
- * to resolve relative SYSTEM IDs found in the document.
- * If the ?id argument is passed, it is assumed that the channel has this
- * external ID. If relative SYSTEM IDs occur in the document, they can
- * be interpreted; however, it is only possible to read from "file:"
- * IDs.
- * By default, the channel automatically detects the encoding. You can
- * set a fixed encoding by passing the ?fixenc argument.
- *
- * from_string: The XML text is read from a string.
- * It is impossible to read from any external entity whose reference is found
- * in the string.
- * By default, the encoding of the string is detected automatically. You can
- * set a fixed encoding by passing the ?fixenc argument.
- *
- * from_file: The XML text is read from the file whose file name is
- * passed to the function (as UTF-8 string).
- * Relative system IDs can be interpreted by this function.
- * The ?system_encoding argument specifies the character encoding used
- * for file names (sic!). By default, UTF-8 is assumed.
- *
- * Examples:
- *
- * from_file "/tmp/file.xml":
- * reads from this file, which is assumed to have the ID
- * SYSTEM "file://localhost/tmp/file.xml".
- *
- * let ch = open_in "/tmp/file.xml" in
- * from_channel ~id:(System "file://localhost/tmp/file.xml") ch
- * This does the same, but uses a channel.
- *
- * from_channel ~id:(System "http://host/file.xml")
- * ch
- * reads from the channel ch, and it is assumed that the ID is
- * SYSTEM "http://host/file.xml". If there is any relative SYSTEM ID,
- * it will be interpreted relative to this location; however, there is
- * no way to read via HTTP.
- * If there is any "file:" SYSTEM ID, it is possible to read the file.
- *
- * The primary sources:
- *
- * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
- * entity to read from is passed to the resolver, and the resolver finds
- * the entity and opens it.
- * The intention of this option is to allow customized
- * resolvers to interpret external identifiers without any restriction.
- * The Pxp_reader module contains several classes allowing the user to
- * compose such a customized resolver from predefined components.
- *
- * ExtID is the interface of choice for own extensions to resolvers.
- *
- * - Entity(m,r): You can implementy every behaviour by using a customized
- * entity class. Once the DTD object d is known that will be used during
- * parsing, the entity e = m d is determined and used together with the
- * resolver r.
- * This is only for hackers.
- *)
-
-
-
-val default_config : config
- (* - Warnings are thrown away
- * - Error messages will contain line numbers
- * - Neither T_super_root nor T_pinstr nor T_comment nodes are generated
- * - The internal encoding is ISO-8859-1
- * - The standalone declaration is checked
- * - Element positions are stored
- * - The IDREF pass is left out
- * - If available, DFAs are used for validation
- * - Only deterministic content models are accepted
- *)
-
-val default_extension : ('a node extension) as 'a
- (* A "null" extension; an extension that does not extend the functionality *)
-
-val default_spec : ('a node extension as 'a) spec
- (* Specifies that you do not want to use extensions. *)
-
-val parse_dtd_entity : config -> source -> dtd
- (* Parse an entity containing a DTD (external subset), and return this DTD. *)
-
-val extract_dtd_from_document_entity : config -> source -> dtd
- (* Parses a closed document, i.e. a document beginning with <!DOCTYPE...>,
- * and returns the DTD contained in the document.
- * The parts of the document outside the DTD are actually not parsed,
- * i.e. parsing stops when all declarations of the DTD have been read.
- *)
-
-val parse_document_entity :
- ?transform_dtd:(dtd -> dtd) ->
- ?id_index:('ext index) ->
- config -> source -> 'ext spec -> 'ext document
- (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
- * and validate the contents of the document against the DTD contained
- * and/or referenced in the document.
- *
- * If the optional argument ~transform_dtd is passed, the following
- * modification applies: After the DTD (both the internal and external
- * subsets) has been parsed, the function ~transform_dtd is called,
- * and the resulting DTD is actually used to validate the document.
- *
- * If the optional argument ~transform_dtd is missing, the parser
- * behaves in the same way as if the identity were passed as ~transform_dtd.
- *
- * If the optional argument ~id_index is present, the parser adds
- * any ID attribute to the passed index. An index is required to detect
- * violations of the uniqueness of IDs.
- *)
-
-val parse_wfdocument_entity :
- config -> source -> 'ext spec -> 'ext document
- (* Parse a closed document (see parse_document_entity), but do not
- * validate it. Only checks on well-formedness are performed.
- *)
-
-val parse_content_entity :
- ?id_index:('ext index) ->
- config -> source -> dtd -> 'ext spec -> 'ext node
- (* Parse a file representing a well-formed fragment of a document. The
- * fragment must be a single element (i.e. something like <a>...</a>;
- * not a sequence like <a>...</a><b>...</b>). The element is validated
- * against the passed DTD, but it is not checked whether the element is
- * the root element specified in the DTD.
- *
- * If the optional argument ~id_index is present, the parser adds
- * any ID attribute to the passed index. An index is required to detect
- * violations of the uniqueness of IDs.
- *)
-
-val parse_wfcontent_entity :
- config -> source -> 'ext spec -> 'ext node
- (* Parse a file representing a well-formed fragment of a document
- * (see parse_content_entity). The fragment is not validated, only
- * checked for well-formedness.
- *)
-
-
-(*$-*)
-
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:30 lpadovan
- * Initial revision
- *
- * Revision 1.7 2000/08/18 20:15:43 gerd
- * Config options:
- * - enable_super_root_nodes: new name for virtual_root
- * - enable_pinstr_nodes: new name for processing_instructions_inline
- * - enable_comment_nodes: new option
- * Updated comments for various options.
- *
- * Revision 1.6 2000/07/23 02:16:33 gerd
- * Support for DFAs.
- *
- * Revision 1.5 2000/07/14 13:57:29 gerd
- * Added the id_index feature.
- *
- * Revision 1.4 2000/07/09 17:52:54 gerd
- * New option store_element_positions.
- *
- * Revision 1.3 2000/07/08 16:26:21 gerd
- * Added the signatures of the functions
- * 'extract_dtd_from_document_entity' and 'parse_wfcontent_entity'.
- * Updated the signature of 'parse_document_entity': New optional
- * argument 'transform_dtd'.
- * Updated the comments.
- *
- * Revision 1.2 2000/07/04 22:09:03 gerd
- * MAJOR CHANGE: Redesign of the interface (not yet complete).
- *
- * Revision 1.1 2000/05/29 23:48:38 gerd
- * Changed module names:
- * Markup_aux into Pxp_aux
- * Markup_codewriter into Pxp_codewriter
- * Markup_document into Pxp_document
- * Markup_dtd into Pxp_dtd
- * Markup_entity into Pxp_entity
- * Markup_lexer_types into Pxp_lexer_types
- * Markup_reader into Pxp_reader
- * Markup_types into Pxp_types
- * Markup_yacc into Pxp_yacc
- * See directory "compatibility" for (almost) compatible wrappers emulating
- * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
- *
- * ======================================================================
- * Old logs from markup_yacc.mli:
- *
- * Revision 1.4 2000/05/29 21:14:57 gerd
- * Changed the type 'encoding' into a polymorphic variant.
- *
- * Revision 1.3 2000/05/27 19:24:01 gerd
- * New option: recognize_standalone_declaration.
- *
- * Revision 1.2 2000/05/20 20:31:40 gerd
- * Big change: Added support for various encodings of the
- * internal representation.
- *
- * Revision 1.1 2000/05/06 23:21:49 gerd
- * Initial revision.
- *
- * Revision 1.9 2000/04/30 18:23:38 gerd
- * New config options 'processing_instructions_inline' and
- * 'virtual_root'.
- *
- * Revision 1.8 2000/03/13 23:46:46 gerd
- * Change: The 'resolver' component of the 'config' type has
- * disappeared. Instead, there is a new resolver component in the Entity
- * and ExtID values of 'source'. I hope that this makes clearer that the
- * resolver has only an effect if used together with Entity and ExtID
- * sources.
- * Change: The Entity value can now return the entity dependent
- * on the DTD that is going to be used.
- *
- * Revision 1.7 2000/02/22 02:32:02 gerd
- * Updated.
- *
- * Revision 1.6 2000/02/22 01:52:45 gerd
- * Added documentation.
- *
- * Revision 1.5 2000/01/20 20:54:43 gerd
- * New config.errors_with_line_numbers.
- *
- * Revision 1.4 1999/09/01 23:09:10 gerd
- * New function parse_wf_entity that simulates a well-formedness
- * parser.
- *
- * Revision 1.3 1999/09/01 16:26:36 gerd
- * Added an empty line. This is *really* a big change.
- *
- * Revision 1.2 1999/08/14 22:20:27 gerd
- * The "config" slot has now a component "warner"which is
- * an object with a "warn" method. This is used to warn about characters
- * that cannot be represented in the Latin 1 alphabet.
- * Furthermore, there is a new component "debugging_mode".
- *
- * Revision 1.1 1999/08/10 00:35:52 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-- Conditional sections:
-
- Conditional_begin and Conditional_end must be in the same entity.
-
-- NDATA: check whether ENTITY attributes refer only to declared
- NDATA entities
+++ /dev/null
-.PHONY: all
-all:
- $(MAKE) -C reader
- $(MAKE) -C write
- $(MAKE) -C codewriter
- $(MAKE) -C canonxml
- $(MAKE) -C negative
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.cmxa
-
-.PHONY: CLEAN
-CLEAN: clean
- $(MAKE) -C reader clean
- $(MAKE) -C write clean
- $(MAKE) -C codewriter clean
- $(MAKE) -C canonxml clean
- $(MAKE) -C negative clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- rm -f dumpfiles
- $(MAKE) -C reader distclean
- $(MAKE) -C write distclean
- $(MAKE) -C codewriter distclean
- $(MAKE) -C canonxml distclean
- $(MAKE) -C negative distclean
-
-dumpfiles: dumpfiles.ml
- ocamlc -o dumpfiles dumpfiles.ml
+++ /dev/null
-----------------------------------------------------------------------
-(Anti) Regression tests
-----------------------------------------------------------------------
-
-- To build the tests, "markup" must already be compiled in ..
- Do "make" to start the compilation.
-
-- To run the tests:
- ./run
-
-- Program dumpfiles: Do "make dumpfiles" to create it.
- It takes XML file names on the command line, and writes a Latex
- document on stdout. The document shows the contents of all files.
- EXAMPLE:
- $ ./dumpfiles canonxml/data_jclark_valid/ext-sa/*.* >x.tex
- $ latex x
+++ /dev/null
-# make validate: make bytecode executable
-# make validate.opt: make native executable
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-#----------------------------------------------------------------------
-
-OCAMLPATH=../..
-
-test_canonxml: test_canonxml.ml
- ocamlfind ocamlc -g -custom -o test_canonxml -package .,str -linkpkg test_canonxml.ml
-
-#----------------------------------------------------------------------
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out.xml
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- rm -f test_canonxml
+++ /dev/null
-----------------------------------------------------------------------
-Regression test "canonxml":
-----------------------------------------------------------------------
-
-- An XML file is parsed, and the contents are printed in a canonical
- format.
-
-- The output is compared with a reference file. The test is only
- passed if the output and the reference are equal.
-
-- Test data "data_jclark_valid":
- Contains the samples by James Clark that are valid. The subdirectories:
- - sa: standalone documents
- - not-sa: non-standalone document (with external DTD)
- - ext-sa: non-standalone document (with other external entity)
-
- Tests that are not passed have been moved into the *-problems directories.
- The reason is typically that characters have been used not in the
- Latin 1 character set.
-
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "001.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-Data
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "002.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "003.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "004.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<e/><e/><e/>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (e*)>
-<!ELEMENT e EMPTY>
-<!ENTITY e SYSTEM "005.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-Data
-<e/>
-More data
-<e/>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA|e)*>
-<!ELEMENT e EMPTY>
-<!ENTITY e SYSTEM "006.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "007.ent">
-]>
-<doc>X&e;Z</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "008.ent">
-]>
-<doc>X&e;Z</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "009.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "010.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e PUBLIC "a not very interesting file" "011.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-&e4;
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e1 "&e2;">
-<!ENTITY e2 "&e3;">
-<!ENTITY e3 SYSTEM "012.ent">
-<!ENTITY e4 "&e5;">
-<!ENTITY e5 "(e5)">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>&e1;</doc>
+++ /dev/null
-<e/>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (e)>
-<!ELEMENT e (#PCDATA)>
-<!ATTLIST e
- a1 CDATA "a1 default"
- a2 NMTOKENS "a2 default"
->
-<!ENTITY x SYSTEM "013.ent">
-]>
-<doc>&x;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "014.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<doc>Data </doc>
\ No newline at end of file
+++ /dev/null
-<doc>Data</doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc>Data </doc>
\ No newline at end of file
+++ /dev/null
-<doc><e></e><e></e><e></e></doc>
\ No newline at end of file
+++ /dev/null
-<doc>Data <e></e> More data <e></e> </doc>
\ No newline at end of file
+++ /dev/null
-<doc>XYZ</doc>
\ No newline at end of file
+++ /dev/null
-<doc>XYZ</doc>
\ No newline at end of file
+++ /dev/null
-<doc> </doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc>xyzzy </doc>
\ No newline at end of file
+++ /dev/null
-<doc>(e5)</doc>
\ No newline at end of file
+++ /dev/null
-<doc><e a1="a1 default" a2="a2 default"></e></doc>
\ No newline at end of file
+++ /dev/null
-<doc>data</doc>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc SYSTEM "001.ent" [
-<!ELEMENT doc EMPTY>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "002.ent" [
-<!ELEMENT doc EMPTY>
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e SYSTEM "003-2.ent">
-<!ATTLIST doc a1 CDATA %e; "v1">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "003-1.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e1 SYSTEM "004-2.ent">
-<!ENTITY % e2 "%e1;">
-%e1;
+++ /dev/null
-<!ATTLIST doc a1 CDATA "value">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "004-1.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e SYSTEM "005-2.ent">
-%e;
+++ /dev/null
-<!ATTLIST doc a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "005-1.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ATTLIST doc a1 CDATA "w1" a2 CDATA "w2">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "006.ent" [
-<!ATTLIST doc a1 CDATA "v1">
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "007.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc PUBLIC "whatever" "008.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc PUBLIC "whatever" "009.ent" [
-<!ATTLIST doc a2 CDATA "v2">
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v2">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "010.ent" [
-<!ATTLIST doc a1 CDATA "v1">
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % e SYSTEM "011.ent">
-%e;
-]>
-<doc></doc>
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % e SYSTEM "012.ent">
-%e;
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![ INCLUDE [
-<!ATTLIST doc a1 CDATA "v1">
-]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "013.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![ %e; [
-<!ATTLIST doc a1 CDATA "v1">
-]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "014.ent" [
-<!ENTITY % e "INCLUDE">
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![ %e; [
-<!ATTLIST doc a1 CDATA "v1">
-]]>
-<!ATTLIST doc a2 CDATA "v2">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "015.ent" [
-<!ENTITY % e "IGNORE">
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![%e;[
-<!ATTLIST doc a1 CDATA "v1">
-]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "016.ent" [
-<!ENTITY % e "INCLUDE">
-]>
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "<!ATTLIST doc a1 CDATA 'v1'>">
-%e;
+++ /dev/null
-<!DOCTYPE doc SYSTEM "017.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "'v1'">
-<!ATTLIST doc a1 CDATA %e;>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "018.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "'v1'">
-<!ATTLIST doc a1 CDATA%e;>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "019.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e "doc">
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST%e;a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "020.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e "doc a1 CDATA">
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST %e; "v1">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "021.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e "INCLUDE[">
-<!ELEMENT doc (#PCDATA)>
-<![ %e; <!ATTLIST doc a1 CDATA "v1"> ]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "022.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e1 "do">
-<!ENTITY % e2 "c">
-<!ENTITY % e3 "%e1;%e2;">
-<!ATTLIST %e3; a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "023.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e1 "'v1'">
-<!ENTITY % e2 'a1 CDATA %e1;'>
-<!ATTLIST doc %e2;>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "024.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e "x">
-<!ENTITY % e "y">
-<!ENTITY % v "'%e;'">
-<!ATTLIST doc a1 CDATA %v;>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "025.ent">
-<doc></doc>
+++ /dev/null
-<!ATTLIST doc a1 CDATA "w1">
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc ANY>
-<!ENTITY % e SYSTEM "026.ent">
-%e;
-<!ATTLIST doc a1 CDATA "x1" a2 CDATA "x2">
-]>
-<doc></doc>
+++ /dev/null
-<!ENTITY % e "">
-<!ELEMENT doc (#PCDATA %e;)>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "027.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![INCLUDE[<!ATTLIST doc a1 CDATA "v1">]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "028.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![IGNORE[<!ATTLIST doc a1 CDATA "v1">]]>
-<!ATTLIST doc a1 CDATA "v2">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "029.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![IGNORE[]]>
-<![INCLUDE[]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "030.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e SYSTEM "031-2.ent">
-<!ENTITY e "<![CDATA[%e;]]>">
+++ /dev/null
-<!ATTLIST doc a1 CDATA "v1">
+++ /dev/null
-<!DOCTYPE doc SYSTEM "031-1.ent">
-<doc>&e;</doc>
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="value"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1" a2="w2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a2="v2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="x"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="w1" a2="x2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc><!ATTLIST doc a1 CDATA "v1"> </doc>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc ></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc >
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED>
-]>
-<doc a1="v1"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED>
-]>
-<doc a1 = "v1"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED>
-]>
-<doc a1='v1'></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc> </doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>&<>"'</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc> </doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED>
-]>
-<doc a1="v1" ></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED a2 CDATA #IMPLIED>
-]>
-<doc a1="v1" a2="v2"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc : CDATA #IMPLIED>
-]>
-<doc :="v1"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc _.-0123456789 CDATA #IMPLIED>
-]>
-<doc _.-0123456789="v1"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc abcdefghijklmnopqrstuvwxyz CDATA #IMPLIED>
-]>
-<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc ABCDEFGHIJKLMNOPQRSTUVWXYZ CDATA #IMPLIED>
-]>
-<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><?pi?></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><?pi some data ? > <??></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><![CDATA[<foo>]]></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><![CDATA[<&]]></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><![CDATA[<&]>]]]></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><!-- a comment --></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><!-- a comment ->--></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e "">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (foo)>
-<!ELEMENT foo (#PCDATA)>
-<!ENTITY e "<foo></foo>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (foo*)>
-<!ELEMENT foo (#PCDATA)>
-]>
-<doc><foo/><foo></foo></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (foo*)>
-<!ELEMENT foo EMPTY>
-]>
-<doc><foo/><foo></foo></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (foo*)>
-<!ELEMENT foo ANY>
-]>
-<doc><foo/><foo></foo></doc>
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<?xml version='1.0'?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<?xml version = "1.0"?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<?xml version='1.0' encoding="UTF-8"?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<?xml version='1.0' standalone='yes'?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<?xml version='1.0' encoding="UTF-8" standalone='yes'?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc/>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc />
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
-<?pi data?>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
-<!-- comment -->
-
+++ /dev/null
-<!-- comment -->
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
-
+++ /dev/null
-<?pi data?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED>
-]>
-<doc a1=""<&>'"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED>
-]>
-<doc a1="A"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>A</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ATTLIST doc a1 CDATA #IMPLIED>
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc a1="foo
-bar"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (e*)>
-<!ELEMENT e EMPTY>
-<!ATTLIST e a1 CDATA "v1" a2 CDATA "v2" a3 CDATA #IMPLIED>
-]>
-<doc>
-<e a3="v3"/>
-<e a1="w1"/>
-<e a2="w2" a3="v3"/>
-</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v1">
-<!ATTLIST doc a1 CDATA "z1">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "v1">
-<!ATTLIST doc a2 CDATA "v2">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>X
-Y</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>]</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>ð€€ô¿½</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<e/>">
-<!ELEMENT doc (e)>
-<!ELEMENT e EMPTY>
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-
-
-<doc
-></doc
->
-
-
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<?pi data?>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>A</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a*)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ATTLIST doc a1 NMTOKENS #IMPLIED>
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc a1=" 1 2 "></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (e*)>
-<!ELEMENT e EMPTY>
-<!ATTLIST e a1 CDATA #IMPLIED a2 CDATA #IMPLIED a3 CDATA #IMPLIED>
-]>
-<doc>
-<e a1="v1" a2="v2" a3="v3"/>
-<e a1="w1" a2="v2"/>
-<e a1="v1" a2="w2" a3="v3"/>
-</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>X Y</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>£</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>เจมส์</doc>
+++ /dev/null
-<!DOCTYPE เจมส์ [
-<!ELEMENT เจมส์ (#PCDATA)>
-]>
-<เจมส์></เจมส์>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>𐀀􏿽</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA #IMPLIED>
-<!-- 34 is double quote -->
-<!ENTITY e1 """>
-]>
-<doc a1="&e1;"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc> </doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e " ">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!NOTATION n PUBLIC "whatever">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % e "<!ELEMENT doc (#PCDATA)>">
-%e;
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a ID #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a IDREF #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a IDREFS #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a ENTITY #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a ENTITIES #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a NOTATION (n1|n2) #IMPLIED>
-<!NOTATION n1 SYSTEM "http://www.w3.org/">
-<!NOTATION n2 SYSTEM "http://www.w3.org/">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a (1|2) #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #REQUIRED>
-]>
-<doc a="v"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #FIXED "v">
-]>
-<doc a="v"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #FIXED "v">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a, b, c)>
-<!ELEMENT a (a?)>
-<!ELEMENT b (b*)>
-<!ELEMENT c (a | b)+>
-]>
-<doc><a/><b/><c><a/></c></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % e SYSTEM "e.dtd">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % e PUBLIC 'whatever' "e.dtd">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [<!ELEMENT doc (#PCDATA)>]><doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "<foo>">
-<!ENTITY e "">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e "">
-<!ENTITY e "<foo>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<foo/>">
-<!ELEMENT doc (foo)>
-<!ELEMENT foo EMPTY>
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e "<foo>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "𐀀􏿽">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ATTLIST e a NOTATION (n) #IMPLIED>
-<!ELEMENT doc (e)*>
-<!ELEMENT e (#PCDATA)>
-<!NOTATION n PUBLIC "whatever">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!NOTATION n SYSTEM "http://www.w3.org/">
-<!ENTITY e SYSTEM "http://www.w3.org/" NDATA n>
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a ENTITY "e">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a)*>
-<!ELEMENT a EMPTY>
-]>
-<doc>
-<a/>
- <a/> <a/>
-
-
-</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>
-
-
-</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % e "foo">
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 CDATA "%e;">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ATTLIST doc a1 CDATA #IMPLIED>
-<!ATTLIST doc a1 NMTOKENS #IMPLIED>
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc a1="1 2"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ATTLIST doc a1 NMTOKENS " 1 2 ">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!ATTLIST doc a2 CDATA #IMPLIED>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e SYSTEM "097.ent">
-<!ATTLIST doc a1 CDATA "v1">
-%e;
-<!ATTLIST doc a2 CDATA "v2">
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><?pi x
-y?></doc>
+++ /dev/null
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e PUBLIC ";!*#@$_%" "100.xml">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e """>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a="""></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a="x y"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a="x	y"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a="x y"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a="x y"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e "
-">
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a="x&e;y"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a=""></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e " ">
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a="x&e;y"></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a NMTOKENS #IMPLIED>
-]>
-<doc a=" x  y "></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a | b)>
-<!ELEMENT a (#PCDATA)>
-]>
-<doc><a></a></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST e a CDATA #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e "<![CDATA[&foo;]]>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e1 "&e2;">
-<!ENTITY e2 "v">
-]>
-<doc>&e1;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc><![CDATA[
-]]></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY rsqb "]">
-]>
-<doc>]</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY rsqb "]]">
-]>
-<doc>]</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc ANY>
-]>
-<doc><!-- -á --></doc>
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc> </doc>
\ No newline at end of file
+++ /dev/null
-<doc>&<>"'</doc>
\ No newline at end of file
+++ /dev/null
-<doc> </doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc :="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc _.-0123456789="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc><?pi ?></doc>
\ No newline at end of file
+++ /dev/null
-<doc><?pi some data ? > <??></doc>
\ No newline at end of file
+++ /dev/null
-<doc><foo></doc>
\ No newline at end of file
+++ /dev/null
-<doc><&</doc>
\ No newline at end of file
+++ /dev/null
-<doc><&]>]</doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc><foo></foo></doc>
\ No newline at end of file
+++ /dev/null
-<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
+++ /dev/null
-<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
+++ /dev/null
-<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc><?pi data?>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<?pi data?><doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1=""<&>'"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="A"></doc>
\ No newline at end of file
+++ /dev/null
-<doc>A</doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="foo bar"></doc>
\ No newline at end of file
+++ /dev/null
-<doc> <e a1="v1" a2="v2" a3="v3"></e> <e a1="w1" a2="v2"></e> <e a1="v1" a2="w2" a3="v3"></e> </doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc>X Y</doc>
\ No newline at end of file
+++ /dev/null
-<doc>]</doc>
\ No newline at end of file
+++ /dev/null
-<doc>£</doc>
\ No newline at end of file
+++ /dev/null
-<doc>เจมส์</doc>
\ No newline at end of file
+++ /dev/null
-<เจมส์></เจมส์>
\ No newline at end of file
+++ /dev/null
-<doc>ð€€ô¿½</doc>
\ No newline at end of file
+++ /dev/null
-<doc><e></e></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<?pi data?><doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc>A</doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="1 2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc> <e a1="v1" a2="v2" a3="v3"></e> <e a1="w1" a2="v2"></e> <e a1="v1" a2="w2" a3="v3"></e> </doc>
\ No newline at end of file
+++ /dev/null
-<doc>X Y</doc>
\ No newline at end of file
+++ /dev/null
-<doc>£</doc>
\ No newline at end of file
+++ /dev/null
-<doc>เจมส์</doc>
\ No newline at end of file
+++ /dev/null
-<เจมส์></เจมส์>
\ No newline at end of file
+++ /dev/null
-<doc>ð€€ô¿½</doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="""></doc>
\ No newline at end of file
+++ /dev/null
-<doc> </doc>
\ No newline at end of file
+++ /dev/null
-<doc> </doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="v"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="v"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="v"></doc>
\ No newline at end of file
+++ /dev/null
-<doc><a></a><b></b><c><a></a></c></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc><foo></foo></doc>
\ No newline at end of file
+++ /dev/null
-<doc><foo></doc>
\ No newline at end of file
+++ /dev/null
-<doc>ð€€ô¿½ô¿¿</doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="e"></doc>
\ No newline at end of file
+++ /dev/null
-<doc> <a></a> <a></a>	<a></a> </doc>
\ No newline at end of file
+++ /dev/null
-<doc> </doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="%e;"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="1 2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="1 2"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a1="v1"></doc>
\ No newline at end of file
+++ /dev/null
-<doc><?pi x
-y?></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="""></doc>
\ No newline at end of file
+++ /dev/null
-<doc><doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="x y"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="x	y"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="x y"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="x y"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="x y"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a=""></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="x y"></doc>
\ No newline at end of file
+++ /dev/null
-<doc a="x y"></doc>
\ No newline at end of file
+++ /dev/null
-<doc><a></a></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc>&foo;</doc>
\ No newline at end of file
+++ /dev/null
-<doc>v</doc>
\ No newline at end of file
+++ /dev/null
-<doc> </doc>
\ No newline at end of file
+++ /dev/null
-<doc>]</doc>
\ No newline at end of file
+++ /dev/null
-<doc>]]</doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<?xml version="1.0"?>
-
-<!DOCTYPE any [
-
-<!ENTITY x PUBLIC "x" "" NDATA p>
-<!ENTITY y PUBLIC "x" "" NDATA p>
-<!ENTITY z PUBLIC "x" "" NDATA p>
-
-<!NOTATION p PUBLIC "image/gif">
-<!NOTATION q PUBLIC "image/jpeg">
-<!NOTATION r PUBLIC "image/png">
-
-<!ELEMENT el EMPTY>
-<!ATTLIST el
- cdata CDATA #IMPLIED
- id ID #IMPLIED
- idref IDREF #IMPLIED
- idrefs IDREFS #IMPLIED
- entity ENTITY #IMPLIED
- entities ENTITIES #IMPLIED
- nmtoken NMTOKEN #IMPLIED
- nmtokens NMTOKENS #IMPLIED
- enum (a|b|c) #IMPLIED
- notation NOTATION (p|q|r) #IMPLIED
->
-
-<!ELEMENT any ANY>
-]>
-
-<any>
- <el cdata="a b c"/>
- <el cdata=" a b c "/>
- <el cdata=" a b c "/>
- <el id="A"/>
- <el id=" B "/>
- <el id=" C "/>
- <el idref="C"/>
- <el idref=" A "/>
- <el idref=" B "/>
- <el idrefs="A B C"/>
- <el idrefs=" A B C "/>
- <el idrefs=" A B C "/>
- <el entity="x"/>
- <el entity=" x "/>
- <el entity=" x "/>
- <el entities="x y z"/>
- <el entities=" x y z "/>
- <el entities=" x y z "/>
- <el nmtoken="a"/>
- <el nmtoken=" a "/>
- <el nmtoken=" a "/>
- <el nmtokens="a b c"/>
- <el nmtokens=" a b c "/>
- <el nmtokens=" a b c "/>
- <el enum="a"/>
- <el enum=" a "/>
- <el enum=" a "/>
- <el notation="p"/>
- <el notation=" p "/>
- <el notation=" p "/>
-</any>
+++ /dev/null
-<?xml version="1.0"?>
-
-<!DOCTYPE any [
-
-<!ELEMENT el EMPTY>
-<!ATTLIST el
- cdata CDATA #IMPLIED
- nmtoken NMTOKEN #IMPLIED
- nmtokens NMTOKENS #IMPLIED
->
-<!ELEMENT any ANY>
-]>
-
-<any>
- <el cdata="a
-b
-c d
-e "/>
- <el nmtoken=" a "/>
- <el nmtoken="
-a
-"/>
- <el nmtoken="
-a
-"/>
- <el nmtoken=" a "/>
- <el nmtokens=" a b c "/>
- <el nmtokens="
-a
-b
-c
-"/>
- <el nmtokens="
-a
-b
-c
-"/>
- <el nmtokens=" a b c "/>
-</any>
+++ /dev/null
-<?xml version="1.0"?>
-
-<!DOCTYPE any [
-
-<!ELEMENT el EMPTY>
-<!ATTLIST el
- cdata CDATA #IMPLIED
- nmtoken NMTOKEN #IMPLIED
- nmtokens NMTOKENS #IMPLIED
->
-<!ELEMENT any ANY>
-
-<!ENTITY elinstance
- '<el cdata="a
-b
-c d
-e "/>
- <el nmtoken=" a "/>
- <el nmtoken="
-a
-"/>
- <el nmtoken="
-a
-"/>
- <el nmtoken=" a "/>
- <el nmtokens=" a b c "/>
- <el nmtokens="
-a
-b
-c
-"/>
- <el nmtokens="
-a
-b
-c
-"/>
- <el nmtokens=" a b c "/>'>
-]>
-
-<any>&elinstance;</any>
+++ /dev/null
-<?xml version="1.0"?>
-
-<!DOCTYPE any [
-
-<!ELEMENT el EMPTY>
-<!ATTLIST el
- cdata CDATA #IMPLIED
->
-<!ELEMENT any ANY>
-
-<!ENTITY elinstance
- '<el cdata="a
-b"/>'>
-]>
-
-<any>&elinstance;</any>
-
+++ /dev/null
-<?xml version="1.0"?>
-
-<!DOCTYPE any [
-
-<!ELEMENT el EMPTY>
-<!ATTLIST el
- cdata CDATA #IMPLIED
- nmtoken NMTOKEN #IMPLIED
- nmtokens NMTOKENS #IMPLIED
->
-<!ELEMENT any ANY>
-]>
-
-<any>
- <el cdata="a b c	d e "/>
- <el nmtoken=" a "/>
- <el nmtoken=" a "/>
- <el nmtoken=" a "/>
- <el nmtoken="	a	"/>
- <el nmtokens=" a b c "/>
- <el nmtokens=" a b c "/>
- <el nmtokens=" a b c "/>
- <el nmtokens="	a	b	c	"/>
-</any>
+++ /dev/null
-<?xml version="1.0"?>
-
-<!DOCTYPE any [
-
-<!ELEMENT el EMPTY>
-<!ATTLIST el
- nmtoken NMTOKEN #FIXED "a"
- nmtokens NMTOKENS #FIXED "a b c"
->
-<!ELEMENT any ANY>
-]>
-
-<any>
- <el nmtoken="
-a
-"/>
- <el nmtokens="
-a
-b
-c
-"/>
-</any>
+++ /dev/null
-<?xml version="1.0"?>
-
-<!DOCTYPE any [
-
-<!ELEMENT el EMPTY>
-<!ATTLIST el
- nmtoken NMTOKEN #FIXED "
-a
-"
- nmtokens NMTOKENS #FIXED "a
-b
-c"
->
-<!ELEMENT any ANY>
-]>
-
-<any>
- <el nmtoken="
-a
-"/>
- <el nmtokens="
-a
-b
-c
-"/>
-</any>
+++ /dev/null
-001.xml tests whether additional white space in attribute value
- is removed during normalization for every att type but
- not for CDATA
-002.xml tests whether TABs, CRs, LFs, and CRLFs are converted
- to spaces (only for CDATA, NMTOKEN, NMTOKENS)
-003.xml similar to 002.xml, but the attribute values occur
- in internal entities
-004.xml tests whether CRLF normalization happens only once
-005.xml tests whether spaces, TABs, LFs, CRs, and CRLFs are correctly
- processed if they are written as character references
-006.xml tests whether normalization is done before #FIXED comparison
-007.xml tests whether normalization is done before #FIXED comparison
+++ /dev/null
-<any> <el cdata="a b c"></el> <el cdata=" a b c "></el> <el cdata=" a b c "></el> <el id="A"></el> <el id="B"></el> <el id="C"></el> <el idref="C"></el> <el idref="A"></el> <el idref="B"></el> <el idrefs="A B C"></el> <el idrefs="A B C"></el> <el idrefs="A B C"></el> <el entity="x"></el> <el entity="x"></el> <el entity="x"></el> <el entities="x y z"></el> <el entities="x y z"></el> <el entities="x y z"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el enum="a"></el> <el enum="a"></el> <el enum="a"></el> <el notation="p"></el> <el notation="p"></el> <el notation="p"></el> </any>
\ No newline at end of file
+++ /dev/null
-<any> <el cdata="a b c d e "></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> </any>
\ No newline at end of file
+++ /dev/null
-<any><el cdata="a b c d e "></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el></any>
\ No newline at end of file
+++ /dev/null
-<any><el cdata="a b"></el></any>
\ No newline at end of file
+++ /dev/null
-<any> <el cdata="a b c	d e "></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> </any>
\ No newline at end of file
+++ /dev/null
-<any> <el nmtoken="a" nmtokens="a b c"></el> <el nmtoken="a" nmtokens="a b c"></el> </any>
\ No newline at end of file
+++ /dev/null
-<any> <el nmtoken="a" nmtokens="a b c"></el> <el nmtoken="a" nmtokens="a b c"></el> </any>
\ No newline at end of file
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE schema [
-<!ELEMENT schema ANY>
-<!ELEMENT element ANY>
-<!ATTLIST element minOccurs CDATA #IMPLIED>
-]>
-<schema>
- <element minOccurs='0'/>
- <element minOccurs='0'/>
-</schema>
+++ /dev/null
-<!DOCTYPE x [
-<!ELEMENT x ANY>
-]>
-<x/>
+++ /dev/null
-<!DOCTYPE a [
-<!ELEMENT a ANY>
-<?pi 0?>
-]>
-<?pi 1?>
-<a>
- <?pi 2?>
- <a>
- <?pi 3?>
- </a>
- <?pi 4?>
-</a>
-<?pi 5?>
\ No newline at end of file
+++ /dev/null
-This directory contains real regression tests, i.e. it is tested whether
-reported bugs have been fixed.
-
-001.xml 2000-08-26: Haruo's single quote bug. Attvalues delimited
- by single quotes did not work for the UTF-8 lexer.
-002+.xml 2000-08-26: Haruo's file-names-are-not-URLs bug. from_file
- interpreted the file name as URL-encoded string. "002+.xml"
- because the "+" must not be decoded as space.
-003.xml 2000-08-26: Alain's bug that data nodes must not be merged
- where PI nodes are created. In the "comments" directory
- there is another test for the case that comments delimit
- data material
+++ /dev/null
-<schema> 	<element minOccurs="0"></element> 	<element minOccurs="0"></element> </schema>
\ No newline at end of file
+++ /dev/null
-<x></x>
\ No newline at end of file
+++ /dev/null
-<?pi 1?><a> <?pi 2?> <a> <?pi 3?> </a> <?pi 4?> </a><?pi 5?>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE a [
-<!ELEMENT a ANY>
-<!-- Comment 0 -->
-]>
-<!-- Comment 1 -->
-<a>
- <!-- Comment -2 -->
- <a>
- <!-- Comment 3 -->
- </a>
- <!-- Comment 4 -->
-</a>
-<!-- Comment 5 -->
\ No newline at end of file
+++ /dev/null
-001 Checks whether enable_comment_nodes works
+++ /dev/null
-<!-- Comment 1 --><a> <!-- Comment -2 --> <a> <!-- Comment 3 --> </a> <!-- Comment 4 --> </a><!-- Comment 5 -->
\ No newline at end of file
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<![IGNORE[<!ATTLIST doc att CDATA #REQUIRED>]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "001.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!-- Only a precondition check for test 003: the first ATTLIST counts -->
-<!ATTLIST doc att CDATA #IMPLIED>
-<!ATTLIST doc att CDATA #REQUIRED>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc SYSTEM "002.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<![INCLUDE[<!ATTLIST doc att CDATA #IMPLIED>]]>
-<!ATTLIST doc att CDATA #REQUIRED>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE doc SYSTEM "003.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e "IGNORE">
-<![%e;[<!ATTLIST doc att CDATA #REQUIRED>]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "004.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e "INCLUDE">
-<![%e;[<!ATTLIST doc att CDATA #IMPLIED>]]>
-<!ATTLIST doc att CDATA #REQUIRED>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "005.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<![IGNORE[This is illegal here]]>
-
+++ /dev/null
-<!DOCTYPE doc SYSTEM "006.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e "]]>">
-<![IGNORE[%e;]]>
-
+++ /dev/null
-<!DOCTYPE doc SYSTEM "007.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<![IGNORE[<!ENTITY e "]]>">]]>
-<![IGNORE[<!ENTITY e ']]>'>]]>
-
+++ /dev/null
-<!DOCTYPE doc SYSTEM "008.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<![IGNORE[<!-- ]]> -->]]>
-<![IGNORE[x <!-- ]]> -->]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "009.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<![IGNORE[x <![IGNORE[xxx]]>]]>
-<![IGNORE[<![IGNORE[xxx]]>]]>
-<![IGNORE[x <![INCLUDE[xxx]]>]]>
-<![IGNORE[<![INCLUDE[xxx]]>]]>
+++ /dev/null
-<!DOCTYPE doc SYSTEM "010.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<![INCLUDE[ <![INCLUDE[ <!ATTLIST doc att CDATA #IMPLIED> ]]>
- <![IGNORE[ xxx ]]>
-]]>
-<!ATTLIST doc att CDATA #REQUIRED>
-
+++ /dev/null
-<!DOCTYPE doc SYSTEM "011.ent">
-<doc></doc>
+++ /dev/null
-001 IGNORE works: <![IGNORE[ ... ]]>
-002 [precondition for 003] The first ATTLIST declaration for the same
- attribute counts
-003 INCLUDE works: <![INCLUDE[ ... ]]>
-004 IGNORE works: <![%e;[ ... ]]> with e="IGNORE"
-005 INCLUDE works: <![%e;[ ... ]]> with e="INCLUDE"
-006 IGNORE works: <![IGNORE[ ... ]]> ignoring a section that would
- be illegal
-007 Within ignored sections references to parameter references are
- not resolved.
- NOTE: You cannot derive this directly from the XML spec. because a
- precise definition what "ignoring" means is missing. This property
- is an interpretation of the statement about reliable parsing in
- section 3.4.
-008 Ignored sections may contain string literals containing "]]>".
- NOTE: same problem with XML spec as 007
-009 Ignored sections may contain comments containing "]]>".
- NOTE: same problem with XML spec as 007
-010 Nested conditional sections with outermost IGNORE
-011 Nested conditional sections with outermost INCLUDE
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<!DOCTYPE a [
- <!ELEMENT a ANY>
- <?pxp:dtd optional-element-and-notation-declarations?>
-]>
-<a><b/></a>
+++ /dev/null
-<!DOCTYPE a [
- <!ELEMENT a ANY>
- <?pxp:dtd optional-element-and-notation-declarations?>
-]>
-<a><b att1="1" att2=" 1 2 3 "/></a>
+++ /dev/null
-<!DOCTYPE a [
- <!ELEMENT a (b)>
- <?pxp:dtd optional-element-and-notation-declarations?>
-]>
-<a><b/></a>
+++ /dev/null
-<!DOCTYPE a [
- <?pxp:dtd optional-element-and-notation-declarations?>
-]>
-<a><b/></a>
+++ /dev/null
-<!DOCTYPE a [
- <!ELEMENT a ANY>
- <!ENTITY x SYSTEM "sample" NDATA m>
- <?pxp:dtd optional-element-and-notation-declarations?>
-]>
-<a/>
+++ /dev/null
-<!DOCTYPE a [
- <!ELEMENT a ANY>
- <!ATTLIST a g ENTITY #IMPLIED>
- <!ENTITY x SYSTEM "sample" NDATA m>
- <?pxp:dtd optional-element-and-notation-declarations?>
-]>
-<a g="x"/>
+++ /dev/null
-<!DOCTYPE a [
- <!ELEMENT a ANY>
- <?pxp:dtd optional-attribute-declarations elements="a"?>
-]>
-<a x="y"/>
+++ /dev/null
-<?pxp:dtd optional-element-and-notation-declarations?>
-
-001.xml Whether it works for undeclared elements
-002.xml Whether it works for undeclared elements with attributes
-003.xml Whether it works for undeclared elements in declarations
-004.xml Whether it works for undeclared root elements
-005.xml Whether it works for undeclared notations
-006.xml Whether it works for undeclared notations which are actually
- referred to
-
-<?pxp:dtd optional-attribute-declarations?>
-
-007.xml Whether it works
-
+++ /dev/null
-<a><b></b></a>
\ No newline at end of file
+++ /dev/null
-<a><b att1="1" att2=" 1 2 3 "></b></a>
\ No newline at end of file
+++ /dev/null
-<a><b></b></a>
\ No newline at end of file
+++ /dev/null
-<a><b></b></a>
\ No newline at end of file
+++ /dev/null
-<a></a>
\ No newline at end of file
+++ /dev/null
-<a g="x"></a>
\ No newline at end of file
+++ /dev/null
-<a x="y"></a>
\ No newline at end of file
+++ /dev/null
-#! /bin/bash
-
-check_dir () {
- dir="$1"
- shift
- xmlfiles=`cd $dir && echo *.xml`
- for file in $xmlfiles; do
- echo -n "File $dir/$file: "
- ./test_canonxml "$@" "$dir/$file" >out.xml
- if cmp out.xml "$dir/out/$file"; then
- echo "OK"
- else
- echo "NOT OK"
- read
- fi
- done
-}
-
-check_dir "data_valid/conditional"
-check_dir "data_valid/att_normalization"
-check_dir "data_valid/optional_decls"
-check_dir "data_valid/comments" -comments
-check_dir "data_valid/bugfixes"
-
-#check_dir "data_jclark_valid/sa-problems"
-#check_dir "data_jclark_valid/ext-sa-problems"
-check_dir "data_jclark_valid/sa"
-check_dir "data_jclark_valid/not-sa"
-check_dir "data_jclark_valid/ext-sa"
-
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-open Pxp_document;;
-open Pxp_yacc;;
-open Pxp_types;;
-
-let error_happened = ref false;;
-
-let rec prerr_error e =
- prerr_endline (string_of_exn e)
-;;
-
-class warner =
- object
- method warn w =
- prerr_endline ("WARNING: " ^ w)
- end
-;;
-
-let outbuf = String.create 8192;;
-
-let output_utf8 config s =
- match config.encoding with
- `Enc_utf8 ->
- print_string s
- | `Enc_iso88591 ->
- for i = 0 to String.length s - 1 do
- let c = Char.code(s.[i]) in
- if c <= 127 then
- print_char(Char.chr(c))
- else begin
- print_char(Char.chr(0xc0 lor (c lsr 6)));
- print_char(Char.chr(0x80 lor (c land 0x3f)));
- end
- done
- | _ -> assert false
-;;
-
-
-let re = Str.regexp "[&<>\"\009\010\013]";;
-
-let escaped s =
- Str.global_substitute
- re
- (fun _ ->
- match Str.matched_string s with
- "&" -> "&"
- | "<" -> "<"
- | ">" -> ">"
- | "\"" -> """
- | "\009" -> "	"
- | "\010" -> " "
- | "\013" -> " "
- | _ -> assert false
- )
- s
-;;
-
-
-let rec output_xml config n =
- match n # node_type with
- T_super_root ->
- n # iter_nodes (output_xml config)
- | T_pinstr pi_name ->
- let [ pi ] = n # pinstr pi_name in
- output_utf8 config "<?";
- output_utf8 config (pi # target);
- output_utf8 config " ";
- output_utf8 config (pi # value);
- output_utf8 config "?>";
- | T_element name ->
- output_utf8 config "<";
- output_utf8 config name;
- let sorted_attnames =
- Sort.list ( <= ) (n # attribute_names) in
- List.iter
- (fun attname ->
- match n # attribute attname with
- Value v ->
- output_utf8 config " ";
- output_utf8 config attname;
- output_utf8 config "=\"";
- output_utf8 config (escaped v);
- output_utf8 config "\"";
- | Valuelist vl ->
- let v = String.concat " " vl in
- output_utf8 config " ";
- output_utf8 config attname;
- output_utf8 config "=\"";
- output_utf8 config (escaped v);
- output_utf8 config "\"";
- | Implied_value ->
- ()
- )
- sorted_attnames;
- output_utf8 config ">";
- n # iter_nodes (output_xml config);
- output_utf8 config "</";
- output_utf8 config name;
- output_utf8 config ">";
- | T_data ->
- let v = n # data in
- output_utf8 config (escaped v)
- | T_comment ->
- let v =
- match n # comment with
- None -> assert false
- | Some x -> x
- in
- output_utf8 config ("<!--" ^ v ^ "-->")
- | _ ->
- assert false
-;;
-
-
-let parse debug wf iso88591 comments filename =
- let spec =
- let e = new element_impl default_extension in
- e # keep_always_whitespace_mode;
- make_spec_from_mapping
- ~super_root_exemplar: e
- ~default_pinstr_exemplar: e
- ~comment_exemplar: e
- ~data_exemplar: (new data_impl default_extension)
- ~default_element_exemplar: e
- ~element_mapping: (Hashtbl.create 1)
- ()
- in
- let config =
- { default_config with
- warner = new warner;
- debugging_mode = debug;
- enable_pinstr_nodes = true;
- enable_super_root_node = true;
- enable_comment_nodes = comments;
- encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
- idref_pass = true;
- }
- in
- try
- let parse_fn =
- if wf then parse_wfdocument_entity
- else
- let index = new hash_index in
- parse_document_entity
- ?transform_dtd:None
- ~id_index:(index :> 'ext index)
- in
- let tree =
- parse_fn
- config
- (from_file filename)
- spec
- in
- output_xml config (tree # root)
- with
- e ->
- error_happened := true;
- prerr_error e
-;;
-
-
-let main() =
- let debug = ref false in
- let wf = ref false in
- let iso88591 = ref false in
- let comments = ref false in
- let files = ref [] in
- Arg.parse
- [ "-d", Arg.Set debug,
- " turn debugging mode on";
- "-wf", Arg.Set wf,
- " check only on well-formedness";
- "-iso-8859-1", Arg.Set iso88591,
- " use ISO-8859-1 as internal encoding instead of UTF-8";
- "-comments", Arg.Set comments,
- " output comments, too";
- ]
- (fun x -> files := x :: !files)
- "
-usage: test_canonxml [options] file ...
-
-List of options:";
- files := List.rev !files;
- List.iter (parse !debug !wf !iso88591 !comments) !files;
-;;
-
-
-main();
-if !error_happened then exit(1);;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:32 lpadovan
- * Initial revision
- *
- * Revision 1.8 2000/08/17 00:51:57 gerd
- * Added -comments option to test enable_comment_nodes.
- *
- * Revision 1.7 2000/08/16 23:44:17 gerd
- * Updates because of changes of the PXP API.
- *
- * Revision 1.6 2000/07/14 14:56:55 gerd
- * Updated: warner.
- *
- * Revision 1.5 2000/07/14 14:17:58 gerd
- * Updated because of iterface changes.
- *
- * Revision 1.4 2000/07/09 01:06:20 gerd
- * Updated.
- *
- * Revision 1.3 2000/06/04 20:31:03 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.2 2000/05/20 20:34:28 gerd
- * Changed for UTF-8 support.
- *
- * Revision 1.1 2000/04/30 20:13:01 gerd
- * Initial revision.
- *
- * Revision 1.3 1999/11/09 22:27:30 gerd
- * The programs returns now an exit code of 1 if one of the
- * XML files produces an error.
- *
- * Revision 1.2 1999/09/01 23:09:56 gerd
- * Added the option -wf that switches to well-formedness checking
- * instead of validation.
- *
- * Revision 1.1 1999/08/14 22:20:53 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-# make validate: make bytecode executable
-# make validate.opt: make native executable
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-#----------------------------------------------------------------------
-
-OCAMLPATH=../..
-
-compile: compile.ml
- ocamlfind ocamlc -g -custom -o compile -package .,str -linkpkg compile.ml
-
-#----------------------------------------------------------------------
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa sample sample.ml out1 out2
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- rm -f compile
-
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-open Pxp_document;;
-open Pxp_yacc;;
-open Pxp_types;;
-
-let error_happened = ref false;;
-
-let rec prerr_error e =
- prerr_endline (string_of_exn e)
-;;
-
-
-class warner =
- object
- method warn w =
- prerr_endline ("WARNING: " ^ w)
- end
-;;
-
-
-let compile in_filename out_filename print super_root pis comments =
- let spec =
- let e = new element_impl default_extension in
- make_spec_from_mapping
- ~super_root_exemplar: e
- ~default_pinstr_exemplar: e
- ~comment_exemplar: e
- ~data_exemplar: (new data_impl default_extension)
- ~default_element_exemplar: e
- ~element_mapping: (Hashtbl.create 1)
- ()
- in
- let config =
- { default_config with
- encoding = `Enc_utf8;
- warner = new warner;
- enable_super_root_node = super_root;
- enable_pinstr_nodes = pis;
- enable_comment_nodes = comments;
- }
- in
- try
- let tree =
- parse_document_entity
- config
- (from_file in_filename)
- spec
- in
-
- let ch = open_out out_filename in
- Pxp_codewriter.write_document ch tree;
- output_string ch "(create_document (new Pxp_types.drop_warnings) Pxp_yacc.default_spec) # write (Pxp_types.Out_channel stdout) `Enc_utf8;;\n";
- close_out ch;
-
- if print then
- tree # write (Out_channel stdout) `Enc_utf8;
- with
- e ->
- error_happened := true;
- prerr_error e
-;;
-
-
-let main() =
- let in_file = ref "" in
- let out_file = ref "" in
- let print_file = ref false in
- let super_root = ref false in
- let pis = ref false in
- let comments = ref false in
- Arg.parse
- [ "-in", (Arg.String (fun s -> in_file := s)),
- " <file> Set the XML file to read";
- "-out", (Arg.String (fun s -> out_file := s)),
- " <file> Set the Ocaml file to write";
- "-print", (Arg.Set print_file),
- " Print the XML file in standard form";
- "-super-root", Arg.Set super_root,
- " Generate a super root node";
- "-pis", Arg.Set pis,
- " Generate wrapper nodes for processing instructions";
- "-comments", Arg.Set comments,
- " Generate nodes for comments";
- ]
- (fun x -> raise (Arg.Bad "Unexpected argument"))
- "
-usage: compile [ options ]
-
-List of options:";
- if !in_file = "" then begin
- prerr_endline "No input file specified.";
- exit 1
- end;
- if !out_file = "" then begin
- prerr_endline "No output file specified.";
- exit 1
- end;
- compile !in_file !out_file !print_file !super_root !pis !comments
-;;
-
-
-main();
-if !error_happened then exit(1);;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:35 lpadovan
- * Initial revision
- *
- * Revision 1.4 2000/08/17 01:20:15 gerd
- * Update: Also tested whether super root nodes, pinstr nodes
- * and comment nodes work.
- * Note: comment nodes are not fully tested yet.
- *
- * Revision 1.3 2000/08/16 23:44:19 gerd
- * Updates because of changes of the PXP API.
- *
- * Revision 1.2 2000/07/16 17:54:15 gerd
- * Updated because of PXP interface changes.
- *
- * Revision 1.1 2000/07/09 00:33:32 gerd
- * Initial revision.
- *
- *)
+++ /dev/null
-#! /bin/sh
-
-./test_codewriter sample001.xml
+++ /dev/null
-<!DOCTYPE a [
-
-<!ELEMENT a (b | (c, d)* | (e, f)+ | g?)>
-<!ELEMENT b (#PCDATA | a)*>
-<!ELEMENT c EMPTY>
-<!ELEMENT d ANY>
-<!ELEMENT e EMPTY>
-<!ELEMENT f EMPTY>
-<!ELEMENT g EMPTY>
-
-<!ATTLIST a u CDATA #IMPLIED
- v NMTOKEN "huhu"
- w (q|p) #REQUIRED
- x NOTATION (n1|n2) "n1"
- y ENTITY #IMPLIED>
-
-<!NOTATION n1 SYSTEM "/bin/n1-processor">
-<!NOTATION n2 SYSTEM "/bin/n2-processor">
-
-<!ENTITY u1 SYSTEM "file-u1" NDATA n1>
-<!ENTITY u2 SYSTEM "file-u2" NDATA n2>
-
-<!-- comment 1 -->
-<?pi1 args ...?>
-]>
-
-<!-- comment 2 -->
-<a u="1" w="q" x="n2">
- <!-- comment 3 -->
- <b>
- <?pi2 args ...?>
- This is text!
- <a w="p" y="u1">
- <c/>
- <d/>
- </a>
- </b>
- <!-- comment 4 -->
-</a>
-
-<!-- comment 5 -->
-<?pi3 args ...?>
-<!-- comment 6 -->
-
+++ /dev/null
-#! /bin/sh
-
-set -e
-
-sample="$1"
-echo "Testing $sample:"
-./compile -in "$sample" -out "sample.ml" -print -super-root -pis -comments >"out1"
-echo "- code written to sample.ml, formatted data to out1"
-OCAMLPATH=../.. ocamlfind ocamlc -package . -linkpkg -custom sample.ml -o sample
-echo "- sample.ml compiled to sample"
-./sample >out2
-echo "- re-read data written to out2"
-if cmp out1 out2; then
- echo "- out1 and out2 are identical! OK"
-else
- echo "- out1 and out2 differ! FAILURE!"
- exit 1
-fi
+++ /dev/null
-
-
-let dump_file name =
- let ch = open_in_bin name in
- let len = in_channel_length ch in
- let sin = String.create len in
- really_input ch sin 0 len;
- close_in ch;
-
- Printf.printf "\\noindent\\begin{minipage}{5.5cm}\n";
- (* Printf.printf "\\rule{5.5cm}{1pt}\n"; *)
- Printf.printf "\\footnotesize\\bf File %s:\\\\\n" name;
- Printf.printf "\\tt{}";
-
- for i = 0 to len - 1 do
- match sin.[i] with
- ('\000'..'\008'|'\011'|'\012'|'\014'..'\031'|'\127'..'\255') as c ->
- Printf.printf "{\\sl (%02x)}\\linebreak[2]" (Char.code c)
- | '\009' ->
- Printf.printf "{\\sl HT}\\linebreak[3]"
- | '\010' ->
- Printf.printf "{\\sl LF}\\\\\n"
- | '\013' ->
- Printf.printf "{\\sl CR}";
- if not(i < len - 1 && sin.[i+1] = '\010') then
- Printf.printf "\\\\\n";
- | ' ' ->
- Printf.printf "\\symbol{32}\\linebreak[3]"
-
- | ('"'|'#'|'$'|'%'|'&'|'-'|'<'|'>'|'['|'\\'|']'|'^'|'_'|'`'|
- '{'|'|'|'}'|'~') as c ->
- Printf.printf "\\symbol{%d}\\linebreak[2]" (Char.code c)
- | c ->
- print_char c;
- print_string "\\linebreak[0]"
- done;
-
- Printf.printf "\\mbox{}\\\\\n";
- Printf.printf "\\rule{5.5cm}{1pt}\n";
- Printf.printf "\\end{minipage}\n"
-;;
-
-
-print_endline "\\documentclass[a4paper]{article}";
-print_endline "\\usepackage{multicol}";
-print_endline "\\begin{document}";
-print_endline "\\begin{multicols}{2}";
-for i = 1 to Array.length(Sys.argv)-1 do
- dump_file Sys.argv.(i)
-done;
-print_endline "\\end{multicols}";
-print_endline "\\end{document}"
-;;
-
-
-
+++ /dev/null
-# make test_negative: make bytecode executable
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-#----------------------------------------------------------------------
-
-OCAMLPATH=../..
-
-test_negative: test_negative.ml
- ocamlfind ocamlc -custom -o test_negative -package .,str -linkpkg test_negative.ml
-
-#----------------------------------------------------------------------
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa current.out
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- rm -f test_negative
-
-
+++ /dev/null
-----------------------------------------------------------------------
-Regression test "negative":
-----------------------------------------------------------------------
-
-- An errorneous XML file is parsed, and the error message is printed.
-
-- The output is compared with a reference file. The test is only
- passed if the output and the reference are equal.
-
-- Test data "data_jclark_notwf":
- Contains the samples by James Clark that are not well-formed.
- The subdirectories:
- - sa: standalone documents
- - not-sa: non-standalone document (with external DTD)
- - ext-sa: non-standalone document (with other external entity)
-
-- Test data "data_jclark_invalid":
- Contains the samples by James Clark that are invalid.
-
-- Tests that are not passed have been moved into the *-problems directories.
- The reason is typically that characters have been used not in the
- Latin 1 character set.
-
-- Test data "data_notwf":
- Contains own tests with samples that are not well-formed.
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/001.xml", at line 5, position 3:
-ERROR (Validity constraint): The root element is `b' but is declared as `a
+++ /dev/null
-<!DOCTYPE a [
-<!ELEMENT a ANY>
-<!ELEMENT b ANY>
-]>
-<b>x</b>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/010.xml", at line 7, position 14:
-ERROR (Validity constraint): Attribute `id' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el id ID #IMPLIED>
-]>
-
-<el id="100"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/011.xml", at line 10, position 17:
-ERROR (Validity constraint): ID not unique
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el id ID #IMPLIED>
-]>
-
-<any>
- <el id="x100"/>
- <el id="x100"/>
-</any>
+++ /dev/null
-WARNING: More than one ATTLIST declaration for element type `el'
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/012.xml", at line 6, position 1:
-ERROR (Validity constraint): More than one ID attribute for element `el'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el id1 ID #IMPLIED>
-<!ATTLIST el id2 ID #IMPLIED>
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/013.xml", at line 5, position 1:
-ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el id ID "a">
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/014.xml", at line 5, position 1:
-ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el id ID #FIXED "a">
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/015.xml", at line 7, position 17:
-ERROR (Validity constraint): Attribute `idref' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el idref IDREF #IMPLIED>
-]>
-
-<el idref="100"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/016.xml", at line 7, position 22:
-ERROR (Validity constraint): Attribute `idrefs' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el idrefs IDREFS #IMPLIED>
-]>
-
-<el idrefs="100 200"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/017.xml" at line 12, position 2:
-ERROR (Validity constraint): Attribute `idref' of element `el' refers to unknown ID `a20'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el id ID #IMPLIED
- idref IDREF #IMPLIED
->
-]>
-
-<any>
- <el id="a10"/>
- <el idref="a20"/>
-</any>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/018.xml" at line 12, position 2:
-ERROR (Validity constraint): Attribute `idrefs' of element `el' refers to unknown ID `a20'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el id ID #IMPLIED
- idrefs IDREFS #IMPLIED
->
-]>
-
-<any>
- <el id="a10"/>
- <el idrefs="a10 a20"/>
-</any>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/019.xml", at line 6, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `x'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ENTITY ndata SYSTEM "" NDATA x>
-<!ELEMENT el EMPTY>
-<!ATTLIST el ent ENTITY #IMPLIED>
-]>
-
-<el ent="10"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/020.xml", at line 6, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `x'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ENTITY ndata SYSTEM "" NDATA x>
-<!ELEMENT el EMPTY>
-<!ATTLIST el ents ENTITIES #IMPLIED>
-]>
-
-<el ents="a 10"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/021.xml", at line 6, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `x'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ENTITY ndata SYSTEM "" NDATA x>
-<!ELEMENT el EMPTY>
-<!ATTLIST el ent ENTITY #IMPLIED>
-]>
-
-<el ent="x"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/022.xml", at line 6, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `x'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ENTITY ndata SYSTEM "" NDATA x>
-<!ELEMENT el EMPTY>
-<!ATTLIST el ents ENTITIES #IMPLIED>
-]>
-
-<el ents="ndata a"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/023.xml", at line 6, position 13:
-ERROR (Validity constraint): Attribute `nm' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el nm NMTOKEN #IMPLIED>
-]>
-<el nm="[]"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/024.xml", at line 6, position 17:
-ERROR (Validity constraint): Attribute `nms' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el nms NMTOKENS #IMPLIED>
-]>
-<el nms="10 []"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/025.xml", at line 5, position 1:
-ERROR (Validity constraint): Default value for attribute `idref' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el idref IDREF "100">
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/026.xml", at line 5, position 1:
-ERROR (Validity constraint): Default value for attribute `idrefs' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el idrefs IDREFS "100 200">
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/027.xml", at line 6, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `x'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ENTITY ndata SYSTEM "" NDATA x>
-<!ELEMENT el EMPTY>
-<!ATTLIST el ent ENTITY "10">
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/028.xml", at line 6, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `x'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ENTITY ndata SYSTEM "" NDATA x>
-<!ELEMENT el EMPTY>
-<!ATTLIST el ents ENTITIES "a 10">
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/029.xml", at line 5, position 1:
-ERROR (Validity constraint): Default value for attribute `nm' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el nm NMTOKEN "[]">
-]>
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/030.xml", at line 5, position 1:
-ERROR (Validity constraint): Default value for attribute `nms' is lexically malformed
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el nms NMTOKENS "10 []">
-]>
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/031.xml", at line 6, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `jpeg'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!NOTATION gif PUBLIC "image/gif">
-<!ELEMENT el EMPTY>
-<!ATTLIST el n NOTATION (gif|jpeg) #IMPLIED>
-]>
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/032.xml", at line 6, position 1:
-ERROR (Validity constraint): Illegal default value for attribute `n' in declaration for element `el'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!NOTATION gif PUBLIC "image/gif">
-<!ELEMENT el EMPTY>
-<!ATTLIST el n NOTATION (gif) "jpeg">
-]>
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/033.xml", at line 7, position 14:
-ERROR (Validity constraint): Attribute `n' does not match one of the declared notation names
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!NOTATION gif PUBLIC "image/gif">
-<!ELEMENT el EMPTY>
-<!ATTLIST el n NOTATION (gif) #IMPLIED>
-]>
-<el n="jpeg"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/034.xml", at line 8, position 1:
-ERROR (Validity constraint): More than one NOTATION attribute for element `el'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!NOTATION gif PUBLIC "image/gif">
-<!ELEMENT el EMPTY>
-<!ATTLIST el n NOTATION (gif) #IMPLIED
- m NOTATION (gif) #IMPLIED
->
-]>
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/035.xml", at line 5, position 1:
-ERROR (Validity constraint): Illegal default value for attribute `enum' in declaration for element `el'
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el enum (a|b|c) "d">
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/036.xml", at line 7, position 14:
-ERROR (Validity constraint): Attribute `enum' does not match one of the declared enumerator tokens
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el enum (a|b|c) #IMPLIED>
-]>
-
-<el enum="d"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/037.xml", at line 7, position 5:
-ERROR (Validity constraint): Required attribute `x' is missing
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el x CDATA #REQUIRED>
-]>
-
-<el/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/038.xml", at line 7, position 13:
-ERROR (Validity constraint): Attribute `x' is fixed, but has here a different value
+++ /dev/null
-<?xml version="1.0"?>
-<!DOCTYPE el [
-<!ELEMENT el EMPTY>
-<!ATTLIST el x CDATA #FIXED "abc">
-]>
-
-<el x="def"/>
+++ /dev/null
-<!ATTLIST el v3 CDATA "ghi">
+++ /dev/null
-WARNING: More than one ATTLIST declaration for element type `el'
-WARNING: More than one ATTLIST declaration for element type `el'
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/060.xml", at line 17, position 12:
-ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el v1 CDATA "abc">
-<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
-%declare_v2;
-<!ENTITY % declare_v3 SYSTEM "060.ent">
-%declare_v3;
-]>
-
-<any>
- <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
- <any><el v2="DEF" v3="GHI"/></any>
- <any><el v3="GHI"/></any>
- <any><el/></any>
-</any>
-
+++ /dev/null
-<!ENTITY % declare_v3 '<!ATTLIST el v3 CDATA "ghi">'>
-
+++ /dev/null
-WARNING: More than one ATTLIST declaration for element type `el'
-WARNING: More than one ATTLIST declaration for element type `el'
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/061.xml", at line 18, position 12:
-ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el v1 CDATA "abc">
-<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
-%declare_v2;
-<!ENTITY % declare_declare_v3 SYSTEM "061.ent">
-%declare_declare_v3;
-%declare_v3;
-]>
-
-<any>
- <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
- <any><el v2="DEF" v3="GHI"/></any>
- <any><el v3="GHI"/></any>
- <any><el/></any>
-</any>
-
+++ /dev/null
-<!ATTLIST el v3 CDATA "ghi">
+++ /dev/null
-WARNING: More than one ATTLIST declaration for element type `el'
-WARNING: More than one ATTLIST declaration for element type `el'
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/062.xml", at line 15, position 12:
-ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any SYSTEM "062.ent" [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el v1 CDATA "abc">
-<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
-%declare_v2;
-]>
-
-<any>
- <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
- <any><el v2="DEF" v3="GHI"/></any>
- <any><el v3="GHI"/></any>
- <any><el/></any>
-</any>
-
+++ /dev/null
-<!ENTITY e3 "ghi">
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/063.xml", at line 15, position 2:
-ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ENTITY e1 "abc">
-<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
-%declare_e2;
-<!ENTITY % declare_e3 SYSTEM "063.ent">
-%declare_e3;
-]>
-
-<any>
- &e1;
- &e2;
- &e3;
-</any>
-
+++ /dev/null
-<!ENTITY e3 "ghi">
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/064.xml", at line 17, position 10:
-ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ENTITY e1 "abc">
-<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
-%declare_e2;
-<!ENTITY % declare_e3 SYSTEM "064.ent">
-%declare_e3;
-<!ELEMENT el EMPTY>
-<!ATTLIST el att CDATA #IMPLIED>
-]>
-
-<any>
- <el att="&e1;"/>
- <el att="&e2;"/>
- <el att="&e3;"/>
-</any>
-
+++ /dev/null
-<!ENTITY e3 "ghi">
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/065.xml", at line 13, position 24:
-ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ENTITY e1 "abc">
-<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
-%declare_e2;
-<!ENTITY % declare_e3 SYSTEM "065.ent">
-%declare_e3;
-<!ELEMENT el EMPTY>
-<!ATTLIST el att1 CDATA "&e1;"
- att2 CDATA "&e2;"
- att3 CDATA "&e3;"
->
-]>
-
-<any>
- <el att1="1" att2="2" att3="3"/>
- <el att2="2" att3="3"/>
- <el att3="3"/>
- <el/>
-</any>
-
+++ /dev/null
-<!ENTITY e3 SYSTEM "ghi" NDATA n3>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/066.xml", at line 13, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `n3'
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ENTITY e1 SYSTEM "abc" NDATA n1>
-<!ENTITY % declare_e2 '<!ENTITY e2 SYSTEM "def" NDATA n2>'>
-%declare_e2;
-<!ENTITY % declare_e3 SYSTEM "066.ent">
-%declare_e3;
-<!ELEMENT el EMPTY>
-<!ATTLIST el att ENTITY #IMPLIED
->
-]>
-
-<any>
- <any><el att="e1"/></any>
- <any><el att="e2"/></any>
- <any><el att="e3"/></any>
-</any>
-
+++ /dev/null
-<!ENTITY e3 SYSTEM "ghi" NDATA n3>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/067.xml", at line 15, position 1:
-ERROR (Validity constraint): Reference to undeclared notation `n3'
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ENTITY e1 SYSTEM "abc" NDATA n1>
-<!ENTITY % declare_e2 '<!ENTITY e2 SYSTEM "def" NDATA n2>'>
-%declare_e2;
-<!ENTITY % declare_e3 SYSTEM "067.ent">
-%declare_e3;
-<!ELEMENT el EMPTY>
-<!ATTLIST el att1 ENTITY "e1"
- att2 ENTITY "e2"
- att3 ENTITY "e3"
->
-]>
-
-<any>
- <any><el att1="e1" att2="e1" att3="e1"/></any>
- <any><el att2="e1" att3="e1"/></any>
- <any><el att3="e1"/></any>
- <any><el/></any>
-</any>
-
+++ /dev/null
-<!ATTLIST el v3 NMTOKEN #IMPLIED>
+++ /dev/null
-WARNING: More than one ATTLIST declaration for element type `el'
-WARNING: More than one ATTLIST declaration for element type `el'
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/068.xml", at line 19, position 23:
-ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el v1 NMTOKEN #IMPLIED>
-<!ENTITY % declare_v2 '<!ATTLIST el v2 NMTOKEN #IMPLIED>'>
-%declare_v2;
-<!ENTITY % declare_v3 SYSTEM "068.ent">
-%declare_v3;
-]>
-
-<any>
- <any><el v1="abc"/></any>
- <any><el v2="abc"/></any>
- <any><el v3="abc"/></any>
- <any><el v1=" abc "/></any>
- <any><el v2=" abc "/></any>
- <any><el v3=" abc "/></any>
-</any>
-
+++ /dev/null
-<!ATTLIST el v3 NMTOKENS #IMPLIED>
+++ /dev/null
-WARNING: More than one ATTLIST declaration for element type `el'
-WARNING: More than one ATTLIST declaration for element type `el'
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/069.xml", at line 19, position 27:
-ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT el EMPTY>
-<!ATTLIST el v1 NMTOKENS #IMPLIED>
-<!ENTITY % declare_v2 '<!ATTLIST el v2 NMTOKENS #IMPLIED>'>
-%declare_v2;
-<!ENTITY % declare_v3 SYSTEM "069.ent">
-%declare_v3;
-]>
-
-<any>
- <any><el v1="abc def"/></any>
- <any><el v2="abc def"/></any>
- <any><el v3="abc def"/></any>
- <any><el v1=" abc def "/></any>
- <any><el v2=" abc def "/></any>
- <any><el v3=" abc def "/></any>
-</any>
-
+++ /dev/null
-<!ELEMENT outer3 (inner)>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/070.xml", at line 19, position 32:
-ERROR (Validity constraint): Element `outer3' violates standalone declaration because extra white space separates the sub elements
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-
-<!DOCTYPE any [
-<!ELEMENT any ANY>
-<!ELEMENT inner EMPTY>
-<!ELEMENT outer1 (inner)>
-<!ENTITY % declare_outer2 '<!ELEMENT outer2 (inner)>'>
-%declare_outer2;
-<!ENTITY % declare_outer3 SYSTEM "070.ent">
-%declare_outer3;
-]>
-
-<any>
- <any><outer1><inner/></outer1></any>
- <any><outer2><inner/></outer2></any>
- <any><outer3><inner/></outer3></any>
- <any><outer1><inner/> </outer1></any>
- <any><outer2><inner/> </outer2></any>
- <any><outer3><inner/> </outer3></any>
-</any>
-
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/080.xml", at line 4, position 0:
-ERROR (Validity constraint): The content model of element `b' is not deterministic
+++ /dev/null
-<!DOCTYPE a [
-<!ELEMENT a ANY>
-<!ELEMENT b ((a,b)|a+)>]>
-<a/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/081.xml", at line 4, position 0:
-ERROR (Validity constraint): The content model of element `b' is not deterministic
+++ /dev/null
-<!DOCTYPE a [
-<!ELEMENT a ANY>
-<!ELEMENT b ((b|a+),a)>]>
-<a/>
+++ /dev/null
-----------------------------------------
-Root element
-----------------------------------------
-
-001.xml Declared root element type matches actual root element type
-
-----------------------------------------
-Attributes
-----------------------------------------
-
-010.xml ID attributes must match the Name production (not nmtoken)
-011.xml ID attributes uniquely identify the element bearing them
- *** TODO ***
-012.xml No element type must have several ID attributes declared
-013.xml No ID attribute must have a default
-014.xml No ID attribute must have a default (FIXED)
-015.xml Attributes of type IDREF must match the Name production
-016.xml Attributes of type IDREFS must match the Names production
-017.xml Attributes of type IDREF must match the value of an ID
- attribute
- *** TODO ***
-018.xml Attributes of type IDREFS must match the values of ID
- attributes
- *** TODO ***
-019.xml Attributes of type ENTITY must match the Name production
-020.xml Attributes of type ENTITIES must match the Names production
-021.xml Attributes of type ENTITY must match an unparsed entity
-022.xml Attributes of type ENTITIES must match unparsed entities
-023.xml Attributes of type NMTOKEN must match the nmtoken production
-024.xml Attributes of type NMTOKENS must match the nmtokens production
-025.xml like 015.xml, but the default value is tested
-026.xml like 016.xml, but the default value is tested
-027.xml like 019.xml, but the default value is tested
-028.xml like 020.xml, but the default value is tested
-029.xml like 023.xml, but the default value is tested
-030.xml like 024.xml, but the default value is tested
-031.xml all notation names in the declaration must have been declared
-032.xml Values of NOTATION type must match one declared value
-033.xml Values of NOTATION type must match one declared value
-034.xml Only one NOTATION attribute per element
-035.xml Values of enum type must match one of the declared values
-036.xml Values of enum type must match one of the declared values
-037.xml missing #REQUIRED attribute
-038.xml #FIXED attributes must match the declared default
-
-----------------------------------------
-Standalone declaration
-----------------------------------------
-
-060.xml Externally declared default values are rejected
-061.xml variant of 060.xml (internal entity within external entity)
-062.xml variant of 060.xml (external subset of DTD)
-063.xml Externally declared parsed general entities are rejected
- (entity ref occurs in main text)
-064.xml Externally declared parsed general entities are rejected
- (entity ref occurs in attribute value)
-065.xml Externally declared parsed general entities are rejected
- (entity ref occurs in attribute default)
- *** THINK ABOUT THIS CASE AGAIN ***
-066.xml Externally declared unparsed entities are rejected
- (entity ref occurs in attribute value)
-067.xml Externally declared unparsed entities are rejected
- (entity ref occurs in attribute default)
-068.xml Externally declared NMTOKEN attributes require normal form
-069.xml Externally declared NMTOKENS attributes require normal form
-070.xml Externally declared elements with regexp content model
- do not like extra white space
-
-----------------------------------------
-Deterministics models
-----------------------------------------
-
-080.xml One example
-081.xml Another example
-
+++ /dev/null
-<!ELEMENT doc EMPTY>
-<!ENTITY % e "<!--">
-%e; -->
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/001.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "001.ent", at line 3, position 3:
-ERROR (Well-formedness constraint): `-->' expected
+++ /dev/null
-<!DOCTYPE doc SYSTEM "001.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e "(#PCDATA">
-<!ELEMENT doc %e;)>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/002.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "002.ent", at line 2, position 18:
-ERROR (Validity constraint): Entities not properly nested with parentheses
+++ /dev/null
-<!DOCTYPE doc SYSTEM "002.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e "<!ELEMENT ">
-%e; doc (#PCDATA)>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/003.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "003.ent", at line 2, position 17:
-ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
+++ /dev/null
-<!DOCTYPE doc SYSTEM "003.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e1 "<!ELEMENT ">
-<!ENTITY % e2 ">">
-%e1; doc (#PCDATA) %e2;
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/004.xml", at line 1, position 30:
-In entity e2, at line 1, position 1:
-Called from entity [dtd] = SYSTEM "004.ent", line 3, position 19:
-ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
+++ /dev/null
-<!DOCTYPE doc SYSTEM "004.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e ">">
-<!ELEMENT doc (#PCDATA) %e;
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/005.xml", at line 1, position 30:
-In entity e, at line 1, position 1:
-Called from entity [dtd] = SYSTEM "005.ent", line 2, position 24:
-ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
+++ /dev/null
-<!DOCTYPE doc SYSTEM "005.ent">
-<doc></doc>
+++ /dev/null
-<!ENTITY % e "(#PCDATA)>">
-<!ELEMENT doc %e;
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/006.xml", at line 1, position 30:
-In entity e, at line 1, position 10:
-Called from entity [dtd] = SYSTEM "006.ent", line 2, position 14:
-ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
+++ /dev/null
-<!DOCTYPE doc SYSTEM "006.ent">
-<doc></doc>
+++ /dev/null
-&e;
\ No newline at end of file
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/001.xml", at line 3, position 1:
-ERROR (Validity constraint): The root element is not declared
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e SYSTEM "001.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-data
-
+++ /dev/null
-In entity e = SYSTEM "002.ent", at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/002.xml", line 5, position 5:
-ERROR (Well-formedness constraint): Bad XML declaration
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "002.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<?xml version="1.0"?><?xml version="1.0"?>
-data
+++ /dev/null
-In entity e = SYSTEM "003.ent", at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/003.xml", line 5, position 5:
-ERROR (Well-formedness constraint): Bad XML declaration
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e SYSTEM "003.ent">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<![ INCLUDE [
-<!ELEMENT doc (#PCDATA)>
-]>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/001.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "001.ent", at line 3, position 0:
-ERROR (Well-formedness constraint): `>]>' expected
+++ /dev/null
-<!DOCTYPE doc SYSTEM "001.ent">
-<doc></doc>
+++ /dev/null
-In entity e, at line 1, position 1:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/002.xml", line 4, position 0:
-ERROR (Well-formedness constraint): `]' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "<?xml version='1.0' encoding='UTF-8'?>">
-%e;
-]>
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![ IGNORE [
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/003.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "003.ent", at line 2, position 11:
-ERROR (Well-formedness constraint): Bad conditional section
+++ /dev/null
-<!DOCTYPE doc SYSTEM "003.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-<![ INCLUDE [
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/004.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "004.ent", at line 3, position 0:
-ERROR (Well-formedness constraint): `>]>' expected
+++ /dev/null
-<!DOCTYPE doc SYSTEM "004.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
-%e;
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/005.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "005.ent", at line 2, position 0:
-ERROR (Well-formedness constraint): Reference to undeclared parameter entity `e'
+++ /dev/null
-<!DOCTYPE doc SYSTEM "005.ent">
-<doc></doc>
+++ /dev/null
-<![INCLUDE
-<!ELEMENT doc (#PCDATA)>
-]]>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/006.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "006.ent", at line 2, position 0:
-ERROR (Well-formedness constraint): Bad conditional section
+++ /dev/null
-<!DOCTYPE doc SYSTEM "006.ent">
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/007.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "007.ent", at line 1, position 0:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<!DOCTYPE doc SYSTEM "007.ent">
-<doc></doc>
+++ /dev/null
-<!ELEMENT doc ANY>
-<!ENTITY e "100%">
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/008.xml", at line 1, position 30:
-In entity [dtd] = SYSTEM "008.ent", at line 2, position 17:
-ERROR (Well-formedness constraint): The character '%' must be written as '%'
+++ /dev/null
-<!DOCTYPE doc SYSTEM "008.ent">
-<doc></doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<゚></゚>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<X๜></X๜>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/001.xml", at line 3, position 0:
-ERROR (Well-formedness constraint): Illegal inside tags
+++ /dev/null
-<doc>
-<doc
-?
-<a</a>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/002.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
+++ /dev/null
-<doc>
-<.doc></.doc>
-</doc>
-
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/003.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Illegal token or character
+++ /dev/null
-<doc><? ?></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/004.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Illegal processing instruction
+++ /dev/null
-<doc><?target some data></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/005.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Illegal processing instruction
+++ /dev/null
-<doc><?target some data?</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/006.xml", at line 1, position 20:
-ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
+++ /dev/null
-<doc><!-- a comment -- another --></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/007.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<doc>& no refc</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/008.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<doc>&.entity;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/009.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<doc>&#RE;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/010.xml", at line 1, position 7:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<doc>A & B</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/011.xml", at line 1, position 7:
-ERROR (Well-formedness constraint): Bad attribute list
+++ /dev/null
-<doc a1></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/012.xml", at line 1, position 8:
-ERROR (Well-formedness constraint): Bad attribute list
+++ /dev/null
-<doc a1=v1></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/013.xml", at line 1, position 8:
-ERROR (Well-formedness constraint): Cannot find the second quotation mark
+++ /dev/null
-<doc a1="v1'></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/014.xml", at line 1, position 8:
-ERROR (Well-formedness constraint): Attribute value contains character '<' literally
+++ /dev/null
-<doc a1="<foo>"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/015.xml", at line 1, position 8:
-ERROR (Well-formedness constraint): Bad attribute list
+++ /dev/null
-<doc a1=></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/016.xml", at line 1, position 13:
-ERROR (Well-formedness constraint): `>' or `/>' expected
+++ /dev/null
-<doc a1="v1" "v2"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/017.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<doc><![CDATA[</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/018.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<doc><![CDATA [ stuff]]></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/019.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/020.xml", at line 1, position 8:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<doc a1="A & B"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/021.xml", at line 1, position 8:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<doc a1="a&b"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/022.xml", at line 1, position 8:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<doc a1="{:"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/023.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Illegal inside tags
+++ /dev/null
-<doc 12="34"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/024.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
+++ /dev/null
-<doc>
-<123></123>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/025.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
+++ /dev/null
-<doc>]]></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/026.xml", at line 1, position 6:
-ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
+++ /dev/null
-<doc>]]]></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/027.xml", at line 4, position 0:
-ERROR (Well-formedness constraint): `-->' expected
+++ /dev/null
-<doc>
-<!-- abc
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/028.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Illegal processing instruction
+++ /dev/null
-<doc>
-<?a pi that is not closed
-</doc>
-
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/029.xml", at line 1, position 9:
-ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
+++ /dev/null
-<doc>abc]]]>def</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/030.xml", at line 1, position 18:
-ERROR: Bad character stream
+++ /dev/null
-<doc>A form feed (\f) is not legal in data</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/031.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Illegal processing instruction
+++ /dev/null
-<doc><?pi a form feed (\f) is not allowed in a pi?></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/032.xml", at line 1, position 23:
-ERROR: Bad character stream
+++ /dev/null
-<doc><!-- a form feed (\f) is not allowed in a comment --></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/033.xml", at line 1, position 8:
-ERROR: Bad character stream
+++ /dev/null
-<doc>abc\edef</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/034.xml", at line 1, position 4:
-ERROR: Bad character stream
+++ /dev/null
-<doc\f>A form-feed is not white space or a name character</doc\f>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/035.xml", at line 1, position 7:
-ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
+++ /dev/null
-<doc>1 < 2 but not in XML</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/036.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Data not allowed here
+++ /dev/null
-<doc></doc>
-Illegal data
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/037.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Character reference not allowed here
+++ /dev/null
-<doc></doc>
- 
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/038.xml", at line 1, position 29:
-ERROR (Well-formedness constraint): Attribute `x' occurs twice in element `doc'
+++ /dev/null
-<doc x="foo" y="bar" x="baz"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/039.xml", at line 1, position 12:
-ERROR (Well-formedness constraint): End-tag does not match start-tag
+++ /dev/null
-<doc><a></aa></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/040.xml", at line 2, position 5:
-ERROR (Well-formedness constraint): Document must consist of only one toplevel element
+++ /dev/null
-<doc></doc>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/041.xml", at line 2, position 5:
-ERROR (Well-formedness constraint): Document must consist of only one toplevel element
+++ /dev/null
-<doc/>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/042.xml", at line 1, position 11:
-SYNTAX ERROR
+++ /dev/null
-<doc/></doc/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/043.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Data not allowed here
+++ /dev/null
-<doc/>
-Illegal data
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/044.xml", at line 1, position 12:
-ERROR (Well-formedness constraint): Document must consist of only one toplevel element
+++ /dev/null
-<doc/><doc/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/045.xml", at line 2, position 2:
-ERROR (Well-formedness constraint): Illegal inside tags
+++ /dev/null
-<doc>
-<a/
-</doc>
-
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/046.xml", at line 2, position 2:
-ERROR (Well-formedness constraint): Illegal inside tags
+++ /dev/null
-<doc>
-<a/</a>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/047.xml", at line 2, position 3:
-ERROR (Well-formedness constraint): Illegal inside tags
+++ /dev/null
-<doc>
-<a / >
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/048.xml", at line 3, position 0:
-ERROR (Well-formedness constraint): CDATA section not allowed here
+++ /dev/null
-<doc>
-</doc>
-<![CDATA[]]>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/049.xml", at line 3, position 15:
-ERROR (Well-formedness constraint): End-tag does not match start-tag
+++ /dev/null
-<doc>
-<a><![CDATA[xyz]]]></a>
-<![CDATA[]]></a>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/050.xml", at line 1, position 0:
-SYNTAX ERROR
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/051.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<!-- a comment -->
-<![CDATA[]]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/052.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Content not allowed here
+++ /dev/null
-<!-- a comment -->
- 
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/053.xml", at line 1, position 10:
-ERROR (Well-formedness constraint): End-tag does not match start-tag
+++ /dev/null
-<doc></DOC>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/054.xml", at line 2, position 36:
-ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY foo PUBLIC "some public id">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/055.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Illegal token or character
+++ /dev/null
-<!DOCTYPE doc [
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/056.xml", at line 1, position 14:
-ERROR (Well-formedness constraint): Content not allowed here
+++ /dev/null
-<!DOCTYPE doc -- a comment -- []>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/057.xml", at line 2, position 22:
-ERROR (Well-formedness constraint): `>' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "whatever" -- a comment -->
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/058.xml", at line 3, position 21:
-ERROR (Well-formedness constraint): `|' and more names expected, or `)'
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 (foo,bar) #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/059.xml", at line 3, position 25:
-ERROR (Well-formedness constraint): #REQUIRED, #IMPLIED, #FIXED or a string literal expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 NMTOKEN v1>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/060.xml", at line 3, position 21:
-ERROR (Well-formedness constraint): One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 NAME #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/061.xml", at line 2, position 28:
-ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e PUBLIC "whatever""e.ent">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/062.xml", at line 2, position 12:
-ERROR (Well-formedness constraint): Whitespace is missing
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY foo"some text">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/063.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
+++ /dev/null
-<!DOCTYPE doc [
-<![INCLUDE[ ]]>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/064.xml", at line 3, position 20:
-ERROR (Well-formedness constraint): Whitespace is missing
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST e a1 CDATA"foo">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/065.xml", at line 3, position 16:
-ERROR (Well-formedness constraint): Whitespace is missing
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1(foo|bar) #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/066.xml", at line 3, position 26:
-ERROR (Well-formedness constraint): Whitespace is missing
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 (foo|bar)#IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/067.xml", at line 3, position 22:
-ERROR (Well-formedness constraint): Whitespace is missing
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 (foo)"foo">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/068.xml", at line 3, position 25:
-ERROR (Well-formedness constraint): Error in NOTATION type (perhaps missing whitespace after NOTATION?)
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a1 NOTATION(foo) #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/069.xml", at line 4, position 38:
-ERROR (Well-formedness constraint): Whitespace missing before `NDATA'
+++ /dev/null
-<!DOCTYPE doc [
-<!NOTATION eps SYSTEM "eps.exe">
-<!-- missing space before NDATA -->
-<!ENTITY foo SYSTEM "foo.eps"NDATA eps>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/070.xml", at line 1, position 40:
-ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
+++ /dev/null
-<!-- a comment ending with three dashes --->
-<doc></doc>
+++ /dev/null
-In entity e3, at line 1, position 0:
-Called from entity e2, line 1, position 0:
-Called from entity e1, line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/071.xml", line 6, position 5:
-ERROR (Validity constraint): Recursive reference to entity `e1'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e1 "&e2;">
-<!ENTITY e2 "&e3;">
-<!ENTITY e3 "&e1;">
-]>
-<doc>&e1;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/072.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
+++ /dev/null
-<doc>&foo;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/073.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): Reference to undeclared general entity `f'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "whatever">
-]>
-<doc>&f;</doc>
+++ /dev/null
-In entity e, at line 1, position 5:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/074.xml", line 5, position 5:
-ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "</foo><foo>">
-]>
-<doc>
-<foo>&e;</foo>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/075.xml", at line 6, position 7:
-ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e1 "&e2;">
-<!ENTITY e2 "&e3;">
-<!ENTITY e3 "&e1;">
-]>
-<doc a="&e1;"></doc>
-
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/076.xml", at line 1, position 7:
-ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
+++ /dev/null
-<doc a="&foo;"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/077.xml", at line 4, position 7:
-ERROR (Well-formedness constraint): Reference to undeclared general entity `bar'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY foo "&bar;">
-]>
-<doc a="&foo;"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/078.xml", at line 3, position 22:
-ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA "&foo;">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/079.xml", at line 6, position 22:
-ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e1 "&e2;">
-<!ENTITY e2 "&e3;">
-<!ENTITY e3 "&e1;">
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA "&e1;">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/080.xml", at line 6, position 29:
-ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e1 "&e2;">
-<!ENTITY e2 "&e3;">
-<!ENTITY e3 "&e1;">
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #FIXED "&e1;">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/081.xml", at line 4, position 7:
-Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e SYSTEM "nul">
-]>
-<doc a="&e;"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/082.xml", at line 4, position 22:
-Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e SYSTEM "nul">
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA "&e;">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/083.xml", at line 4, position 5:
-ERROR (Validity constraint): Invalid reference to NDATA entity e
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e SYSTEM "nul" NDATA n>
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/084.xml", at line 4, position 22:
-ERROR (Validity constraint): Invalid reference to NDATA entity e
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e SYSTEM "nul" NDATA n>
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA "&e;">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/085.xml", at line 1, position 25:
-ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
+++ /dev/null
-<!DOCTYPE doc PUBLIC "[" "null.ent">
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/086.xml", at line 2, position 24:
-ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY foo PUBLIC "[" "null.xml">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/087.xml", at line 2, position 36:
-ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
+++ /dev/null
-<!DOCTYPE doc [
-<!NOTATION foo PUBLIC "[" "null.ent">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/088.xml", at line 6, position 7:
-ERROR (Well-formedness constraint): Cannot find the second quotation mark
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-<!ENTITY e '"'>
-]>
-<doc a="&e;></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/089.xml", at line 2, position 32:
-ERROR (Well-formedness constraint): `>' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % foo SYSTEM "foo.xml" NDATA bar>
-]>
-<doc></doc>
+++ /dev/null
-In entity e, at line 1, position 7:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/090.xml", line 4, position 5:
-ERROR (Well-formedness constraint): Attribute value contains character '<' literally
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<foo a='<'></foo>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/091.xml", at line 3, position 32:
-ERROR (Well-formedness constraint): `>' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!NOTATION n SYSTEM "n">
-<!ENTITY % foo SYSTEM "foo.xml" NDATA n>
-]>
-<doc></doc>
+++ /dev/null
-In entity e, at line 1, position 7:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/092.xml", line 4, position 5:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<foo a='&'></foo>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/093.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<doc>X</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/094.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Bad XML declaration
+++ /dev/null
-<?xml VERSION="1.0"?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/095.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Bad XML declaration
+++ /dev/null
-<?xml encoding="UTF-8" version="1.0"?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/096.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Cannot find the second quotation mark
+++ /dev/null
-<?xml version="1.0"encoding="UTF-8" ?>
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/097.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Cannot find the second quotation mark
+++ /dev/null
-<?xml version="1.0' encoding="UTF-8" ?>
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/098.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Bad XML declaration
+++ /dev/null
-<?xml version="1.0" version="1.0"?>
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/099.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Bad XML declaration
+++ /dev/null
-<?xml version="1.0" valid="no" ?>
-<doc></doc>
\ No newline at end of file
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/100.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Illegal 'standalone' declaration
+++ /dev/null
-<?xml version="1.0" standalone="YES" ?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/101.xml", at line 1, position 0:
-Other exception: Failure("Netconversion.encoding_of_string: unknown encoding")
+++ /dev/null
-<?xml version="1.0" encoding=" UTF-8"?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/102.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Bad XML version string
+++ /dev/null
-<?xml version="1.0 " ?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/103.xml", at line 4, position 13:
-ERROR (Well-formedness constraint): End-tag does not match start-tag
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<foo>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/104.xml", at line 4, position 13:
-ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<foo>">
-]>
-<doc>&e;</foo></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/105.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<?pi stuff?>
-<![CDATA[]]>
-<doc>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/106.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Content not allowed here
+++ /dev/null
-<?pi data?>
- <doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/107.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
+++ /dev/null
-<!DOCTYPE doc [
-<![CDATA[]]>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/108.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<doc>
-<![CDATA [ ]]>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/109.xml", at line 4, position 0:
-ERROR (Well-formedness constraint): Content not allowed here
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<doc></doc>">
-]>
-&e;
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/110.xml", at line 5, position 3:
-ERROR (Well-formedness constraint): Entity reference not allowed here
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "">
-]>
-<doc></doc>
-&e;
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/111.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): Illegal inside tags
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "foo='bar'">
-]>
-<doc &e;></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/112.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<doc>
-<![cdata[data]]>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/113.xml", at line 2, position 18:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % foo "&">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/114.xml", at line 2, position 16:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY foo "&">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/115.xml", at line 4, position 7:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "&">
-]>
-<doc a="&e;"></doc>
+++ /dev/null
-In entity e, at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/116.xml", line 4, position 5:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "&#9">
-]>
-<doc>&e;7;</doc>
+++ /dev/null
-In entity e, at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/117.xml", line 4, position 5:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "&">
-]>
-<doc>&e;#97;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/118.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "#">
-]>
-<doc>&&e;97;</doc>
+++ /dev/null
-In entity e, at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/119.xml", line 5, position 0:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "&">
-]>
-<doc>
-&e;#38;
-</doc>
+++ /dev/null
-In entity e, at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/120.xml", line 5, position 0:
-ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "&">
-]>
-<doc>
-&e;
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/121.xml", at line 2, position 9:
-ERROR (Well-formedness constraint): Illegal token or character
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY #DEFAULT "default">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/122.xml", at line 2, position 27:
-ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a, (b) | c)?>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/123.xml", at line 2, position 22:
-ERROR (Well-formedness constraint): `>' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc ((doc?)))>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/124.xml", at line 2, position 19:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (doc|#PCDATA)*>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/125.xml", at line 2, position 16:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc ((#PCDATA))>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/126.xml", at line 2, position 22:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)+>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/127.xml", at line 2, position 22:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)?>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/128.xml", at line 2, position 14:
-ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc CDATA>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/129.xml", at line 2, position 14:
-ERROR (Well-formedness constraint): Content model expression expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc - - (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/130.xml", at line 2, position 21:
-ERROR (Well-formedness constraint): `>' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (doc?) +(foo)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/131.xml", at line 2, position 21:
-ERROR (Well-formedness constraint): `>' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (doc?) -(foo)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/132.xml", at line 2, position 41:
-ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a, (b, c), (d, (e, f) | g))?>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/133.xml", at line 2, position 17:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a *)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/134.xml", at line 2, position 18:
-ERROR (Well-formedness constraint): `>' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a) *>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/135.xml", at line 2, position 17:
-ERROR (Well-formedness constraint): References to general entities not allowed in DTDs
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (a & b)?>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/136.xml", at line 2, position 14:
-ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc O O (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/137.xml", at line 2, position 13:
-ERROR (Well-formedness constraint): Whitespace is missing
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc(#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/138.xml", at line 2, position 19:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (doc*?)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/139.xml", at line 2, position 15:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc ()>
-]>
-<doc></doc>
+++ /dev/null
-In entity e, at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/140.xml", line 4, position 5:
-ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<゚></゚>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity e, at line 1, position 2:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/141.xml", line 4, position 5:
-ERROR (Well-formedness constraint): Illegal inside tags
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<X๜></X๜>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/142.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): Code point 0 outside the accepted range of code points
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>�</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/143.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): Code point 31 outside the accepted range of code points
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/144.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): Code point 65535 outside the accepted range of code points
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/145.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): Code point 55296 outside the accepted range of code points
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>�</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/146.xml", at line 4, position 5:
-ERROR (Well-formedness constraint): Code point 1114112 outside the accepted range of code points
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>�</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/147.xml", at line 2, position 0:
-SYNTAX ERROR
+++ /dev/null
-
-<?xml version="1.0"?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/148.xml", at line 2, position 0:
-SYNTAX ERROR
+++ /dev/null
-<!-- -->
-<?xml version="1.0"?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/149.xml", at line 3, position 0:
-ERROR (Well-formedness constraint): `]' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<?xml version="1.0"?>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/150.xml", at line 2, position 0:
-SYNTAX ERROR
+++ /dev/null
-<doc>
-<?xml version="1.0"?>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/151.xml", at line 3, position 0:
-SYNTAX ERROR
+++ /dev/null
-<doc>
-</doc>
-<?xml version="1.0"?>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/152.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Bad XML declaration
+++ /dev/null
-<?xml encoding="UTF-8"?>
-<doc></doc>
+++ /dev/null
-In entity e, at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/153.xml", line 5, position 5:
-SYNTAX ERROR
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e "<?xml encoding='UTF-8'?>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/154.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Reserved processing instruction
+++ /dev/null
-<?XML version="1.0"?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/155.xml", at line 1, position 0:
-ERROR (Well-formedness constraint): Reserved processing instruction
+++ /dev/null
-<?xmL version="1.0"?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/156.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Reserved processing instruction
+++ /dev/null
-<doc>
-<?xMl version="1.0"?>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/157.xml", at line 2, position 0:
-ERROR (Well-formedness constraint): Reserved processing instruction
+++ /dev/null
-<doc>
-<?xmL?>
-</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/158.xml", at line 4, position 10:
-ERROR (Well-formedness constraint): Illegal token or character
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!NOTATION gif PUBLIC "image/gif" "">
-<!ATTLIST #NOTATION gif a1 CDATA #IMPLIED>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/159.xml", at line 3, position 38:
-ERROR (Well-formedness constraint): The character '&' must be written as '&'
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY e "<![CDATA[Tim & Michael]]>">
-]>
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/160.xml", at line 4, position 18:
-ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "">
-<!ENTITY foo "%e;">
-]>
-<doc></doc>
+++ /dev/null
-In entity e, at line 1, position 9:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/161.xml", line 3, position 15:
-ERROR (Well-formedness constraint): Bad content model expression
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY % e "#PCDATA">
-<!ELEMENT doc (%e;)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/162.xml", at line 4, position 20:
-ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e1 "">
-<!ENTITY % e2 "%e1;">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/163.xml", at line 5, position 0:
-ERROR (Well-formedness constraint): Content not allowed here
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "">
-]>
-%e;
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/164.xml", at line 4, position 2:
-ERROR (Well-formedness constraint): References to parameter entities not allowed here
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "">
-] %e; >
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/165.xml", at line 2, position 8:
-ERROR (Well-formedness constraint): Whitespace is missing after ENTITY
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY% e "">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/166.xml", at line 1, position 0:
-ERROR: Bad character stream
+++ /dev/null
-<doc>ï¿¿</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/167.xml", at line 1, position 0:
-ERROR: Bad character stream
+++ /dev/null
-<doc>￾</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/168.xml", at line 1, position 0:
-ERROR: Bad character stream
+++ /dev/null
-<doc>í €</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/169.xml", at line 1, position 0:
-ERROR: Bad character stream
+++ /dev/null
-<doc>í°€</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/170.xml", at line 1, position 0:
-ERROR: Bad character stream
+++ /dev/null
-<doc>÷€€€</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/171.xml", at line 1, position 5:
-ERROR: Bad character stream
+++ /dev/null
-<!-- ï¿¿ -->
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/172.xml", at line 1, position 0:
-ERROR: Bad character stream
+++ /dev/null
-<?pi ï¿¿?>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/173.xml", at line 1, position 7:
-ERROR: Bad character stream
+++ /dev/null
-<doc a="ï¿¿"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/174.xml", at line 1, position 5:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<doc><![CDATA[ï¿¿]]></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/175.xml", at line 3, position 18:
-ERROR: Bad character stream
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ENTITY % e "ï¿¿">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/176.xml", at line 5, position 0:
-ERROR (Well-formedness constraint): Missing end tag
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/177.xml", at line 4, position 6:
-ERROR: Bad character stream
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>Aï¿¿</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/178.xml", at line 5, position 7:
-ERROR (Well-formedness constraint): Cannot find the second quotation mark
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA #IMPLIED>
-]>
-<doc a=""></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/179.xml", at line 2, position 11:
-ERROR (Well-formedness constraint): Cannot find the second quotation mark
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "">
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/180.xml", at line 3, position 22:
-ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA "&e;">
-<!ENTITY e "v">
-]>
-<doc></doc>
+++ /dev/null
-In entity e, at line 1, position 0:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/181.xml", line 5, position 5:
-ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<![CDATA[">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>&e;]]></doc>
+++ /dev/null
-In entity e, at line 1, position 4:
-Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/182.xml", line 5, position 5:
-ERROR (Well-formedness constraint): `-->' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e "<!--">
-<!ELEMENT doc (#PCDATA)>
-]>
-<doc>&e;--></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/183.xml", at line 2, position 28:
-ERROR (Well-formedness constraint): `)*' expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA | foo*)* >
-<!ELEMENT foo EMPTY>
-]>
-<doc></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/184.xml", at line 2, position 25:
-ERROR (Well-formedness constraint): Name expected
+++ /dev/null
-<!DOCTYPE doc [
-<!ELEMENT doc (#PCDATA | (foo))* >
-<!ELEMENT foo EMPTY>
-]>
-<doc></doc>
-
+++ /dev/null
-<!ELEMENT doc (#PCDATA)>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/185.xml", at line 3, position 5:
-ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
+++ /dev/null
-<?xml version="1.0" standalone="yes"?>
-<!DOCTYPE doc SYSTEM "185.ent">
-<doc>&e;</doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/186.xml", at line 5, position 15:
-ERROR (Well-formedness constraint): Whitespace is missing between attributes `b' and `d'
+++ /dev/null
-<!DOCTYPE a [
-<!ELEMENT a EMPTY>
-<!ATTLIST a b CDATA #IMPLIED d CDATA #IMPLIED>
-]>
-<a b="c"d="e"/>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/001.xml", at line 4, position 7:
-ERROR (Validity constraint): Found reference to external entity in attribute value
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e SYSTEM "null.ent">
-]>
-<doc a="&e;"></doc>
+++ /dev/null
-In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/002.xml", at line 4, position 22:
-ERROR (Validity constraint): Found reference to external entity in attribute value
+++ /dev/null
-<!DOCTYPE doc [
-<!ENTITY e SYSTEM "null.ent">
-<!ELEMENT doc (#PCDATA)>
-<!ATTLIST doc a CDATA "&e;">
-]>
-<doc></doc>
+++ /dev/null
-#! /bin/bash
-
-# $Id$
-
-
-t=./test_negative
-
-init_test () {
- # $1: Options for test_negative
- # $2: Path to test record
- options="$1"
- input="$2"
- output=`dirname $input`/`basename $input .xml`.out
- if [ -f "$output" ]; then
- echo "Test $input already initialized; skipping"
- else
- $t $options "$input" >"$output"
- echo Test $input initialized.
- fi
-}
-
-
-check_test () {
- # $1: Options for test_negative
- # $2: Path to test record
- options="$1"
- input="$2"
- output=`dirname $input`/`basename $input .xml`.out
- $t $options "$input" >current.out
- if [ -f "$output" ]; then
- if cmp "$output" current.out; then
- echo Test $input OK
- else
- echo Test $input FAILED!!!
- fi
- else
- echo Test $input still uninitialized
- echo - OUTPUT:
- cat current.out
- fi
-}
-
-
-for_directory () {
- what="$1"
- shift
- options="$1"
- shift
- while [ $# -gt 0 ]; do
- input="$1"
- shift
- if [ -f "$input" ]; then
- $what "$options" "$input"
- else
- if [ -d "$input" ]; then
- for ent in $input/*.xml; do
- for_directory $what "$options" $ent
- done
- else
- echo "Not found: $input" >&2
- fi
- fi
- done
-}
-
-
-usage () {
- cat <<EOF >&2
-usage: $0 [ -init -wf ] file ... dir ...
-EOF
- exit 1
-}
-
-
-action="check_test"
-options=""
-while true; do
- case "x$1" in
- x-init)
- action="init_test"
- shift
- ;;
- x-wf)
- options="$options -wf"
- shift
- ;;
- x-*)
- usage
- ;;
- *)
- break
- ;;
- esac
-done
-
-
-if [ $# -gt 0 ]; then
- for_directory $action "$options" "$@"
-else
- for_directory $action -wf \
- data_jclark_notwf/ext-sa data_jclark_notwf/not-sa data_jclark_notwf/sa \
- data_notwf/sa
- for_directory $action "" \
- data_jclark_invalid data_invalid
-fi
-
-# ======================================================================
-# $Log$
-# Revision 1.1 2000/11/17 09:57:33 lpadovan
-# Initial revision
-#
-# Revision 1.2 2000/05/01 16:23:39 gerd
-# Added data_invalid.
-#
-# Revision 1.1 2000/05/01 15:58:50 gerd
-# Initial revision.
-#
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-open Pxp_document;;
-open Pxp_yacc;;
-open Pxp_types;;
-
-let error_happened = ref false;;
-
-let rec print_error e =
- print_endline (string_of_exn e)
-;;
-
-class warner =
- object
- method warn w =
- print_endline ("WARNING: " ^ w)
- end
-;;
-
-let parse debug wf iso88591 filename =
- try
- let config =
- { default_config with
- warner = new warner;
- debugging_mode = debug;
- encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
- idref_pass = true;
- }
- in
- let parse_fn =
- if wf then parse_wfdocument_entity
- else
- let index = new hash_index in
- parse_document_entity
- ?transform_dtd:None
- ~id_index:(index :> 'ext index)
- in
- let tree =
- parse_fn
- config
- (from_file filename)
- default_spec
- in
- print_endline "Parsed without error";
- with
- e ->
- error_happened := true;
- print_error e
-;;
-
-
-let main() =
- let debug = ref false in
- let wf = ref false in
- let iso88591 = ref false in
- let files = ref [] in
- Arg.parse
- [ "-d", Arg.Set debug, "turn debugging mode on";
- "-wf", Arg.Set wf, "check only on well-formedness";
- "-iso-8859-1", Arg.Set iso88591, "use ISO-8859-1 as internal encoding instead of UTF-8";
- ]
- (fun x -> files := x :: !files)
- "
-usage: test_negative [options] file ...
-
-List of options:";
- files := List.rev !files;
- List.iter (parse !debug !wf !iso88591) !files;
-;;
-
-
-main();
-if !error_happened then exit(1);;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:33 lpadovan
- * Initial revision
- *
- * Revision 1.6 2000/07/14 14:57:12 gerd
- * Updated: warner
- *
- * Revision 1.5 2000/07/14 14:20:11 gerd
- * Updated because of PXP interface changes.
- *
- * Revision 1.4 2000/07/09 01:49:09 gerd
- * Updated because of PXP interface changes.
- *
- * Revision 1.3 2000/06/04 20:31:21 gerd
- * Updates because of renamed PXP modules.
- *
- * Revision 1.2 2000/05/28 17:23:22 gerd
- * Updated.
- *
- * Revision 1.1 2000/05/01 15:58:50 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-# make validate: make bytecode executable
-# make validate.opt: make native executable
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-#----------------------------------------------------------------------
-
-OCAMLPATH=../..
-
-test_reader: test_reader.ml
- ocamllex minilex.mll
- ocamlfind ocamlc -custom -o test_reader -package .,unix,threads \
- -linkpkg -thread -noautolink \
- -g minilex.ml test_reader.ml
-
-#----------------------------------------------------------------------
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa minilex.ml
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- rm -f test_reader
-
+++ /dev/null
-{ }
-rule nextchar = parse
- _
- { Some (Lexing.lexeme lexbuf).[0] }
- | eof
- { None }
-{ }
+++ /dev/null
-0123456789
\ No newline at end of file
+++ /dev/null
-open Pxp_reader;;
-open Pxp_types;;
-open Minilex;;
-
-let make_channel s =
- (* Returns a channel reading the bytes from the string s *)
- let rd, wr = Unix.pipe() in
- let ch_rd = Unix.in_channel_of_descr rd in
- let ch_wr = Unix.out_channel_of_descr wr in
- ignore
- (Thread.create
- (fun () ->
- output_string ch_wr s;
- close_out ch_wr;
- )
- ()
- );
- ch_rd
-;;
-
-(**********************************************************************)
-
-let t001 () =
- (* Reads from a string (without recoding it), checks the lexbuf size *)
- let s = "0123456789abc" in
- let r = new resolve_read_this_string s in
- r # init_rep_encoding `Enc_iso88591;
- r # init_warner (new drop_warnings);
- let lb = r # open_in Anonymous in
- let c = nextchar lb in
- assert (c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
- * now be at the end of the buffer indicating that the buffer is now
- * empty.
- *)
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- let c = nextchar lb in
- assert (c = Some '9');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- r # change_encoding "";
- let c = nextchar lb in
- assert (c = Some 'a');
- assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
- ignore(nextchar lb);
- let c = nextchar lb in
- assert (c = Some 'c');
- let c = nextchar lb in
- assert (c = None);
- r # close_in;
- true
-;;
-
-
-let t002 () =
- (* Like t001, but reads from a channel *)
- let ch = make_channel "0123456789abc" in
- let r = new resolve_read_this_channel ch in
- r # init_rep_encoding `Enc_iso88591;
- r # init_warner (new drop_warnings);
- let lb = r # open_in Anonymous in
- let c = nextchar lb in
- assert (c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
- * now be at the end of the buffer indicating that the buffer is now
- * empty.
- *)
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- ignore(nextchar lb);
- let c = nextchar lb in
- assert (c = Some '9');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- r # change_encoding "";
- let c = nextchar lb in
- assert (c = Some 'a');
- assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
- ignore(nextchar lb);
- let c = nextchar lb in
- assert (c = Some 'c');
- let c = nextchar lb in
- assert (c = None);
- r # close_in;
- true
-;;
-
-
-let t003 () =
- (* Tests non-automatic encoding conversion from ISO-8859-1 to UTF-8 *)
- let s = "0«»°áà âãäÃÀÂÃÄéèêëÃìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ" in
- let r = new resolve_read_this_string ~fixenc:`Enc_iso88591 s in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let lb = r # open_in Anonymous in
- let c = ref (nextchar lb) in
- assert (!c = Some '0');
- assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
- (* Note: because we initialize the resolver with ~fixenc, the resolver can
- * fill the buffer with more than one byte from the beginning.
- *)
- let u = ref "" in
- while !c <> None do
- ( match !c with
- Some x -> u := !u ^ String.make 1 x
- | None -> ()
- );
- c := nextchar lb
- done;
- r # close_in;
- !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
-;;
-
-
-let t004 () =
- (* Tests non-automatic encoding conversion from UTF-8 to ISO-8859-1 *)
- let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
- let r = new resolve_read_this_string ~fixenc:`Enc_utf8 s in
- r # init_rep_encoding `Enc_iso88591;
- r # init_warner (new drop_warnings);
- let lb = r # open_in Anonymous in
- let c = ref (nextchar lb) in
- assert (!c = Some '0');
- assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
- (* Note: because we initialize the resolver with ~fixenc, the resolver can
- * fill the buffer with more than one byte from the beginning.
- *)
- let u = ref "" in
- while !c <> None do
- ( match !c with
- Some x -> u := !u ^ String.make 1 x
- | None -> ()
- );
- c := nextchar lb
- done;
- r # close_in;
- !u = "0«»°áà âãäÃÀÂÃÄéèêëÃìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ"
-;;
-
-
-let t005 () =
- (* Tests automatic encoding conversion from UTF-8 to ISO-8859-1 *)
- let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
- let r = new resolve_read_this_string s in
- r # init_rep_encoding `Enc_iso88591;
- r # init_warner (new drop_warnings);
- let lb = r # open_in Anonymous in
- let c = ref (nextchar lb) in
- assert (!c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- let u = ref "" in
- while !c <> None do
- ( match !c with
- Some x -> u := !u ^ String.make 1 x
- | None -> ()
- );
- c := nextchar lb
- done;
- r # close_in;
- !u = "0«»°áà âãäÃÀÂÃÄéèêëÃìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ"
-;;
-
-
-let t006 () =
- (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
- * This variant invokes change_encoding early.
- *)
- let s = "\254\255\0000\000«\000»\000°\000á\000à \000â\000ã\000ä\000Ã\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000Ã\000ì\000î\000ï\000Ã\000ÃŒ\000ÃŽ\000Ã\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ã’\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ã\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
- let r = new resolve_read_this_string s in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let lb = r # open_in Anonymous in
- let c = ref (nextchar lb) in
- assert (!c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- r # change_encoding "";
- let u = ref "" in
- while !c <> None do
- ( match !c with
- Some x -> u := !u ^ String.make 1 x
- | None -> ()
- );
- c := nextchar lb
- done;
- r # close_in;
- !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
-;;
-
-
-let t007 () =
- (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
- * This variant does not invoke change_encoding
- *)
- let s = "\254\255\0000\000«\000»\000°\000á\000à \000â\000ã\000ä\000Ã\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000Ã\000ì\000î\000ï\000Ã\000ÃŒ\000ÃŽ\000Ã\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ã’\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ã\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
- let r = new resolve_read_this_string s in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let lb = r # open_in Anonymous in
- let c = ref (nextchar lb) in
- assert (!c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- let u = ref "" in
- while !c <> None do
- ( match !c with
- Some x -> u := !u ^ String.make 1 x
- | None -> ()
- );
- c := nextchar lb
- done;
- r # close_in;
- !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
-;;
-
-(**********************************************************************)
-
-let t100 () =
- (* Reads from a file without recoding it *)
- let r = new resolve_as_file () in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let cwd = Sys.getcwd() in
- let lb = r # open_in (System ("file://localhost" ^ cwd ^ "/t100.dat")) in
- let c = nextchar lb in
- assert (c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
- * now be at the end of the buffer indicating that the buffer is now
- * empty.
- *)
- for i = 1 to 8 do
- ignore(nextchar lb);
- done;
- let c = nextchar lb in
- assert (c = Some '9');
- r # close_in;
- true
-;;
-
-let t101 () =
- (* Reads from a file without recoding it *)
- let r = new resolve_as_file () in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let cwd = Sys.getcwd() in
- let lb = r # open_in (System ("//localhost" ^ cwd ^ "/t100.dat")) in
- let c = nextchar lb in
- assert (c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
- * now be at the end of the buffer indicating that the buffer is now
- * empty.
- *)
- for i = 1 to 8 do
- ignore(nextchar lb);
- done;
- let c = nextchar lb in
- assert (c = Some '9');
- r # close_in;
- true
-;;
-
-let t102 () =
- (* Reads from a file without recoding it *)
- let r = new resolve_as_file () in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let cwd = Sys.getcwd() in
- let lb = r # open_in (System (cwd ^ "/t100.dat")) in
- let c = nextchar lb in
- assert (c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
- * now be at the end of the buffer indicating that the buffer is now
- * empty.
- *)
- for i = 1 to 8 do
- ignore(nextchar lb);
- done;
- let c = nextchar lb in
- assert (c = Some '9');
- r # close_in;
- true
-;;
-
-let t103 () =
- (* Reads from a file without recoding it *)
- let r = new resolve_as_file () in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let lb = r # open_in (System "t100.dat") in
- let c = nextchar lb in
- assert (c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
- * now be at the end of the buffer indicating that the buffer is now
- * empty.
- *)
- for i = 1 to 8 do
- ignore(nextchar lb);
- done;
- let c = nextchar lb in
- assert (c = Some '9');
- r # close_in;
- true
-;;
-
-(**********************************************************************)
-
-let t110 () =
- (* Checks whether relative URLs are properly handled *)
- let r = new resolve_as_file () in
- r # init_rep_encoding `Enc_utf8;
- r # init_warner (new drop_warnings);
- let lb = r # open_in (System "t100.dat") in
- let c = nextchar lb in
- assert (c = Some '0');
- assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
- (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
- * now be at the end of the buffer indicating that the buffer is now
- * empty.
- *)
- for i = 1 to 8 do
- ignore(nextchar lb);
- done;
- let r' = r # clone in
- let lb' = r' # open_in (System "t100.dat") in
- let c = nextchar lb' in
- assert (c = Some '0');
- for i = 1 to 8 do
- ignore(nextchar lb');
- done;
- let c = nextchar lb' in
- assert (c = Some '9');
- r' # close_in;
- let c = nextchar lb in
- assert (c = Some '9');
- r # close_in;
- true
-;;
-
-(**********************************************************************)
-(* Tests whether the encoding handling of System IDs is okay *)
-
-let t200 () =
- (* Check the technique for the following tests:
- * [Checks also 'combine' to some extent.)
- *)
- let r1 = new resolve_read_this_string
- ~id:(System "b.xml")
- ~fixenc:`Enc_iso88591
- "ae" in
- let r2 = new resolve_read_this_string
- ~id:(System "a.xml")
- ~fixenc:`Enc_iso88591
- "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'b.xml'> ]> <a>&ae;</a>" in
- let r = new combine [ r1; r2 ] in
- (* It should now be possible to resolve &ae; *)
- let _ =
- Pxp_yacc.parse_document_entity
- { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
- (Pxp_yacc.ExtID(System "a.xml", r))
- Pxp_yacc.default_spec
- in
- true
-;;
-
-
-let t201 () =
- (* Check that System IDs are converted to UTF-8. rep_encoding = ISO-8859-1 *)
- let r1 = new resolve_read_this_string
- ~id:(System "\195\164.xml") (* This is an UTF-8 "ä"! *)
- ~fixenc:`Enc_iso88591
- "ae" in
- let r2 = new resolve_read_this_string
- ~id:(System "a.xml")
- ~fixenc:`Enc_iso88591
- "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
- let r = new combine [ r1; r2 ] in
- (* It should now be possible to resolve &ae; *)
- let _ =
- Pxp_yacc.parse_document_entity
- { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
- (Pxp_yacc.ExtID(System "a.xml", r))
- Pxp_yacc.default_spec
- in
- true
-;;
-
-
-let t202 () =
- (* Check that System IDs are converted to UTF-8. rep_encoding = UTF-8 *)
- let r1 = new resolve_read_this_string
- ~id:(System "\195\164.xml")
- ~fixenc:`Enc_iso88591
- "ae" in
- let r2 = new resolve_read_this_string
- ~id:(System "a.xml")
- ~fixenc:`Enc_iso88591
- "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
- let r = new combine [ r1; r2 ] in
- (* It should now be possible to resolve &ae; *)
- let _ =
- Pxp_yacc.parse_document_entity
- { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_utf8 }
- (Pxp_yacc.ExtID(System "a.xml", r))
- Pxp_yacc.default_spec
- in
- true
-;;
-
-(**********************************************************************)
-
-let test f n =
- try
- print_string ("Reader test " ^ n);
- flush stdout;
- if f() then
- print_endline " ok"
- else
- print_endline " FAILED!!!!";
- with
- error ->
- print_endline (" FAILED: " ^ string_of_exn error)
-;;
-
-test t001 "001";;
-test t002 "002";;
-test t003 "003";;
-test t004 "004";;
-test t005 "005";;
-test t006 "006";;
-test t007 "007";;
-
-test t100 "100";;
-test t101 "101";;
-test t102 "102";;
-test t103 "103";;
-
-test t110 "110";;
-
-test t200 "200";;
-test t201 "201";;
-test t202 "202";;
+++ /dev/null
-#! /bin/sh
-
-set -e
-
-(cd reader && ./test_reader)
-(cd canonxml && ./run_canonxml)
-(cd write && ./run_write)
-(cd codewriter && ./run_codewriter)
-(cd negative && ./run_negative)
+++ /dev/null
-# make validate: make bytecode executable
-# make validate.opt: make native executable
-# make clean: remove intermediate files (in this directory)
-# make CLEAN: remove intermediate files (recursively)
-# make distclean: remove any superflous files (recursively)
-#----------------------------------------------------------------------
-
-OCAMLPATH=../..
-
-test_write: test_write.ml
- ocamlfind ocamlc -g -custom -o test_write -package .,str -linkpkg test_write.ml
-
-#----------------------------------------------------------------------
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
- rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out1 out2 out3
-
-.PHONY: CLEAN
-CLEAN: clean
-
-.PHONY: distclean
-distclean: clean
- rm -f *~
- rm -f test_write
-
+++ /dev/null
-#! /bin/bash
-
-test_sample () {
- file="$1"
- echo -n "Testing $file... "
- ./test_write -in "$file" >out1
- ./test_write -in out1 >out2
- ./test_write -in out2 >out3
- if cmp out1 out3; then
- echo "OK"
- else
- echo "FAILED"
- fi
-}
-
-
-test_sample "sample001.xml"
+++ /dev/null
-<!DOCTYPE a [
-
-<!ELEMENT a (b | (c, d)* | (e, f)+ | g?)>
-<!ELEMENT b (#PCDATA | a)*>
-<!ELEMENT c EMPTY>
-<!ELEMENT d ANY>
-<!ELEMENT e EMPTY>
-<!ELEMENT f EMPTY>
-<!ELEMENT g EMPTY>
-
-<!ATTLIST a u CDATA #IMPLIED
- v NMTOKEN "huhu"
- w (q|p) #REQUIRED
- x NOTATION (n1|n2) "n1"
- y ENTITY #IMPLIED>
-
-<!NOTATION n1 SYSTEM "/bin/n1-processor">
-<!NOTATION n2 SYSTEM "/bin/n2-processor">
-
-<!ENTITY u1 SYSTEM "file-u1" NDATA n1>
-<!ENTITY u2 SYSTEM "file-u2" NDATA n2>
-
-<?pi1 args ...?>
-]>
-
-<a u="1" w="q" x="n2">
- <b>
- <?pi2 args ...?>
- This is text!
- <a w="p" y="u1">
- <c/>
- <d/>
- </a>
- </b>
-</a>
-
-<?pi3 args ...?>
+++ /dev/null
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-
-open Pxp_document;;
-open Pxp_yacc;;
-open Pxp_types;;
-
-let error_happened = ref false;;
-
-let rec prerr_error e =
- prerr_endline (string_of_exn e)
-;;
-
-class warner =
- object
- method warn w =
- prerr_endline ("WARNING: " ^ w)
- end
-;;
-
-let parse_and_write in_filename =
- let spec =
- let e = new element_impl default_extension in
- make_spec_from_mapping
- ~super_root_exemplar: e
- ~default_pinstr_exemplar: e
- ~data_exemplar: (new data_impl default_extension)
- ~default_element_exemplar: e
- ~element_mapping: (Hashtbl.create 1)
- ()
- in
- let config =
- { default_config with
- warner = new warner;
- enable_pinstr_nodes = true;
- enable_super_root_node = true;
- encoding = `Enc_utf8;
- }
- in
- try
- let tree =
- parse_document_entity
- config
- (from_file in_filename)
- spec
- in
-
- tree # write (Out_channel stdout) `Enc_utf8;
- with
- e ->
- error_happened := true;
- prerr_error e
-;;
-
-
-let main() =
- let in_file = ref "" in
- Arg.parse
- [ "-in", (Arg.String (fun s -> in_file := s)),
- " <file> Set the XML file to read";
- ]
- (fun x -> raise (Arg.Bad "Unexpected argument"))
- "
-usage: test_write [ options ]
-
-List of options:";
- if !in_file = "" then begin
- prerr_endline "No input file specified.";
- exit 1
- end;
- parse_and_write !in_file
-;;
-
-
-main();
-if !error_happened then exit(1);;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:35 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/08/16 23:44:21 gerd
- * Updates because of changes of the PXP API.
- *
- * Revision 1.1 2000/07/16 17:50:39 gerd
- * Initial revision.
- *
- *)
+++ /dev/null
-#! /bin/sh
-#
-# $Id$
-# ----------------------------------------------------------------------
-#
-# usage: collect_files file ...
-#
-# Prints the names of the files passed as arguments which actually
-# exist and are regular files.
-
-for x in "$@"; do
- if [ -f "$x" ]; then
- echo "$x"
- fi
-done
-
-# ======================================================================
-#
-# $Log$
-# Revision 1.1 2000/11/17 09:57:35 lpadovan
-# Initial revision
-#
-# Revision 1.1 2000/07/27 21:07:26 gerd
-# Initial revision.
-#
+++ /dev/null
-#! /bin/sh
-# (*
-exec ocaml "$0" "$@"
-*) directory ".";;
-
-(* $Id$
- * ----------------------------------------------------------------------
- *
- *)
-
-let get_arg variant insert_line =
- (* returns the argument of an "#insert" line *)
- let s = ref "" in
- for i = 8 to String.length insert_line - 1 do
- match insert_line.[i] with
- ' ' -> ()
- | '*' ->
- (* replace '*' with 'variant' *)
- s := !s ^ variant
- | c ->
- s := !s ^ String.make 1 c
- done;
- !s
-;;
-
-
-let edit_file variant name =
- let basename = Filename.chop_suffix name ".src" in
- let mllname = basename ^ "_" ^ variant ^ ".mll" in
- let chin = open_in name in
- let chout = open_out mllname in
- output_string chout "(* File generated by insert_variant; DO NOT EDIT! *)\n";
- begin try
- while true do
- let line = input_line chin in
- (* We do not have Str here. *)
- if String.length line >= 8 & String.sub line 0 8 = "#insert " then begin
- let insname = get_arg variant line in
- (* Copy the file 'insname' to chout *)
- let chcopy = open_in insname in
- let n = in_channel_length chcopy in
- let s = String.create n in
- really_input chcopy s 0 n;
- close_in chcopy;
- output_string chout s;
- end
- else begin
- output_string chout line;
- output_char chout '\n';
- end
- done
- with
- End_of_file -> ()
- end;
- close_in chin;
- close_out chout
-;;
-
-
-let main() =
- let variant = ref "" in
- let files = ref [] in
- Arg.current := 0; (* Because of a OCaml-3.00 bug *)
- Arg.parse
- [ "-variant", Arg.String (fun s -> variant := s),
- "<name> Set the variant (character encoding)";
- ]
- (fun s -> files := !files @ [s])
- "insert_variant [ options ] file.src ...
-
-Reads the files, replaces the #insert lines by the referred files, and
-writes the file file_variant.mll.
-
-The #insert lines include the specified file into the source. The
-asterisk (*) is replaced by the name of the variant.
-
-Options:
-";
-
- if !variant = "" then
- failwith "No variant specified!";
-
- List.iter
- (fun name -> edit_file !variant name)
- !files
-;;
-
-
-main();;
-
-(* ======================================================================
- * History:
- *
- * $Log$
- * Revision 1.1 2000/11/17 09:57:35 lpadovan
- * Initial revision
- *
- * Revision 1.2 2000/05/20 21:14:33 gerd
- * Workaround for an OCaml 3.00 bug.
- *
- * Revision 1.1 2000/05/20 20:30:15 gerd
- * Initial revision.
- *
- *
- *)
+++ /dev/null
-*.cmo
-*.cmx
-*.cmi
-
+++ /dev/null
-#(******************************************************)
-#(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-#(* 14/05/2000 *)
-#(******************************************************)
-
-OCAMLC = ocamlc
-OCAMLOPT = ocamlopt
-OCAMLDEP = ocamldep
-OCAMLLEX = ocamllex
-OCAMLYACC = ocamlyacc
-
-all: ucs2_to_utf8
-opt: ucs2_to_utf8.opt
-
-DEPOBJS = ucs2_to_utf8.ml lexer.ml parser.ml parser.mli types.ml
-
-UCS2_TO_UTF8OBJS = types.cmo lexer.cmo parser.cmo ucs2_to_utf8.cmo
-UCS2_TO_UTF8OPTOBJS = types.cmx lexer.cmx parser.cmx ucs2_to_utf8.cmx
-
-lexer.ml:
- $(OCAMLLEX) lexer.mll
-
-parser.ml:
- $(OCAMLYACC) parser.mly
-
-parser.mli:
- $(OCAMLYACC) parser.mly
-
-depend: lexer.ml parser.ml parser.mli
- $(OCAMLDEP) $(DEPOBJS) > depend
-
-ucs2_to_utf8: $(UCS2_TO_UTF8OBJS)
- $(OCAMLC) -o ucs2_to_utf8 $(UCS2_TO_UTF8OBJS)
-
-ucs2_to_utf8.opt: $(UCS2_TO_UTF8OPTOBJS)
- $(OCAMLOPT) -o ucs2_to_utf8.opt $(UCS2_TO_UTF8OPTOBJS)
-
-.SUFFIXES: .ml .mli .cmo .cmi .cmx
-.ml.cmo:
- $(OCAMLC) -c $<
-.mli.cmi:
- $(OCAMLC) -c $<
-.ml.cmx:
- $(OCAMLOPT) -c $<
-
-clean:
- rm -f *.cm[iox] *.o lexer.ml parser.ml parser.mli \
- ucs2_to_utf8 ucs2_to_utf8.opt
-
-include depend
+++ /dev/null
-(******************************************************)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 14/05/2000 *)
-(******************************************************)
-
-How to compile: "make clean && make depend && make && make opt"
-
-Usage: "cat input.mll | ./ucs2_to_utf8 > output.mll"
- where in input.mll there are definitions of ucs2 regular expressions
- and in output.mll there are the same utf8 regular expressions in the
- format expected by ocamllex
-
- See input/input.mll for an example (the definitions are taken from the
- appendix B of the XML reccomendation) and input/example.mll for a
- smaller one.
+++ /dev/null
-{
-(******************************************************)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 14/05/2000 *)
-(******************************************************)
-
-open Parser
-
-let comment_depth = ref 0;;
-
-let charint_of_lexeme l =
- String.set l 0 '0' ;
- int_of_string l
-;;
-}
-
-let digit = ['0'-'9']|['A'-'F']
-
-rule token =
- parse
- [' ' '\t' '\n'] { token lexbuf }
- | "let" { LET }
- | (['a'-'z']|'_')(['a'-'z']|['A'-'Z']|'_'|['0'-'9']|'\'')*
- { IDENT (Lexing.lexeme lexbuf) }
- | '=' { EQ }
- | ";;" { END_OF_LET }
- | "|" { PIPE }
- | '[' { LBRACKET }
- | ']' { RBRACKET }
- | '-' { RANGE }
- | "(*" { incr comment_depth ;
- comment lexbuf
- }
- | "#x" digit digit digit digit { CHAR (charint_of_lexeme (Lexing.lexeme lexbuf)) }
- | eof { EOF }
-
-and comment =
- parse
- "(*" { incr comment_depth ; comment lexbuf }
- | "*)" { decr comment_depth ;
- if !comment_depth = 0 then token lexbuf else comment lexbuf
- }
- | _ { comment lexbuf }
+++ /dev/null
-/******************************************************/
-/* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> */
-/* 14/05/2000 */
-/******************************************************/
-
-%token <int>CHAR
-%token <string>IDENT
-%token LET
-%token EQ
-%token END_OF_LET
-%token RBRACKET
-%token PIPE
-%token LBRACKET
-%token RANGE
-%token EOF
-%start main
-%type <Types.definition list> main
-
-%%
-
-main:
- EOF { [] }
- | declaration main { $1::$2 }
-;
-
-declaration:
- LET IDENT EQ regexp END_OF_LET
- { { Types.id = $2 ; Types.rel = $4 } }
-;
-
-regexp:
- regexptoken PIPE regexp { $1::$3 }
- | regexptoken { [$1] }
-;
-
-regexptoken:
- CHAR { Types.Char $1 }
- | LBRACKET CHAR RANGE CHAR RBRACKET { Types.Interval ($2,$4) }
- | IDENT { Types.Identifier $1 }
-;
+++ /dev/null
-(******************************************************)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 14/05/2000 *)
-(******************************************************)
-
-type regexp =
- Char of int
- | Interval of int * int (* lower bound, upper bound *)
- | Identifier of string
- | Concat of regexp list list (* concatenation of disjunctions *)
-;;
-
-type definition = { id : string ; rel : regexp list } ;;
+++ /dev/null
-(******************************************************)
-(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
-(* 14/05/2000 *)
-(******************************************************)
-
-(* Surrogate Pairs are not accepted in XML files (is it true???) *)
-exception SurrogatePairs;;
-
-(* Interval (n,m) where n >m m *)
-exception InvalidInterval of int * int;;
-
-(* Given an ucs2 character code, returns it in utf8 *)
-(* (as a concatenation of characters) *)
-let char_ucs2_to_utf8 =
- function
- n when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
- | n when n <= 0x007F -> Types.Char n
- | n when n <= 0x07FF ->
- Types.Concat
- [[Types.Char (n lsr 6 land 0b00011111 lor 0b11000000)] ;
- [Types.Char (n land 0b00111111 lor 0b10000000)]]
- | n ->
- Types.Concat
- [[Types.Char (n lsr 12 land 0b00001111 lor 0b11100000)] ;
- [Types.Char (n lsr 6 land 0b00111111 lor 0b10000000)] ;
- [Types.Char (n land 0b00111111 lor 0b10000000)]]
-;;
-
-(*CSC: Two functions for debugging pourposes only
-
-let char_ucs2_to_utf8 =
- function
- n when n >= 0xD800 && n <= 0xDFFF -> assert false
- | n when n <= 0x007F -> [[n]]
- | n when n <= 0x07FF ->
- [[(n lsr 6 land 0b00011111 lor 0b11000000)] ;
- [(n land 0b00111111 lor 0b10000000)]]
- | n ->
- [[(n lsr 12 land 0b00001111 lor 0b11100000)] ;
- [(n lsr 6 land 0b00111111 lor 0b10000000)] ;
- [(n land 0b00111111 lor 0b10000000)]]
-;;
-
-let rec bprint =
- function
- 0 -> ""
- | n -> bprint (n / 2) ^ string_of_int (n mod 2)
-;;
-*)
-
-(* A few useful functions *)
-let rec mklist e =
- function
- 0 -> []
- | n -> e::(mklist e (n - 1))
-;;
-
-let sup =
- let t = Types.Char 0b10111111 in
- function
- 1 -> t
- | n -> Types.Concat (mklist [t] n)
-;;
-
-let rec inf =
- let b = Types.Char 0b10000000 in
- function
- 1 -> [[b]]
- | n -> mklist [b] n
-;;
-
-let mysucc =
- function
- [Types.Char n] -> n + 1
- | _ -> assert false
-;;
-
-let mypred =
- function
- [Types.Char n] -> n - 1
- | _ -> assert false
-;;
-
-(* Given two utf8-encoded extremes of an interval character code *)
-(* whose 'length' is the same, it returns the utf8 regular expression *)
-(* matching all the characters in the interval *)
-let rec same_length_ucs2_to_utf8 =
- let module T = Types in
- function
- (T.Char n, T.Char m) when n = m -> [T.Char n]
- | (T.Char n, T.Char m) -> [T.Interval (n,m)]
- | (T.Concat [hen ; [tln]], T.Concat [hem ; [tlm]]) when hen = hem ->
- [T.Concat [hen ; same_length_ucs2_to_utf8 (tln,tlm)]]
- | (T.Concat [hen ; [tln]], T.Concat ([hem ; [tlm]] as e2)) ->
- (T.Concat [hen ; same_length_ucs2_to_utf8 (tln,sup 1)]) ::
- (let shen = mysucc hen
- and phem = mypred hem in
- let succhen = [T.Char shen] in
- if succhen = hem then
- same_length_ucs2_to_utf8 (T.Concat (succhen::(inf 1)), T.Concat e2)
- else
- (T.Concat [[T.Interval (shen, phem)] ;
- [T.Interval (0b10000000,0b10111111)]])::
- same_length_ucs2_to_utf8 (T.Concat (hem::(inf 1)), T.Concat e2)
- )
- (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf 1)), T.Concat e2)*)
- | (T.Concat (hen::tln), T.Concat (hem::tlm)) when hen = hem ->
- [T.Concat [hen ; same_length_ucs2_to_utf8 (T.Concat tln, T.Concat tlm)]]
- | (T.Concat (hen::tln), T.Concat ((hem::tlm) as e2)) ->
- let n = List.length tln in
- (T.Concat
- [hen ; same_length_ucs2_to_utf8 (T.Concat tln,sup n)]) ::
- (let shen = mysucc hen
- and phem = mypred hem in
- let succhen = [T.Char shen] in
- if succhen = hem then
- same_length_ucs2_to_utf8 (T.Concat (succhen::(inf n)), T.Concat e2)
- else
- (T.Concat [[T.Interval (shen, phem)] ;
- [T.Interval (0b10000000,0b10111111)] ;
- [T.Interval (0b10000000,0b10111111)]]
- )::
- same_length_ucs2_to_utf8 (T.Concat (hem::(inf n)), T.Concat e2)
- )
- (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf n)),T.Concat e2)*)
- | _ -> assert false
-;;
-
-(* Given an interval of ucs2 characters, splits *)
-(* the list in subintervals whose extremes has *)
-(* the same utf8 encoding length and, for each *)
-(* extreme, calls same_length_ucs2_to_utf8 *)
-let rec seq_ucs2_to_utf8 =
- function
- (n,_) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
- | (_,n) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
- | (n,m) when n > m -> raise (InvalidInterval (n,m))
- | (n,m) when n = m -> [char_ucs2_to_utf8 n]
- | (n,m) when n <= 0x07F && m > 0x07F ->
- (seq_ucs2_to_utf8 (n,0x07F)) @ (seq_ucs2_to_utf8 (0x080,m))
- | (n,m) when n <= 0x07FF && m > 0x07FF ->
- (seq_ucs2_to_utf8 (n,0x07FF)) @ (seq_ucs2_to_utf8 (0x0800,m))
- | (n,m) ->
- let utf8n = char_ucs2_to_utf8 n
- and utf8m = char_ucs2_to_utf8 m in
- same_length_ucs2_to_utf8 (utf8n,utf8m)
-;;
-
-(* Given an ucs2 regual expression, returns *)
-(* the corresponding utf8 regular expression *)
-let ucs2_to_utf8 { Types.id = id ; Types.rel = rel } =
- let rec aux re l2 =
- match re with
- Types.Char i -> char_ucs2_to_utf8 i :: l2
- | Types.Interval (l,u) -> seq_ucs2_to_utf8 (l,u) @ l2
- | Types.Identifier _ as i -> i :: l2
- | Types.Concat rell ->
- let foo rel = List.fold_right aux rel [] in
- Types.Concat (List.map foo rell) :: l2
- in
- { Types.id = id ; Types.rel = List.fold_right aux rel [] }
-;;
-
-(* The function actually used to produce the output *)
-let output = print_string ;;
-
-(* padded_string_of_int i returns the string representing the *)
-(* integer i (i < 256) using exactly 3 digits (example: 13 -> "013") *)
-let padded_string_of_int i =
- if i < 10 then
- "00" ^ string_of_int i
- else if i < 100 then
- "0" ^ string_of_int i
- else
- string_of_int i
-;;
-
-(* Two functions useful to print a definition *)
-let rec print_disjunction ?(first = true) =
- function
- [] -> ()
- | he::tl ->
- if not first then output " | " ;
- print_re he ;
- print_disjunction ~first:false tl
-and print_re =
- function
- Types.Char i -> output ("'\\" ^ padded_string_of_int i ^ "'")
- | Types.Interval (l,u) ->
- output ("['\\" ^ padded_string_of_int l ^ "'-'\\" ^
- padded_string_of_int u ^ "']")
- | Types.Identifier i -> output i
- | Types.Concat rell ->
- let foo rel =
- if List.length rel > 1 then
- (output "(" ; print_disjunction rel ; output ")")
- else
- print_disjunction rel
- in
- List.iter foo rell
-;;
-
-(* print_definition prints a definition in the format expected by ocamllex *)
-let print_definition { Types.id = id ; Types.rel = rel } =
- output ("let " ^ id ^ " =\n ") ;
- print_disjunction rel ;
- output "\n\n"
-;;
-
-(* main *)
-let _ =
- let lexbuf = Lexing.from_channel stdin in
- let ucs2_result = Parser.main Lexer.token lexbuf in
- List.iter print_definition (List.map ucs2_to_utf8 ucs2_result)
-;;