]> matita.cs.unibo.it Git - helm.git/commitdiff
Initial revision
authorLuca Padovani <luca.padovani@unito.it>
Fri, 17 Nov 2000 09:57:23 +0000 (09:57 +0000)
committerLuca Padovani <luca.padovani@unito.it>
Fri, 17 Nov 2000 09:57:23 +0000 (09:57 +0000)
1239 files changed:
helm/DEVEL/pxp/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/netstring/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/netstring/LICENSE [new file with mode: 0644]
helm/DEVEL/pxp/netstring/META [new file with mode: 0644]
helm/DEVEL/pxp/netstring/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/netstring/RELEASE [new file with mode: 0644]
helm/DEVEL/pxp/netstring/base64.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/base64.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/cgi.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/cgi.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/compat-base64/META [new file with mode: 0644]
helm/DEVEL/pxp/netstring/compat-cgi/META [new file with mode: 0644]
helm/DEVEL/pxp/netstring/depend [new file with mode: 0644]
helm/DEVEL/pxp/netstring/depend.pkg [new file with mode: 0644]
helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB [new file with mode: 0644]
helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/doc/INSTALL [new file with mode: 0644]
helm/DEVEL/pxp/netstring/doc/INSTALL.xml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/doc/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/netstring/doc/README [new file with mode: 0644]
helm/DEVEL/pxp/netstring/doc/README.xml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/mimestring.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/mimestring.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netbuffer.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netbuffer.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netconversion.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netconversion.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netencoding.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netencoding.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/nethtml.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/nethtml.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/nethtml_scanner.mll [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netmappings.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netmappings.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netmappings_iso.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netmappings_other.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstream.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstream.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring.cma [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring.cmxa [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring_mt.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring_mt.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring_str.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring_str.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring_top.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/netstring_top.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/neturl.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/neturl.mli [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tests/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tests/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tests/test_cgi.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tests/test_encoding.cgi [new file with mode: 0755]
helm/DEVEL/pxp/netstring/tests/test_mimestring.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tests/test_netencoding.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tests/test_neturl.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tests/test_recode.ml [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tools/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/pxp/LICENSE [new file with mode: 0644]
helm/DEVEL/pxp/pxp/META [new file with mode: 0644]
helm/DEVEL/pxp/pxp/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/Makefile.code [new file with mode: 0644]
helm/DEVEL/pxp/pxp/Makefile.conf [new file with mode: 0644]
helm/DEVEL/pxp/pxp/RELEASE [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/META [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/Makefile.code [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/Makefile.conf [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_document.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_document.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_types.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_types.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/EXTENSIONS [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/INSTALL [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/INSTALL.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/PRERELEASE [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/README.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/SPEC [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/SPEC.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/design.txt [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/dist-common.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/TIMESTAMP [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/c36.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/c533.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/c893.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/index.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/markup.css [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/p34.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/pic/done [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x107.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x468.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x550.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x675.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x738.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/html/x939.html [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml [new file with mode: 0755]
helm/DEVEL/pxp/pxp/doc/manual/src/markup.css [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/readme/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/readme/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/readme/Makefile.code [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/readme/main.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/readme/readme.dtd [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/readme/to_html.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/readme/to_text.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/simple_transformation/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/simple_transformation/delcol.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/validate/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/validate/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/validate/validate.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/styles/crazy-style.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-object.dtd [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-style.dtd [new file with mode: 0644]
helm/DEVEL/pxp/pxp/examples/xmlforms/styles/mini-style.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/Makefile.code [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/Makefile.generate [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/objects [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_iso88591.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_utf8.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_iso88591.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_utf8.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_iso88591.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_content_string.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_declaration.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_drv_utf8.def [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_generic.def [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_iso88591.def [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_document.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_document_type.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_dtd_string.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_misc.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_name_string.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/lexers/pxp_lex_within_tag.src [new file with mode: 0644]
helm/DEVEL/pxp/pxp/m2parsergen/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/m2parsergen/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/m2parsergen/ast.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/m2parsergen/generator.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/m2parsergen/lexer.mll [new file with mode: 0644]
helm/DEVEL/pxp/pxp/m2parsergen/parser.mly [new file with mode: 0644]
helm/DEVEL/pxp/pxp/m2parsergen/x.m2y [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_aux.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_codewriter.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_codewriter.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_dfa.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_dfa.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_document.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_document.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_dtd.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_dtd.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_entity.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_lexer_types.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_lexer_types.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_lexers.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_lexers.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_reader.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_reader.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_types.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_types.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_utf8.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_utf8.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_yacc.m2y [new file with mode: 0644]
helm/DEVEL/pxp/pxp/pxp_yacc.mli [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/MISSING_TESTS [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-1.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-2.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-1.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-2.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-1.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-2.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-1.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-2.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/015.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/016.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/017.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/018.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/019.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/020.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/021.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/022.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/023.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/024.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/025.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/026.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/027.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/028.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/029.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/030.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/031.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/015.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/016.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/017.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/018.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/019.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/020.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/021.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/022.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/023.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/024.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/025.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/026.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/027.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/028.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/029.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/030.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/031.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/032.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/033.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/034.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/035.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/036.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/037.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/038.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/039.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/040.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/041.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/042.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/043.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/044.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/045.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/046.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/047.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/048.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/049.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/050.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/051.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/052.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/053.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/054.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/055.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/056.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/057.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/058.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/059.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/060.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/061.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/062.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/063.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/064.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/065.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/066.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/067.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/068.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/069.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/070.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/071.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/072.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/073.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/074.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/075.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/076.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/077.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/078.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/079.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/080.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/081.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/082.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/083.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/084.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/085.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/086.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/087.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/088.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/089.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/090.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/091.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/092.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/093.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/094.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/095.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/096.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/098.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/099.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/100.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/101.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/102.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/103.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/104.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/105.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/106.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/107.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/108.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/109.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/110.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/111.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/112.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/113.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/114.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/115.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/116.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/117.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/118.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/119.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/015.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/016.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/017.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/018.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/019.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/020.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/021.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/022.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/023.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/024.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/025.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/026.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/027.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/028.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/029.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/030.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/031.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/032.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/033.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/034.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/035.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/036.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/037.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/038.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/039.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/040.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/041.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/042.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/043.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/044.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/045.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/046.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/047.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/048.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/049.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/050.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/051.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/052.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/053.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/054.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/055.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/056.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/057.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/058.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/059.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/060.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/061.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/062.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/063.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/064.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/065.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/066.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/067.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/068.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/069.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/070.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/071.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/072.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/073.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/074.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/075.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/076.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/077.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/078.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/079.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/080.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/081.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/082.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/083.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/084.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/085.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/086.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/087.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/088.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/089.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/090.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/091.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/092.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/093.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/094.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/095.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/096.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/097.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/098.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/099.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/100.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/101.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/102.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/103.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/104.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/105.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/106.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/107.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/108.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/109.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/110.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/111.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/112.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/113.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/114.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/115.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/116.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/117.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/118.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/119.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/INDEX [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/002+.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/INDEX [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/002+.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/INDEX [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/INDEX [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/INDEX [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/canonxml/run_canonxml [new file with mode: 0755]
helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/codewriter/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/codewriter/compile.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/codewriter/run_codewriter [new file with mode: 0755]
helm/DEVEL/pxp/pxp/rtests/codewriter/sample001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/codewriter/test_codewriter [new file with mode: 0755]
helm/DEVEL/pxp/pxp/rtests/dumpfiles.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/INDEX [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/140.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/141.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/014.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/014.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/028.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/028.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/029.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/029.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/030.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/030.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/031.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/031.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/032.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/032.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/179.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/179.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/null.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.out [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/null.ent [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/negative/run_negative [new file with mode: 0755]
helm/DEVEL/pxp/pxp/rtests/negative/test_negative.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/reader/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/reader/minilex.mll [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/reader/t100.dat [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/reader/test_reader.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/run [new file with mode: 0755]
helm/DEVEL/pxp/pxp/rtests/write/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/write/run_write [new file with mode: 0755]
helm/DEVEL/pxp/pxp/rtests/write/sample001.xml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/rtests/write/test_write.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/tools/collect_files [new file with mode: 0755]
helm/DEVEL/pxp/pxp/tools/insert_variant [new file with mode: 0755]
helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/.cvsignore [new file with mode: 0644]
helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/Makefile [new file with mode: 0644]
helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/README [new file with mode: 0644]
helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/lexer.mll [new file with mode: 0644]
helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/parser.mly [new file with mode: 0644]
helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/types.ml [new file with mode: 0644]
helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/ucs2_to_utf8.ml [new file with mode: 0644]

diff --git a/helm/DEVEL/pxp/.cvsignore b/helm/DEVEL/pxp/.cvsignore
new file mode 100644 (file)
index 0000000..c1fcbc4
--- /dev/null
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/.cvsignore b/helm/DEVEL/pxp/netstring/.cvsignore
new file mode 100644 (file)
index 0000000..c1fcbc4
--- /dev/null
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/LICENSE b/helm/DEVEL/pxp/netstring/LICENSE
new file mode 100644 (file)
index 0000000..820032e
--- /dev/null
@@ -0,0 +1,21 @@
+Copyright 1999 by Gerd Stolpmann
+
+The package "netstring" is copyright by Gerd Stolpmann. 
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the "netstring" software (the "Software"), to deal in the
+Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
diff --git a/helm/DEVEL/pxp/netstring/META b/helm/DEVEL/pxp/netstring/META
new file mode 100644 (file)
index 0000000..d422128
--- /dev/null
@@ -0,0 +1,54 @@
+version = "0.9.3"
+requires = "str"
+description = "String processing for the Internet"
+
+archive(byte) = 
+    "netstring.cma netmappings_iso.cmo netmappings_other.cmo"
+archive(byte,toploop) = 
+    "netstring.cma netmappings_iso.cmo netmappings_other.cmo 
+     netstring_top.cmo"
+archive(byte,mt) = 
+    "netstring.cma netmappings_iso.cmo netmappings_other.cmo 
+     netstring_mt.cmo"
+archive(byte,mt,toploop) = 
+    "netstring.cma netmappings_iso.cmo netmappings_other.cmo 
+     netstring_mt.cmo netstring_top.cmo"
+archive(native) = 
+    "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx"
+archive(native,mt) = 
+    "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx 
+     netstring_mt.cmx"
+
+archive(byte,netstring_only_iso) = 
+    "netstring.cma netmappings_iso.cmo"
+archive(byte,toploop,netstring_only_iso) = 
+    "netstring.cma netmappings_iso.cmo
+     netstring_top.cmo"
+archive(byte,mt,netstring_only_iso) = 
+    "netstring.cma netmappings_iso.cmo
+     netstring_mt.cmo"
+archive(byte,mt,toploop,netstring_only_iso) = 
+    "netstring.cma netmappings_iso.cmo
+     netstring_mt.cmo netstring_top.cmo"
+archive(native,netstring_only_iso) = 
+    "netstring.cmxa netmappings_iso.cmx"
+archive(native,mt,netstring_only_iso) = 
+    "netstring.cmxa netmappings_iso.cmx 
+     netstring_mt.cmx"
+
+archive(byte,netstring_minimum) = 
+    "netstring.cma"
+archive(byte,toploop,netstring_minimum) = 
+    "netstring.cma 
+     netstring_top.cmo"
+archive(byte,mt,netstring_minimum) = 
+    "netstring.cma 
+     netstring_mt.cmo"
+archive(byte,mt,toploop,netstring_minimum) = 
+    "netstring.cma 
+     netstring_mt.cmo netstring_top.cmo"
+archive(native,netstring_minimum) = 
+    "netstring.cmxa"
+archive(native,mt,netstring_minimum) = 
+    "netstring.cmxa 
+     netstring_mt.cmx"
diff --git a/helm/DEVEL/pxp/netstring/Makefile b/helm/DEVEL/pxp/netstring/Makefile
new file mode 100644 (file)
index 0000000..98f9ef0
--- /dev/null
@@ -0,0 +1,151 @@
+# make all:            make bytecode archive
+# make opt:            make native archive
+# make install:        install bytecode archive, and if present, native archive
+# make uninstall:      uninstall package
+# make clean:          remove intermediate files
+# make distclean:      remove any superflous files
+# make release:        cleanup, create archive, tag CVS module 
+#                      (for developers)
+
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS  = netstring_str.cmo \
+           netencoding.cmo netbuffer.cmo netstream.cmo \
+          mimestring.cmo cgi.cmo base64.cmo \
+           nethtml_scanner.cmo nethtml.cmo \
+          neturl.cmo \
+          netmappings.cmo netconversion.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE  = netstring.cma
+XARCHIVE = netstring.cmxa
+
+NAME     = netstring
+REQUIRES = str
+
+ISO_MAPPINGS   = mappings/iso*.unimap
+OTHER_MAPPINGS = mappings/cp*.unimap \
+                 mappings/adobe*.unimap \
+                mappings/jis*.unimap \
+                 mappings/koi*.unimap \
+                 mappings/mac*.unimap \
+                 mappings/windows*.unimap
+
+all: $(ARCHIVE) \
+     netstring_top.cmo netstring_mt.cmo \
+     netmappings_iso.cmo netmappings_other.cmo
+
+opt: $(XARCHIVE) \
+     netstring_mt.cmx \
+     netmappings_iso.cmx netmappings_other.cmx
+
+
+$(ARCHIVE): $(OBJECTS) 
+       $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS) 
+       $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+netmappings_iso.ml:
+       $(MAKE) -C tools 
+       test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
+               -o netmappings_iso.ml $(ISO_MAPPINGS)
+
+netmappings_other.ml:
+       $(MAKE) -C tools 
+       test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
+               -o netmappings_other.ml $(OTHER_MAPPINGS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS   =
+OCAMLC    = ocamlc $(DEBUG) $(OPTIONS) $(ROPTIONS)
+OCAMLOPT  = ocamlopt $(OPTIONS) $(ROPTIONS)
+OCAMLLEX  = ocamllex
+OCAMLDEP  = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+DEBUG  =
+# Invoke with: make DEBUG=-g
+
+depend: *.ml *.mli
+       $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+       $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.PHONY: install
+install: all
+       { test ! -f $(XARCHIVE) || extra="*.cmxa *.a netstring_mt.cmx netmappings_iso.cmx netmappings_other.cmx netstring_mt.o netmappings_iso.o netmappings_other.o"; }; \
+       $(OCAMLFIND) install $(NAME) *.mli *.cmi *.cma netstring_top.cmo netstring_mt.cmo netmappings_iso.cmo netmappings_other.cmo META $$extra
+
+.PHONY: install-cgi
+install-cgi: 
+       $(OCAMLFIND) install cgi compat-cgi/META
+
+
+.PHONY: install-base64
+install-base64: 
+       $(OCAMLFIND) install base64 compat-base64/META
+
+
+.PHONY: uninstall
+uninstall:
+       $(OCAMLFIND) remove $(NAME)
+
+.PHONY: uninstall-cgi
+uninstall-cgi:
+       $(OCAMLFIND) remove cgi
+
+.PHONY: uninstall-base64
+uninstall-base64:
+       $(OCAMLFIND) remove base64
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+       test ! -d mappings || rm -f netmappings_iso.ml netmappings_other.ml
+
+.PHONY: distclean
+distclean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+       rm -f *~ depend depend.pkg compat-cgi/*~ compat-base64/*~
+       $(MAKE) -C tests distclean
+       $(MAKE) -C doc distclean
+       $(MAKE) -C tools distclean
+
+RELEASE: META
+       awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
+
+.PHONY: dist
+dist: RELEASE
+       r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*/doc/readme.dtd" --exclude="*/Mail" --exclude="*/mappings" $(NAME)
+
+.PHONY: tag-release
+tag-release: RELEASE
+       r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r $(NAME)
+
+.PHONY: release
+release: distclean
+       test -f netmappings_iso.ml
+       test -f netmappings_other.ml
+       $(MAKE) tag-release
+       $(MAKE) dist
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll
+
+.ml.cmx:
+       $(OCAMLOPT) -c -thread $<
+
+.ml.cmo:
+       $(OCAMLC) -c -thread $<
+
+.mli.cmi:
+       $(OCAMLC) -c $<
+
+.mll.ml:
+       $(OCAMLLEX) $<
+
+include depend
+include depend.pkg
diff --git a/helm/DEVEL/pxp/netstring/RELEASE b/helm/DEVEL/pxp/netstring/RELEASE
new file mode 100644 (file)
index 0000000..965065d
--- /dev/null
@@ -0,0 +1 @@
+0.9.3
diff --git a/helm/DEVEL/pxp/netstring/base64.ml b/helm/DEVEL/pxp/netstring/base64.ml
new file mode 100644 (file)
index 0000000..285626f
--- /dev/null
@@ -0,0 +1,24 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let encode s = Netencoding.Base64.encode s;;
+let url_encode s = Netencoding.Base64.url_encode s;;
+let decode s = Netencoding.Base64.decode s;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.1  2000/03/02 01:15:20  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/base64.mli b/helm/DEVEL/pxp/netstring/base64.mli
new file mode 100644 (file)
index 0000000..5dd60ea
--- /dev/null
@@ -0,0 +1,36 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Base64 compatibility module                                        *)
+(**********************************************************************)
+
+(* PLEASE DO NOT USE THIS MODULE IN NEW SOFTWARE!
+ * The module Netencoding.Base64 is the preferred API. This module is
+ * only for compatibility with older software.
+ *)
+
+(* This interface is compatible with all previously released Base64
+ * modules (0.1 and 0.2).
+ *)
+
+val encode : string -> string
+
+val url_encode : string -> string
+
+val decode : string -> string
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/03/02 01:15:20  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/cgi.ml b/helm/DEVEL/pxp/netstring/cgi.ml
new file mode 100644 (file)
index 0000000..48412be
--- /dev/null
@@ -0,0 +1,645 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+exception Resources_exceeded
+
+type argument_processing = Memory | File | Automatic;;
+
+type argument =
+    { mutable arg_name : string;
+      mutable arg_processing : argument_processing;
+      mutable arg_buf_value : Buffer.t;
+      mutable arg_mem_value : string option;
+              (* Here, the value is stored if it must be kept in memory *)
+      mutable arg_disk_value : string Weak.t;
+              (* This component is used iff arg_mem_value = None. The
+              * weak array has a length of 1, and the single element stores
+              * the value (if any).
+              *)
+      mutable arg_file : string option;
+              (* The filename of the temporary file storing the value *)
+      mutable arg_fd : out_channel option;
+              (* The file descriptor of the temp file (if open) *)
+      mutable arg_mimetype : string;
+      mutable arg_filename : string option;
+      mutable arg_header : (string * string) list;
+              (* For the last three components, see the description of the
+              * corresponding functions in the mli file.
+              *)
+    }
+;;
+
+type workaround =
+    Work_around_MSIE_Content_type_bug
+  | Work_around_backslash_bug
+;;
+
+type config =
+    { maximum_content_length : int;
+      how_to_process_arguments : argument -> argument_processing;
+      tmp_directory : string;
+      tmp_prefix : string;
+      workarounds : workaround list;
+    }
+;;
+
+
+let print_argument arg =
+  Format.printf
+    "<CGIARG name=%s filename=%s mimetype=%s store=%s>"
+    arg.arg_name
+    (match arg.arg_filename with None -> "*" | Some n -> n)
+    arg.arg_mimetype
+    (match arg.arg_file with None -> "Memory" | Some n -> n)
+;;
+
+
+let encode = Netencoding.Url.encode ;;
+let decode = Netencoding.Url.decode ;;
+
+
+
+let url_split_re =
+  Str.regexp "[&=]";;
+
+
+let mk_url_encoded_parameters nv_pairs =
+  String.concat "&"
+    (List.map
+       (fun (name,value) ->
+         let name_encoded = Netencoding.Url.encode name in
+         let value_encoded = Netencoding.Url.encode value in
+         name_encoded ^ "=" ^ value_encoded
+       )
+       nv_pairs
+    )
+;;
+
+
+let dest_url_encoded_parameters parstr =
+
+  let rec parse_after_amp tl =
+    match tl with
+       Str.Text name :: Str.Delim "=" :: Str.Text value :: tl' ->
+         (Netencoding.Url.decode name,
+          Netencoding.Url.decode value) :: parse_next tl'
+      | Str.Text name :: Str.Delim "=" :: Str.Delim "&" :: tl' ->
+         (Netencoding.Url.decode name, "") :: parse_after_amp tl'
+      | Str.Text name :: Str.Delim "=" :: [] ->
+         [Netencoding.Url.decode name, ""]
+      | _ ->
+         failwith "Cgi.dest_url_encoded_parameters"
+  and parse_next tl =
+    match tl with
+       [] -> []
+      | Str.Delim "&" :: tl' ->
+         parse_after_amp tl'
+      | _ ->
+         failwith "Cgi.dest_url_encoded_parameters"
+  in
+  let toklist = Str.full_split url_split_re parstr in
+  match toklist with
+      [] -> []
+    | _ -> parse_after_amp toklist
+;;
+
+
+let mk_form_encoded_parameters ntv_triples =
+  failwith "Cgi.mk_form_encoded_parameters: not implemented";;
+
+
+let dest_parameter_header header options =
+  let get_name s =
+    (* s is: form-data; ... name="fieldname" ...
+     * Extract "fieldname"
+     *)
+    try
+      let tok, params = Mimestring.scan_value_with_parameters s options in
+      List.assoc "name" params
+    with
+       Not_found ->
+         failwith "Cgi.dest_form_encoded_parameters"
+      | Failure "Mimestring.scan_value_with_parameters" ->
+         failwith "Cgi.dest_form_encoded_parameters"
+  in
+
+  let get_filename s =
+    (* s is: form-data; ... filename="fieldname" ...
+     * Extract "fieldname"
+     *)
+    try
+      let tok, params = Mimestring.scan_value_with_parameters s options in
+      Some(List.assoc "filename" params)
+    with
+       Not_found ->
+         None
+      | Failure "Mimestring.scan_value_with_parameters" ->
+         failwith "Cgi.dest_form_encoded_parameters"
+  in
+
+  let mime_type =
+    try List.assoc "content-type" header
+    with Not_found -> "text/plain" in     (* the default *)
+
+  let content_disposition =
+    try List.assoc "content-disposition" header
+    with
+       Not_found ->
+         failwith "Cgi.dest_form_encoded_parameters: no content-disposition"
+  in
+
+  let name = get_name content_disposition in
+  let filename = get_filename content_disposition in
+
+  name, mime_type, filename
+;;
+
+
+let dest_form_encoded_parameters parstr ~boundary config =
+  let options =
+    if List.mem Work_around_backslash_bug config.workarounds then
+      [ Mimestring.No_backslash_escaping ]
+    else
+      []
+  in
+  let parts =
+    Mimestring.scan_multipart_body_and_decode
+      parstr 0 (String.length parstr) boundary in
+  List.map
+    (fun (params, value) ->
+
+      let name, mime_type, filename = dest_parameter_header params options in
+      { arg_name = name;
+       arg_processing = Memory;
+       arg_buf_value = Buffer.create 1;
+       arg_mem_value = Some value;
+       arg_disk_value = Weak.create 1;
+       arg_file = None;
+       arg_fd = None;
+       arg_mimetype = mime_type;
+       arg_filename = filename;
+       arg_header = params;
+      }
+
+    )
+    parts
+;;
+
+
+let make_temporary_file config =
+  (* Returns (filename, out_channel). *)
+  let rec try_creation n =
+    try
+      let fn =
+       Filename.concat
+         config.tmp_directory
+         (config.tmp_prefix ^ "-" ^ (string_of_int n))
+      in
+      let fd =
+       open_out_gen
+         [ Open_wronly; Open_creat; Open_excl; Open_binary ]
+         0o666
+         fn
+      in
+      fn, fd
+    with
+       Sys_error m ->
+         (* This does not look very intelligent, but it is the only chance
+          * to limit the number of trials.
+          *)
+         if n > 1000 then
+           failwith ("Cgi: Cannot create temporary file: " ^ m);
+         try_creation (n+1)
+  in
+  try_creation 0
+;;
+
+
+let dest_form_encoded_parameters_from_netstream s ~boundary config =
+  let parts = ref [] in
+  let options =
+    if List.mem Work_around_backslash_bug config.workarounds then
+      [ Mimestring.No_backslash_escaping ]
+    else
+      []
+  in
+
+  let create header =
+    (* CALLBACK for scan_multipart_body_from_netstream *)
+    let name, mime_type, filename = dest_parameter_header header options in
+    let p0 =
+      { arg_name = name;
+       arg_processing = Memory;
+       arg_buf_value = Buffer.create 80;
+       arg_mem_value = None;
+       arg_disk_value = Weak.create 1;
+       arg_file = None;
+       arg_fd = None;
+       arg_mimetype = mime_type;
+       arg_filename = filename;
+       arg_header = header;
+      }
+    in
+    let pr = config.how_to_process_arguments p0 in
+    let p = { p0 with arg_processing = pr } in
+    if pr = File then begin
+      let fn, fd = make_temporary_file config in
+      p.arg_file <- Some fn;
+      p.arg_fd   <- Some fd;
+      p.arg_mem_value <- None;
+    end;
+    p
+  in
+
+  let add p s k n =
+    (* CALLBACK for scan_multipart_body_from_netstream *)
+    if (p.arg_processing = Automatic) &&
+       (Buffer.length (p.arg_buf_value) >= Netstream.block_size s) then begin
+        (* This is a LARGE argument *)
+        p.arg_processing <- File;
+        let fn, fd = make_temporary_file config in
+        p.arg_file <- Some fn;
+        p.arg_fd   <- Some fd;
+        p.arg_mem_value <- None;
+        output_string fd (Buffer.contents p.arg_buf_value);
+        p.arg_buf_value <- Buffer.create 1;
+       end;
+
+    match p.arg_processing with
+       (Memory|Automatic) ->
+         Buffer.add_substring
+           p.arg_buf_value
+           (Netbuffer.unsafe_buffer (Netstream.window s))
+           k
+           n
+      | File ->
+         let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
+         output
+           fd
+           (Netbuffer.unsafe_buffer (Netstream.window s))
+           k
+           n;
+  in
+
+  let stop p =
+    (* CALLBACK for scan_multipart_body_from_netstream *)
+    begin match p.arg_processing with
+       (Memory|Automatic) ->
+         p.arg_mem_value <- Some (Buffer.contents p.arg_buf_value);
+         p.arg_buf_value <- Buffer.create 1;
+      | File ->
+         let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
+         close_out fd;
+         p.arg_mem_value <- None
+    end;
+    parts := p :: !parts
+  in
+
+  Mimestring.scan_multipart_body_from_netstream
+    s
+    boundary
+    create
+    add
+    stop;
+
+  List.rev !parts
+;;
+
+
+let getenv name =
+  try Sys.getenv name with Not_found -> "";;
+
+(* getenv:
+ * We use this getenv instead of Sys.getenv. The CGI specification does not
+ * say anything about what should happen if a certain environment variable
+ * is not set.
+ * Some servers initialize the environment variable to the empty string if
+ * it is not applicable, some servers do not set the variable at all. Because
+ * of this, unset variables are always reported as empty variables.
+ *
+ * This is especially a problem with QUERY_STRING.
+ *)
+
+let mk_simple_arg ~name v =
+  { arg_name = name;
+    arg_processing = Memory;
+    arg_buf_value = Buffer.create 1;
+    arg_mem_value = Some v;
+    arg_disk_value = Weak.create 0;
+    arg_file = None;
+    arg_fd = None;
+    arg_mimetype = "text/plain";
+    arg_filename = None;
+    arg_header = [];
+  }
+;;
+
+let mk_memory_arg ~name ?(mime = "text/plain") ?filename ?(header = []) v =
+  { arg_name = name;
+    arg_processing = Memory;
+    arg_buf_value = Buffer.create 1;
+    arg_mem_value = Some v;
+    arg_disk_value = Weak.create 0;
+    arg_file = None;
+    arg_fd = None;
+    arg_mimetype = mime;
+    arg_filename = filename;
+    arg_header = header;
+  }
+;;
+
+let mk_file_arg 
+  ~name ?(mime = "text/plain") ?filename ?(header = []) v_filename =
+  let v_abs_filename =
+    if Filename.is_relative v_filename then
+      Filename.concat (Sys.getcwd()) v_filename
+    else
+      v_filename
+  in
+  { arg_name = name;
+    arg_processing = File;
+    arg_buf_value = Buffer.create 1;
+    arg_mem_value = None;
+    arg_disk_value = Weak.create 0;
+    arg_file = Some v_abs_filename;
+    arg_fd = None;
+    arg_mimetype = mime;
+    arg_filename = filename;
+    arg_header = header;
+  }
+;;
+
+
+let get_content_type config =
+  (* Get the environment variable CONTENT_TYPE; if necessary apply
+   * workarounds for browser bugs.
+   *)
+  let content_type = getenv "CONTENT_TYPE" in
+  let user_agent = getenv "HTTP_USER_AGENT" in
+  let eff_content_type =
+    if Str.string_match (Str.regexp ".*MSIE") user_agent 0 &&
+       List.mem Work_around_MSIE_Content_type_bug config.workarounds
+    then begin
+      (* Microsoft Internet Explorer: When used with SSL connections,
+       * this browser sometimes produces CONTENT_TYPEs like
+       * "multipart/form-data; boundary=..., multipart/form-data; boundary=..."
+       * Workaround: Throw away everything after ", ".
+       *)
+      if Str.string_match (Str.regexp "\\([^,]*boundary[^,]*\\), .*boundary")
+                         content_type 0
+      then
+       Str.matched_group 1 content_type
+      else
+       content_type
+    end
+    else
+      content_type
+  in
+  eff_content_type
+;;
+
+
+let really_parse_args config =
+  let make_simple_arg (n,v) = mk_simple_arg n v in
+
+  match getenv "REQUEST_METHOD" with
+      ("GET"|"HEAD") ->
+       List.map
+         make_simple_arg
+         (dest_url_encoded_parameters(getenv "QUERY_STRING"))
+
+    | "POST" ->
+       let n =
+         try
+           int_of_string (getenv "CONTENT_LENGTH")
+         with
+             _ -> failwith "Cgi.parse_arguments"
+       in
+       if n > config.maximum_content_length then
+         raise Resources_exceeded;
+       begin
+         let mime_type, params =
+           Mimestring.scan_mime_type(get_content_type config) [] in
+         match mime_type with
+             "application/x-www-form-urlencoded" ->
+               let buf = String.create n in
+               really_input stdin buf 0 n;
+               List.map
+                 make_simple_arg
+                 (dest_url_encoded_parameters buf)
+           | "multipart/form-data" ->
+               let boundary =
+                 try
+                   List.assoc "boundary" params
+                 with
+                     Not_found ->
+                       failwith "Cgi.parse_arguments"
+               in
+               (* -------------------------------------------------- DEBUG
+                  let f = open_out "/tmp/cgiout" in
+                  output_string f buf;
+                  close_out f;
+                * --------------------------------------------------
+                *)
+               dest_form_encoded_parameters_from_netstream
+                 (Netstream.create_from_channel stdin (Some n) 4096)
+                 boundary
+                 config
+           | _ ->
+               failwith ("Cgi.parse_arguments: unknown content-type " ^ mime_type)
+       end
+    | _ ->
+       failwith "Cgi.parse_arguments: unknown method"
+
+let parsed = ref None;;    (* protected by lock/unlock *)
+
+let lock   = ref (fun () -> ());;
+let unlock = ref (fun () -> ());;
+
+let init_mt new_lock new_unlock =
+  lock   := new_lock;
+  unlock := new_unlock
+;;
+
+let protect f =
+  !lock();
+  try
+    let r = f() in
+    !unlock();
+    r
+  with
+      x ->
+        !unlock();
+        raise x
+;;
+
+let parse_arguments config =
+  protect
+    (fun () ->
+       match !parsed with
+          Some _ -> ()
+        | None ->
+            parsed := Some (List.map
+                              (fun arg -> arg.arg_name, arg)
+                              (really_parse_args config))
+    )
+;;
+
+let arguments () =
+  protect
+    (fun () ->
+       match !parsed with
+          Some plist -> plist
+        | None ->
+            failwith "Cgi.arguments"
+    )
+;;
+
+let set_arguments arglist =
+  protect 
+    (fun () ->
+       parsed := Some (List.map
+                        (fun arg -> arg.arg_name, arg)
+                        arglist)
+    )
+;;
+
+let default_config =
+  { maximum_content_length = max_int;
+    how_to_process_arguments = (fun _ -> Memory);
+    tmp_directory = "/var/tmp";
+    tmp_prefix = "cgi-";
+    workarounds = [ Work_around_MSIE_Content_type_bug;
+                   Work_around_backslash_bug;
+                 ]
+  }
+;;
+
+let arg_value arg =
+  match arg.arg_mem_value with
+      None ->
+       begin
+         match Weak.get arg.arg_disk_value 0 with
+             None ->
+               begin
+                 match arg.arg_file with
+                     None ->
+                       failwith "Cgi.arg_value: no value present"
+                   | Some filename ->
+                       let fd = open_in_bin filename in
+                       try
+                         let len = in_channel_length fd in
+                         let s = String.create len in
+                         really_input fd s 0 len;
+                         Weak.set arg.arg_disk_value 0 (Some s);
+                         close_in fd;
+                         s
+                       with
+                           any -> close_in fd; raise any
+               end
+           | Some v -> v
+       end
+    | Some s ->
+       s
+;;
+
+let arg_name     arg = arg.arg_name;;
+let arg_file     arg = arg.arg_file;;
+let arg_mimetype arg = arg.arg_mimetype;;
+let arg_filename arg = arg.arg_filename;;
+let arg_header   arg = arg.arg_header;;
+
+let cleanup () =
+  protect
+    (fun () ->
+       match !parsed with
+          None -> ()
+        | Some plist ->
+            List.iter
+              (fun (name, arg) ->
+                 match arg.arg_file with
+                     None -> ()
+                   | Some filename ->
+                       (* We do not complain if the file does not exist anymore. *)
+                       if Sys.file_exists filename then
+                         Sys.remove filename;
+                       arg.arg_file <- None
+              )
+              plist
+    )
+;;
+
+let argument name = List.assoc name (arguments());;
+let argument_value name = arg_value (argument name);;
+
+module Operators = struct
+  let ( !% ) = argument
+  let ( !$ ) = argument_value
+end;;
+
+
+let parse_args() =
+  parse_arguments default_config;
+  List.map
+    (fun (name, arg) -> name, arg_value arg)
+    (arguments())
+;;
+
+let parse_args_with_mimetypes() =
+  parse_arguments default_config;
+  List.map
+    (fun (name, arg) -> name, arg_mimetype arg, arg_value arg)
+    (arguments())
+;;
+
+let header s =
+  let t =
+    match s with
+       "" -> "text/html"
+      | _  -> s
+  in
+  print_string ("Content-type: " ^ t ^ "\n\n");
+  flush stdout
+;;
+
+
+let this_url() =
+  "http://" ^ (getenv "SERVER_NAME") ^ (getenv "SCRIPT_NAME")
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.7  2000/06/25 21:40:36  gerd
+ *     Added printer.
+ *
+ * Revision 1.6  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.5  2000/05/16 22:29:36  gerd
+ *     Added support for two common file upload bugs.
+ *
+ * Revision 1.4  2000/04/15 16:47:27  gerd
+ *     Last minor changes before releasing 0.6.
+ *
+ * Revision 1.3  2000/04/15 13:09:01  gerd
+ *     Implemented uploads to temporary files.
+ *
+ * Revision 1.2  2000/03/02 01:15:30  gerd
+ *     Updated.
+ *
+ * Revision 1.1  2000/02/25 15:21:12  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/cgi.mli b/helm/DEVEL/pxp/netstring/cgi.mli
new file mode 100644 (file)
index 0000000..8aea499
--- /dev/null
@@ -0,0 +1,419 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* FOR SIMPLE CGI PROGRAMS:
+ *
+ * If you do not need all the features of the API below, the following may
+ * be enough:
+ *
+ * - At the beginning of the main program, call 'parse_argument' with
+ *   either 'default_config' as argument or with a customized configuration.
+ * - Use 'argument_value(name)' to get the string value of the CGI parameter
+ *   'name'. If you like, you can also open the Cgi.Operators module and
+ *   write '!$ name' instead. Here, !$ is a prefix operator equivalent to
+ *   argument_value.
+ *
+ * If you do not change the default configuration, you do not need to
+ * worry about temporary files - there are not any.
+ *
+ * Most of the other functions defined below deal with file uploads, and
+ * are only useful for that.
+ *)
+
+
+(**********************************************************************)
+(* CGI functions                                                      *)
+(**********************************************************************)
+
+(* First, the general interface to the CGI argument parser. *)
+
+exception Resources_exceeded
+
+type argument
+
+type argument_processing =
+    Memory        (* Keep the value of the argument in memory *)
+  | File          (* Store the value of the argument into a temporary file *)
+  | Automatic     (* Store only large arguments into files. An argument
+                  * value is large if it is longer than about one block (4K).
+                  * This is not an exact definition.
+                  *)
+
+type workaround =
+    Work_around_MSIE_Content_type_bug
+      (* There is a bug in MSIE I observed together with SSL connections.
+       * The CONTENT_TYPE passed to the server has sometimes the wrong
+       * format. This option enables a workaround if the user agent string
+       * contains the word "MSIE".
+       *)
+  | Work_around_backslash_bug
+      (* There is a bug in many browsers: The backslash character is not
+       * handled as an escaping character in MIME headers. Because DOS-
+       * based systems use the backslash regularly in filenames, this bug
+       * matters.
+       * This option changes the interpretation of backslashes such that
+       * these are handled as normal characters. I do not know any browser
+       * that is not affected by this bug, so there is no check on
+       * the user agent string.
+       *)
+
+
+type config =
+    { maximum_content_length : int;
+          (* The maximum CONTENT_LENGTH. Bigger requests trigger an
+          * Resources_exceeded exception. This feature can be used
+          * to detect primitive denial-of-service attacks.
+          *)
+      how_to_process_arguments : argument -> argument_processing;
+          (* After the beginning of an argument has been decoded, the
+          * type of processing is decided by invoking this function on
+          * the argument. Note that the passed argument is incomplete -
+          * it does not have a value. You can assume that name, filename,
+          * MIME type and the whole header are already known.
+          * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS
+          * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION.
+          * All other transport methods can only handle the Memory
+          * processing type.
+          *)
+      tmp_directory : string;
+          (* The temporary directory to use for the temporary files. *)
+      tmp_prefix : string;
+         (* A prefix for temporary files. It is recommended that the prefix
+          * contains a part that is random or that depends on rapidly changing
+          * environment properties. For example, the process ID is a good
+          * candidate, or the current system time. It is not required that
+          * the prefix is unique; there is a fail-safe algorithm that
+          * computes a unique file name from the prefix, even if several
+          * CGI programs run concurrently.
+          *)
+      workarounds : workaround list;
+          (* Specifies which workarounds should be enabled. *)
+    }
+
+val parse_arguments : config -> unit
+val arguments : unit -> (string * argument) list
+    (* - let () = parse_arguments config:
+     * Decodes the CGI arguments. 'config' specifies limits and processing
+     * hints; you can simply pass default_config (see below).
+     *
+     * - let arglist = get_arguments():
+     * The function returns a list with (name, arg) pairs. The name is
+     * passed back as string while the value is returned as opaque type
+     * 'argument'. Below accessor functions are defined. These functions
+     * require that parse_arguments was invoked before.
+     *
+     * Note 1: You can invoke 'parse_arguments' several times, but only
+     * the first time the arguments are read in. If you call the function
+     * again, it does nothing (even if the config changes). This is also
+     * true if 'parse_arguments' has been invoked after 'set_arguments'.
+     *
+     * Note 2: It is not guaranteed that stdin has been read until EOF.
+     * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec).
+     *
+     * Note 3: If arguments are processed in File or Automatic mode, the
+     * caller of 'parse_arguments' is responsible for deleting the files
+     * after use. You may consider to apply the at_exit function of the
+     * core library for this purpose. See also 'cleanup' below.
+     *)
+
+val set_arguments : argument list -> unit
+    (* Alternatively, you can set the arguments to use. This overrides any
+     * previously parsed set of arguments, and also any following parsing.
+     * - Intended for debugging, and to make it possible to replace the
+     * CGI parser by a different one while retaining this API.
+     *)
+
+val default_config : config
+    (* maximum_content_length = maxint
+     * how_to_process_arguments = "use always Memory"
+     * tmp_directory = "/var/tmp"
+     * tmp_prefix = "cgi"
+     * workarounds = [ Work_around_MSIE_content_type_bug;
+     *                 Work_around_backslash_bug;
+     *               ]
+     *
+     * Note 1: On some Unixes, a special file system is used for /tmp that
+     * stores the files into the virtual memory (main memory or swap area).
+     * Because of this, /var/tmp is preferred as default.
+     *
+     * Note 2: Filename.temp_file is not used because it depends on
+     * environment variables which are usually not set in a CGI environment.
+     *)
+
+val arg_name     : argument -> string
+val arg_value    : argument -> string
+val arg_file     : argument -> string option
+val arg_mimetype : argument -> string
+val arg_filename : argument -> string option
+val arg_header   : argument -> (string * string) list
+    (* The accessor functions that return several aspects of arguments.
+     * arg_name: returns the name of the argument
+     * arg_value: returns the value of the argument. If the value is stored
+     *     in a temporary file, the contents of this file are returned, i.e.
+     *     the file is loaded. This may have some consequences:
+     *     (1) The function may fail because of I/O errors.
+     *     (2) The function may be very slow, especially if the file is
+     *         non-local.
+     *     (3) If the value is bigger than Sys.max_string_length, the function
+     *         raises the exception Resources_exceeded. On 32 bit architectures,
+     *         strings are limited to 16 MB.
+     *     Note that loaded values are put into weak arrays. This makes it
+     *     possible that subsequent calls of 'arg_value' on the same argument
+     *     can avoid loading the value again, and that unused values will
+     *     nevertheless be collected by the GC.
+     * arg_file: returns 'Some filename' if the value resides in a temporary
+     *     file, and 'filename' is the absolute path of this file. If the
+     *     value is only available in memory, None is returned.
+     * arg_mimetype: returns the MIME type of the argument. Note that the
+     *     default MIME type is "text/plain", and that the default is returned
+     *     if the MIME type is not available.
+     * arg_filename: returns 'Some filename' if the argument is associated
+     *     with a certain filename (e.g. from a file upload); otherwise None
+     * arg_header: returns pairs (name,value) containing the complete header
+     *     of the argument. If the transmission protocol does not specify
+     *     a header, the empty list is passed back.
+     *)
+
+val mk_simple_arg : name:string -> string -> argument
+    (* mk_simple_arg name value:
+     * Creates a simple argument with only name, and a value passed by string.
+     * The MIME type is "text/plain".
+     *)
+
+val mk_memory_arg
+    : name:string -> ?mime:string -> ?filename:string -> 
+      ?header:((string * string) list) -> string -> argument
+    (* mk_memory_arg name mimetype filename header value:
+     * Creates an argument whose value is kept in memory.
+     *
+     * Note: The signature of this function changed in release 0.8.
+     *)
+
+val mk_file_arg
+    : name:string -> ?mime:string -> ?filename:string -> 
+      ?header:((string * string) list) -> string -> argument
+    (* mk_file_arg name mimetype filename header value_filename:
+     * Creates an argument whose value is stored in the file
+     * 'value_filename'. If this file name is not absolute, it is interpreted
+     * relative to the directory returned by Sys.getcwd() - this might not
+     * be what you want with respect to mount points and symlinks (and it
+     * depends on the operating system as getcwd is only POSIX.1). The
+     * file name is turned into an absolute name immediately, and the
+     * function arg_file returns the rewritten name.
+     *
+     * Note: The signature of this function changed in release 0.8.
+     *)
+
+
+val cleanup : unit -> unit
+    (* Removes all temporary files that occur in the current set of arguments
+     * (as returned by 'arguments()').
+     *)
+
+
+(* Convenience functions: *)
+
+val argument : string -> argument
+    (* let argument name = List.assoc name (arguments()) -- i.e. returns
+     * the argument with the passed name. Of course, this function expects
+     * that 'parse_arguments' was called before.
+     *)
+
+val argument_value : string -> string
+    (* let argument_value name = arg_value(argument name) -- i.e. returns
+     * the value of the argument.
+     * See also Operators.( !$ ) below.
+     *)
+
+(* For toploop printers: *)
+
+val print_argument : argument -> unit
+
+
+(* Now, the compatibility functions. *)
+
+val parse_args : unit -> (string * string) list
+    (* Decodes the arguments of the CGI and returns them as an association list
+     * Works whatever the method is (GET or POST)
+     *)
+
+val parse_args_with_mimetypes : unit -> (string * string * string) list
+    (* Like parse_args, but returns also the MIME type.
+     * The triples contain (name, mime_type, value).
+     * If an encoding was chosen that does not transfer the MIME type,
+     * "text/plain" is returned.
+     *
+     * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED.
+     * It was included in netstring-0.4, but most people want not only
+     * the MIME type. parse_arguments should be used instead.
+     *)
+
+val header : string -> unit
+    (* Prints the content-type header.
+     * the argument is the MIME type (default value is "text/html" if the
+     * argument is the empty string)
+     *)
+
+val this_url : unit -> string
+    (* Returns the address of the CGI *)
+
+(**********************************************************************)
+(* The Operators module                                               *)
+(**********************************************************************)
+
+(* If you open the Operators module, you can write
+ *     !% "name"      instead of     argument "name", and
+ *     !$ "name"      instead of     argument_value "name"
+ *)
+
+module Operators : sig
+  val ( !% ) : string -> argument
+      (* same as 'argument' above *)
+  val ( !$ ) : string -> string
+      (* same as 'argument_value' above *)
+end
+
+(**********************************************************************)
+(* Low-level functions                                                *)
+(**********************************************************************)
+
+(* Encoding/Decoding within URLs:
+ *
+ * The following two functions perform the '%'-substitution for
+ * characters that may otherwise be interpreted as metacharacters.
+ *
+ * See also the Netencoding module. This interface contains these functions
+ * to keep the compatibility with the old Cgi module.
+ *)
+
+val decode : string -> string
+val encode : string -> string
+
+(* URL-encoded parameters:
+ *
+ * The following two functions create and analyze URL-encoded parameters.
+ * Format: name1=val1&name2=val2&...
+ *)
+
+val mk_url_encoded_parameters : (string * string) list -> string
+    (* The argument is a list of (name,value) pairs. The result is the
+     * single URL-encoded parameter string.
+     *)
+
+val dest_url_encoded_parameters : string -> (string * string) list
+    (* The argument is the URL-encoded parameter string. The result is
+     * the corresponding list of (name,value) pairs.
+     * Note: Whitespace within the parameter string is ignored.
+     * If there is a format error, the function fails.
+     *)
+
+(* Form-encoded parameters:
+ *
+ * According to: RFCs 2388, 2183, 2045, 2046
+ *
+ * General note: This is a simple API to encode/decode form-encoded parameters.
+ * Especially, it is not possible to pass the header of the parts through
+ * this API.
+ *)
+
+val mk_form_encoded_parameters : (string * string * string) list ->
+                                     (string * string)
+    (* The argument is a list of (name,mimetype,value) triples.
+     * The result is (parstr, boundary), where 'parstr' is the
+     * single form-encoded parameter string, and 'boundary' is the
+     * boundary to separate the message parts.
+     *
+     * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED!
+     *)
+
+val  dest_form_encoded_parameters : string -> boundary:string -> config ->
+                                       argument list
+    (* The first argument is the form-encoded parameter string.
+     * The second argument is the boundary (extracted from the mime type).
+     * Third argument: Only the workarounds component is used.
+     * The result is
+     * the corresponding list of arguments (all in memory).
+     * If there is a format error, the function fails.
+     * Note: embedded multipart/mixed types are returned as they are,
+     *   and are not recursively decoded.
+     * Note: The content-transfer-encodings "7bit", "8bit", "binary",
+     *   "base64", and "quoted-printable" are supported.
+     * Note: Parameter names which include spaces or non-alphanumeric
+     *   characters may be problematic (the rules of RFC 2047 are NOT applied).
+     * Note: The returned MIME type is not normalized.
+     *)
+
+val dest_form_encoded_parameters_from_netstream
+    : Netstream.t -> boundary:string -> config -> argument list
+    (* let arglist = dest_form_encoded_parameters_from_netstream s b c:
+     * Reads the form-encoded parameters from netstream s. The boundary
+     * is passed in b, and the configuration in c.
+     * A list of arguments is returned.
+     *
+     * See also dest_form_encoded_parameters.
+     *
+     * Restriction: In contrast to dest_form_encoded_parameters, this
+     * function is not able to handle the content-transfer-encodings
+     * "base64" and "quoted-printable". (This is not really a restriction
+     * because no browser uses these encodings in conjunction with HTTP.
+     * This is different if mail transport is chosen. - The reason for
+     * this restriction is that there are currently no stream functions
+     * for decoding.)
+     *)
+
+(* Private functions: *)
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+
+
+(**********************************************************************)
+(* Compatibility with CGI library by J.-C. Filliatre                  *)
+(**********************************************************************)
+
+(* The following functions are compatible with J.-C. Filliatre's CGI
+ * library:
+ *
+ * parse_args, header, this_url, decode, encode.
+ *
+ * Note that the new implementation of parse_args can be safely invoked
+ * several times.
+ *
+ * Since release 0.8, Netstring's CGI implementation is again thread-safe.
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.7  2000/06/25 21:40:36  gerd
+ *     Added printer.
+ *
+ * Revision 1.6  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.5  2000/05/16 22:28:13  gerd
+ *     New "workarounds" config component.
+ *
+ * Revision 1.4  2000/04/15 16:47:27  gerd
+ *     Last minor changes before releasing 0.6.
+ *
+ * Revision 1.3  2000/04/15 13:09:01  gerd
+ *     Implemented uploads to temporary files.
+ *
+ * Revision 1.2  2000/03/02 01:15:30  gerd
+ *     Updated.
+ *
+ * Revision 1.1  2000/02/25 15:21:12  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/compat-base64/META b/helm/DEVEL/pxp/netstring/compat-base64/META
new file mode 100644 (file)
index 0000000..a5c003e
--- /dev/null
@@ -0,0 +1,3 @@
+version = "0.5"
+requires = "netstring"
+description = "Compatibility with base64"
diff --git a/helm/DEVEL/pxp/netstring/compat-cgi/META b/helm/DEVEL/pxp/netstring/compat-cgi/META
new file mode 100644 (file)
index 0000000..2294921
--- /dev/null
@@ -0,0 +1,3 @@
+version = "0.5"
+requires = "netstring"
+description = "Compatibility with cgi"
diff --git a/helm/DEVEL/pxp/netstring/depend b/helm/DEVEL/pxp/netstring/depend
new file mode 100644 (file)
index 0000000..5991264
--- /dev/null
@@ -0,0 +1,36 @@
+base64.cmo: netencoding.cmi base64.cmi 
+base64.cmx: netencoding.cmx base64.cmi 
+cgi.cmo: mimestring.cmi netbuffer.cmi netencoding.cmi netstream.cmi cgi.cmi 
+cgi.cmx: mimestring.cmx netbuffer.cmx netencoding.cmx netstream.cmx cgi.cmi 
+mimestring.cmo: netbuffer.cmi netencoding.cmi netstream.cmi netstring_str.cmi \
+    mimestring.cmi 
+mimestring.cmx: netbuffer.cmx netencoding.cmx netstream.cmx netstring_str.cmx \
+    mimestring.cmi 
+netbuffer.cmo: netbuffer.cmi 
+netbuffer.cmx: netbuffer.cmi 
+netconversion.cmo: netmappings.cmi netconversion.cmi 
+netconversion.cmx: netmappings.cmx netconversion.cmi 
+netencoding.cmo: netstring_str.cmi netencoding.cmi 
+netencoding.cmx: netstring_str.cmx netencoding.cmi 
+nethtml.cmo: nethtml.cmi 
+nethtml.cmx: nethtml.cmi 
+netmappings.cmo: netmappings.cmi 
+netmappings.cmx: netmappings.cmi 
+netmappings_iso.cmo: netmappings.cmi 
+netmappings_iso.cmx: netmappings.cmx 
+netmappings_other.cmo: netmappings.cmi 
+netmappings_other.cmx: netmappings.cmx 
+netstream.cmo: netbuffer.cmi netstream.cmi 
+netstream.cmx: netbuffer.cmx netstream.cmi 
+netstring_mt.cmo: cgi.cmi netmappings.cmi netstring_str.cmi netstring_mt.cmi 
+netstring_mt.cmx: cgi.cmx netmappings.cmx netstring_str.cmx netstring_mt.cmi 
+netstring_str.cmo: netstring_str.cmi 
+netstring_str.cmx: netstring_str.cmi 
+netstring_top.cmo: netstring_top.cmi 
+netstring_top.cmx: netstring_top.cmi 
+neturl.cmo: netencoding.cmi neturl.cmi 
+neturl.cmx: netencoding.cmx neturl.cmi 
+cgi.cmi: netstream.cmi 
+mimestring.cmi: netstream.cmi 
+netmappings.cmi: netconversion.cmi 
+netstream.cmi: netbuffer.cmi 
diff --git a/helm/DEVEL/pxp/netstring/depend.pkg b/helm/DEVEL/pxp/netstring/depend.pkg
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB
new file mode 100644 (file)
index 0000000..d942e27
--- /dev/null
@@ -0,0 +1,52 @@
+******************************************************************************
+ABOUT-FINDLIB - Package manager for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+The findlib library provides a scheme to manage reusable software components 
+(packages), and includes tools that support this scheme. Packages are 
+collections of OCaml modules for which metainformation can be stored. The 
+packages are kept in the filesystem hierarchy, but with strict directory 
+structure. The library contains functions to look the directory up that stores 
+a package, to query metainformation about a package, and to retrieve dependency 
+information about multiple packages. There is also a tool that allows the user 
+to enter queries on the command-line. In order to simplify compilation and 
+linkage, there are new frontends of the various OCaml compilers that can 
+directly deal with packages. 
+
+Together with the packages metainformation is stored. This includes a version 
+string, the archives the package consists of, and additional linker options. 
+Packages can also be dependent on other packages. There is a query which finds 
+out all predecessors of a list of packages and sorts them topologically. The 
+new compiler frontends do this implicitly. 
+
+Metainformation can be conditional, i.e. depend on a set of predicates. This is 
+mainly used to be able to react on certain properties of the environment, such 
+as if the bytecode or the native compiler is invoked, if the application is 
+multi-threaded, and a few more. If the new compiler frontends are used, most 
+predicates are found out automatically. 
+
+There is special support for scripts. A new directive, "#require", loads 
+packages into scripts. Of course, this works only with newly created toploops 
+which include the findlib library. 
+
+==============================================================================
+Where to get findlib
+==============================================================================
+
+The manual of findlib is available online [1]. You can download findlib here 
+[2]. 
+
+
+--------------------------
+
+[1]   see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2]   see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml
new file mode 100644 (file)
index 0000000..d1dc5b0
--- /dev/null
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY f "<em>findlib</em>">
+<!ENTITY F "<em>Findlib</em>">
+
+]>
+
+<readme title="ABOUT-FINDLIB - Package manager for O'Caml">
+  <sect1>
+    <title>Abstract</title>
+<p>
+The &f; library provides a scheme to manage reusable software
+components (packages), and includes tools that support this
+scheme. Packages are collections of OCaml modules for which
+metainformation can be stored. The packages are kept in the filesystem
+hierarchy, but with strict directory structure. The library contains
+functions to look the directory up that stores a package, to query
+metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows
+the user to enter queries on the command-line. In order to simplify
+compilation and linkage, there are new frontends of the various OCaml
+compilers that can directly deal with packages.
+</p>
+
+<p>
+Together with the packages metainformation is stored. This includes a
+version string, the archives the package consists of, and additional
+linker options. Packages can also be dependent on other
+packages. There is a query which finds out all predecessors of a list
+of packages and sorts them topologically. The new compiler frontends
+do this implicitly.
+</p>
+
+<p>
+Metainformation can be conditional, i.e. depend on a set of
+predicates. This is mainly used to be able to react on certain
+properties of the environment, such as if the bytecode or the native
+compiler is invoked, if the application is multi-threaded, and a few
+more. If the new compiler frontends are used, most predicates are
+found out automatically.
+</p>
+
+<p>
+There is special support for scripts. A new directive, "#require",
+loads packages into scripts. Of course, this works only with newly
+created toploops which include the &f; library.
+</p>
+
+  </sect1>
+
+  <sect1><title>Where to get findlib</title>
+    <p>
+The manual of &f; is available <a href="&url.findlib-project;">online</a>.
+You can download &f; <a href="&url.findlib-download;">here</a>.
+</p>
+  </sect1>
+</readme>
diff --git a/helm/DEVEL/pxp/netstring/doc/INSTALL b/helm/DEVEL/pxp/netstring/doc/INSTALL
new file mode 100644 (file)
index 0000000..cca3994
--- /dev/null
@@ -0,0 +1,128 @@
+******************************************************************************
+INSTALL - Netstring, string processing functions for the net
+******************************************************************************
+
+
+==============================================================================
+The "Netstring" package
+==============================================================================
+
+------------------------------------------------------------------------------
+Prerequisites
+------------------------------------------------------------------------------
+
+Netstring does not need any other packages besides the O'Caml core. Netstring 
+needs at least O'Caml 3.00. The installation procedure defined in the Makefile 
+requires findlib [1] to work [2]. 
+
+------------------------------------------------------------------------------
+Configuration
+------------------------------------------------------------------------------
+
+It is not necessary to configure "Netstring". 
+
+------------------------------------------------------------------------------
+Compilation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals: 
+
+-  make all
+   compiles with the bytecode compiler and creates netstring.cma, 
+   netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and 
+   netmappings_other.cmo
+   
+-  make opt
+   compiles with the native compiler and creates netstring.cmxa, 
+   netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx
+   
+The archive netstring.cmx?a contains the functionality, and the two 
+single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add 
+configurations to the character set conversion module. These configurations are 
+optional:
+
+-  Netmappings_iso: Conversion tables for the character sets ISO-8859-2, -3, 
+   -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.
+   
+-  Netmappings_other: Conversion tables for the character sets WINDOWS-1250, 
+   -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258; code pages 037, 424, 
+   437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861, 862, 863, 864, 865, 
+   866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh Roman encoding; 
+   Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats Encodings.
+   
+Even without these configuration files, the conversion module is able to handle 
+the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant of 
+UTF-8.
+
+The module Netstring_mt must be linked into multi-threaded applications; 
+otherwise some mutexes remain uninitialized.
+
+The module Netstring_top loads several printers for abstract values (for 
+toploops).
+
+------------------------------------------------------------------------------
+Installation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+-  make install
+   installs the bytecode archive, the interface definitions, and if present, 
+   the native archive in the default location of findlib 
+   
+-  make install-cgi
+   Installs a pseudo package "cgi" which is compatible with the old cgi 
+   package. This has the effect that software searching the "cgi" package will 
+   find the netstring package instead. This is recommended.
+   
+-  make install-base64
+   Installs a pseudo package "base64" which is compatible with the old base64 
+   package. This has the effect that software searching the "base64" package 
+   will find the netstring package instead. This is recommended.
+   
+-  make uninstall
+   removes the package
+   
+-  make uninstall-cgi
+   removes the "cgi" compatibility package
+   
+-  make uninstall-base64
+   removes the "base64" compatibility package
+   
+------------------------------------------------------------------------------
+Linking netstring with findlib
+------------------------------------------------------------------------------
+
+The command 
+
+ocamlfind ocamlc ... -package netstring ... -linkpkg ...
+
+links as much as possible code from netstring into your application: All 
+conversion tables; when -thread is specified, the initialization code for 
+multi-threaded programs; when a toploop is created, the code setting the value 
+printers.
+
+The following predicates reduce the amount of linked code:
+
+-  netstring_only_iso: Only the conversion tables for the ISO-8859 series of 
+   character sets are linked.
+   
+-  netstring_minimum: No additional conversion tables are linked; only 
+   ISO-8859-1 and the UTF encodings work.
+   
+For example, the command may look like 
+
+ocamlfind ocamlc ... 
+          -package netstring -predicates netstring_only_iso ... -linkpkg ...
+
+to link only the ISO-8859 conversion tables.
+
+
+--------------------------
+
+[1]   see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2]   Findlib is a package manager, see the file ABOUT-FINDLIB.
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/INSTALL.xml b/helm/DEVEL/pxp/netstring/doc/INSTALL.xml
new file mode 100644 (file)
index 0000000..b5b53ed
--- /dev/null
@@ -0,0 +1,153 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY m "<em>Netstring</em>">
+
+]>
+
+<readme title="INSTALL - Netstring, string processing functions for the net">
+  <sect1><title>The "Netstring" package</title>
+    <sect2><title>Prerequisites</title>
+      <p>
+&m; does not need any other packages besides the O'Caml core. &m; needs
+at least O'Caml 3.00. The installation procedure defined in the Makefile
+requires <a href="&url.findlib-project;">findlib</a> to
+work<footnote><em>Findlib</em> is a package manager, see the file
+ABOUT-FINDLIB.</footnote>.
+</p>
+    </sect2>
+
+    <sect2><title>Configuration</title>
+      <p>
+It is not necessary to configure "Netstring".
+</p>
+    </sect2>
+
+    <sect2><title>Compilation</title>
+      <p>
+The Makefile defines the following goals:
+</p>
+      <ul>
+       <li>
+         <p>make all</p> 
+         <p>compiles with the bytecode compiler and creates netstring.cma,
+netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and
+netmappings_other.cmo</p>
+       </li>
+       <li>
+         <p>make opt</p>
+         <p>compiles with the native compiler and creates netstring.cmxa,
+netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx</p>
+       </li>
+      </ul>
+
+      <p>The archive netstring.cmx?a contains the functionality, and the two
+single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add
+configurations to the character set conversion module. These configurations are
+optional:</p>
+
+      <ul>
+       <li><p>Netmappings_iso: Conversion tables for the character sets
+ISO-8859-2, -3, -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.</p>
+       </li>
+       <li><p>Netmappings_other: Conversion tables for the character sets
+WINDOWS-1250, -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258;
+code pages 037, 424, 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861,
+862, 863, 864, 865, 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh
+Roman encoding; Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats
+Encodings.</p>
+       </li>
+      </ul>
+
+<p>Even without these configuration files, the conversion module is able to
+handle the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant
+of UTF-8.</p>
+
+<p>The module Netstring_mt must be linked into multi-threaded applications;
+otherwise some mutexes remain uninitialized.</p>
+
+<p>The module Netstring_top loads several printers for abstract values (for
+toploops).</p> 
+
+    </sect2>
+
+    <sect2><title>Installation</title>
+      <p>
+The Makefile defines the following goals:</p>
+      <ul>
+       <li>
+         <p>make install</p>
+         <p>installs the bytecode archive, the interface definitions, and if
+present, the native archive in the default location of <em>findlib</em>
+</p>
+       </li>
+
+       <li>
+         <p>make install-cgi</p>
+         <p>Installs a pseudo package "cgi" which is compatible with the old
+cgi package. This has the effect that software searching the "cgi" package will
+find the netstring package instead. This is recommended.</p>
+       </li>
+
+       <li>
+         <p>make install-base64</p> <p>Installs a pseudo package "base64"
+which is compatible with the old base64 package. This has the effect that
+software searching the "base64" package will find the netstring package
+instead. This is recommended.</p>
+       </li>
+
+       <li>
+         <p>make uninstall</p>
+         <p>removes the package</p>
+       </li>
+
+       <li>
+         <p>make uninstall-cgi</p>
+         <p>removes the "cgi" compatibility package</p>
+       </li>
+
+       <li>
+         <p>make uninstall-base64</p>
+         <p>removes the "base64" compatibility package</p>
+       </li>
+      </ul>
+    </sect2>
+
+
+    <sect2>
+      <title>Linking netstring with findlib</title>
+      <p>The command
+<code>
+ocamlfind ocamlc ... -package netstring ... -linkpkg ...
+</code>
+links as much as possible code from netstring into your application: All
+conversion tables; when -thread is specified, the initialization code for
+multi-threaded programs; when a toploop is created, the code setting the value
+printers.</p>
+
+<p>The following predicates reduce the amount of linked code:</p>
+
+      <ul>
+       <li><p>netstring_only_iso: Only the conversion tables for the ISO-8859
+series of character sets are linked.</p>
+       </li>
+       <li><p>netstring_minimum: No additional conversion tables are linked;
+only ISO-8859-1 and the UTF encodings work.</p>
+       </li>
+      </ul>
+
+<p>For example, the command may look like
+
+<code>
+ocamlfind ocamlc ... 
+          -package netstring -predicates netstring_only_iso ... -linkpkg ...
+</code>
+
+to link only the ISO-8859 conversion tables.</p>
+    </sect2>
+
+  </sect1>
+</readme>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/netstring/doc/Makefile b/helm/DEVEL/pxp/netstring/doc/Makefile
new file mode 100644 (file)
index 0000000..7f8450b
--- /dev/null
@@ -0,0 +1,22 @@
+.PHONY: all
+all: README INSTALL ABOUT-FINDLIB
+
+README: README.xml common.xml config.xml
+       readme -text README.xml >README
+
+INSTALL: INSTALL.xml common.xml config.xml
+       readme -text INSTALL.xml >INSTALL
+
+ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
+       readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+
diff --git a/helm/DEVEL/pxp/netstring/doc/README b/helm/DEVEL/pxp/netstring/doc/README
new file mode 100644 (file)
index 0000000..b590416
--- /dev/null
@@ -0,0 +1,212 @@
+******************************************************************************
+README - Netstring, string processing functions for the net
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+Netstring is a collection of string processing functions that are useful in 
+conjunction with Internet messages and protocols. In particular, it contains 
+functions for the following purposes:
+
+-  Parsing MIME messages
+   
+-  Several encoding/decoding functions (Base 64, Quoted Printable, Q, 
+   URL-encoding)
+   
+-  A new implementation of the CGI interface that allows users to upload files
+   
+-  A simple HTML parser
+   
+-  URL parsing, printing and processing
+   
+-  Conversion between character sets
+   
+==============================================================================
+Download
+==============================================================================
+
+You can download Netstring as gzip'ed tarball [1]. 
+
+==============================================================================
+Documentation
+==============================================================================
+
+Sorry, there is no manual. The mli files describe each function in detail. 
+Furthermore, the following additional information may be useful.
+
+------------------------------------------------------------------------------
+New CGI implementation
+------------------------------------------------------------------------------
+
+For a long time, the CGI implementation by Jean-Christophe Filliatre has been 
+the only freely available module that implemented the CGI interface (it also 
+based on code by Daniel de Rauglaudre). It worked well, but it did not support 
+file uploads because this requires a parser for MIME messages.
+
+The main goal of Netstring is to realize such uploads, and because of this it 
+contains an almost complete parser for MIME messages.
+
+The new CGI implementation provides the same functions than the old one, and 
+some extensions. If you call Cgi.parse_args(), you get the CGI parameters as 
+before, but as already explained this works also if the parameters are 
+encaspulated as MIME message. In the HTML code, you can select the MIME format 
+by using 
+
+<form action="..." method="post" enctype="multipart/form-data">
+...
+</form>
+
+
+- this "enctype" attribute forces the browser to send the form parameters as 
+multipart MIME message (Note: You can neither send the parameters of a 
+conventional hyperlink as MIME message nor the form parameters if the "method" 
+is "get"). In many browsers only this particular encoding enables the file 
+upload elements, you cannot perform file uploads with other encodings. 
+
+As MIME messages can transport MIME types, filename, and other additional 
+properties, it is also possible to get these using the enhanced interface. 
+After calling 
+
+Cgi.parse_arguments config
+
+you can get all available information about a certain parameter by invoking 
+
+let param = Cgi.argument "name"
+
+- where "param" has the type "argument". There are several accessor functions 
+to extract the various aspects of arguments (name, filename, value by string, 
+value by temporary file, MIME type, MIME header) from "argument" values. 
+
+------------------------------------------------------------------------------
+Base64, and other encodings
+------------------------------------------------------------------------------
+
+Netstring is also the successor of the Base64 package. It provides a Base64 
+compatible interface, and an enhanced API. The latter is contained in the 
+Netencoding module which also offers implementations of the "quoted printable", 
+"Q", and "URL" encodings. Please see netencoding.mli for details.
+
+------------------------------------------------------------------------------
+The MIME scanner functions
+------------------------------------------------------------------------------
+
+In the Mimestring module you can find several functions scanning parts of MIME 
+messages. These functions already cover most aspects of MIME messages: Scanning 
+of headers, analysis of structured header entries, and scanning of multipart 
+bodies. Of course, a full-featured MIME scanner would require some more 
+functions, especially concrete parsers for frequent structures (mail addresses 
+or date strings). 
+
+Please see the file mimestring.mli for details.
+
+------------------------------------------------------------------------------
+The HTML parser
+------------------------------------------------------------------------------
+
+The HTML parser should be able to read every HTML file; whether it is correct 
+or not. The parser tries to recover from parsing errors as much as possible. 
+
+The parser returns the HTML term as conventional recursive value (i.e. no 
+object-oriented design).
+
+The parser depends a bit on knowledge about the HTML version; mainly because it 
+needs to know the tags that are always empty. It may be necessary that you must 
+adjust this configuration before the parser works well enough for your purpose. 
+
+Please see the Nethtml module for details.
+
+------------------------------------------------------------------------------
+The abstract data type URL
+------------------------------------------------------------------------------
+
+The module Neturl contains support for URL parsing and processing. The 
+implementation follows strictly the standards RFC 1738 and RFC 1808. URLs can 
+be parsed, and several accessor functions allow the user to get components of 
+parsed URLs, or to change components. Modifying URLs is safe; it is impossible 
+to create a URL that does not have a valid string representation.
+
+Both absolute and relative URLs are supported. It is possible to apply a 
+relative URL to a base URL in order to get the corresponding absolute URL.
+
+------------------------------------------------------------------------------
+Conversion between character sets and encodings
+------------------------------------------------------------------------------
+
+The module Netconversion converts strings from one characters set to another. 
+It is Unicode-based, and there are conversion tables for more than 50 
+encodings.
+
+==============================================================================
+Author, Copying
+==============================================================================
+
+Netstring has been written by Gerd Stolpmann [2]. You may copy it as you like, 
+you may use it even for commercial purposes as long as the license conditions 
+are respected, see the file LICENSE coming with the distribution. It allows 
+almost everything. 
+
+==============================================================================
+History
+==============================================================================
+
+-  Changed in 0.9.3: Fixed a bug in the "install" rule of the Makefile.
+   
+-  Changed in 0.9.2: New format for the conversion tables which are now much 
+   smaller.
+   
+-  Changed in 0.9.1: Updated the Makefile such that (native-code) compilation 
+   of netmappings.ml becomes possible. 
+   
+-  Changed in 0.9: Extended Mimestring module: It can now process RFC-2047 
+   messages.
+   New Netconversion module which converts strings between character encodings.
+   
+-  Changed in 0.8.1: Added the component url_accepts_8bits to 
+   Neturl.url_syntax. This helps processing URLs which intentionally contain 
+   bytes >= 0x80.
+   Fixed a bug: Every URL containing a 'j' was malformed!
+   
+-  Changed in 0.8: Added the module Neturl which provides the abstract data 
+   types of URLs.
+   The whole package is now thread-safe.
+   Added printers for the various opaque data types.
+   Added labels to function arguments where appropriate. The following 
+   functions changed their signatures significantly: Cgi.mk_memory_arg, 
+   Cgi.mk_file_arg.
+   
+-  Changed in 0.7: Added workarounds for frequent browser bugs. Some functions 
+   take now an additional argument specifying which workarounds are enabled.
+   
+-  Changed in 0.6.1: Updated URLs in documentation.
+   
+-  Changed in 0.6: The file upload has been re-implemented to support large 
+   files; the file is now read block by block and the blocks can be collected 
+   either in memory or in a temporary file.
+   Furthermore, the CGI API has been revised. There is now an opaque data type 
+   "argument" that hides all implementation details and that is extensible (if 
+   necessary, it is possible to add features without breaking the interface 
+   again).
+   The CGI argument parser can be configured; currently it is possible to limit 
+   the size of uploaded data, to control by which method arguments are 
+   processed, and to set up where temporary files are created.
+   The other parts of the package that have nothing to do with CGI remain 
+   unchanged. 
+   
+-  Changed in 0.5.1: A mistake in the documentation has been corrected.
+   
+-  Initial version 0.5: The Netstring package wants to be the successor of the 
+   Base64-0.2 and the Cgi-0.3 packages. The sum of both numbers is 0.5, and 
+   because of this, the first version number is 0.5. 
+   
+
+--------------------------
+
+[1]   see http://www.ocaml-programming.de/packages/netstring-0.9.2.tar.gz
+
+[2]   see mailto:gerd@gerd-stolpmann.de
+
+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/README.xml b/helm/DEVEL/pxp/netstring/doc/README.xml
new file mode 100644 (file)
index 0000000..bbf473e
--- /dev/null
@@ -0,0 +1,244 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="README - Netstring, string processing functions for the net">
+  <sect1>
+    <title>Abstract</title>
+    <p>
+<em>Netstring</em> is a collection of string processing functions that are
+useful in conjunction with Internet messages and protocols. In particular,
+it contains functions for the following purposes:</p>
+
+<ul>
+      <li><p>Parsing MIME messages</p>
+      </li>
+      <li><p>Several encoding/decoding functions (Base 64, Quoted Printable, Q, URL-encoding)</p>
+      </li>
+      <li><p>A new implementation of the CGI interface that allows users to upload files</p>
+      </li>
+      <li><p>A simple HTML parser</p>
+      </li>
+      <li><p>URL parsing, printing and processing</p>
+      </li>
+      <li><p>Conversion between character sets</p>
+      </li>
+    </ul>
+
+  </sect1>
+
+  <sect1>
+    <title>Download</title>
+    <p>
+You can download <em>Netstring</em> as gzip'ed <a
+                                              href="&url.netstring-download;">tarball</a>.
+</p>
+
+  </sect1>
+
+  <sect1>
+    <title>Documentation</title>
+    <p>
+Sorry, there is no manual. The mli files describe each function in
+detail. Furthermore, the following additional information may be useful.</p>
+
+    <sect2>
+      <title>New CGI implementation</title>
+
+      <p>For a long time, the CGI implementation by Jean-Christophe Filliatre
+has been the only freely available module that implemented the CGI interface
+(it also based on code by Daniel de Rauglaudre). It worked well, but it did not
+support file uploads because this requires a parser for MIME messages.</p>
+      <p>The main goal of Netstring is to realize such uploads, and because of
+this it contains an almost complete parser for MIME messages.</p>
+      <p>The new CGI implementation provides the same functions than the old
+one, and some extensions. If you call Cgi.parse_args(), you get the CGI
+parameters as before, but as already explained this works also if the
+parameters are encaspulated as MIME message. In the HTML code, you can select
+the MIME format by using
+<code><![CDATA[
+<form action="..." method="post" enctype="multipart/form-data">
+...
+</form>
+]]>
+</code>
+- this "enctype" attribute forces the browser to send the form parameters
+as multipart MIME message (Note: You can neither send the parameters of a
+conventional hyperlink as MIME message nor the form parameters if the
+"method" is "get"). In many browsers only this particular encoding enables
+the file upload elements, you cannot perform file uploads with other encodings.
+</p>
+
+      <p>As MIME messages can transport MIME types, filename, and other
+additional properties, it is also possible to get these using the enhanced
+interface. After calling
+<code><![CDATA[
+Cgi.parse_arguments config
+]]></code>
+you can get all available information about a certain parameter by invoking
+<code><![CDATA[
+let param = Cgi.argument "name"
+]]></code>
+- where "param" has the type "argument". There are several accessor functions
+to extract the various aspects of arguments (name, filename, value by string,
+value by temporary file, MIME type, MIME header) from "argument" values.
+</p>
+
+    </sect2>
+
+
+    <sect2>
+      <title>Base64, and other encodings</title>
+
+      <p>Netstring is also the successor of the Base64 package. It provides a
+Base64 compatible interface, and an enhanced API. The latter is contained in
+the Netencoding module which also offers implementations of the "quoted
+printable", "Q", and "URL" encodings. Please see netencoding.mli for
+details.</p>
+
+    </sect2>
+
+
+    <sect2>
+      <title>The MIME scanner functions</title>
+
+      <p>In the Mimestring module you can find several functions scanning parts
+of MIME messages. These functions already cover most aspects of MIME messages:
+Scanning of headers, analysis of structured header entries, and scanning of
+multipart bodies. Of course, a full-featured MIME scanner would require some
+more functions, especially concrete parsers for frequent structures 
+(mail addresses or date strings).
+</p>
+      <p>Please see the file mimestring.mli for details.</p>
+    </sect2>
+
+
+    <sect2>
+      <title>The HTML parser</title>
+
+      <p>The HTML parser should be able to read every HTML file; whether it is
+correct or not. The parser tries to recover from parsing errors as much as
+possible.
+</p>
+      <p>The parser returns the HTML term as conventional recursive value
+(i.e. no object-oriented design).</p>
+      <p>The parser depends a bit on knowledge about the HTML version; mainly
+because it needs to know the tags that are always empty. It may be necessary
+that you must adjust this configuration before the parser works well enough for
+your purpose.
+</p>
+      <p>Please see the Nethtml module for details.</p>
+    </sect2>
+
+    <sect2>
+      <title>The abstract data type URL</title>
+      <p>The module Neturl contains support for URL parsing and processing.
+The implementation follows strictly the standards RFC 1738 and RFC 1808.
+URLs can be parsed, and several accessor functions allow the user to
+get components of parsed URLs, or to change components. Modifying URLs
+is safe; it is impossible to create a URL that does not have a valid
+string representation.</p>
+
+      <p>Both absolute and relative URLs are supported. It is possible to
+apply a relative URL to a base URL in order to get the corresponding
+absolute URL.</p>
+    </sect2>
+
+    <sect2>
+      <title>Conversion between character sets and encodings</title>
+      <p>The module Netconversion converts strings from one characters set
+to another. It is Unicode-based, and there are conversion tables for more than
+50 encodings.</p>
+    </sect2>
+
+  </sect1>
+
+  <sect1>
+    <title>Author, Copying</title>
+    <p>
+<em>Netstring</em> has been written by &person.gps;. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+</p>
+  </sect1>
+
+  <sect1>
+    <title>History</title>
+
+    <ul>
+      <li><p><em>Changed in 0.9.3:</em> Fixed a bug in the "install" rule of
+the Makefile.</p>
+      </li>
+      <li><p><em>Changed in 0.9.2:</em> New format for the conversion tables
+which are now much smaller.</p>
+      </li>
+      <li><p><em>Changed in 0.9.1:</em> Updated the Makefile such that
+(native-code) compilation of netmappings.ml becomes possible.
+</p>
+      </li>
+      <li><p><em>Changed in 0.9:</em> Extended Mimestring module: It can
+now process RFC-2047 messages.</p>
+       <p>New Netconversion module which converts strings between character
+encodings.</p>
+      </li>
+      <li><p><em>Changed in 0.8.1:</em> Added the component
+url_accepts_8bits to Neturl.url_syntax. This helps processing URLs which
+intentionally contain bytes >= 0x80.</p>
+       <p>Fixed a bug: Every URL containing a 'j' was malformed!</p>
+      </li>
+      <li><p><em>Changed in 0.8:</em> Added the module Neturl which 
+provides the abstract data types of URLs.</p>
+       <p>The whole package is now thread-safe.</p>
+       <p>Added printers for the various opaque data types.</p>
+       <p>Added labels to function arguments where appropriate. The
+following functions changed their signatures significantly:
+Cgi.mk_memory_arg, Cgi.mk_file_arg.</p>
+      </li>
+      <li><p><em>Changed in 0.7:</em> Added workarounds for frequent
+browser bugs. Some functions take now an additional argument
+specifying which workarounds are enabled.</p>
+      </li>
+      <li><p><em>Changed in 0.6.1:</em> Updated URLs in documentation.</p>
+      </li>
+
+      <li><p><em>Changed in 0.6:</em> The file upload has been re-implemented
+to support large files; the file is now read block by block and the blocks can
+be collected either in memory or in a temporary file.<br/>
+Furthermore, the CGI API has been revised. There is now an opaque data type
+"argument" that hides all implementation details and that is extensible (if
+necessary, it is possible to add features without breaking the interface
+again).<br/>
+The CGI argument parser can be configured; currently it is possible to
+limit the size of uploaded data, to control by which method arguments are
+processed, and to set up where temporary files are created.<br/>
+The other parts of the package that have nothing to do with CGI remain
+unchanged.
+</p>
+      </li>
+
+      <li><p><em>Changed in 0.5.1:</em> A mistake in the documentation has
+been corrected.</p>
+      </li>
+
+      <li><p><em>Initial version 0.5:</em>
+The Netstring package wants to be the successor of the Base64-0.2 and
+the Cgi-0.3 packages. The sum of both numbers is 0.5, and because of this,
+the first version number is 0.5.
+</p>
+      </li>
+    </ul>
+  </sect1>
+
+</readme>
+
diff --git a/helm/DEVEL/pxp/netstring/mimestring.ml b/helm/DEVEL/pxp/netstring/mimestring.ml
new file mode 100644 (file)
index 0000000..8fc4bfc
--- /dev/null
@@ -0,0 +1,1035 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Str = Netstring_str;;
+
+let cr_or_lf_re = Str.regexp "[\013\n]";;
+
+let trim_right_spaces_re =
+  Str.regexp "[ \t]+$";;
+
+let trim_left_spaces_re =
+  Str.regexp "^[ \t]+";;
+
+let header_re =
+  Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;
+
+let empty_line_re =
+  Str.regexp "\013?\n";;
+
+let end_of_header_re =
+  Str.regexp "\n\013?\n";;
+
+
+let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
+  let rec parse_header i l =
+    match Str.string_partial_match header_re parstr i with
+       Some r ->
+         let i' = Str.match_end r in
+         if i' > i1 then
+           failwith "Mimestring.scan_header";
+         let name = String.lowercase(Str.matched_group r 1 parstr) in
+         let value_with_crlf =
+           Str.matched_group r 2 parstr in
+         let value =
+           if unfold then begin
+             let value_with_rspaces =
+               Str.global_replace cr_or_lf_re "" value_with_crlf in
+             let value_with_lspaces =
+               Str.global_replace trim_right_spaces_re "" value_with_rspaces in
+             Str.global_replace trim_left_spaces_re "" value_with_lspaces 
+           end
+           else value_with_crlf
+         in
+         parse_header i' ( (name,value) :: l)
+      | None ->
+         (* The header must end with an empty line *)
+         begin match Str.string_partial_match empty_line_re parstr i with
+             Some r' ->
+               List.rev l, Str.match_end r'
+           | None ->
+               failwith "Mimestring.scan_header"
+         end
+  in
+  parse_header i0 []
+;;
+
+type s_token =
+    Atom of string
+  | EncodedWord of (string * string * string)
+  | QString of string
+  | Control of char
+  | Special of char
+  | DomainLiteral of string
+  | Comment
+  | End
+;;
+
+type s_option =
+    No_backslash_escaping
+  | Return_comments
+  | Recognize_encoded_words
+;;
+
+type s_extended_token =
+    { token      : s_token;
+      token_pos  : int;
+      token_line : int;
+      token_linepos : int;   (* Position of the beginning of the line *)
+      token_len  : int;
+      mutable token_sep : bool; (* separates adjacent encoded words *)
+    }
+;;
+
+let get_token et  = et.token;;
+let get_pos et    = et.token_pos;;
+let get_line et   = et.token_line;;
+let get_column et = et.token_pos - et.token_linepos;;
+let get_length et = et.token_len;;
+let separates_adjacent_encoded_words et = et.token_sep;;
+
+let get_decoded_word et =
+  match et.token with
+      Atom s -> s
+    | QString s -> s
+    | Control c -> String.make 1 c
+    | Special c -> String.make 1 c
+    | DomainLiteral s -> s
+    | Comment -> ""
+    | EncodedWord (_, encoding, content) ->
+       ( match encoding with
+             ("Q"|"q") ->
+               Netencoding.Q.decode content
+           | ("B"|"b") -> 
+               Netencoding.Base64.decode 
+                 ~url_variant:false
+                 ~accept_spaces:false
+                 content
+           | _ -> failwith "get_decoded_word"
+       )
+    | End -> 
+       failwith "get_decoded_word"
+;;
+
+let get_charset et =
+  match et.token with
+      EncodedWord (charset, _, _) -> charset
+    | End -> failwith "get_charset"
+    | _ -> "US-ASCII"
+;;
+
+type scanner_spec =
+    { (* What the user specifies: *)
+      scanner_specials : char list;
+      scanner_options : s_option list;
+      (* Derived from that: *)
+      mutable opt_no_backslash_escaping : bool;
+      mutable opt_return_comments : bool;
+      mutable opt_recognize_encoded_words : bool;
+
+      mutable is_special : bool array;
+      mutable space_is_special : bool;
+    }
+;;
+
+type scanner_target =
+    { scanned_string : string;
+      mutable scanner_pos : int;
+      mutable scanner_line : int;
+      mutable scanner_linepos : int; 
+      (* Position of the beginning of the line *)
+      mutable scanned_tokens : s_extended_token Queue.t;
+      (* A queue of already scanned tokens in order to look ahead *)
+      mutable last_token : s_token;
+      (* The last returned token. It is only important whether it is
+       * EncodedWord or not.
+       *)
+    }
+;;
+
+type mime_scanner = scanner_spec * scanner_target
+;;
+
+let get_pos_of_scanner (spec, target) = 
+  if spec.opt_recognize_encoded_words then
+    failwith "get_pos_of_scanner"
+  else
+    target.scanner_pos
+;;
+
+let get_line_of_scanner (spec, target) = 
+  if spec.opt_recognize_encoded_words then
+    failwith "get_line_of_scanner"
+  else
+    target.scanner_line
+;;
+
+let get_column_of_scanner (spec, target) = 
+  if spec.opt_recognize_encoded_words then
+    failwith "get_column_of_scanner"
+  else
+    target.scanner_pos - target.scanner_linepos 
+;;
+
+let create_mime_scanner ~specials ~scan_options =
+  let is_spcl = Array.create 256 false in
+  List.iter
+    (fun c -> is_spcl.( Char.code c ) <- true)
+    specials;
+  let spec =
+    { scanner_specials = specials;
+      scanner_options = scan_options;
+      opt_no_backslash_escaping = 
+       List.mem No_backslash_escaping scan_options;
+      opt_return_comments = 
+       List.mem Return_comments scan_options;
+      opt_recognize_encoded_words = 
+       List.mem Recognize_encoded_words scan_options;
+      is_special = is_spcl;
+      space_is_special = is_spcl.(32);
+    }
+  in
+  (* Grab the remaining arguments: *)
+  fun ?(pos=0) ?(line=1) ?(column=0) s ->
+    let target =
+      { scanned_string = s;
+       scanner_pos = pos;
+       scanner_line = line;
+       scanner_linepos = pos - column;
+       scanned_tokens = Queue.create();
+       last_token = Comment;   (* Must not be initialized with EncodedWord *)
+      }
+    in
+    spec, target
+;;
+
+
+let encoded_word_re =
+  Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;
+
+let scan_next_token ((spec,target) as scn) =
+  let mk_pair t len =
+    { token = t;
+      token_pos = target.scanner_pos;
+      token_line = target.scanner_line;
+      token_linepos = target.scanner_linepos;
+      token_len = len;
+      token_sep = false;
+    },
+    t
+  in
+
+  (* Note: mk_pair creates a new token pair, and it assumes that 
+   * target.scanner_pos (and also scanner_line and scanner_linepos)
+   * still contain the position of the beginning of the token.
+   *)
+
+  let s = target.scanned_string in
+  let l = String.length s in
+  let rec scan i =
+    if i < l then begin
+      let c = s.[i] in
+      if spec.is_special.( Char.code c ) then begin
+       let pair = mk_pair (Special c) 1 in
+       target.scanner_pos <- target.scanner_pos + 1;
+       (match c with
+            '\n' -> 
+              target.scanner_line    <- target.scanner_line + 1;
+              target.scanner_linepos <- target.scanner_pos;
+          | _ -> ()
+       );
+       pair
+      end
+      else
+       match c with
+           '"' -> 
+             (* Quoted string: *)
+             scan_qstring (i+1) (i+1) 0
+         | '(' ->
+             (* Comment: *)
+             let i', line, linepos = 
+               scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
+             in
+             let advance() =
+               target.scanner_pos <- i';
+               target.scanner_line <- line;
+               target.scanner_linepos <- linepos
+             in
+             if spec.opt_return_comments then begin
+               let pair = mk_pair Comment (i' - i) in
+               advance();
+               pair
+             end
+             else 
+               if spec.space_is_special then begin
+                 let pair = mk_pair (Special ' ') (i' - i) in
+                 advance();
+                 pair
+               end
+               else begin
+                 advance();
+                 scan i'
+               end
+         | (' '|'\t'|'\r') ->
+             (* Ignore whitespace by default: *)
+             target.scanner_pos <- target.scanner_pos + 1;
+             scan (i+1)
+         | '\n' ->
+             (* Ignore whitespace by default: *)
+             target.scanner_pos     <- target.scanner_pos + 1;
+             target.scanner_line    <- target.scanner_line + 1;
+             target.scanner_linepos <- target.scanner_pos;
+             scan (i+1)
+         | ('\000'..'\031'|'\127'..'\255') ->
+             let pair = mk_pair (Control c) 1 in
+             target.scanner_pos <- target.scanner_pos + 1;
+             pair
+         | '[' ->
+             (* Domain literal: *)
+             scan_dliteral (i+1) (i+1) 0
+         | _ ->
+             scan_atom i i
+    end
+    else 
+      mk_pair End 0
+
+  and scan_atom i0 i =
+    let return_atom() =
+      let astring = String.sub s i0 (i-i0) in
+      let r =
+       if spec.opt_recognize_encoded_words then
+         Str.string_match ~groups:4 encoded_word_re astring 0
+       else
+         None
+      in
+      match r with
+         None ->
+           (* An atom contains never a linefeed character, so we can ignore
+            * scanner_line here.
+            *)
+           let pair = mk_pair (Atom astring) (i-i0) in
+           target.scanner_pos <- i;
+           pair
+       | Some mr ->
+           (* Found an encoded word. *)
+           let charset  = Str.matched_group mr 1 astring in
+           let encoding = Str.matched_group mr 2 astring in
+           let content  = Str.matched_group mr 3 astring in
+           let t = EncodedWord(String.uppercase charset,
+                                String.uppercase encoding,
+                                content) in
+           let pair = mk_pair t (i-i0) in
+           target.scanner_pos <- i;
+           pair
+    in
+
+    if i < l then
+      let c = s.[i] in
+      match c with
+         ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
+           return_atom()
+       | _ ->
+           if spec.is_special.( Char.code c ) then
+             return_atom()
+           else
+             scan_atom i0 (i+1)
+    else
+      return_atom()
+
+  and scan_qstring i0 i n =
+    if i < l then
+      let c = s.[i] in
+      match c with
+         '"' ->
+           (* Regular end of the quoted string: *)
+           let content, line, linepos = copy_qstring i0 (i-1) n in
+           let pair = mk_pair (QString content) (i-i0+2) in
+           target.scanner_pos <- i+1;
+           target.scanner_line <- line;
+           target.scanner_linepos <- linepos;
+           pair
+       | '\\' when not spec.opt_no_backslash_escaping ->
+           scan_qstring i0 (i+2) (n+1)
+       | _ ->
+           scan_qstring i0 (i+1) (n+1)
+    else
+      (* Missing right double quote *)
+      let content, line, linepos = copy_qstring i0 (l-1) n in
+      let pair = mk_pair (QString content) (l-i0+1) in
+      target.scanner_pos <- l;
+      target.scanner_line <- line;
+      target.scanner_linepos <- linepos;
+      pair
+
+  and copy_qstring i0 i1 n =
+    (* Used for quoted strings and for domain literals *)
+    let r = String.create n in
+    let k = ref 0 in
+    let line = ref target.scanner_line in
+    let linepos = ref target.scanner_linepos in
+    for i = i0 to i1 do
+      let c = s.[i] in
+      match c with
+         '\\' when i < i1 &&  not spec.opt_no_backslash_escaping -> ()
+       | '\n' ->
+           line := !line + 1;
+           linepos := i+1;
+           r.[ !k ] <- c; 
+           incr k
+       | _ -> 
+           r.[ !k ] <- c; 
+           incr k
+    done;
+    assert (!k = n);
+    r, !line, !linepos
+
+  and scan_dliteral i0 i n =
+    if i < l then
+      let c = s.[i] in
+      match c with
+         ']' ->
+           (* Regular end of the domain literal: *)
+           let content, line, linepos = copy_qstring i0 (i-1) n in
+           let pair = mk_pair (DomainLiteral content) (i-i0+2) in
+           target.scanner_pos <- i+1;
+           target.scanner_line <- line;
+           target.scanner_linepos <- linepos;
+           pair
+       | '\\' when not spec.opt_no_backslash_escaping ->
+           scan_dliteral i0 (i+2) (n+1)
+       | _ ->
+           (* Note: '[' is not allowed by RFC 822; we treat it here as
+            * a regular character (questionable)
+            *)
+           scan_dliteral i0 (i+1) (n+1)
+    else
+      (* Missing right bracket *)
+      let content, line, linepos = copy_qstring i0 (l-1) n in
+      let pair = mk_pair (DomainLiteral content) (l-i0+1) in
+      target.scanner_pos <- l;
+      target.scanner_line <- line;
+      target.scanner_linepos <- linepos;
+      pair
+
+
+  and scan_comment i level line linepos =
+    if i < l then
+      let c = s.[i] in
+      match c with
+         ')' ->
+           (i+1), line, linepos
+       | '(' ->
+           (* nested comment *)
+           let i', line', linepos' = 
+             scan_comment (i+1) (level+1) line linepos 
+           in
+           scan_comment i' level line' linepos'
+       | '\\' when not spec.opt_no_backslash_escaping ->
+           if (i+1) < l && s.[i+1] = '\n' then
+             scan_comment (i+2) level (line+1) (i+2)
+           else
+             scan_comment (i+2) level line linepos
+       | '\n' ->
+           scan_comment (i+1) level (line+1) (i+1)
+       | _ ->
+           scan_comment (i+1) level line linepos
+    else
+      (* Missing closing ')' *)
+      i, line, linepos
+  in
+
+  scan target.scanner_pos
+;;
+
+
+let scan_token ((spec,target) as scn) =
+  (* This function handles token queueing in order to recognize white space
+   * that separates adjacent encoded words.
+   *)
+
+  let rec collect_whitespace () =
+    (* Scans whitespace tokens and returns them as:
+     * (ws_list, other_tok)     if there is some other_tok following the
+     *                          list (other_tok = End is possible)
+     *)
+    let (et, t) as pair = scan_next_token scn in
+    ( match t with
+         (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
+           let ws_list, tok = collect_whitespace() in
+           pair :: ws_list, tok
+       | _ ->
+           [], pair
+    )
+  in
+
+  try
+    (* Is there an already scanned token in the queue? *)
+    let et = Queue.take target.scanned_tokens in
+    let t = et.token in
+    target.last_token <- t;
+    et, et.token
+  with
+      Queue.Empty ->
+       (* If not: inspect the last token. If that token is an EncodedWord,
+        * the next tokens are scanned in advance to determine if there
+        * are spaces separating two EncodedWords. These tokens are put
+        * into the queue such that it is avoided that they are scanned
+        * twice. (The sole purpose of the queue.)
+        *)
+       match target.last_token with
+           EncodedWord(_,_,_) as ew ->
+             let ws_list, tok = collect_whitespace() in
+             (* If tok is an EncodedWord, too, the tokens in ws_list must
+              * be flagged as separating two adjacent encoded words. 
+              *)
+             ( match tok with
+                   _, EncodedWord(_,_,_) ->
+                     List.iter
+                       (fun (et,t) ->
+                          et.token_sep <- true)
+                       ws_list
+                 | _ ->
+                     ()
+             );
+             (* Anyway, queue the read tokens but the first up *)
+             ( match ws_list with
+                   [] ->
+                     (* Nothing to queue *)
+                     let et, t = tok in
+                     target.last_token <- t;
+                     tok
+                 | (et,t) as pair :: ws_list' ->
+                     List.iter
+                       (fun (et',_) -> 
+                          Queue.add et' target.scanned_tokens)
+                       ws_list';
+                     ( match tok with
+                         | _, End ->
+                             ()
+                         | (et',_) ->
+                             Queue.add et' target.scanned_tokens
+                     );
+                     (* Return the first scanned token *)
+                     target.last_token <- t;
+                     pair
+             )
+         | _ ->
+             (* Regular case: Scan one token; do not queue it up *)
+             let (et, t) as pair = scan_next_token scn in 
+             target.last_token <- t;
+             pair
+;;
+       
+
+let scan_token_list scn =
+  let rec collect() =
+    match scan_token scn with
+       _, End ->
+         []
+      | pair ->
+         pair :: collect()
+  in
+  collect()
+;;
+
+
+let scan_structured_value s specials options =
+  let rec collect scn =
+    match scan_token scn with
+       _, End ->
+         []
+      | _, t ->
+         t :: collect scn
+  in
+  let scn = create_mime_scanner specials options s in
+  collect scn
+;;
+
+
+let specials_rfc822 =
+  [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;
+
+
+let specials_rfc2045 =
+  [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;
+
+
+let scan_encoded_text_value s =
+  let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
+  let options =  [ Recognize_encoded_words ] in
+  let scn = create_mime_scanner specials options s in
+  
+  let rec collect () =
+    match scan_token scn with
+       _, End ->
+         []
+      | et, _ when separates_adjacent_encoded_words et ->
+         collect()
+      | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
+         et :: collect ()
+      | _, _ ->
+         assert false
+  in
+  collect()
+;;
+
+
+let scan_value_with_parameters s options =
+  let rec parse_params tl =
+    match tl with
+       Atom n :: Special '=' :: Atom v :: tl' ->
+         (n,v) :: parse_rest tl'
+      | Atom n :: Special '=' :: QString v :: tl' ->
+         (n,v) :: parse_rest tl'
+      | _ ->
+         failwith "Mimestring.scan_value_with_parameters"
+  and parse_rest tl =
+    match tl with
+       [] -> []
+      | Special ';' :: tl' ->
+         parse_params tl'
+      | _ ->
+         failwith "Mimestring.scan_value_with_parameters"
+  in
+
+  (* Note: Even if not used here, the comma is a very common separator
+   * and should be recognized as being special. You will get a
+   * failure if there is a comma in the scanned string.
+   *)
+  let tl = scan_structured_value s [ ';'; '='; ',' ] options in
+  match tl with
+      [ Atom n ] -> n, []
+    | [ QString n ] -> n, []
+    | Atom n :: Special ';' :: tl' ->
+       n, parse_params tl'
+    | QString n :: Special ';' :: tl' ->
+       n, parse_params tl'
+    | _ ->
+       failwith "Mimestring.scan_value_with_parameters"
+;;
+
+
+let scan_mime_type s options =
+  let n, params = scan_value_with_parameters s options in
+  (String.lowercase n),
+  (List.map (fun (n,v) -> (String.lowercase n, v)) params)
+;;
+
+
+let lf_re = Str.regexp "[\n]";;
+
+let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
+  let l_s = String.length s in
+  if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
+    invalid_arg "Mimestring.scan_multipart_body";
+
+  (* First compile the regexps scanning for 'boundary': *)
+  let boundary1_re =
+    Str.regexp ("\n--" ^ Str.quote boundary) in
+  let boundary2_re =
+    Str.regexp ("--" ^ Str.quote boundary) in
+
+  let rec parse i =
+    (* i: Beginning of the current part (position directly after the
+     * boundary line
+     *)
+    (* Search for next boundary at position i *)
+    let i' =
+      try min (fst (Str.search_forward boundary1_re s i) + 1) i1
+      with
+         Not_found -> i1
+    in
+    (* i': Either the position of the first '-' of the boundary line,
+     *     or i1 if no boundary has been found
+     *)
+    if i' >= i1 then
+      [] (* Ignore everything after the last boundary *)
+    else
+      let i'' =
+       try min (fst (Str.search_forward lf_re s i') + 1) i1
+       with
+           Not_found -> i1
+      in
+      (* i'': The position after the boundary line *)
+(*
+      print_int i; print_newline();
+      print_int i'; print_newline();
+      print_int i''; print_newline();
+      flush stdout;
+*)
+      let header, k = scan_header s i i' in
+      (* header: the header of the part
+       * k: beginning of the body
+       *)
+
+      let value =
+       (* We know that i'-1 is a linefeed character. i'-2 should be a CR
+        * character. Both characters are not part of the value.
+        *)
+       if i' >= 2 then
+         match s.[i'-2] with
+             '\013' -> String.sub s k (i'-2-k)
+           | _      -> String.sub s k (i'-1-k)
+       else
+         String.sub s k (i'-1-k)
+      in
+
+      let pair =
+       (header, value) in
+
+      if i'' >= i1
+      then
+       [ pair ]
+      else
+       pair :: parse i''
+  in
+
+  (* Find the first boundary. This is a special case, because it may be
+   * right at the beginning of the string (no preceding CRLF)
+   *)
+
+  let i_bnd =
+    if Str.string_partial_match boundary2_re s i0 <> None then
+      i0
+    else
+      try min (fst (Str.search_forward boundary1_re s i0)) i1
+      with
+         Not_found -> i1
+  in
+
+  if i_bnd >= i1 then
+    []
+  else
+    let i_bnd' =
+      try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
+      with
+         Not_found -> i1
+    in
+    if i_bnd' >= i1 then
+      []
+    else
+      parse i_bnd'
+;;
+
+
+let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
+  let parts = scan_multipart_body s i0 i1 boundary in
+  List.map
+    (fun (params, value) ->
+       let encoding =
+        try List.assoc "content-transfer-encoding" params
+        with Not_found -> "7bit"
+       in
+
+       (* NOTE: In the case of "base64" and "quoted-printable", the allocation
+       * of the string "value" could be avoided.
+       *)
+
+       let value' =
+        match encoding with
+            ("7bit"|"8bit"|"binary") -> value
+          | "base64" ->
+              Netencoding.Base64.decode_substring
+                value 0 (String.length value) false true
+          | "quoted-printable" ->
+              Netencoding.QuotedPrintable.decode_substring
+                value 0 (String.length value)
+          | _ ->
+              failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
+       in
+       (params, value')
+    )
+    parts
+;;
+
+
+let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =
+
+  (* The block size of s must be at least the length of the boundary + 3.
+   * Otherwise it is not guaranteed that the boundary is always recognized.
+   *)
+  if Netstream.block_size s < String.length boundary + 3 then
+    invalid_arg "Mimestring.scan_multipart_body_from_netstream";
+
+  (* First compile the regexps scanning for 'boundary': *)
+  let boundary1_re =
+    Str.regexp ("\n--" ^ Str.quote boundary) in
+  let boundary2_re =
+    Str.regexp ("--" ^ Str.quote boundary) in
+
+  (* Subtask 1: Search the end of the MIME header: CR LF CR LF
+   *            (or LF LF). Enlarge the window until the complete header
+   *            is covered by the window.
+   *)
+  let rec search_end_of_header k =
+    (* Search the end of the header beginning at position k of the
+     * current window.
+     * Return the position of the first character of the body.
+     *)
+    try
+      (* Search for LF CR? LF: *)
+      let i, r = Str.search_forward
+                  end_of_header_re
+                  (Netbuffer.unsafe_buffer (Netstream.window s))
+                  k
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then
+       Str.match_end r
+      else
+       raise Not_found
+    with
+       Not_found ->
+         (* If the end of the stream is reached, the end of the header
+          * is missing: Error.
+          * Otherwise, we try to read another block, and continue.
+          *)
+         if Netstream.at_eos s then
+           failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
+         let w0 = Netstream.window_length s in
+         Netstream.want_another_block s;
+         search_end_of_header (max (w0 - 2) 0)
+  in
+
+  (* Subtask 2: Search the first boundary line. *)
+  let rec search_first_boundary() =
+    (* Search boundary per regexp; return the position of the character
+     * immediately following the boundary (on the same line), or
+     * raise Not_found.
+     *)
+    try
+      (* Search boundary per regexp: *)
+      let i, r = Str.search_forward
+                  boundary1_re
+                  (Netbuffer.unsafe_buffer (Netstream.window s))
+                  0
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then begin
+       Str.match_end r
+      end
+      else raise Not_found
+    with
+       Not_found ->
+         if Netstream.at_eos s then raise Not_found;
+         (* The regexp did not match: Move the window by one block.
+          *)
+         let n =
+           min
+             (Netstream.window_length s)
+             (Netstream.block_size s)
+         in
+         Netstream.move s n;
+         search_first_boundary()
+  in
+
+  (* Subtask 3: Search the next boundary line. Invoke 'add' for every
+   * read chunk
+   *)
+  let rec search_next_boundary p =
+    (* Returns the position directly after the boundary on the same line *)
+    try
+      (* Search boundary per regexp: *)
+      let i,r = Str.search_forward
+                 boundary1_re
+                 (Netbuffer.unsafe_buffer (Netstream.window s))
+                 0
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then begin
+       (* Add the last chunk of the part. *)
+       let n =
+         (* i is a LF. i - 1 should be CR. Ignore these characters. *)
+         if i >= 1 then
+           match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
+               '\013' -> i - 1
+             | _      -> i
+         else
+           i
+       in
+       (* Printf.printf "add n=%d\n" n; *)
+       add p s 0 n;
+       Str.match_end r
+      end
+      else raise Not_found
+    with
+       Not_found ->
+         if Netstream.at_eos s then
+           failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
+         (* The regexp did not match: Add the first block of the window;
+          * and move the window.
+          *)
+         let n =
+           min
+             (Netstream.window_length s)
+             (Netstream.block_size s)
+         in
+         (* Printf.printf "add n=%d\n" n; *)
+         add p s 0 n;
+         Netstream.move s n;
+         search_next_boundary p
+  in
+
+  (* Subtask 4: Search the end of the boundary line *)
+  let rec search_end_of_line k =
+    (* Search LF beginning at position k. Discard any contents until that. *)
+    try
+      let i,r = Str.search_forward
+                 lf_re
+                 (Netbuffer.unsafe_buffer (Netstream.window s))
+                 k
+      in
+      (* If match_end <= window_length, the search was successful.
+       * Otherwise, we searched in the uninitialized region of the
+       * buffer.
+       *)
+      if Str.match_end r <= Netstream.window_length s then begin
+        Str.match_end r
+      end
+      else raise Not_found
+    with
+       Not_found ->
+         if Netstream.at_eos s then
+           failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
+         (* The regexp did not match: move the window.
+          *)
+         let n = Netstream.window_length s in
+         Netstream.move s n;
+         search_end_of_line 0
+  in
+
+  (* Subtask 5: Check whether "--" follows the boundary on the same line *)
+  let check_whether_last_boundary k =
+    (* k: The position directly after the boundary. *)
+    Netstream.want s (k+2);
+    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+    (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
+  in
+
+  (* Subtask 6: Check whether the buffer begins with a boundary. *)
+  let check_beginning_is_boundary () =
+    let m = String.length boundary + 2 in
+    Netstream.want s m;
+    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+    (Netstream.window_length s >= m) &&
+    (Str.string_partial_match boundary2_re str 0 <> None)
+  in
+
+  let rec parse_part () =
+    (* The first byte of the current window of s contains the character
+     * directly following the boundary line that starts this part.
+     *)
+    (* Search the end of the MIME header: *)
+    let k_eoh = search_end_of_header 0 in
+    (* Printf.printf "k_eoh=%d\n" k_eoh; *)
+    (* Get the MIME header: *)
+    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+    let header, k_eoh' = scan_header str 0 k_eoh in
+    assert (k_eoh = k_eoh');
+    (* Move the window over the header: *)
+    Netstream.move s k_eoh;
+    (* Create the part: *)
+    let p = create header in
+    let continue =
+      begin try
+       (* Search the next boundary; add the chunks while searching: *)
+       let k_eob = search_next_boundary p in
+       (* Printf.printf "k_eob=%d\n" k_eob; *)
+        (* Is this the last boundary? *)
+       if check_whether_last_boundary k_eob then begin
+         (* Skip the rest: *)
+         while not (Netstream.at_eos s) do
+           Netstream.move s (Netstream.window_length s)
+         done;
+         Netstream.move s (Netstream.window_length s);
+         false
+       end
+       else begin
+         (* Move to the beginning of the next line: *)
+         let k_eol = search_end_of_line k_eob in
+         Netstream.move s k_eol;
+         true
+       end
+      with
+         any ->
+           (try stop p with _ -> ());
+           raise any
+      end in
+      stop p;
+      if continue then
+       (* Continue with next part: *)
+       parse_part()
+  in
+
+  (* Check whether s directly begins with a boundary: *)
+  if check_beginning_is_boundary() then begin
+    (* Move to the beginning of the next line: *)
+    let k_eol = search_end_of_line 0 in
+    Netstream.move s k_eol;
+    (* Begin with first part: *)
+    parse_part()
+  end
+  else begin
+    (* Search the first boundary: *)
+    try
+      let k_eob = search_first_boundary() in
+      (* Printf.printf "k_eob=%d\n" k_eob; *)
+      (* Move to the beginning of the next line: *)
+      let k_eol = search_end_of_line k_eob in
+      (* Printf.printf "k_eol=%d\n" k_eol; *)
+      Netstream.move s k_eol;
+      (* Begin with first part: *)
+      parse_part()
+    with
+       Not_found ->
+         (* No boundary at all: The body is empty. *)
+         ()
+  end;
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/08/13 00:04:36  gerd
+ *     Encoded_word -> EncodedWord
+ *     Bugfixes.
+ *
+ * Revision 1.7  2000/08/07 00:25:14  gerd
+ *     Implemented the new functions for structured field lexing.
+ *
+ * Revision 1.6  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.5  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.4  2000/05/16 22:30:14  gerd
+ *     Added support for some types of malformed MIME messages.
+ *
+ * Revision 1.3  2000/04/15 13:09:01  gerd
+ *     Implemented uploads to temporary files.
+ *
+ * Revision 1.2  2000/03/02 01:15:30  gerd
+ *     Updated.
+ *
+ * Revision 1.1  2000/02/25 15:21:12  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/mimestring.mli b/helm/DEVEL/pxp/netstring/mimestring.mli
new file mode 100644 (file)
index 0000000..39634b5
--- /dev/null
@@ -0,0 +1,683 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Collection of auxiliary functions to parse MIME headers            *)
+(**********************************************************************)
+
+
+val scan_header : 
+       ?unfold:bool ->
+       string -> start_pos:int -> end_pos:int -> 
+         ((string * string) list * int)
+    (* let params, i2 = scan_header s i0 i1:
+     *
+     * DESCRIPTION
+     *
+     * Scans the MIME header that begins at position i0 in the string s
+     * and that must end somewhere before position i1. It is intended
+     * that in i1 the character position following the end of the body of the
+     * MIME message is passed.
+     * Returns the parameters of the header as (name,value) pairs (in
+     * params), and in i2 the position of the character following
+     * directly after the header (i.e. after the blank line separating
+     * the header from the body).
+     * The following normalizations have already been applied:
+     * - The names are all in lowercase
+     * - Newline characters (CR and LF) have been removed (unless
+     *   ?unfold:false has been passed)
+     * - Whitespace at the beginning and at the end of values has been
+     *   removed (unless ?unfold:false is specified)
+     * The rules of RFC 2047 have NOT been applied.
+     * The function fails if the header violates the header format
+     * strongly. (Some minor deviations are tolerated, e.g. it is sufficient
+     * to separate lines by only LF instead of CRLF.)
+     *
+     * OPTIONS:
+     *
+     * unfold: If true (the default), folded lines are concatenated and
+     *   returned as one line. This means that CR and LF characters are
+     *   deleted and that whitespace at the beginning and the end of the
+     *   string is removed.
+     *   You may set ?unfold:false to locate individual characters in the
+     *   parameter value exactly.
+     *
+     * ABOUT MIME MESSAGE FORMAT:
+     *
+     * This is the modern name for messages in "E-Mail format". Messages
+     * consist of a header and a body; the first empty line separates both
+     * parts. The header contains lines "param-name: param-value" where
+     * the param-name must begin on column 0 of the line, and the ":"
+     * separates the name and the value. So the format is roughly:
+     *
+     * param1-name: param1-value
+     * ...
+     * paramN-name: paramN-value
+     *
+     * body
+     *
+     * This function wants in i0 the position of the first character of
+     * param1-name in the string, and in i1 the position of the character
+     * following the body. It returns as i2 the position where the body
+     * begins. Furthermore, in 'params' all parameters are returned that
+     * exist in the header.
+     *
+     * DETAILS
+     *
+     * Note that parameter values are restricted; you cannot represent
+     * arbitrary strings. The following problems can arise:
+     * - Values cannot begin with whitespace characters, because there
+     *   may be an arbitrary number of whitespaces between the ':' and the
+     *   value.
+     * - Values (and names of parameters, too) must only be formed of
+     *   7 bit ASCII characters. (If this is not enough, the MIME standard
+     *   knows the extension RFC 2047 that allows that header values may
+     *   be composed of arbitrary characters of arbitrary character sets.)
+     * - Header values may be broken into several lines, the continuation
+     *   lines must begin with whitespace characters. This means that values
+     *   must not contain line breaks as semantical part of the value.
+     *   And it may mean that ONE whitespace character is not distinguishable
+     *   from SEVERAL whitespace characters.
+     * - Header lines must not be longer than 76 characters. Values that
+     *   would result into longer lines must be broken into several lines.
+     *   This means that you cannot represent strings that contain too few
+     *   whitespace characters.
+     * - Some gateways pad the lines with spaces at the end of the lines.
+     *
+     * This implementation of a MIME scanner tolerates a number of
+     * deviations from the standard: long lines are not rejected; 8 bit
+     * values are accepted; lines may be ended only with LF instead of
+     * CRLF.
+     * Furthermore, header values are transformed:
+     * - leading and trailing spaces are always removed
+     * - CRs and LFs are deleted; it is guaranteed that there is at least
+     *   one space or tab where CR/LFs are deleted.
+     * Last but not least, the names of the header values are converted
+     * to lowercase; MIME specifies that they are case-independent.
+     *
+     * COMPATIBILITY WITH THE STANDARD
+     *
+     * This function can parse all MIME headers that conform to RFC 822.
+     * But there may be still problems, as RFC 822 allows some crazy
+     * representations that are actually not used in practice.
+     * In particular, RFC 822 allows it to use backslashes to "indicate"
+     * that a CRLF sequence is semantically meant as line break. As this
+     * function normally deletes CRLFs, it is not possible to recognize such
+     * indicators in the result of the function.
+     *)
+
+(**********************************************************************)
+
+(* The following types and functions allow it to build scanners for
+ * structured MIME values in a highly configurable way.
+ *
+ * WHAT ARE STRUCTURED VALUES?
+ *
+ * RFC 822 (together with some other RFCs) defines lexical rules
+ * how formal MIME header values should be divided up into tokens. Formal
+ * MIME headers are those headers that are formed according to some
+ * grammar, e.g. mail addresses or MIME types.
+ *    Some of the characters separate phrases of the value; these are
+ * the "special" characters. For example, '@' is normally a special
+ * character for mail addresses, because it separates the user name
+ * from the domain name. RFC 822 defines a fixed set of special
+ * characters, but other RFCs use different sets. Because of this,
+ * the following functions allow it to configure the set of special characters.
+ *    Every sequence of characters may be embraced by double quotes,
+ * which means that the sequence is meant as literal data item;
+ * special characters are not recognized inside a quoted string. You may
+ * use the backslash to insert any character (including double quotes)
+ * verbatim into the quoted string (e.g. "He said: \"Give it to me!\"").
+ * The sequence of a backslash character and another character is called
+ * a quoted pair.
+ *    Structured values may contain comments. The beginning of a comment
+ * is indicated by '(', and the end by ')'. Comments may be nested.
+ * Comments may contain quoted pairs. A
+ * comment counts as if a space character were written instead of it.
+ *    Control characters are the ASCII characters 0 to 31, and 127.
+ * RFC 822 demands that MIME headers are 7 bit ASCII strings. Because
+ * of this, this function also counts the characters 128 to 255 as
+ * control characters.
+ *    Domain literals are strings embraced by '[' and ']'; such literals
+ * may contain quoted pairs. Today, domain literals are used to specify
+ * IP addresses.
+ *    Every character sequence not falling in one of the above categories
+ * is an atom (a sequence of non-special and non-control characters).
+ * When recognized, atoms may be encoded in a character set different than
+ * US-ASCII; such atoms are called encoded words (see RFC 2047).
+ *
+ * EXTENDED INTERFACE:
+ *
+ * In order to scan a string containing a MIME value, you must first
+ * create a mime_scanner using the function create_mime_scanner.
+ * The scanner contains the reference to the scanned string, and a 
+ * specification how the string is to be scanned. The specification
+ * consists of the lists 'specials' and 'scan_options'.
+ *
+ * The character list 'specials' specifies the set of special characters.
+ * These characters are returned as Special c token; the following additional
+ * rules apply:
+ *
+ * - Spaces:
+ *   If ' ' in specials: A space character is returned as Special ' '.
+ *       Note that there may also be an effect on how comments are returned
+ *       (see below).
+ *   If ' ' not in specials: Spaces are ignored.
+ *
+ * - Tabs, CRs, LFs:
+ *   If '\t' in specials: A tab character is returned as Special '\t'.
+ *   If '\t' not in specials: Tabs are ignored.
+ *
+ *   If '\r' in specials: A CR character is returned as Special '\r'.
+ *   If '\r' not in specials: CRs are ignored.
+ *
+ *   If '\n' in specials: A LF character is returned as Special '\n'.
+ *   If '\n' not in specials: LFs are ignored.
+ *
+ * - Comments:
+ *   If '(' in specials: Comments are not recognized. The character '('
+ *       is returned as Special '('.
+ *   If '(' not in specials: Comments are recognized. How comments are
+ *       returned, depends on the following:
+ *       If Return_comments in scan_options: Outer comments are returned as
+ *           Comment (note that inner comments count but
+ *           are not returned as tokens)
+ *       If otherwise ' ' in specials: Outer comments are returned as
+ *           Special ' '
+ *       Otherwise: Comments are recognized but ignored.
+ *
+ * - Quoted strings:
+ *   If '"' in specials: Quoted strings are not recognized, and double quotes
+ *       are returned as Special '"'.
+ *   If '"' not in specials: Quoted strings are returned as QString tokens.
+ *
+ * - Domain literals:
+ *   If '[' in specials: Domain literals are not recognized, and left brackets
+ *       are returned as Special '['.
+ *   If '[' not in specials: Domain literals are returned as DomainLiteral
+ *       tokens.
+ *
+ * Note that the rule for domain literals is completely new in netstring-0.9.
+ * It may cause incompatibilities with previous versions if '[' is not
+ * special.
+ *
+ * The general rule for special characters: Every special character c is
+ * returned as Special c, and any additional scanning functionality 
+ * for this character is turned off.
+ *
+ * If recognized, quoted strings are returned as QString s, where
+ * s is the string without the embracing quotes, and with already
+ * decoded quoted pairs.
+ *
+ * Control characters c are returned as Control c.
+ *
+ * If recognized, comments may either be returned as spaces (in the case
+ * you are not interested in the contents of comments), or as Comment tokens.
+ * The contents of comments are not further scanned; you must start a
+ * subscanner to analyze comments as structured values.
+ *
+ * If recognized, domain literals are returned as DomainLiteral s, where
+ * s is the literal without brackets, and with decoded quoted pairs.
+ *
+ * Atoms are returned as Atom s where s is a longest sequence of
+ * atomic characters (all characters which are neither special nor control
+ * characters nor delimiters for substructures). If the option
+ * Recognize_encoded_words is on, atoms which look like encoded words
+ * are returned as EncodedWord tokens. (Important note: Neither '?' nor
+ * '=' must be special in order to enable this functionality.)
+ *
+ * After the mime_scanner has been created, you can scan the tokens by
+ * invoking scan_token which returns one token at a time, or by invoking
+ * scan_token_list which returns all following tokens.
+ *
+ * There are two token types: s_token is the base type and is intended to
+ * be used for pattern matching. s_extended_token is a wrapper that 
+ * additionally contains information where the token occurs.
+ *
+ * SIMPLE INTERFACE
+ *
+ * Instead of creating a mime_scanner and calling the scan functions,
+ * you may also invoke scan_structured_value. This function returns the
+ * list of tokens directly; however, it is restricted to s_token.
+ *
+ * EXAMPLES
+ *
+ * scan_structured_value "user@domain.com" [ '@'; '.' ] []
+ *   = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user @ domain . com" [ '@'; '.' ] []
+ *   = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] []
+ *   = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] 
+ *     [ Return_comments ]
+ *   = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.'; 
+ *       Atom "com" ]
+ *
+ * scan_structured_value "user (Do you know him?) @ domain . com" 
+ *     [ '@'; '.'; ' ' ] []
+ *   = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@'; 
+ *       Special ' '; Atom "domain";
+ *       Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user (Do you know him?) @ domain . com" 
+ *     [ '@'; '.'; ' ' ] [ Return_comments ]
+ *   = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@'; 
+ *       Special ' '; Atom "domain";
+ *       Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] []
+ *   = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
+ *       Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" ['@'; '.'; '(']
+ *     []
+ *   = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
+ *       Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] []
+ *   = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
+ *       Atom "com" ]
+ *
+ * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" 
+ *     [ ] [ ] 
+ *   = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
+ *
+ * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" 
+ *     [ ] [ Recognize_encoded_words ] 
+ *   = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
+ *
+ *)
+
+
+
+type s_token =
+    Atom of string
+  | EncodedWord of (string * string * string)
+  | QString of string
+  | Control of char
+  | Special of char
+  | DomainLiteral of string
+  | Comment
+  | End
+
+(* - Words are: Atom, EncodedWord, QString.
+ * - Atom s: The character sequence forming the atom is contained in s
+ * - EncodedWord(charset, encoding, encoded_string) means:
+ *   * charset is the (uppercase) character set
+ *   * encoding is either "Q" or "B"
+ *   * encoded_string: contains the text of the word; the text is represented
+ *     as octet string following the conventions for character set charset and 
+ *     then encoded either as "Q" or "B" string.
+ * - QString s: Here, s are the characters inside the double quotes after
+ *   decoding any quoted pairs (backslash + character pairs)
+ * - Control c: The control character c
+ * - Special c: The special character c
+ * - DomainLiteral s: s contains the characters inside the brackets after
+ *   decoding any quoted pairs
+ * - Comment: if the option Return_comments is specified, this token
+ *   represents the whole comment.
+ * - End: Is returned after the last token
+ *)
+
+
+type s_option =
+    No_backslash_escaping
+      (* Do not handle backslashes in quoted string and comments as escape
+       * characters; backslashes are handled as normal characters.
+       * For example: "C:\dir\file" will be returned as
+       * QString "C:\dir\file", and not as QString "C:dirfile".
+       * - This is a common error in many MIME implementations.
+       *)
+  | Return_comments
+      (* Comments are returned as token Comment (unless '(' is included
+       * in the list of special characters, in which case comments are
+       * not recognized at all).
+       * You may get the exact location of the comment by applying
+       * get_pos and get_length to the extended token.
+       *)
+  | Recognize_encoded_words
+      (* Enables that encoded words are recognized and returned as
+       * EncodedWord(charset,encoding,content) instead of Atom.
+       *)
+
+type s_extended_token
+  (* An opaque type containing s_token plus:
+   * - where the token occurs
+   * - RFC-2047 access functions
+   *)
+
+val get_token : s_extended_token -> s_token
+    (* Return the s_token within the s_extended_token *)
+
+val get_decoded_word : s_extended_token -> string
+val get_charset : s_extended_token -> string
+    (* Return the decoded word (the contents of the word after decoding the
+     * "Q" or "B" representation), and the character set of the decoded word
+     * (uppercase).
+     * These functions not only work for EncodedWord:
+     * - Atom: Returns the atom without decoding it
+     * - QString: Returns the characters inside the double quotes, and
+     *   decodes any quoted pairs (backslash + character)
+     * - Control: Returns the one-character string
+     * - Special: Returns the one-character string
+     * - DomainLiteral: Returns the characters inside the brackets, and
+     *   decodes any quoted pairs
+     * - Comment: Returns ""
+     * The character set is "US-ASCII" for these tokens.
+     *)
+
+val get_pos : s_extended_token -> int
+    (* Return the byte position where the token starts in the string 
+     * (the first byte has position 0)
+     *)
+
+val get_line : s_extended_token -> int
+    (* Return the line number where the token starts (numbering begins
+     * usually with 1) 
+     *)
+
+val get_column : s_extended_token -> int
+    (* Return the column of the line where the token starts (first column
+     * is number 0)
+     *)
+
+val get_length : s_extended_token -> int
+    (* Return the length of the token in bytes *)
+
+val separates_adjacent_encoded_words : s_extended_token -> bool
+    (* True iff the current token is white space (Special ' ', Special '\t',
+     * Special '\r' or Special '\n') and the last non-white space token
+     * was EncodedWord and the next non-white space token will be
+     * EncodedWord.
+     * Such spaces do not count and must be ignored by any application.
+     *)
+
+
+type mime_scanner
+
+val create_mime_scanner : 
+      specials:char list -> 
+      scan_options:s_option list -> 
+      ?pos:int ->
+      ?line:int ->
+      ?column:int ->
+      string -> 
+        mime_scanner
+    (* Creates a new mime_scanner scanning the passed string.
+     * specials: The list of characters recognized as special characters.
+     * scan_options: The list of global options modifying the behaviour
+     *   of the scanner
+     * pos: The position of the byte where the scanner starts in the
+     *   passed string. Defaults to 0.
+     * line: The line number of this byte. Defaults to 1.
+     * column: The column number of this byte. Default to 0.
+     *
+     * The optional parameters pos, line, column are intentionally after
+     * scan_options and before the string argument, so you can specify
+     * scanners by partially applying arguments to create_mime_scanner
+     * which are not yet connected with a particular string:
+     * let my_scanner_spec = create_mime_scanner my_specials my_options in
+     * ...
+     * let my_scanner = my_scanner_spec my_string in 
+     * ...
+     *)
+
+val get_pos_of_scanner : mime_scanner -> int
+val get_line_of_scanner : mime_scanner -> int
+val get_column_of_scanner : mime_scanner -> int
+    (* Return the current position, line, and column of a mime_scanner.
+     * The primary purpose of these functions is to simplify switching
+     * from one mime_scanner to another within a string:
+     *
+     * let scanner1 = create_mime_scanner ... s in
+     * ... now scanning some tokens from s using scanner1 ...
+     * let scanner2 = create_mime_scanner ... 
+     *                  ?pos:(get_pos_of_scanner scanner1)
+     *                  ?line:(get_line_of_scanner scanner1)
+     *                  ?column:(get_column_of_scanner scanner1)
+     *                  s in
+     * ... scanning more tokens from s using scanner2 ...
+     *
+     * RESTRICTION: These functions are not available if the option
+     * Recognize_encoded_words is on. The reason is that this option
+     * enables look-ahead scanning; please use the location of the last
+     * scanned token instead.
+     * It is currently not clear whether a better implementation is needed
+     * (costs a bit more time).
+     *
+     * Note: To improve the performance of switching, it is recommended to
+     * create scanner specs in advance (see the example my_scanner_spec
+     * above).
+     *)
+
+val scan_token : mime_scanner -> (s_extended_token * s_token)
+    (* Returns the next token, or End if there is no more token. *)
+
+val scan_token_list : mime_scanner -> (s_extended_token * s_token) list
+    (* Returns all following tokens as a list (excluding End) *)
+
+val scan_structured_value : string -> char list -> s_option list -> s_token list
+    (* This function is included for backwards compatibility, and for all
+     * cases not requiring extended tokens.
+     *
+     * It scans the passed string according to the list of special characters
+     * and the list of options, and returns the list of all tokens.
+     *)
+
+val specials_rfc822 : char list
+val specials_rfc2045 : char list
+    (* The sets of special characters defined by the RFCs 822 and 2045.
+     *
+     * CHANGE in netstring-0.9: '[' and ']' are no longer special because
+     * there is now support for domain literals.
+     * '?' and '=' are not special in the rfc2045 version because there is
+     * already support for encoded words.
+     *)
+
+
+(**********************************************************************)
+
+(* Widely used scanners: *)
+
+
+val scan_encoded_text_value : string -> s_extended_token list
+    (* Scans a "text" value. The returned token list contains only
+     * Special, Atom and EncodedWord tokens. 
+     * Spaces, TABs, CRs, LFs are returned unless
+     * they occur between adjacent encoded words in which case
+     * they are ignored.
+     *)
+
+
+val scan_value_with_parameters : string -> s_option list ->
+                                   (string * (string * string) list)
+    (* let name, params = scan_value_with_parameters s options:
+     * Scans phrases like
+     *    name ; p1=v1 ; p2=v2 ; ...
+     * The scan is done with the set of special characters [';', '='].
+     *)
+
+val scan_mime_type : string -> s_option list ->
+                       (string * (string * string) list)
+    (* let name, params = scan_mime_type s options:
+     * Scans MIME types like
+     *    text/plain; charset=iso-8859-1
+     * The name of the type and the names of the parameters are converted
+     * to lower case.
+     *)
+
+
+(**********************************************************************)
+
+(* Scanners for MIME bodies *)
+
+val scan_multipart_body : string -> start_pos:int -> end_pos:int -> 
+                            boundary:string ->
+                            ((string * string) list * string) list
+    (* let [params1, value1; params2, value2; ...]
+     *   = scan_multipart_body s i0 i1 b
+     *
+     * Scans the string s that is the body of a multipart message.
+     * The multipart message begins at position i0 in s and i1 the position
+     * of the character following the message. In b the boundary string
+     * must be passed (this is the "boundary" parameter of the multipart
+     * MIME type, e.g. multipart/mixed;boundary="some string" ).
+     *     The return value is the list of the parts, where each part
+     * is returned as pair (params, value). The left component params
+     * is the list of name/value pairs of the header of the part. The
+     * right component is the RAW content of the part, i.e. if the part
+     * is encoded ("content-transfer-encoding"), the content is returned
+     * in the encoded representation. The caller must himself decode
+     * the content.
+     *     The material before the first boundary and after the last
+     * boundary is not returned.
+     *
+     * MULTIPART MESSAGES
+     *
+     * The MIME standard defines a way to group several message parts to
+     * a larger message (for E-Mails this technique is known as "attaching"
+     * files to messages); these are the so-called multipart messages.
+     * Such messages are recognized by the major type string "multipart",
+     * e.g. multipart/mixed or multipart/form-data. Multipart types MUST
+     * have a boundary parameter because boundaries are essential for the
+     * representation.
+     *    Multipart messages have a format like
+     *
+     * ...Header...
+     * Content-type: multipart/xyz; boundary="abc"
+     * ...Header...
+     *
+     * Body begins here ("prologue")
+     * --abc
+     * ...Header part 1...
+     *
+     * ...Body part 1...
+     * --abc
+     * ...Header part 2...
+     *
+     *
+     * ...Body part 2
+     * --abc
+     * ...
+     * --abc--
+     * Epilogue
+     *
+     * The parts are separated by boundary lines which begin with "--" and
+     * the string passed as boundary parameter. (Note that there may follow
+     * arbitrary text on boundary lines after "--abc".) The boundary is
+     * chosen such that it does not occur as prefix of any line of the
+     * inner parts of the message.
+     *     The parts are again MIME messages, with header and body. Note
+     * that it is explicitely allowed that the parts are even multipart
+     * messages.
+     *     The texts before the first boundary and after the last boundary
+     * are ignored.
+     *     Note that multipart messages as a whole MUST NOT be encoded.
+     * Only the PARTS of the messages may be encoded (if they are not
+     * multipart messages themselves).
+     *
+     * Please read RFC 2046 if want to know the gory details of this
+     * brain-dead format.
+     *)
+
+val scan_multipart_body_and_decode : string -> start_pos:int -> end_pos:int -> 
+                                        boundary:string ->
+                                        ((string * string) list * string) list
+    (* Same as scan_multipart_body, but decodes the bodies of the parts
+     * if they are encoded using the methods "base64" or "quoted printable".
+     * Fails, if an unknown encoding is used.
+     *)
+
+val scan_multipart_body_from_netstream
+    : Netstream.t ->
+      boundary:string ->
+      create:((string * string) list -> 'a) ->
+      add:('a -> Netstream.t -> int -> int -> unit) ->
+      stop:('a -> unit) ->
+      unit
+    (* scan_multipart_body_from_netstream s b create add stop:
+     *
+     * Reads the MIME message from the netstream s block by block. The
+     * parts are delimited by the boundary b.
+     *
+     * Once a new part is detected and begins, the function 'create' is
+     * called with the MIME header as argument. The result p of this function
+     * may be of any type.
+     *
+     * For every chunk of the part that is being read, the function 'add'
+     * is invoked: add p s k n.
+     * Here, p is the value returned by the 'create' invocation for the
+     * current part. s is the netstream. The current window of s contains
+     * the read chunk completely; the chunk begins at position k of the
+     * window (relative to the beginning of the window) and has a length
+     * of n bytes.
+     *
+     * When the part has been fully read, the function 'stop' is
+     * called with p as argument.
+     *
+     * That means, for every part the following is executed:
+     * - let p = create h
+     * - add p s k1 n1
+     * - add p s k2 n2
+     * - ...
+     * - add p s kN nN
+     * - stop p
+     *
+     * IMPORTANT PRECONDITION:
+     * - The block size of the netstream s must be at least
+     *   String.length b + 3
+     *
+     * EXCEPTIONS:
+     * - Exceptions can happen because of ill-formed input, and within
+     *   the callbacks of the functions 'create', 'add', 'stop'.
+     * - If the exception happens while part p is being read, and the
+     *   'create' function has already been called (successfully), the
+     *   'stop' function is also called (you have the chance to close files).
+     *)
+
+
+(* THREAD-SAFETY:
+ * The functions are thread-safe as long as the threads do not share
+ * values.
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/08/13 00:04:36  gerd
+ *     Encoded_word -> EncodedWord
+ *     Bugfixes.
+ *
+ * Revision 1.7  2000/08/07 00:25:00  gerd
+ *     Major update of the interface for structured field lexing.
+ *
+ * Revision 1.6  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.5  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.4  2000/05/16 22:29:12  gerd
+ *     New "option" arguments specifying the level of MIME
+ * compatibility.
+ *
+ * Revision 1.3  2000/04/15 13:09:01  gerd
+ *     Implemented uploads to temporary files.
+ *
+ * Revision 1.2  2000/03/02 01:15:30  gerd
+ *     Updated.
+ *
+ * Revision 1.1  2000/02/25 15:21:12  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netbuffer.ml b/helm/DEVEL/pxp/netstring/netbuffer.ml
new file mode 100644 (file)
index 0000000..d6fc40f
--- /dev/null
@@ -0,0 +1,145 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type t = 
+    { mutable buffer : string;
+      mutable length : int;
+    }
+
+(* To help the garbage collector:
+ * The 'buffer' has a minimum length of 31 bytes. This minimum can still
+ * be stored in the minor heap.
+ * The 'buffer' has a length which is always near a multiple of two. This
+ * limits the number of different bucket sizes, and simplifies reallocation
+ * of freed memory.
+ *)
+
+(* Optimal string length:
+ * Every string takes: 1 word for the header, enough words for the 
+ * contents + 1 Null byte (for C compatibility).
+ * If the buffer grows, it is best to use a new string length such
+ * that the number of words is exactly twice as large as for the previous
+ * string.
+ * n:              length of the previous string in bytes
+ * w:              storage size of the previous string in words
+ * n':             length of the new string in bytes
+ * w' = 2*w:       storage size of the new string in words
+ *
+ * w = (n+1) / word_length + 1
+ *            [it is assumed that (n+1) is always a multiple of word_length]
+ *
+ * n' = (2*w - 1) * word_length - 1
+ *
+ * n' = [2 * ( [n+1] / word_length + 1) - 1] * word_length - 1
+ *    = ...
+ *    = (2*n + 2) + word_length - 1
+ *    = 2 * n + word_length + 1
+ *
+ * n'+1 is again a multiple of word_length:
+ * n'+1 = 2*n + 2 + word_length
+ *      = 2*(n+1) + word_length
+ *      = a multiple of word_length because n+1 is a multiple of word_length
+ *)
+
+let word_length = Sys.word_size / 8       (* in bytes *)
+
+let create n =
+  { buffer = String.create (max n 31); length = 0; }
+
+let contents b =
+  String.sub b.buffer 0 b.length
+    
+let sub b ~pos:k ~len:n =
+  if k+n > b.length then
+    raise (Invalid_argument "Netbuffer.sub");
+  String.sub b.buffer k n
+    
+let unsafe_buffer b =
+  b.buffer
+
+let length b =
+  b.length
+
+let add_string b s =
+  let l = String.length s in
+  if l + b.length > String.length b.buffer then begin
+    let l' = l + b.length in
+    let rec new_size s =
+      if s >= l' then s else new_size(2*s + word_length + 1)
+    in
+    let buffer' = String.create (new_size (String.length b.buffer)) in
+    String.blit b.buffer 0 buffer' 0 b.length;
+    b.buffer <- buffer'
+  end;
+  String.blit s 0 b.buffer b.length l;
+  b.length <- b.length + l
+    
+let add_sub_string b s ~pos:k ~len:l =
+  if l + b.length > String.length b.buffer then begin
+    let l' = l + b.length in
+    let rec new_size s =
+      if s >= l' then s else new_size(2*s + word_length + 1)
+    in
+    let buffer' = String.create (new_size (String.length b.buffer)) in
+    String.blit b.buffer 0 buffer' 0 b.length;
+    b.buffer <- buffer'
+  end;
+  String.blit s k b.buffer b.length l;
+  b.length <- b.length + l
+    
+let delete b ~pos:k ~len:l =
+  (* deletes l bytes at position k in b *)
+  let n = String.length b.buffer in
+  if k+l <> n & k <> n then
+    String.blit b.buffer (k+l) b.buffer k (n-l-k);
+  b.length <- b.length - l;
+  ()
+
+let try_shrinking b =
+  (* If the buffer size decreases drastically, reallocate the buffer *)
+  if b.length < (String.length b.buffer / 2) then begin
+    let rec new_size s =
+      if s >= b.length then s else new_size(2*s + word_length + 1)
+    in
+    let buffer' = String.create (new_size 31) in
+    String.blit b.buffer 0 buffer' 0 b.length;
+    b.buffer <- buffer'
+  end 
+
+let clear b =
+  delete b 0 (b.length)
+  
+let index_from b k c =
+  if k > b.length then
+    raise (Invalid_argument "Netbuffer.index_from");
+  let p = String.index_from b.buffer k c in
+  if p >= b.length then raise Not_found;
+  p
+
+let print_buffer b =
+  Format.printf
+    "<NETBUFFER: %d/%d>"
+    b.length
+    (String.length b.buffer)
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.2  2000/06/24 20:20:33  gerd
+ *     Added the toploop printer.
+ *
+ * Revision 1.1  2000/04/15 13:07:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netbuffer.mli b/helm/DEVEL/pxp/netstring/netbuffer.mli
new file mode 100644 (file)
index 0000000..0ecd61e
--- /dev/null
@@ -0,0 +1,93 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* A Netbuffer.t is a buffer that can grow and shrink dynamically. *)
+
+type t
+
+val create : int -> t
+    (* Creates a netbuffer which allocates initially this number of bytes. 
+     * The logical length is zero.
+     *)
+
+val contents : t -> string
+    (* Returns the contents of the buffer as fresh string. *)
+
+val sub : t -> pos:int -> len:int -> string
+    (* sub nb k n: returns the n characters starting at position n from 
+     * netbuffer nb as fresh string
+     *)
+
+val length : t -> int
+    (* Returns the logical length of the buffer *)
+
+val add_string : t -> string -> unit
+    (* add_string nb s: Adds a copy of the string s to the logical end of
+     * the netbuffer nb. If necessary, the nb grows.
+     *)
+
+val add_sub_string : t -> string -> pos:int -> len:int -> unit
+    (* add_sub_string nb s k n: Adds the substring of s starting at position
+     * k with length n to the logical end of the netbuffer nb. If necessary,
+     * the nb grows.
+     * This is semantically the same as
+     * add_string nb (String.sub s k n), but the extra copy is avoided.
+     *)
+
+val delete : t -> pos:int -> len:int -> unit
+    (* delete nb k n: Deletes the n bytes at position k of netbuffer nb
+     * in-place.
+     * The netbuffer does not shrink!
+     *)
+
+val clear : t -> unit
+    (* deletes all contents from the buffer. As 'delete', the netbuffer does
+     * not shrink.
+     *)
+
+val try_shrinking : t -> unit
+    (* try_shrinking nb: If the length of the buffer is less than half of
+     * the allocated space, the netbuffer is reallocated in order to save
+     * memory.
+     *)
+
+val index_from : t -> int -> char -> int
+    (* index_from nb k c: Searches the character c in the netbuffer beginning
+     * at position k. If found, the position of the left-most occurence is
+     * returned. Otherwise, Not_found is raised.
+     *)
+
+val unsafe_buffer : t -> string
+    (* WARNING! This is a low-level function!
+     * Returns the current string that internally holds the buffer.
+     * The byte positions 0 to length - 1 actually store the contents of
+     * the buffer. You can directly read and modify the buffer. Note that
+     * there is no protection if you read or write positions beyond the
+     * length of the buffer.
+     *)
+
+val print_buffer : t -> unit
+    (* For the toploop *)
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.2  2000/06/24 20:20:33  gerd
+ *     Added the toploop printer.
+ *
+ * Revision 1.1  2000/04/15 13:07:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netconversion.ml b/helm/DEVEL/pxp/netstring/netconversion.ml
new file mode 100644 (file)
index 0000000..e740654
--- /dev/null
@@ -0,0 +1,864 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+
+type encoding =
+  [  `Enc_utf8       (* UTF-8 *)
+  |  `Enc_java
+  |  `Enc_utf16      (* UTF-16 with unspecified endianess (restricted usage) *)
+  |  `Enc_utf16_le   (* UTF-16 little endian *)
+  |  `Enc_utf16_be   (* UTF-16 big endian *)
+  |  `Enc_usascii    (* US-ASCII (only 7 bit) *)
+  |  `Enc_iso88591   (* ISO-8859-1 *)
+  |  `Enc_iso88592   (* ISO-8859-2 *)
+  |  `Enc_iso88593   (* ISO-8859-3 *)
+  |  `Enc_iso88594   (* ISO-8859-4 *)
+  |  `Enc_iso88595   (* ISO-8859-5 *)
+  |  `Enc_iso88596   (* ISO-8859-6 *)
+  |  `Enc_iso88597   (* ISO-8859-7 *)
+  |  `Enc_iso88598   (* ISO-8859-8 *)
+  |  `Enc_iso88599   (* ISO-8859-9 *)
+  |  `Enc_iso885910  (* ISO-8859-10 *)
+  |  `Enc_iso885913  (* ISO-8859-13 *)
+  |  `Enc_iso885914  (* ISO-8859-14 *)
+  |  `Enc_iso885915  (* ISO-8859-15 *)
+  |  `Enc_koi8r      (* KOI8-R *)
+  |  `Enc_jis0201    (* JIS-0201 *)
+    (* Microsoft: *)
+  |  `Enc_windows1250  (* WINDOWS-1250 *)
+  |  `Enc_windows1251  (* WINDOWS-1251 *)
+  |  `Enc_windows1252  (* WINDOWS-1252 *)
+  |  `Enc_windows1253  (* WINDOWS-1253 *)
+  |  `Enc_windows1254  (* WINDOWS-1254 *)
+  |  `Enc_windows1255  (* WINDOWS-1255 *)
+  |  `Enc_windows1256  (* WINDOWS-1256 *)
+  |  `Enc_windows1257  (* WINDOWS-1257 *)
+  |  `Enc_windows1258  (* WINDOWS-1258 *)
+    (* IBM, ASCII-based: *)
+  |  `Enc_cp437
+  |  `Enc_cp737
+  |  `Enc_cp775
+  |  `Enc_cp850
+  |  `Enc_cp852
+  |  `Enc_cp855
+  |  `Enc_cp856
+  |  `Enc_cp857
+  |  `Enc_cp860
+  |  `Enc_cp861
+  |  `Enc_cp862
+  |  `Enc_cp863
+  |  `Enc_cp864
+  |  `Enc_cp865
+  |  `Enc_cp866
+  |  `Enc_cp869
+  |  `Enc_cp874
+  |  `Enc_cp1006
+   (* IBM, EBCDIC-based: *)
+  |  `Enc_cp037
+  |  `Enc_cp424
+  |  `Enc_cp500
+  |  `Enc_cp875
+  |  `Enc_cp1026
+   (* Adobe: *)
+  |  `Enc_adobe_standard_encoding
+  |  `Enc_adobe_symbol_encoding
+  |  `Enc_adobe_zapf_dingbats_encoding
+   (* Apple: *)
+  |  `Enc_macroman
+
+  ]
+;;
+
+
+let norm_enc_name e =
+  (* Removes some characters from e; uppercase *)
+  let e' = String.create (String.length e) in
+  let rec next i j =
+    if i < String.length e then
+      match e.[i] with
+         ('-'|'_'|'.') -> next (i+1) j
+       | c             -> e'.[j] <- c; next (i+1) (j+1)
+    else
+      j
+  in
+  let l = next 0 0 in
+  String.uppercase(String.sub e' 0 l)
+;;
+
+
+let encoding_of_string e =
+  match norm_enc_name e with
+      ("UTF16"|"UCS2"|"ISO10646UCS2")                 -> `Enc_utf16
+    | "UTF16BE"                                       -> `Enc_utf16_be
+    | "UTF16LE"                                       -> `Enc_utf16_le
+    | "UTF8"                                          -> `Enc_utf8
+    | ("UTF8JAVA"|"JAVA")                             -> `Enc_java
+    | ("USASCII"|"ASCII"|"ISO646US"|"IBM367"|"CP367") -> `Enc_usascii
+    | ("ISO88591"|"LATIN1"|"IBM819"|"CP819")          -> `Enc_iso88591
+    | ("ISO88592"|"LATIN2")                           -> `Enc_iso88592
+    | ("ISO88593"|"LATIN3")                           -> `Enc_iso88593
+    | ("ISO88594"|"LATIN4")                           -> `Enc_iso88594
+    | ("ISO88595"|"CYRILLIC")                         -> `Enc_iso88595
+    | ("ISO88596"|"ARABIC"|"ECMA114"|"ASMO708")       -> `Enc_iso88596
+    | ("ISO88597"|"GREEK"|"GREEK8"|"ELOT928"|"ECMA118") -> `Enc_iso88597
+    | ("ISO88598"|"HEBREW")                           -> `Enc_iso88598
+    | ("ISO88599"|"LATIN5")                           -> `Enc_iso88599
+    | ("ISO885910"|"LATIN6")                          -> `Enc_iso885910
+    | "ISO885913"                                     -> `Enc_iso885913
+    | "ISO885914"                                     -> `Enc_iso885914
+    | "ISO885915"                                     -> `Enc_iso885915
+    | "KOI8R"                                         -> `Enc_koi8r
+    | ("JIS0201"|"JISX0201"|"X0201")                  -> `Enc_jis0201
+
+    | "WINDOWS1250"                                   -> `Enc_windows1250
+    | "WINDOWS1251"                                   -> `Enc_windows1251
+    | "WINDOWS1252"                                   -> `Enc_windows1252
+    | "WINDOWS1253"                                   -> `Enc_windows1253
+    | "WINDOWS1254"                                   -> `Enc_windows1254
+    | "WINDOWS1255"                                   -> `Enc_windows1255
+    | "WINDOWS1256"                                   -> `Enc_windows1256
+    | "WINDOWS1257"                                   -> `Enc_windows1257
+    | "WINDOWS1258"                                   -> `Enc_windows1258
+
+    | ("CP437"|"IBM437")                              -> `Enc_cp437
+    | ("CP737"|"IBM737")                              -> `Enc_cp737
+    | ("CP775"|"IBM775")                              -> `Enc_cp775
+    | ("CP850"|"IBM850")                              -> `Enc_cp850
+    | ("CP852"|"IBM852")                              -> `Enc_cp852
+    | ("CP855"|"IBM855")                              -> `Enc_cp855
+    | ("CP856"|"IBM856")                              -> `Enc_cp856
+    | ("CP857"|"IBM857")                              -> `Enc_cp857
+    | ("CP860"|"IBM860")                              -> `Enc_cp860
+    | ("CP861"|"IBM861")                              -> `Enc_cp861
+    | ("CP862"|"IBM862")                              -> `Enc_cp862
+    | ("CP863"|"IBM863")                              -> `Enc_cp863
+    | ("CP864"|"IBM864")                              -> `Enc_cp864
+    | ("CP865"|"IBM865")                              -> `Enc_cp865
+    | ("CP866"|"IBM866")                              -> `Enc_cp866
+    | ("CP869"|"IBM869")                              -> `Enc_cp869
+    | ("CP874"|"IBM874")                              -> `Enc_cp874
+    | ("CP1006"|"IBM1006")                            -> `Enc_cp1006
+
+    | ("CP037"|"IBM037"|"EBCDICCPUS"|"EBCDICCPCA"|"EBCDICCPWT"|
+       "EBCDICCPNL")                                  -> `Enc_cp037
+    | ("CP424"|"IBM424"|"EBCDICCPHE")                 -> `Enc_cp424
+    | ("CP500"|"IBM500"|"EBCDICCPBE"|"EBCDICCPCH")    -> `Enc_cp500
+    | ("CP875"|"IBM875")                              -> `Enc_cp875
+    | ("CP1026"|"IBM1026")                            -> `Enc_cp1026
+
+    | "ADOBESTANDARDENCODING"       -> `Enc_adobe_standard_encoding
+    | "ADOBESYMBOLENCODING"         -> `Enc_adobe_symbol_encoding
+    | "ADOBEZAPFDINGBATSENCODING"   -> `Enc_adobe_zapf_dingbats_encoding
+
+    | "MACINTOSH"                   -> `Enc_macroman
+
+    | _ ->
+       failwith "Netconversion.encoding_of_string: unknown encoding"
+;;
+
+
+let string_of_encoding (e : encoding) =
+  (* If there is a "preferred MIME name", this name is returned (see IANA). *)
+  match e with
+      `Enc_utf16    -> "UTF-16"
+    | `Enc_utf16_be -> "UTF-16-BE"
+    | `Enc_utf16_le -> "UTF-16-LE"
+    | `Enc_utf8     -> "UTF-8"
+    | `Enc_java     -> "UTF-8-JAVA"
+    | `Enc_usascii  -> "US-ASCII"
+    | `Enc_iso88591 -> "ISO-8859-1"
+    | `Enc_iso88592 -> "ISO-8859-2"
+    | `Enc_iso88593 -> "ISO-8859-3"
+    | `Enc_iso88594 -> "ISO-8859-4"
+    | `Enc_iso88595 -> "ISO-8859-5"
+    | `Enc_iso88596 -> "ISO-8859-6"
+    | `Enc_iso88597 -> "ISO-8859-7"
+    | `Enc_iso88598 -> "ISO-8859-8"
+    | `Enc_iso88599 -> "ISO-8859-9"
+    | `Enc_iso885910 -> "ISO-8859-10"
+    | `Enc_iso885913 -> "ISO-8859-13"
+    | `Enc_iso885914 -> "ISO-8859-14"
+    | `Enc_iso885915 -> "ISO-8859-15"
+    | `Enc_koi8r     -> "KOI8-R"
+    | `Enc_jis0201   -> "JIS_X0201"
+    | `Enc_windows1250 -> "WINDOWS-1250"
+    | `Enc_windows1251 -> "WINDOWS-1251"
+    | `Enc_windows1252 -> "WINDOWS-1252"
+    | `Enc_windows1253 -> "WINDOWS-1253"
+    | `Enc_windows1254 -> "WINDOWS-1254"
+    | `Enc_windows1255 -> "WINDOWS-1255"
+    | `Enc_windows1256 -> "WINDOWS-1256"
+    | `Enc_windows1257 -> "WINDOWS-1257"
+    | `Enc_windows1258 -> "WINDOWS-1258"
+    | `Enc_cp437   -> "CP437"
+    | `Enc_cp737   -> "CP737"
+    | `Enc_cp775   -> "CP775"
+    | `Enc_cp850   -> "CP850"
+    | `Enc_cp852   -> "CP852"
+    | `Enc_cp855   -> "CP855"
+    | `Enc_cp856   -> "CP856"
+    | `Enc_cp857   -> "CP857"
+    | `Enc_cp860   -> "CP860"
+    | `Enc_cp861   -> "CP861"
+    | `Enc_cp862   -> "CP862"
+    | `Enc_cp863   -> "CP863"
+    | `Enc_cp864   -> "CP864"
+    | `Enc_cp865   -> "CP865"
+    | `Enc_cp866   -> "CP866"
+    | `Enc_cp869   -> "CP869"
+    | `Enc_cp874   -> "CP874"
+    | `Enc_cp1006  -> "CP1006"
+    | `Enc_cp037   -> "CP037"
+    | `Enc_cp424   -> "CP424"
+    | `Enc_cp500   -> "CP500"
+    | `Enc_cp875   -> "CP875"
+    | `Enc_cp1026  -> "CP1026"
+    | `Enc_adobe_standard_encoding      -> "ADOBE-STANDARD-ENCODING"
+    | `Enc_adobe_symbol_encoding        -> "ADOBE-SYMBOL-ENCODING"
+    | `Enc_adobe_zapf_dingbats_encoding -> "ADOBE-ZAPF-DINGBATS-ENCODING"
+    | `Enc_macroman                     -> "MACINTOSH"
+;;
+
+
+let read_iso88591 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p = Char.code s_in.[p_in + k_in] in
+      let n = write p k_out c_out in
+      if n < 0 then
+       k_in, k_out, `Enc_iso88591
+      else
+       scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_iso88591
+  in
+  scan 0 0 0
+;;
+
+
+let read_usascii write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p = Char.code s_in.[p_in + k_in] in
+      if p >= 0x80 then raise Malformed_code;
+      let n = write p k_out c_out in
+      if n < 0 then
+       k_in, k_out, `Enc_usascii
+      else
+       scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_usascii
+  in
+  scan 0 0 0
+;;
+
+
+let read_8bit m_to_unicode enc write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let p_local = Char.code s_in.[p_in + k_in] in
+      let p_uni = Array.unsafe_get m_to_unicode p_local in
+      if p_uni < 0 then raise Malformed_code;
+      let n = write p_uni k_out c_out in
+      if n < 0 then
+       k_in, k_out, enc
+      else
+       scan (k_in + 1) (k_out + n) (c_out + 1)
+    end
+    else
+      k_in, k_out, enc
+  in
+  scan 0 0 0
+;;
+
+
+let read_utf8 is_java write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in < l_in then begin
+      let n_out, n_in =
+       match s_in.[p_in + k_in] with
+           '\000' ->
+             if is_java then raise Malformed_code;
+             write 0 k_out c_out, 1
+         | ('\001'..'\127' as c) ->
+             write (Char.code c) k_out c_out, 1
+         | ('\128'..'\223' as c) ->
+             if k_in + 1 >= l_in then
+               -1, 0
+             else begin
+               let n1 = Char.code c in
+               let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+               if is_java && (n1 = 0x80 && n2 = 0xc0) then
+                 write 0 k_out c_out, 2
+               else begin
+                 if n2 < 128 or n2 > 191 then raise Malformed_code;
+                 let p = ((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) in
+                 if p < 128 then raise Malformed_code;
+                 write p k_out c_out, 2
+               end
+             end
+         | ('\224'..'\239' as c) ->
+             if k_in + 2 >= l_in then
+               -1, 0
+             else begin
+               let n1 = Char.code c in
+               let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+               let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+               if n2 < 128 or n2 > 191 then raise Malformed_code;
+               if n3 < 128 or n3 > 191 then raise Malformed_code;
+               let p =
+                 ((n1 land 0b1111) lsl 12) lor
+                 ((n2 land 0b111111) lsl 6) lor
+                 (n3 land 0b111111)
+               in
+               if p < 0x800 then raise Malformed_code;
+               if (p >= 0xd800 && p < 0xe000) then
+                 (* Surrogate pairs are not supported in UTF-8 *)
+                 raise Malformed_code;
+               if (p >= 0xfffe && p <= 0xffff) then
+                 raise Malformed_code;
+               write p k_out c_out, 3
+             end
+         | ('\240'..'\247' as c) ->
+             if k_in + 3 >= l_in then
+               -1, 0
+             else begin
+               let n1 = Char.code c in
+               let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+               let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+               let n4 = Char.code (s_in.[p_in + k_in + 3]) in
+               if n2 < 128 or n2 > 191 then raise Malformed_code;
+               if n3 < 128 or n3 > 191 then raise Malformed_code;
+               if n4 < 128 or n4 > 191 then raise Malformed_code;
+               let p = ((n1 land 0b111) lsl 18) lor
+                       ((n2 land 0b111111) lsl 12) lor
+                       ((n3 land 0b111111) lsl 6) lor
+                       (n4 land 0b111111)
+               in
+               if p < 0x10000 then raise Malformed_code;
+               if p >= 0x110000 then
+                 (* These code points are not supported. *)
+                 raise Malformed_code;
+               write p k_out c_out, 4
+             end
+         | _ ->
+             (* Outside the valid range of XML characters *)
+             raise Malformed_code;
+      in
+      (* n_out: number of written bytes; -1 means out buf is full
+       * n_in: number of read bytes; 0 means end of in buf reached
+       * n_in = 0  implies  n_out = -1
+       *)
+      if n_out < 0 then
+       k_in, k_out, `Enc_utf8
+      else
+       scan (k_in + n_in) (k_out + n_out) (c_out + 1)
+    end
+    else
+      k_in, k_out, `Enc_utf8
+  in
+  scan 0 0 0
+;;
+
+
+let surrogate_offset = 0x10000 - (0xD800 lsl 10) - 0xDC00;;
+       
+let read_utf16_le k_in_0 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in + 1 < l_in then begin
+      let p = (Char.code s_in.[p_in + k_in]) lor ((Char.code s_in.[p_in + k_in + 1]) lsl 8) in
+
+      if p >= 0xd800 & p < 0xe000 then begin
+       (* This is a surrogate pair. *)
+       if k_in + 3 < l_in then begin
+         if p <= 0xdbff then begin
+           let q = (Char.code s_in.[p_in + k_in + 2 ]) lor
+                   ((Char.code s_in.[p_in + k_in + 3]) lsl 8) in
+           if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+           let eff_p = (p lsl 10) + q + surrogate_offset in
+           let n = write eff_p k_out c_out in
+           if n < 0 then
+             k_in, k_out, `Enc_utf16_le
+           else
+             scan (k_in + 4) (k_out + n) (c_out + 1)
+         end
+         else
+           (* Malformed pair: *)
+           raise Malformed_code;
+       end
+       else 
+         (* Incomplete pair: *)
+         k_in, k_out, `Enc_utf16_le
+      end
+
+      else
+       if p = 0xfffe then 
+         (* Big endian byte order mark: It is illegal here *)
+         raise Malformed_code
+       else begin
+         (* A regular code point *)
+         let n = write p k_out c_out in
+         if n < 0 then
+           k_in, k_out, `Enc_utf16_le
+         else
+           scan (k_in + 2) (k_out + n) (c_out + 1)
+       end
+    end
+    else
+      (* Incomplete character: *)
+      k_in, k_out, `Enc_utf16_le
+  in
+  scan k_in_0 0 0
+;;
+
+
+let read_utf16_be k_in_0 write s_in p_in l_in =
+  let rec scan k_in k_out c_out =
+    if k_in + 1 < l_in then begin
+      let p = (Char.code s_in.[p_in + k_in + 1]) lor ((Char.code s_in.[p_in + k_in]) lsl 8) in
+
+      if p >= 0xd800 & p < 0xe000 then begin
+       (* This is a surrogate pair. *)
+       if k_in + 3 < l_in then begin
+         if p <= 0xdbff then begin
+           let q = (Char.code s_in.[p_in + k_in + 3 ]) lor
+                   ((Char.code s_in.[p_in + k_in + 2]) lsl 8) in
+           if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+           let eff_p = (p lsl 10) + q + surrogate_offset in
+           let n = write eff_p k_out c_out in
+           if n < 0 then
+             k_in, k_out, `Enc_utf16_be
+           else
+             scan (k_in + 4) (k_out + n) (c_out + 1)
+         end
+         else
+           (* Malformed pair: *)
+           raise Malformed_code;
+       end
+       else 
+         (* Incomplete pair: *)
+         k_in, k_out, `Enc_utf16_be
+      end
+
+      else
+       if p = 0xfffe then
+         (* Little endian byte order mark: It is illegal here *)
+         raise Malformed_code
+       else begin
+         (* A regular code point *)
+         let n = write p k_out c_out in
+         if n < 0 then
+           k_in, k_out, `Enc_utf16_be
+         else
+           scan (k_in + 2) (k_out + n) (c_out + 1)
+       end
+
+    end
+    else
+      (* Incomplete character: *)
+      k_in, k_out, `Enc_utf16_be
+  in
+  scan k_in_0 0 0
+;;
+
+
+let read_utf16 write s_in p_in l_in =
+  (* Expect a BOM at the beginning of the text *)
+  if l_in >= 2 then begin
+    let c0 = s_in.[p_in + 0] in
+    let c1 = s_in.[p_in + 1] in
+    if c0 = '\254' & c1 = '\255' then begin
+      (* 0xfeff as big endian *)
+      read_utf16_be 2 write s_in p_in l_in
+    end
+    else 
+      if c0 = '\255' & c1 = '\254' then begin
+       (* 0xfeff as little endian *)
+       read_utf16_le 2 write s_in p_in l_in
+      end
+      else
+       (* byte order mark missing *)
+       raise Malformed_code
+  end
+  else
+    0, 0, `Enc_utf16
+;;
+
+
+let write_iso88591 s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    if p > 255 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+       (* Enough space to store 'subst': *)
+       String.blit subst 0 s_out (k_out+p_out) l_subst;
+       l_subst
+      end
+      else
+       (* Not enough space: Stop this round of recoding *)
+       -1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_usascii s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    if p > 127 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+       (* Enough space to store 'subst': *)
+       String.blit subst 0 s_out (k_out+p_out) l_subst;
+       l_subst
+      end
+      else
+       (* Not enough space: Stop this round of recoding *)
+       -1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_8bit from_unicode s_out p_out l_out max_chars w p k_out c_out =
+  if k_out < l_out && c_out < max_chars then begin
+    let p' =
+      match Array.unsafe_get from_unicode (p land 255) with
+         Netmappings.U_nil -> -1
+       | Netmappings.U_single (p0,q0) ->
+           if p0 = p then q0 else -1
+       | Netmappings.U_list l ->
+           (try List.assoc p l with Not_found -> -1)
+    in
+    if p' < 0 then begin
+      let subst = w p in
+      let l_subst =  String.length subst in
+      if k_out + l_subst <= l_out then begin
+       (* Enough space to store 'subst': *)
+       String.blit subst 0 s_out (k_out+p_out) l_subst;
+       l_subst
+      end
+      else
+       (* Not enough space: Stop this round of recoding *)
+       -1
+    end
+    else begin
+      s_out.[p_out + k_out] <- Char.chr p';
+      1
+    end
+  end
+  else
+    -1   (* End-of-buffer indicator *)
+;;
+
+
+let write_utf8 is_java s_out p_out l_out max_chars w p k_out c_out =
+  if p <= 127 && (not is_java || p <> 0) then begin
+    if k_out < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out] <- Char.chr p;
+      1
+    end
+    else -1
+  end
+  else if p <= 0x7ff then begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xc0 lor (p lsr 6));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor (p land 0x3f));
+      2
+    end
+    else -1
+  end
+  else if p <= 0xffff then begin
+    (* Refuse writing surrogate pairs, and fffe, ffff *)
+    if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then
+      failwith "Netconversion.write_utf8";
+    if k_out + 2 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xe0 lor (p lsr 12));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
+      s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor (p land 0x3f));
+      3
+    end
+    else -1
+  end
+  else if p <= 0x10ffff then begin
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out]     <- Char.chr (0xf0 lor (p lsr 18));
+      s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 12) land 0x3f));
+      s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor ((p lsr 6)  land 0x3f));
+      s_out.[p_out + k_out + 3] <- Char.chr (0x80 lor (p land 0x3f));
+      4
+    end
+    else -1
+  end
+  else
+    (* Higher code points are not possible in XML: *)
+    failwith "Netconversion.write_utf8"
+;;
+
+
+let write_utf16_le s_out p_out l_out max_chars w p k_out c_out =
+  if p >= 0xfffe then begin
+    if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_le";
+    (* Must be written as surrogate pair *)
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      let high = (p lsr 10) + 0xd800 in
+      let low  = (p land 0x3ff) + 0xdc00 in
+      s_out.[p_out + k_out    ] <- Char.chr (high land 0xff);
+      s_out.[p_out + k_out + 1] <- Char.chr (high lsr 8);
+      s_out.[p_out + k_out + 2] <- Char.chr (low land 0xff);
+      s_out.[p_out + k_out + 3] <- Char.chr (low lsr 8);
+      4
+    end
+    else -1
+  end
+  else begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out    ] <- Char.chr (p land 0xff);
+      s_out.[p_out + k_out + 1] <- Char.chr (p lsr 8);
+      2
+    end
+    else
+      -1
+  end
+;;
+
+
+let write_utf16_be s_out p_out l_out max_chars w p k_out c_out =
+  if p >= 0xfffe then begin
+    if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_be";
+    (* Must be written as surrogate pair *)
+    if k_out + 3 < l_out && c_out < max_chars then begin
+      let high = (p lsr 10) + 0xd800 in
+      let low  = (p land 0x3ff) + 0xdc00 in
+      s_out.[p_out + k_out + 1] <- Char.chr (high land 0xff);
+      s_out.[p_out + k_out    ] <- Char.chr (high lsr 8);
+      s_out.[p_out + k_out + 3] <- Char.chr (low land 0xff);
+      s_out.[p_out + k_out + 2] <- Char.chr (low lsr 8);
+      4
+    end
+    else -1
+  end
+  else begin
+    if k_out + 1 < l_out && c_out < max_chars then begin
+      s_out.[p_out + k_out + 1] <- Char.chr (p land 0xff);
+      s_out.[p_out + k_out    ] <- Char.chr (p lsr 8);
+      2
+    end
+    else
+      -1
+  end
+;;
+
+
+let recode ~in_enc
+           ~in_buf
+          ~in_pos
+          ~in_len
+          ~out_enc
+          ~out_buf
+           ~out_pos
+          ~out_len
+          ~max_chars
+          ~subst =
+  if (in_pos < 0  || in_len < 0  || in_pos  + in_len  > String.length in_buf ||
+      out_pos < 0 || out_len < 0 || out_pos + out_len > String.length out_buf)
+  then
+    invalid_arg "Netconversion.recode";
+
+  let reader =
+    match in_enc with
+       `Enc_iso88591 -> read_iso88591
+      | `Enc_usascii  -> read_usascii
+      | `Enc_utf8     -> read_utf8 false
+      | `Enc_java     -> read_utf8 true
+      | `Enc_utf16    -> read_utf16
+      | `Enc_utf16_le -> read_utf16_le 0
+      | `Enc_utf16_be -> read_utf16_be 0
+      | _             -> 
+         (try
+            let to_unicode' = Hashtbl.find Netmappings.to_unicode in_enc in
+            let to_unicode =
+              Netmappings.lock();
+              Lazy.force to_unicode' in
+            Netmappings.unlock();
+            read_8bit to_unicode in_enc
+          with
+              Not_found ->
+                failwith("Support for the encoding `" ^
+                         string_of_encoding in_enc ^ 
+                         "' has not been compiled into Netstring")
+         )
+  in
+  let writer =
+    match out_enc with
+       `Enc_iso88591 -> write_iso88591  out_buf out_pos out_len max_chars subst
+      | `Enc_usascii  -> write_usascii   out_buf out_pos out_len max_chars subst
+      | `Enc_utf8     -> write_utf8 false 
+                                         out_buf out_pos out_len max_chars subst
+      | `Enc_java     -> write_utf8 true out_buf out_pos out_len max_chars subst
+      | `Enc_utf16    -> failwith "Netconversion.recode"
+      | `Enc_utf16_le -> write_utf16_le  out_buf out_pos out_len max_chars subst
+      | `Enc_utf16_be -> write_utf16_be  out_buf out_pos out_len max_chars subst
+      | _             -> 
+         (try
+            let from_unicode' = Hashtbl.find Netmappings.from_unicode out_enc 
+            in
+            let from_unicode =
+              Netmappings.lock();
+              Lazy.force from_unicode' in
+            Netmappings.unlock();
+            write_8bit from_unicode out_buf out_pos out_len max_chars subst
+          with
+              Not_found ->
+                failwith("Support for the encoding `" ^
+                         string_of_encoding out_enc ^ 
+                         "' has not been compiled into Netstring")
+         )
+  in
+  reader writer in_buf in_pos in_len
+;;
+
+
+let makechar enc p =
+  match enc with
+      `Enc_iso88591 -> 
+       if p > 255 then raise Not_found;
+       String.make 1 (Char.chr p)
+    | `Enc_usascii ->
+       if p > 127 then raise Not_found;
+       String.make 1 (Char.chr p)
+    | `Enc_utf8 ->
+       let s = String.create 4 in
+       let n = write_utf8 false s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_java ->
+       let s = String.create 4 in
+       let n = write_utf8 true s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_utf16_le ->
+       let s = String.create 4 in
+       let n = write_utf16_le s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_utf16_be ->
+       let s = String.create 4 in
+       let n = write_utf16_be s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+       String.sub s 0 n
+    | `Enc_utf16 ->
+       failwith "Netconversion.makechar"
+    | _ ->
+       let s = String.create 1 in
+       let from_unicode' = 
+         try
+           Hashtbl.find Netmappings.from_unicode enc 
+         with
+             Not_found ->
+               failwith("Support for the encoding `" ^
+                        string_of_encoding enc ^ 
+                        "' has not been compiled into Netstring")
+       in
+       let from_unicode =
+         Netmappings.lock();
+         Lazy.force from_unicode' in
+       Netmappings.unlock();
+       let n =
+         write_8bit from_unicode s 0 1 1 (fun _ -> raise Not_found) p 0 0 in
+       s
+;;
+
+
+let recode_string ~in_enc ~out_enc ?(subst = (fun _ -> raise Not_found)) s =
+
+  let length = String.length s in
+  let size = 1024 in
+  let out_buf = String.create size in
+
+  let rec recode_loop k s_done in_enc =
+    (* 'k' bytes of 's' have already been processed, and the result is in
+     * 's_done'.
+     *)
+    (* Recode to 'out_buf': *)
+    let in_len = length - k in
+    let in_done, out_done, in_enc' =
+      recode ~in_enc:in_enc   ~in_buf:s        ~in_pos:k     ~in_len:in_len
+             ~out_enc:out_enc ~out_buf:out_buf ~out_pos:0    ~out_len:size  
+             ~max_chars:size  ~subst:subst in
+    (* Collect the results: *)
+    let k' = k + in_done in
+    let s_done' = String.sub out_buf 0 out_done :: s_done in
+    (* Still something to do? *)
+    if k' < length then
+      recode_loop k' s_done' in_enc'
+    else
+      (* No: Concatenate s_done' to get the final result. *)
+      String.concat "" (List.rev s_done')
+  in
+
+  recode_loop 0 [] in_enc
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/29 00:46:41  gerd
+ *     New type for the Unicode to 8 bit translation table.
+ *     The Netmappings tables are now Lazy.t.
+ *
+ * Revision 1.1  2000/08/13 00:02:57  gerd
+ *     Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.ml):
+ * 
+ * Revision 1.5  2000/07/27 00:41:14  gerd
+ *     new 8 bit codes
+ *
+ * Revision 1.4  2000/07/04 22:11:41  gerd
+ *     Implemented the enhancements and extensions of
+ * rev. 1.4 of pxp_encoding.mli.
+ *
+ * Revision 1.3  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1  2000/05/20 20:30:50  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netconversion.mli b/helm/DEVEL/pxp/netstring/netconversion.mli
new file mode 100644 (file)
index 0000000..5e3e4b4
--- /dev/null
@@ -0,0 +1,241 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+(* Encodings:
+ * - With the exception of UTF-8 and UTF-16, only single-byte character sets
+ *   are supported.
+ * - I took the mappings from www.unicode.org, and the standard names of
+ *   the character sets from IANA. Obviously, many character sets are missing
+ *   that can be supported; especially ISO646 character sets, many EBCDIC 
+ *   code pages. 
+ * - Because of the copyright statement from Unicode, I cannot put the
+ *   source tables that describe the mappings into the distribution. They
+ *   are publicly available from www.unicode.org.
+ * - Because of this, it is difficult for you to extend the list of character 
+ *   sets; you need the source tables I am not allowed to distribute.
+ *   These tables have a very simple format: Every line describes a pair
+ *   of code points; the left code (<= 0xff) is the code in the character
+ *   set, the right code (<= 0xffff) is the Unicode equivalent.
+ *   For an example, see
+ *   http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT
+ *   You can send me such files, and I will integrate them into the 
+ *   distribution (if possible).
+ * - I really do not know very much about the character sets used in
+ *   East Asia. If you need them, please write the necessary conversion
+ *   functions and send them to me.
+ *
+ * KNOWN PROBLEMS:
+ * - The following charsets do not have a bijective mapping to Unicode:
+ *   adobe_standard_encoding, adobe_symbol_encoding, 
+ *   adobe_zapf_dingbats_encoding, cp1002 (0xFEBE). The current implementation
+ *   simply removes one of the conflicting code point pairs - this might
+ *   not what you want.
+ *)
+
+type encoding =
+  [  `Enc_utf8       (* UTF-8 *)
+  |  `Enc_java       (* The variant of UTF-8 used by Java *)
+  |  `Enc_utf16      (* UTF-16 with unspecified endianess (restricted usage) *)
+  |  `Enc_utf16_le   (* UTF-16 little endian *)
+  |  `Enc_utf16_be   (* UTF-16 big endian *)
+  |  `Enc_usascii    (* US-ASCII (only 7 bit) *)
+  |  `Enc_iso88591   (* ISO-8859-1 *)
+  |  `Enc_iso88592   (* ISO-8859-2 *)
+  |  `Enc_iso88593   (* ISO-8859-3 *)
+  |  `Enc_iso88594   (* ISO-8859-4 *)
+  |  `Enc_iso88595   (* ISO-8859-5 *)
+  |  `Enc_iso88596   (* ISO-8859-6 *)
+  |  `Enc_iso88597   (* ISO-8859-7 *)
+  |  `Enc_iso88598   (* ISO-8859-8 *)
+  |  `Enc_iso88599   (* ISO-8859-9 *)
+  |  `Enc_iso885910  (* ISO-8859-10 *)
+  |  `Enc_iso885913  (* ISO-8859-13 *)
+  |  `Enc_iso885914  (* ISO-8859-14 *)
+  |  `Enc_iso885915  (* ISO-8859-15 *)
+  |  `Enc_koi8r      (* KOI8-R *)
+  |  `Enc_jis0201    (* JIS-0201 *)
+    (* Microsoft: *)
+  |  `Enc_windows1250  (* WINDOWS-1250 *)
+  |  `Enc_windows1251  (* WINDOWS-1251 *)
+  |  `Enc_windows1252  (* WINDOWS-1252 *)
+  |  `Enc_windows1253  (* WINDOWS-1253 *)
+  |  `Enc_windows1254  (* WINDOWS-1254 *)
+  |  `Enc_windows1255  (* WINDOWS-1255 *)
+  |  `Enc_windows1256  (* WINDOWS-1256 *)
+  |  `Enc_windows1257  (* WINDOWS-1257 *)
+  |  `Enc_windows1258  (* WINDOWS-1258 *)
+    (* IBM, ASCII-based: *)
+  |  `Enc_cp437
+  |  `Enc_cp737
+  |  `Enc_cp775
+  |  `Enc_cp850
+  |  `Enc_cp852
+  |  `Enc_cp855
+  |  `Enc_cp856
+  |  `Enc_cp857
+  |  `Enc_cp860
+  |  `Enc_cp861
+  |  `Enc_cp862
+  |  `Enc_cp863
+  |  `Enc_cp864
+  |  `Enc_cp865
+  |  `Enc_cp866
+  |  `Enc_cp869
+  |  `Enc_cp874
+  |  `Enc_cp1006
+   (* IBM, EBCDIC-based: *)
+  |  `Enc_cp037
+  |  `Enc_cp424
+  |  `Enc_cp500
+  |  `Enc_cp875
+  |  `Enc_cp1026
+   (* Adobe: *)
+  |  `Enc_adobe_standard_encoding
+  |  `Enc_adobe_symbol_encoding
+  |  `Enc_adobe_zapf_dingbats_encoding
+   (* Apple: *)
+  |  `Enc_macroman
+
+  ]
+
+
+val encoding_of_string : string -> encoding;;
+    (* Returns the encoding of the name of the encoding. Fails if the 
+     * encoding is unknown.
+     * E.g. encoding_of_string "iso-8859-1" = `Enc_iso88591
+     *)
+
+val string_of_encoding : encoding -> string;;
+    (* Returns the name of the encoding. *)
+
+
+val makechar : encoding -> int -> string
+  (* makechar enc i:
+   * Creates the string representing the code point i in encoding enc.
+   * Raises Not_found if the character is legal but cannot be represented 
+   * in enc.
+   * 
+   * Possible encodings: everything but `Enc_utf16.
+   *)
+
+val recode : in_enc:encoding -> 
+             in_buf:string -> 
+            in_pos:int ->
+            in_len:int -> 
+            out_enc:encoding -> 
+            out_buf:string -> 
+            out_pos:int ->
+            out_len:int ->
+            max_chars:int ->
+             subst:(int -> string) -> (int * int * encoding)
+  (* 
+   * let (in_n, out_n, in_enc') = 
+   *     recode in_enc in_buf in_len out_enc out_buf out_pos out_len max_chars 
+   *            subst:
+   * Converts the character sequence contained in the at most in_len bytes
+   * of in_buf starting at position in_pos, and writes the result 
+   * into at most out_len bytes of out_buf starting at out_pos.
+   * At most max_chars are written into out_buf.
+   * The characters in in_buf are assumed to be encoded as in_enc, and the 
+   * characters in out_buf will be encoded as out_enc.
+   * If there is a code point which cannot be represented in out_enc,
+   * the function subst is called with the code point as argument, and the
+   * resulting string (which must already be encoded as out_enc) is
+   * inserted instead. 
+   * Note: It is possible that subst is called several times for the same
+   * character.
+   * Return value: out_n is the actual number of bytes written into out_buf.
+   * in_n is the actual number of bytes that have been converted from
+   * in_buf; in_n may be smaller than in_len because of incomplete
+   * multi-byte characters, or because the output buffer has less space
+   * for characters than the input buffer, or because of a change
+   * of the encoding variant.
+   * If there is at least one complete character in in_buf, and at least
+   * space for one complete character in out_buf, and max_chars >= 1, it is 
+   * guaranteed that in_n > 0 or out_n > 0.
+   * in_enc' is normally identical to in_enc. However, there are cases
+   * in which the encoding can be refined when looking at the byte
+   * sequence; for example whether a little endian or big endian variant
+   * of the encoding is used. in_enc' is the variant of in_enc that was
+   * used for the last character that has been converted.
+   *
+   * NOTES:
+   *
+   * Supported range of code points: 0 to 0xd7ff, 0xe000 to 0xfffd,
+   * 0x10000 to 0x10ffff.
+   *
+   * Enc_utf8: Malformed UTF-8 byte sequences are always rejected. This
+   * is also true for the sequence 0xc0 0x80 which is used by some software
+   * (Java) as paraphrase for the code point 0.
+   *
+   * Enc_utf16: When reading from a string encoded as Enc_utf16, a byte
+   * order mark is expected at the beginning. The detected variant 
+   * (Enc_utf16_le or Enc_utf16_be) is returned. The byte order mark is
+   * not included into the output string. - It is not possible to
+   * write as Enc_utf16.
+   *
+   * Enc_utf16_le, Enc_utf16_be: When reading from such a string, the
+   * code point 0xfeff is returned as it is; it is a "zero-width 
+   * non-breaking space". The code point 0xfffe is rejected.
+   *
+   * Surrogate pairs: These are recognized (or written) only for a
+   * UTF-16 encoding; and rejected for any other encoding.
+   *
+   * Rejected byte sequences cause the exception Bad_character_stream.
+   *)
+
+val recode_string : in_enc:encoding -> 
+                    out_enc:encoding ->
+                   ?subst:(int -> string) ->
+                   string ->
+                    string 
+  (* Recodes a complete string from in_enc to out_enc, and returns it.
+   * The function subst is invoked for code points of in_enc that cannot
+   * be represented in out_enc, and the result of the function invocation
+   * is substituted.
+   * If subst is missing, Not_found is raised in this case.
+   *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/08/13 00:02:57  gerd
+ *     Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.mli):
+ *
+ * Revision 1.4  2000/07/04 22:05:58  gerd
+ *     Enhanced version of 'recode'. Labeled arguments.
+ * New function 'recode_string'.
+ *
+ * Revision 1.3  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1  2000/05/20 20:30:50  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netencoding.ml b/helm/DEVEL/pxp/netstring/netencoding.ml
new file mode 100644 (file)
index 0000000..e87c4c3
--- /dev/null
@@ -0,0 +1,903 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+module Str = Netstring_str;;
+
+module Base64 = struct
+  let b64_pattern plus slash =
+    [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+       'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
+       'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+       'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
+       '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;
+
+
+  let rfc_pattern = b64_pattern '+' '/';;
+  let url_pattern = b64_pattern '-' '/';;
+
+  let encode_with_options b64 equal s pos len linelen crlf =
+  (* encode using "base64".
+   * 'b64': The encoding table, created by b64_pattern.
+   * 'equal': The character that should be used instead of '=' in the original
+   *          encoding scheme. Pass '=' to get the original encoding scheme.
+   * s, pos, len, linelen: See the interface description of encode_substring.
+   *)
+    assert (Array.length b64 = 64);
+    if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
+      invalid_arg "Netencoding.Base64.encode_with_options";
+    if pos + len > String.length s then
+      invalid_arg "Netencoding.Base64.encode_with_options";
+
+    let linelen =
+      (linelen/4) * 4 in
+
+    let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
+    (* l_t: length of the result without additional line endings *)
+
+    let l_t' = 
+      if linelen < 4 then
+       l_t
+      else
+       if l_t = 0 then 0 else 
+         let n_lines = ((l_t - 1) / linelen) + 1 in
+         l_t + n_lines * (if crlf then 2 else 1)
+    in
+    (* l_t': length of the result with CRLF or LF characters *)
+    
+    let t = String.make l_t' equal in
+    let j = ref 0 in
+    let q = ref 0 in
+    for k = 0 to len / 3 - 1 do
+      let p = pos + 3*k in
+      (* p >= pos >= 0: this is evident
+       * p+2 < pos+len <= String.length s:
+       *   Because k <= len/3-1
+       *         3*k <= 3*(len/3-1) = len - 3
+       *   pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
+       * So it is proved that the following unsafe string accesses always
+       * work.
+       *)
+      let bits = (Char.code (String.unsafe_get s (p))   lsl 16) lor
+                (Char.code (String.unsafe_get s (p+1)) lsl  8) lor
+                (Char.code (String.unsafe_get s (p+2))) in
+      (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
+      assert(!j + 3 < l_t');
+      String.unsafe_set t !j     (Array.unsafe_get b64 ( bits lsr 18));
+      String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
+      String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr  6) land 63));
+      String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits         land 63));
+      j := !j + 4;
+      if linelen > 3 then begin
+       q := !q + 4;
+       if !q + 4 > linelen then begin
+         (* The next 4 characters won't fit on the current line. So insert
+          * a line ending.
+          *)
+         if crlf then begin
+           t.[ !j ] <- '\013';
+           t.[ !j+1 ] <- '\010';
+           j := !j + 2;
+         end
+         else begin 
+           t.[ !j ] <- '\010';
+           incr j
+         end;
+         q := 0;
+       end;
+      end;
+    done;
+    (* padding if needed: *)
+    let m = len mod 3 in
+    begin
+      match m with
+         0 -> ()
+       | 1 ->
+            let bits = Char.code (s.[pos + len - 1]) in
+           t.[ !j     ] <- b64.( bits lsr 2);
+           t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
+           j := !j + 4;
+           q := !q + 4;
+       | 2 ->
+           let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
+                       (Char.code (s.[pos + len - 1])) in
+           t.[ !j     ] <- b64.( bits lsr 10);
+           t.[ !j + 1 ] <- b64.((bits lsr  4) land 0x3f);
+           t.[ !j + 2 ] <- b64.((bits lsl  2) land 0x3f);
+           j := !j + 4;
+           q := !q + 4;
+       | _ -> assert false
+    end;
+
+    (* If required, add another line end: *)
+
+    if linelen > 3 & !q > 0 then begin
+      if crlf then begin
+       t.[ !j ] <- '\013';
+       t.[ !j+1 ] <- '\010';
+       j := !j + 2;
+      end
+      else begin 
+       t.[ !j ] <- '\010';
+       incr j
+      end;     
+    end;
+
+    t ;;
+
+
+
+  let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+    let l = match len with None -> String.length s - pos | Some x -> x in
+    encode_with_options rfc_pattern '=' s pos l linelength crlf;;
+
+
+  let encode_substring s ~pos ~len ~linelength ~crlf =
+    encode_with_options rfc_pattern '=' s pos len linelength crlf;;
+
+
+  let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+    let l = match len with None -> String.length s - pos | Some x -> x in
+    encode_with_options url_pattern '.' s pos l linelength crlf;;
+    
+
+  let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
+    if len < 0 or pos < 0 or pos > String.length t then
+      invalid_arg "Netencoding.Base64.decode_substring";
+    if pos + len > String.length t then
+      invalid_arg "Netencoding.Base64.decode_substring";
+
+    (* Compute the number of effective characters l_t in 't';
+     * pad_chars: number of '=' characters at the end of the string.
+     *)
+    let l_t, pad_chars =
+      if p_spaces then begin
+       (* Count all non-whitespace characters: *)
+       let c = ref 0 in
+       let p = ref 0 in
+       for i = pos to pos + len - 1 do
+         match String.unsafe_get t i with
+             (' '|'\t'|'\r'|'\n') -> ()
+           | ('='|'.') as ch ->
+               if ch = '.' & not p_url then
+                 invalid_arg "Netencoding.Base64.decode_substring";
+               incr c;
+               incr p;
+               if !p > 2 then
+                 invalid_arg "Netencoding.Base64.decode_substring";
+               for j = i+1 to pos + len - 1 do
+                 match String.unsafe_get t j with
+                     (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
+                   | _ ->
+                       (* Only another '=' or spaces allowed *)
+                       invalid_arg "Netencoding.Base64.decode_substring";
+               done
+           | _ -> incr c
+       done;
+       if !c mod 4 <> 0 then
+         invalid_arg "Netencoding.Base64.decode_substring";
+       !c, !p
+      end
+      else
+       len,
+       ( if len mod 4 <> 0 then
+           invalid_arg "Netencoding.Base64.decode_substring";
+         if len > 0 then (
+           if String.sub t (len - 2) 2 = "==" or 
+              (p_url & String.sub t (len - 2) 2 = "..") then 2
+           else 
+             if String.sub t (len - 1) 1 = "=" or 
+                (p_url & String.sub t (len - 1) 1 = ".") then 1
+             else
+               0
+         )
+         else 0 
+       )
+    in
+
+    let l_s = (l_t / 4) * 3 - pad_chars in       (* sic! *)
+    let s = String.create l_s in
+
+    let decode_char c =
+      match c with
+         'A' .. 'Z'  -> Char.code(c) - 65     (* 65 = Char.code 'A' *)
+       | 'a' .. 'z'  -> Char.code(c) - 71     (* 71 = Char.code 'a' - 26 *)
+       | '0' .. '9'  -> Char.code(c) + 4      (* -4 = Char.code '0' - 52 *)
+       | '+'         -> 62
+       | '-'         -> if not p_url then 
+                          invalid_arg "Netencoding.Base64.decode_substring";
+                        62
+       | '/'         -> 63
+       | _           -> invalid_arg "Netencoding.Base64.decode_substring";
+    in
+
+    (* Decode all but the last quartet: *)
+
+    let cursor = ref pos in
+    let rec next_char() = 
+      match t.[ !cursor ] with
+         (' '|'\t'|'\r'|'\n') -> 
+           if p_spaces then (incr cursor; next_char())
+           else invalid_arg "Netencoding.Base64.decode_substring"
+       | c ->
+           incr cursor; c
+    in
+
+    if p_spaces then begin
+      for k = 0 to l_t / 4 - 2 do
+       let q = 3*k in
+       let c0 = next_char() in
+       let c1 = next_char() in
+       let c2 = next_char() in
+       let c3 = next_char() in
+       let n0 = decode_char c0 in
+       let n1 = decode_char c1 in
+       let n2 = decode_char c2 in
+       let n3 = decode_char c3 in
+       let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+       let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+       let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+       String.unsafe_set s q     (Char.chr x0);
+       String.unsafe_set s (q+1) (Char.chr x1);
+       String.unsafe_set s (q+2) (Char.chr x2);
+      done;
+    end
+    else begin
+      (* Much faster: *)
+      for k = 0 to l_t / 4 - 2 do
+       let p = pos + 4*k in
+       let q = 3*k in
+       let c0 = String.unsafe_get t p in
+       let c1 = String.unsafe_get t (p + 1) in
+       let c2 = String.unsafe_get t (p + 2) in
+       let c3 = String.unsafe_get t (p + 3) in
+       let n0 = decode_char c0 in
+       let n1 = decode_char c1 in
+       let n2 = decode_char c2 in
+       let n3 = decode_char c3 in
+       let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+       let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+       let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+       String.unsafe_set s q     (Char.chr x0);
+       String.unsafe_set s (q+1) (Char.chr x1);
+       String.unsafe_set s (q+2) (Char.chr x2);
+      done;
+      cursor := pos + l_t - 4;
+    end;
+
+    (* Decode the last quartet: *)
+
+    if l_t > 0 then begin
+      let q = 3*(l_t / 4 - 1) in
+      let c0 = next_char() in
+      let c1 = next_char() in
+      let c2 = next_char() in
+      let c3 = next_char() in
+
+      if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
+       let n0 = decode_char c0 in
+       let n1 = decode_char c1 in
+       let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+       s.[ q ]   <- Char.chr x0;
+      end
+      else
+       if (c3 = '=') or (p_url & c3 = '.') then begin
+         let n0 = decode_char c0 in
+         let n1 = decode_char c1 in
+         let n2 = decode_char c2 in
+         let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+         let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+         s.[ q ]   <- Char.chr x0;
+         s.[ q+1 ] <- Char.chr x1;
+       end
+       else begin
+         let n0 = decode_char c0 in
+         let n1 = decode_char c1 in
+         let n2 = decode_char c2 in
+         let n3 = decode_char c3 in
+         let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+         let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+         let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+         s.[ q ]   <- Char.chr x0;
+         s.[ q+1 ] <- Char.chr x1;
+         s.[ q+2 ] <- Char.chr x2;
+       end
+
+    end;
+
+    s ;;
+
+
+
+  let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
+    let l = match len with None -> String.length s - pos | Some x -> x in
+    decode_substring s pos l url_variant accept_spaces;;
+
+  let decode_ignore_spaces s =
+    decode_substring s 0 (String.length s) true true;;
+
+  
+end
+
+
+
+module QuotedPrintable = struct
+
+  let encode_substring s ~pos ~len =
+    
+    if len < 0 or pos < 0 or pos > String.length s then
+      invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+    if pos + len > String.length s then
+      invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+
+    let rec count n i =
+      if i < len then
+       match String.unsafe_get s (pos+i) with
+           ('\r'|'\n') -> 
+             count (n+1) (i+1)
+         | ('\000'..'\031'|'\127'..'\255'|
+            '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
+             count (n+3) (i+1)
+         | ' ' ->
+             (* Protect spaces only if they occur at the end of a line *)
+             if i+1 < len then
+               match s.[pos+i+1] with
+                   ('\r'|'\n') -> 
+                     count (n+3) (i+1)
+                 | _ ->
+                     count (n+1) (i+1)
+             else
+               count (n+3) (i+1)
+         | _ ->
+             count (n+1) (i+1)
+      else
+       n
+    in
+
+    let l = count 0 0 in
+    let t = String.create l in
+    
+    let hexdigit =
+      [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+        '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+    let k = ref 0 in
+
+    let add_quoted c =
+      t.[ !k ]   <- '=';
+      t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+      t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+    in
+
+    for i = 0 to len - 1 do
+      match String.unsafe_get s i with
+         ('\r'|'\n') as c -> 
+           String.unsafe_set t !k c;
+           incr k
+       | ('\000'..'\031'|'\127'..'\255'|
+          '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
+           add_quoted c;
+           k := !k + 3
+       | ' ' ->
+           (* Protect spaces only if they occur at the end of a line *)
+           if i+1 < len then
+             match s.[pos+i+1] with
+                 ('\r'|'\n') -> 
+                   add_quoted ' ';
+                   k := !k + 3;
+               | _ ->
+                   String.unsafe_set t !k ' ';
+                   incr k
+           else begin
+             add_quoted ' ';
+             k := !k + 3;
+           end
+       | c ->
+           String.unsafe_set t !k c;
+           incr k
+    done;
+
+    t ;;
+
+
+  let encode ?(pos=0) ?len s =
+    let l = match len with None -> String.length s - pos | Some x -> x in 
+    encode_substring s pos l;;
+
+
+
+  let decode_substring s ~pos ~len =
+    
+    if len < 0 or pos < 0 or pos > String.length s then
+      invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+    if pos + len > String.length s then
+      invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+
+    let decode_hex c =
+      match c with
+         '0'..'9' -> Char.code c - 48
+       | 'A'..'F' -> Char.code c - 55
+       | 'a'..'f' -> Char.code c - 87
+       | _ ->
+          invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+    in 
+
+    let rec count n i =
+      if i < len then
+       match String.unsafe_get s (pos+i) with
+           '=' ->
+             if i+1 = len then
+               (* A '=' at EOF is ignored *)
+               count n (i+1)
+             else
+               if i+1 < len then
+                 match s.[pos+i+1] with
+                     '\r' ->
+                       (* Official soft break *)
+                       if i+2 < len & s.[pos+i+2] = '\n' then
+                         count n (i+3)
+                       else
+                         count n (i+2)
+                   | '\n' ->
+                       (* Inofficial soft break *)
+                       count n (i+2)
+                   | _ ->
+                       if i+2 >= len then
+                         invalid_arg 
+                           "Netencoding.QuotedPrintable.decode_substring";
+                       let _ = decode_hex s.[pos+i+1] in
+                       let _ = decode_hex s.[pos+i+2] in
+                       count (n+1) (i+3)
+               else
+                 invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+         | _ ->
+             count (n+1) (i+1)
+      else
+       n
+    in
+
+    let l = count 0 0 in
+    let t = String.create l in
+    let k = ref pos in
+    let e = pos + len in
+    let i = ref 0 in
+
+    while !i < l do
+      match String.unsafe_get s !k with
+         '=' ->
+           if !k+1 = e then
+             (* A '=' at EOF is ignored *)
+             ()
+           else
+             if !k+1 < e then
+               match s.[!k+1] with
+                   '\r' ->
+                     (* Official soft break *)
+                     if !k+2 < e & s.[!k+2] = '\n' then
+                       k := !k + 3
+                     else
+                       k := !k + 2
+                 | '\n' ->
+                     (* Inofficial soft break *)
+                     k := !k + 2
+                 | _ ->
+                     if !k+2 >= e then
+                       invalid_arg 
+                         "Netencoding.QuotedPrintable.decode_substring";
+                     let x1 = decode_hex s.[!k+1] in
+                     let x2 = decode_hex s.[!k+2] in
+                     t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+                     k := !k + 3;
+                     incr i
+             else
+               invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+       | c ->
+           String.unsafe_set t !i c;
+           incr k;
+           incr i
+    done;
+
+    t ;;
+
+
+  let decode ?(pos=0) ?len s =
+    let l = match len with None -> String.length s - pos | Some x -> x in 
+    decode_substring s pos l;;
+
+end
+
+             
+module Q = struct
+
+  let encode_substring s ~pos ~len =
+    
+    if len < 0 or pos < 0 or pos > String.length s then
+      invalid_arg "Netencoding.Q.encode_substring";
+    if pos + len > String.length s then
+      invalid_arg "Netencoding.Q.encode_substring";
+
+    let rec count n i =
+      if i < len then
+       match String.unsafe_get s (pos+i) with
+         | ('A'..'Z'|'a'..'z'|'0'..'9') ->
+             count (n+1) (i+1)
+         | _ ->
+             count (n+3) (i+1)
+      else
+       n
+    in
+
+    let l = count 0 0 in
+    let t = String.create l in
+    
+    let hexdigit =
+      [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+        '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+    let k = ref 0 in
+
+    let add_quoted c =
+      t.[ !k ]   <- '=';
+      t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+      t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+    in
+
+    for i = 0 to len - 1 do
+      match String.unsafe_get s i with
+       | ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
+           String.unsafe_set t !k c;
+           incr k
+       | c ->
+           add_quoted c;
+           k := !k + 3
+    done;
+
+    t ;;
+
+
+  let encode ?(pos=0) ?len s =
+    let l = match len with None -> String.length s - pos | Some x -> x in 
+    encode_substring s pos l;;
+
+
+
+  let decode_substring s ~pos ~len =
+    
+    if len < 0 or pos < 0 or pos > String.length s then
+      invalid_arg "Netencoding.Q.decode_substring";
+    if pos + len > String.length s then
+      invalid_arg "Netencoding.Q.decode_substring";
+
+    let decode_hex c =
+      match c with
+         '0'..'9' -> Char.code c - 48
+       | 'A'..'F' -> Char.code c - 55
+       | 'a'..'f' -> Char.code c - 87
+       | _ ->
+          invalid_arg "Netencoding.Q.decode_substring";
+    in 
+
+    let rec count n i =
+      if i < len then
+       match String.unsafe_get s (pos+i) with
+           '=' ->
+             if i+2 >= len then
+               invalid_arg "Netencoding.Q.decode_substring";
+             let _ = decode_hex s.[pos+i+1] in
+             let _ = decode_hex s.[pos+i+2] in
+             count (n+1) (i+3)
+         | _ ->  (* including '_' *)
+             count (n+1) (i+1)
+      else
+       n
+    in
+
+    let l = count 0 0 in
+    let t = String.create l in
+    let k = ref pos in
+    let e = pos + len in
+    let i = ref 0 in
+
+    while !i < l do
+      match String.unsafe_get s !k with
+         '=' ->
+           if !k+2 >= e then
+             invalid_arg "Netencoding.Q.decode_substring";
+           let x1 = decode_hex s.[!k+1] in
+           let x2 = decode_hex s.[!k+2] in
+           t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+           k := !k + 3;
+           incr i
+       | '_' ->
+           String.unsafe_set t !i ' ';
+           incr k;
+           incr i
+       | c ->
+           String.unsafe_set t !i c;
+           incr k;
+           incr i
+    done;
+
+    t ;;
+
+
+  let decode ?(pos=0) ?len s =
+    let l = match len with None -> String.length s - pos | Some x -> x in 
+    decode_substring s pos l ;;
+
+end
+
+
+module Url = struct
+  let hex_digits =
+    [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+       '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;
+
+  let to_hex2 k =
+    (* Converts k to a 2-digit hex string *)
+    let s = String.create 2 in
+    s.[0] <- hex_digits.( (k lsr 4) land 15 );
+    s.[1] <- hex_digits.( k land 15 );
+    s ;;
+
+
+  let of_hex1 c =
+    match c with
+       ('0'..'9') -> Char.code c - Char.code '0'
+      | ('A'..'F') -> Char.code c - Char.code 'A' + 10
+      | ('a'..'f') -> Char.code c - Char.code 'a' + 10
+      | _ ->
+       raise Not_found ;;
+
+
+
+  let url_encoding_re =
+    Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;
+
+  let url_decoding_re =
+    Str.regexp "\\+\\|%..\\|%.\\|%";;
+
+
+  let encode s =
+    Str.global_substitute
+      url_encoding_re
+      (fun r _ ->
+        match Str.matched_string r s with
+            " " -> "+"
+          | x ->
+              let k = Char.code(x.[0]) in
+              "%" ^ to_hex2 k
+      )
+      s ;;
+
+
+  let decode s =
+    let l = String.length s in
+    Str.global_substitute
+      url_decoding_re
+      (fun r _ ->
+        match Str.matched_string r s with
+          | "+" -> " "
+          | _ ->
+              let i = Str.match_beginning r in
+              (* Assertion: s.[i] = '%' *)
+              if i+2 >= l then failwith "Cgi.decode";
+              let c1 = s.[i+1] in
+              let c2 = s.[i+2] in
+              begin
+                try
+                  let k1 = of_hex1 c1 in
+                  let k2 = of_hex1 c2 in
+                  String.make 1 (Char.chr((k1 lsl 4) lor k2))
+                with
+                    Not_found ->
+                      failwith "Cgi.decode"
+              end
+      )
+      s ;;
+
+end
+
+
+module Html = struct
+
+  let eref_re = 
+    Str.regexp 
+      "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
+  let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
+  
+  let etable =
+    [ "lt", "<";
+      "gt", ">";
+      "amp", "&";
+      "quot", "\"";     
+         (* Note: &quot; is new in HTML-4.0, but it has been widely used
+         * much earlier.
+         *)
+      "nbsp", "\160";
+      "iexcl", "\161";
+      "cent", "\162";
+      "pound", "\163";
+      "curren", "\164";
+      "yen", "\165";
+      "brvbar", "\166";
+      "sect", "\167";
+      "uml", "\168";
+      "copy", "\169";
+      "ordf", "\170";
+      "laquo", "\171";
+      "not", "\172";
+      "shy", "\173";
+      "reg", "\174";
+      "macr", "\175";
+      "deg", "\176";
+      "plusmn", "\177";
+      "sup2", "\178";
+      "sup3", "\179";
+      "acute", "\180";
+      "micro", "\181";
+      "para", "\182";
+      "middot", "\183";
+      "cedil", "\184";
+      "sup1", "\185";
+      "ordm", "\186";
+      "raquo", "\187";
+      "frac14", "\188";
+      "frac12", "\189";
+      "frac34", "\190";
+      "iquest", "\191";
+      "Agrave", "\192";
+      "Aacute", "\193";
+      "Acirc", "\194";
+      "Atilde", "\195";
+      "Auml", "\196";
+      "Aring", "\197";
+      "AElig", "\198";
+      "Ccedil", "\199";
+      "Egrave", "\200";
+      "Eacute", "\201";
+      "Ecirc", "\202";
+      "Euml", "\203";
+      "Igrave", "\204";
+      "Iacute", "\205";
+      "Icirc", "\206";
+      "Iuml", "\207";
+      "ETH", "\208";
+      "Ntilde", "\209";
+      "Ograve", "\210";
+      "Oacute", "\211";
+      "Ocirc", "\212";
+      "Otilde", "\213";
+      "Ouml", "\214";
+      "times", "\215";
+      "Oslash", "\216";
+      "Ugrave", "\217";
+      "Uacute", "\218";
+      "Ucirc", "\219";
+      "Uuml", "\220";
+      "Yacute", "\221";
+      "THORN", "\222";
+      "szlig", "\223";
+      "agrave", "\224";
+      "aacute", "\225";
+      "acirc", "\226";
+      "atilde", "\227";
+      "auml", "\228";
+      "aring", "\229";
+      "aelig", "\230";
+      "ccedil", "\231";
+      "egrave", "\232";
+      "eacute", "\233";
+      "ecirc", "\234";
+      "euml", "\235";
+      "igrave", "\236";
+      "iacute", "\237";
+      "icirc", "\238";
+      "iuml", "\239";
+      "eth", "\240";
+      "ntilde", "\241";
+      "ograve", "\242";
+      "oacute", "\243";
+      "ocirc", "\244";
+      "otilde", "\245";
+      "ouml", "\246";
+      "divide", "\247";
+      "oslash", "\248";
+      "ugrave", "\249";
+      "uacute", "\250";
+      "ucirc", "\251";
+      "uuml", "\252";
+      "yacute", "\253";
+      "thorn", "\254";
+      "yuml", "\255";
+    ] ;;
+
+  let quick_etable =
+    let ht = Hashtbl.create 50 in
+    List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
+    (* Entities to be decoded, but that must not be encoded: *)
+    Hashtbl.add ht "apos" "'";        (* used in XML documents *)
+    ht ;;
+
+  let rev_etable =
+    let a = Array.create 256 "" in
+    List.iter (fun (name,value) -> 
+                a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
+    for i = 0 to 8 do
+      a.(i) <- "&#" ^ string_of_int i ^ ";"
+    done;
+    for i = 11 to 12 do
+      a.(i) <- "&#" ^ string_of_int i ^ ";"
+    done;
+    for i = 14 to 31 do
+      a.(i) <- "&#" ^ string_of_int i ^ ";"
+    done;
+    for i = 127 to 159 do
+      a.(i) <- "&#" ^ string_of_int i ^ ";"
+    done;
+    a ;;
+
+  let decode_to_latin1 s =
+    Str.global_substitute
+      eref_re
+      (fun r _ ->
+        let t = Str.matched_string r s in
+        try
+          let n = int_of_string(Str.matched_group r 2 s) in
+          if n < 256 then
+            String.make 1 (Char.chr n)
+          else
+            t
+        with
+            Not_found ->
+              try
+                let name = Str.matched_group r 3 s in
+                try
+                  Hashtbl.find quick_etable name
+                with
+                    Not_found ->
+                      t
+              with
+                  Not_found -> assert false
+      )
+      s ;;
+
+  let encode_from_latin1 s =
+    Str.global_substitute
+      unsafe_re
+      (fun r _ ->
+        let t = Str.matched_string r s in
+        let i = Char.code (t.[0]) in
+        rev_etable.(i)
+      )
+      s ;;
+end
+        
+            
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.5  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.4  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.3  2000/03/03 17:03:16  gerd
+ *     Q encoding: CR and LF are quoted.
+ *
+ * Revision 1.2  2000/03/03 01:08:29  gerd
+ *     Added Netencoding.Html functions.
+ *
+ * Revision 1.1  2000/03/02 01:14:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netencoding.mli b/helm/DEVEL/pxp/netstring/netencoding.mli
new file mode 100644 (file)
index 0000000..6466572
--- /dev/null
@@ -0,0 +1,271 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Several encodings important for the net                            *)
+(**********************************************************************)
+
+
+(**********************************************************************)
+(* Base 64 encoding                                                   *)
+(**********************************************************************)
+
+(* See RFC 2045 for a description of Base 64 encoding. *)
+
+(* THREAD-SAFETY: 
+ * All Base64 functions are reentrant and thus thread-safe.
+ *)
+
+module Base64 : sig
+
+  val encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
+               string -> string
+      (* Compute the "base 64" encoding of the given string argument.
+       * Note that the result is a string that only contains the characters
+       * a-z, A-Z, 0-9, +, /, =, and optionally spaces, CR and LF characters.
+       *
+       * If pos and/or len are passed, only the substring starting at
+       * pos (default: 0) with length len (default: rest of the string)
+       * is encoded.
+       *
+       * The result is divided up into lines not longer than 'linelength' 
+       * (without counting the line separator); default: do not divide lines.
+       * If 'linelength' is smaller than 4, no line division is performed.
+       * If 'linelength' is not divisible by 4, the produced lines are a 
+       * bit shorter than 'linelength'.
+       *
+       * If 'crlf' (default: false) the lines are ended by CRLF; otherwise 
+       * they are only ended by LF.
+       * (You need the crlf option to produce correct MIME messages.)
+       * 
+       *)
+
+  val url_encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
+                   string -> string
+      (* Same as 'encode' but use slightly different characters that can be
+       * part of URLs without additional encodings.
+       * The encoded string consists only of the characters a-z, A-Z, 0-9, 
+       * -, /, .
+       * 'url_encode' does NOT implement the Base 64 encoding as described
+       * in the standard!
+       *)
+
+  val encode_substring : string -> pos:int -> len:int -> linelength:int -> 
+                         crlf:bool -> string
+      (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
+       *
+       * encode_substring s pos len linelen crlf:
+       * Encodes the substring at position 'pos' in 's' with length 'len'.
+       * The result is divided up into lines not longer than 'linelen' (without
+       * counting the line separator).
+       * If 'linelen' is smaller than 4, no line division is performed.
+       * If 'linelen' is not divisible by 4, the produced lines are a 
+       * bit shorter than 'linelen'.
+       * If 'crlf' the lines are ended by CRLF; otherwise they are only
+       * ended by LF.
+       * (You need the crlf option to produce correct MIME messages.)
+       *)
+
+  val decode : ?pos:int -> ?len:int -> ?url_variant:bool -> 
+               ?accept_spaces:bool -> string -> string
+      (* Decodes the given string argument. 
+       *
+       * If pos and/or len are passed, only the substring starting at
+       * pos (default: 0) with length len (default: rest of the string)
+       * is decoded.
+       * 
+       * If url_variant (default: true) is set, the functions also
+       * accepts the characters '-' and '.' as produced by 'url_encode'.
+       *
+       * If accept_spaces (default: false) is set, the function ignores
+       * white space contained in the string to decode (otherwise the
+       * function fails if it finds white space).
+       *)
+
+  val decode_ignore_spaces : string -> string
+      (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+       *
+       * Decodes the string, too, but it is allowed that the string contains
+       * whitespace characters.
+       * This function is slower than 'decode'.
+       *)
+
+  val decode_substring : string -> pos:int -> len:int -> url_variant:bool -> 
+                         accept_spaces:bool -> string
+      (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+       *
+       * decode_substring s pos len url spaces:
+       * Decodes the substring of 's' beginning at 'pos' with length 'len'.
+       * If 'url', strings created by 'url_encode' are accepted, too.
+       * If 'spaces', whitespace characters are allowed in the string.
+       *)
+end
+
+(**********************************************************************)
+(* Quoted printable encoding                                          *)
+(**********************************************************************)
+
+(* See RFC 2045.
+ * This implementation assumes that the encoded string has a text MIME
+ * type. Because of this, the characters CR and LF are never protected 
+ * by hex tokens; they are copied literally to the output string.
+ *)
+
+(* THREAD-SAFETY: 
+ * All QuotedPrintable functions are reentrant and thus thread-safe.
+ *)
+
+module QuotedPrintable :
+  sig
+    val encode : ?pos:int -> ?len:int -> string -> string
+       (* Encodes the string and returns it.
+        * Note line breaks: 
+        *   No additional soft line breaks are added. The characters CR
+        *   and LF are not represented as =0D resp. =0A. (But other control
+        *   characters ARE encoded.)
+        * Note unsafe characters:
+        *   As recommended by RFC 2045, the characters !\"#$@[]^`{|}~
+        *   are additionally represented as hex tokens.        -- "
+        *
+        * If pos and/or len are passed, only the substring starting at
+        * pos (default: 0) with length len (default: rest of the string)
+        * is encoded.
+        *)
+
+    val encode_substring : string -> pos:int -> len:int -> string
+       (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
+        * encode_substring s pos len:
+        * Encodes the substring of 's' beginning at 'pos' with length 'len'.
+        *)
+
+    val decode : ?pos:int -> ?len:int -> string -> string
+       (* Decodes the string and returns it.
+        * Most format errors cause an Invalid_argument exception.
+        * Note that soft line breaks can be properly decoded although 
+        * 'encode' will never produce them.
+        *
+        * If pos and/or len are passed, only the substring starting at
+        * pos (default: 0) with length len (default: rest of the string)
+        * is decoded.
+        *)
+
+    val decode_substring : string -> pos:int -> len:int -> string
+        (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+        * decode_substring s pos len:
+        * Decodes the substring of 's' beginning at 'pos' with length 'len'.
+        *)
+
+  end
+
+(**********************************************************************)
+(* Q encoding                                                         *)
+(**********************************************************************)
+
+(* See RFC 2047. 
+ * The functions behave similar to those of QuotedPrintable. 
+ *)
+
+(* THREAD-SAFETY: 
+ * All Q functions are reentrant and thus thread-safe.
+ *)
+
+module Q :
+  sig
+    val encode : ?pos:int -> ?len:int -> string -> string
+       (* Note:
+        * All characters except alphanumeric characters are protected by
+        * hex tokens.
+        * In particular, spaces are represented as "=20", not as "_".
+        *)
+
+    val decode : ?pos:int -> ?len:int -> string -> string
+
+    val encode_substring : string -> pos:int -> len:int -> string
+        (* *** DEPRECATED FUNCTION *** Use 'encode' instead! *** *)
+
+    val decode_substring : string -> pos:int -> len:int -> string
+        (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** *)
+  end
+
+(**********************************************************************)
+(* B encoding                                                         *)
+(**********************************************************************)
+
+(* The B encoding of RFC 2047 is the same as Base64. *)
+
+
+(**********************************************************************)
+(* URL-encoding                                                       *)
+(**********************************************************************)
+
+(* Encoding/Decoding within URLs:
+ *
+ * The following two functions perform the '%'-substitution for
+ * characters that may otherwise be interpreted as metacharacters.
+ *
+ * According to: RFC 1738, RFC 1630
+ *)
+
+(* THREAD-SAFETY:
+ * The Url functions are thread-safe.
+ *)
+
+module Url : 
+  sig
+    val decode : string -> string
+    val encode : string -> string
+  end
+
+
+(**********************************************************************)
+(* HTMLization                                                        *)
+(**********************************************************************)
+
+(* Encodes characters that need protection by converting them to
+ * entity references. E.g. "<" is converted to "&lt;".
+ * As the entities may be named, there is a dependency on the character
+ * set. Currently, there are only functions for the Latin 1 alphabet.
+ *)
+
+(* THREAD-SAFETY:
+ * The Html functions are thread-safe.
+ *)
+
+module Html :
+  sig
+    val encode_from_latin1 : string -> string
+       (* Encodes the characters 0-8, 11-12, 14-31, '<', '>', '"', '&',
+        * 127-255. If the characters have a name, a named entity is
+        * preferred over a numeric entity.
+        *)
+    val decode_to_latin1   : string -> string
+       (* Decodes the string. Unknown named entities are left as they
+        * are (i.e. decode_to_latin1 "&nonsense;" = "&nonsense;").
+        * The same applies to numeric entities greater than 255.
+        *)
+  end
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.3  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.2  2000/03/03 01:08:29  gerd
+ *     Added Netencoding.Html functions.
+ *
+ * Revision 1.1  2000/03/02 01:14:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/nethtml.ml b/helm/DEVEL/pxp/netstring/nethtml.ml
new file mode 100644 (file)
index 0000000..7f9d983
--- /dev/null
@@ -0,0 +1,276 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Nethtml_scanner;;
+
+type document =
+    Element of (string  *  (string*string) list  *  document list)
+  | Data of string
+;;
+
+
+exception End_of_scan;;
+
+
+let no_end_tag =  (* empty HTML elements *)
+  ref
+    [ "isindex";
+      "base";
+      "meta";
+      "link";
+      "hr";
+      "input";
+      "img";
+      "param";
+      "basefont";
+      "br";
+      "area";
+    ]
+;;
+
+
+let special_tag =   (* other lexical rules *)
+  ref
+    [ "script";
+      "style";
+    ]
+;;
+
+
+let rec parse_comment buf =
+  let t = scan_comment buf in
+  match t with
+      Mcomment ->
+       parse_comment buf
+    | Eof ->
+       raise End_of_scan
+    | _ ->
+       ()
+;;
+
+
+let rec parse_doctype buf =
+  let t = scan_doctype buf in
+  match t with
+      Mdoctype ->
+       parse_doctype buf
+    | Eof ->
+       raise End_of_scan
+    | _ ->
+       ()
+;;
+
+
+let parse_document buf =
+  let current_name = ref "" in
+  let current_atts = ref [] in
+  let current_subs = ref [] in
+  let stack = Stack.create() in
+
+  let parse_atts() =
+    let rec next_no_space() =
+      match scan_element buf with
+         Space _ -> next_no_space()
+       | t -> t
+    in
+
+    let rec parse_atts_lookahead next =
+      match next with
+         Relement -> []
+       | Name n ->
+           begin match next_no_space() with
+               Is ->
+                 begin match next_no_space() with
+                     Name v ->
+                       (String.lowercase n, String.uppercase v) ::
+                       parse_atts_lookahead (next_no_space())
+                   | Literal v ->
+                       (String.lowercase n,v) ::
+                       parse_atts_lookahead (next_no_space())
+                   | Eof ->
+                       raise End_of_scan
+                   | Relement ->
+                       (* Illegal *)
+                       []
+                   | _ ->
+                       (* Illegal *)
+                       parse_atts_lookahead (next_no_space())
+                 end
+             | Eof ->
+                 raise End_of_scan
+             | Relement ->
+                 (* <tag name> <==> <tag name="name"> *)
+                 [ String.lowercase n, String.lowercase n ]
+             | next' ->
+                 (* assume <tag name ... > <==> <tag name="name" ...> *)
+                 ( String.lowercase n, String.lowercase n ) ::
+                 parse_atts_lookahead next'
+           end
+       | Eof ->
+           raise End_of_scan
+       | _ ->
+           (* Illegal *)
+           parse_atts_lookahead (next_no_space())
+    in
+    parse_atts_lookahead (next_no_space())
+  in
+
+  let rec parse_special name =
+    (* Parse until </name> *)
+    match scan_special buf with
+       Lelementend n ->
+         if n = name then
+           ""
+         else
+           "</" ^ n ^ parse_special name
+      | Eof ->
+         raise End_of_scan
+      | Cdata s ->
+         s ^ parse_special name
+      | _ ->
+         (* Illegal *)
+         parse_special name
+  in
+
+  let rec skip_element() =
+    (* Skip until ">" *)
+    match scan_element buf with
+       Relement ->
+         ()
+      | Eof ->
+         raise End_of_scan
+      | _ ->
+         skip_element()
+  in
+
+  let rec parse_next() =
+    let t = scan_document buf in
+    match t with
+       Lcomment ->
+         parse_comment buf;
+         parse_next()
+      | Ldoctype ->
+         parse_doctype buf;
+         parse_next()
+      | Lelement name ->
+         let name = String.lowercase name in
+         if List.mem name !no_end_tag then begin
+           let atts = parse_atts() in
+           current_subs := (Element(name, atts, [])) :: !current_subs;
+           parse_next()
+         end
+         else if List.mem name !special_tag then begin
+           let atts = parse_atts() in
+           let data = parse_special name in
+           (* Read until ">" *)
+           skip_element();
+           current_subs := (Element(name, atts, [Data data])) :: !current_subs;
+           parse_next()
+         end
+         else begin
+           let atts = parse_atts() in
+           Stack.push (!current_name, !current_atts, !current_subs) stack;
+           current_name := name;
+           current_atts := atts;
+           current_subs := [];
+           parse_next()
+         end
+      | Cdata data ->
+         current_subs := (Data data) :: !current_subs;
+         parse_next()
+      | Lelementend name ->
+         let name = String.lowercase name in
+         (* Read until ">" *)
+         skip_element();
+         (* Search the element to close on the stack: *)
+         let found = ref (name = !current_name) in
+         Stack.iter
+           (fun (old_name, _, _) ->
+              if name = old_name then found := true)
+           stack;
+         (* If not found, the end tag is wrong. Simply ignore it. *)
+         if not !found then
+           parse_next()
+         else begin
+           (* Put the current element on to the stack: *)
+           Stack.push (!current_name, !current_atts, !current_subs) stack;
+           (* If found: Remove the elements from the stack, and append
+            * them to the previous element as sub elements
+            *)
+           let rec remove() =
+             let old_name, old_atts, old_subs = Stack.pop stack in
+               (* or raise Stack.Empty *)
+             if old_name = name then
+               old_name, old_atts, old_subs
+             else
+               let older_name, older_atts, older_subs = remove() in
+               older_name,
+               older_atts,
+               (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
+           in
+           let old_name, old_atts, old_subs = remove() in
+           (* Remove one more element: the element containing the element
+            * currently being closed.
+            *)
+           let new_name, new_atts, new_subs = Stack.pop stack in
+           current_name := new_name;
+           current_atts := new_atts;
+           current_subs := (Element (old_name, old_atts, List.rev old_subs))
+                            :: new_subs;
+           (* Go on *)
+           parse_next()
+         end
+      | Eof ->
+         raise End_of_scan
+      | _ ->
+         parse_next()
+  in
+  try
+    parse_next();
+    List.rev !current_subs
+  with
+      End_of_scan ->
+       (* Close all remaining elements: *)
+       Stack.push (!current_name, !current_atts, !current_subs) stack;
+       let rec remove() =
+         let old_name, old_atts, old_subs = Stack.pop stack in
+               (* or raise Stack.Empty *)
+         try
+           let older_name, older_atts, older_subs = remove() in
+           older_name,
+           older_atts,
+           (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
+         with
+             Stack.Empty ->
+               old_name, old_atts, old_subs
+       in
+       let name, atts, subs = remove() in
+       List.rev subs
+;;
+
+
+let parse_string s =
+  let buf = Lexing.from_string s in
+  parse_document buf
+;;
+
+
+let parse_file fd =
+  let buf = Lexing.from_channel fd in
+  parse_document buf
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/03/03 01:07:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/nethtml.mli b/helm/DEVEL/pxp/netstring/nethtml.mli
new file mode 100644 (file)
index 0000000..d7af381
--- /dev/null
@@ -0,0 +1,72 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* The type 'document' represents parsed HTML documents. 
+ * Element (name, args, subnodes): is an element node for an element of
+ *   type 'name' (i.e. written <name ...>...</name>) with arguments 'args'
+ *   and subnodes 'subnodes' (the material within the element). The arguments
+ *   are simply name/value pairs. Entity references (something like %xy;)
+ *   occuring in the values are NOT resolved.
+ *   Arguments without values (e.g. <select name="x" multiple>: here,
+ *   "multiple" is such an argument) are represented as (name,name), i.e. the
+ *   name is returned as value.
+ *   As argument names are case-insensitive, the names are all lowercase.
+ * Data s: is a character data node. Again, entity references are contained
+ *   as such and not as what they mean.
+ *)
+
+type document =
+    Element of (string  *  (string*string) list  *  document list)
+  | Data of string
+;;
+
+
+val no_end_tag : string list ref;;
+  (* List of tags which are always empty. This variable is pre-configured,
+   * but you may want to change it.
+   * It is important to know which elements are always empty, because HTML
+   * allows it to omit the end tag for them. For example, 
+   * <a><b>x</a> is parsed as 
+   *   Element("a",[],[ Element("b",[],[]); Data "x" ])
+   * if we know that "a" is an empty element, but it is wrongly parsed as
+   *   Element("a",[],[ Element("b",[], [ Data "x"]) ])
+   * if "a" is actually empty but we do not know it.
+   * An example of such a tag is "br".
+   *)
+
+val special_tag : string list ref;;
+  (* List of tags with a special rule for recognizing the end.
+   * This variable is pre-configured, but you may want to change it.
+   * The special rule is that the metacharacters '<', '>' and so on lose
+   * their meaning within the element, and that only the corresponding 
+   * end tag stops this kind of scanning. An example is the element
+   * "javascript". Inner elements are not recognized, and the element
+   * can only be ended by </javascript>. (Other elements are also ended
+   * if an embracing element ends, e.g. "j" in <k><j></k>!)
+   *
+   * Note that comments are not recognized within special elements;
+   * comments are returned as character material.
+   *)
+
+val parse_string : string -> document list
+  (* Parses the HTML document from a string and returns it *)
+
+val parse_file : in_channel -> document list
+  (* Parses the HTML document from a file and returns it *)
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/03/03 01:07:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/nethtml_scanner.mll b/helm/DEVEL/pxp/netstring/nethtml_scanner.mll
new file mode 100644 (file)
index 0000000..03e6dea
--- /dev/null
@@ -0,0 +1,128 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+{
+  type token =
+      Lcomment
+    | Rcomment
+    | Mcomment
+    | Ldoctype
+    | Rdoctype
+    | Mdoctype
+    | Lelement of string
+    | Lelementend of string
+    | Relement
+    | Cdata of string 
+    | Space of int
+    | Name of string
+    | Is
+    | Literal of string
+    | Other
+    | Eof
+}
+
+(* Simplified rules: Only Latin-1 is recognized as character set *)
+
+let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
+let extender = '\183'
+let digit = ['0'-'9']
+let hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
+let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
+let name = ( letter | '_' | ':' ) namechar*
+let nmtoken = namechar+
+let ws = [ ' ' '\t' '\r' '\n' ]
+let string_literal1 = '"' [^ '"' '>' '<' '\n']* '"'
+let string_literal2 = "'" [^ '\'' '>' '<' '\n']* "'"
+
+
+(* This following rules reflect HTML as it is used, not the SGML
+ * rules.
+ *)
+
+rule scan_document = parse
+  | "<!--"
+      { Lcomment }
+  | "<!"
+      { Ldoctype }
+  | "<" name
+      { let s = Lexing.lexeme lexbuf in
+       Lelement (String.sub s 1 (String.length s - 1))
+      }
+  | "</" name
+      { let s = Lexing.lexeme lexbuf in
+       Lelementend (String.sub s 2 (String.length s - 2))
+      }
+  | "<"                (* misplaced "<" *)
+      { Cdata "<" }
+  | eof
+      { Eof }
+  | [^ '<' ]+
+      { Cdata (Lexing.lexeme lexbuf)}
+
+and scan_special = parse
+  | "</" name 
+      { let s = Lexing.lexeme lexbuf in
+       Lelementend (String.sub s 2 (String.length s - 2))
+      }
+  | "<"
+      { Cdata "<" }
+  | eof
+      { Eof }
+  | [^ '<' ]+
+      { Cdata (Lexing.lexeme lexbuf)}
+
+
+and scan_comment = parse
+  | "-->"
+      { Rcomment }
+  | "-"
+      { Mcomment }
+  | eof
+      { Eof }
+  | [^ '-']+
+      { Mcomment }
+
+and scan_doctype = parse
+  | ">"                   (* Occurence in strings, and [ ] brackets ignored *)
+      { Rdoctype }
+  | eof
+      { Eof }
+  | [^ '>' ] +
+      { Mdoctype }
+
+and scan_element = parse
+  | ">"
+      { Relement }
+  | ws+
+      { Space (String.length (Lexing.lexeme lexbuf)) }
+  | name
+      { Name (Lexing.lexeme lexbuf) }
+  | "="
+      { Is }
+  | string_literal1
+      { let s = Lexing.lexeme lexbuf in
+       Literal (String.sub s 1 (String.length s - 2)) 
+      }
+  | string_literal2
+      { let s = Lexing.lexeme lexbuf in
+       Literal (String.sub s 1 (String.length s - 2)) 
+      }
+  | eof
+      { Eof }
+  | _
+      { Other }
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/03/03 01:07:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netmappings.ml b/helm/DEVEL/pxp/netstring/netmappings.ml
new file mode 100644 (file)
index 0000000..4821350
--- /dev/null
@@ -0,0 +1,38 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type from_uni_list =
+    U_nil
+  | U_single of (int*int)
+  | U_list of (int*int) list
+;;
+
+let to_unicode = Hashtbl.create 50;;
+
+let from_unicode = Hashtbl.create 50;;
+
+let f_lock = ref (fun () -> ());;
+let f_unlock = ref (fun () -> ());;
+
+let lock () = !f_lock();;
+let unlock () = !f_unlock();;
+
+let init_mt new_f_lock new_f_unlock =
+  f_lock := new_f_lock;
+  f_unlock := new_f_unlock
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/08/28 23:17:54  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netmappings.mli b/helm/DEVEL/pxp/netstring/netmappings.mli
new file mode 100644 (file)
index 0000000..1c52d07
--- /dev/null
@@ -0,0 +1,115 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+type from_uni_list =
+    U_nil
+  | U_single of (int*int)
+  | U_list of (int*int) list
+;;
+  (* A representation of (int*int) list that is optimized for the case that
+   * lists with 0 and 1 elements are the most frequent cases.
+   *)
+
+
+val to_unicode   : (Netconversion.encoding, 
+                   int array Lazy.t)           Hashtbl.t;;
+
+val from_unicode : (Netconversion.encoding, 
+                   from_uni_list array Lazy.t) Hashtbl.t;;
+  (* These hashtables are used internally by the parser to store
+   * the conversion tables from 8 bit encodings to Unicode and vice versa.
+   * It is normally not necessary to access these tables; the 
+   * Netconversion module does it already for you.
+   *
+   * Specification of the conversion tables:
+   *
+   * to_unicode: maps an 8 bit code to Unicode, i.e.
+   *    let m = Hashtbl.find `Enc_isoXXX to_unicode in
+   *    let unicode = m.(isocode) 
+   *    - This may be (-1) to indicate that the code point is not defined.
+   *
+   * from_unicode: maps Unicode to an 8 bit code, i.e.
+   *    let m = Hashtbl.find `Enc_isoXXX from_unicode in
+   *    let l = m.(unicode land 255)
+   *    Now search in l the pair (unicode, isocode), and return isocode.
+   *
+   * Note: It is guaranteed that both arrays have always 256 elements.
+   *)
+
+val lock : unit -> unit
+  (* In multi-threaded applications: obtains a lock which is required to
+   * Lazy.force the values found in to_unicode and from_unicode.
+   * In single-threaded applications: a NO-OP
+   *)
+
+val unlock : unit -> unit
+  (* In multi-threaded applications: releases the lock which is required to
+   * Lazy.force the values found in to_unicode and from_unicode.
+   * In single-threaded applications: a NO-OP
+   *)
+
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+  (* Internally used; see netstring_mt.ml *)
+
+
+(* ---------------------------------------- *)
+
+(* The following comment was written when the conversion module belonged
+ * to the PXP package (Polymorhic XML Parser).
+ *)
+
+(* HOW TO ADD A NEW 8 BIT CODE:
+ *
+ * It is relatively simple to add a new 8 bit code to the system. This
+ * means that the parser can read and write files with the new encoding;
+ * this does not mean that the parser can represent the XML tree internally
+ * by the new encoding.
+ *
+ * - Put a new unimap file into the "mappings" directory. The file format
+ *   is simple; please look at the already existing files. 
+ *   The name of the file determines the internal name of the code:
+ *   If the file is called <name>.unimap, the code will be called
+ *   `Enc_<name>.
+ *
+ * - Extend the type "encoding" in pxp_types.mli and pxp_types.ml
+ *
+ * - Extend the two functions encoding_of_string and string_of_encoding
+ *   in pxp_types.ml
+ *
+ * - Recompile the parser
+ *
+ * Every encoding consumes at least 3kB of memory, but this may be much more 
+ * if the code points are dispersed on the Unicode code space.
+ *
+ * Perhaps the addition of new codes will become even simpler in future
+ * versions of PXP; but it is currently more important to support 
+ * non-8-bit codes, too.
+ * Every contribution of new codes to PXP is welcome!
+ *)
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/29 00:47:24  gerd
+ *     New type for the conversion Unicode to 8bit.
+ *     Conversion tables are now lazy. Thus also mutexes are required.
+ *
+ * Revision 1.1  2000/08/13 00:02:57  gerd
+ *     Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_mappings.mli):
+ * 
+ * Revision 1.1  2000/07/27 00:40:02  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netmappings_iso.ml b/helm/DEVEL/pxp/netstring/netmappings_iso.ml
new file mode 100644 (file)
index 0000000..9b86aae
--- /dev/null
@@ -0,0 +1,54 @@
+(* WARNING! This is a generated file! *)
+let iso88591_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
+let iso88591_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let iso885910_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\001\018\001\001\"\001\001*\001\001(\001\0016\001\000\167\001\001;\001\001\016\001\001`\001\001f\001\001}\001\000\173\001\001j\001\001J\001\000\176\001\001\005\001\001\019\001\001#\001\001+\001\001)\001\0017\001\000\183\001\001<\001\001\017\001\001a\001\001g\001\001~\001 \021\001\001k\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\000\207\001\000\208\001\001E\001\001L\001\000\211\001\000\212\001\000\213\001\000\214\001\001h\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\000\239\001\000\240\001\001F\001\001M\001\000\243\001\000\244\001\000\245\001\000\246\001\001i\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\0018" 0 : int array);;
+let iso885910_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\015\000\000\000\000\000\000\006\185\000\000\006\185\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\192@\145\160\160AA\160\160\001\001\001\001\000\224@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\169@\145\160\160QQ\160\160\001\001\017\001\000\185@\145\160\160RR\160\160\001\001\018\001\000\162@\145\160\160SS\160\160\001\001\019\001\000\178@\144\160TT\145\160\160UU\160\160\001 \021\001\000\189@\145\160\160VV\160\160\001\001\022\001\000\204@\145\160\160WW\160\160\001\001\023\001\000\236@\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\163@\145\160\160cc\160\160\001\001#\001\000\179@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\164@\145\160\160kk\160\160\001\001+\001\000\180@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\166@\145\160\160ww\160\160\001\0017\001\000\182@\145\160\160xx\160\160\001\0018\001\000\255@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\168@\145\160\160||\160\160\001\001<\001\000\184@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\145\160\160\000E\000E\160\160\001\001E\001\000\209@\145\160\160\000F\000F\160\160\001\001F\001\000\241@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\145\160\160\000J\000J\160\160\001\001J\001\000\175@\145\160\160\000K\000K\160\160\001\001K\001\000\191@\145\160\160\000L\000L\160\160\001\001L\001\000\210@\145\160\160\000M\000M\160\160\001\001M\001\000\242@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\170@\145\160\160\000a\000a\160\160\001\001a\001\000\186@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\171@\145\160\160\000g\000g\160\160\001\001g\001\000\187@\145\160\160\000h\000h\160\160\001\001h\001\000\215@\145\160\160\000i\000i\160\160\001\001i\001\000\247@\145\160\160\000j\000j\160\160\001\001j\001\000\174@\145\160\160\000k\000k\160\160\001\001k\001\000\190@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\172@\145\160\160\000~\000~\160\160\001\001~\001\000\188@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@@@@\144\160\001\000\167\001\000\167@@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@@@@@@\144\160\001\000\183\001\000\183@@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214@\144\160\001\000\216\001\000\216@\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230@@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246@\144\160\001\000\248\001\000\248@\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254@" 0 : Netmappings.from_uni_list array);;
+ let iso885913_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001 \029\001\000\162\001\000\163\001\000\164\001 \030\001\000\166\001\000\167\001\000\216\001\000\169\001\001V\001\000\171\001\000\172\001\000\173\001\000\174\001\000\198\001\000\176\001\000\177\001\000\178\001\000\179\001 \028\001\000\181\001\000\182\001\000\183\001\000\248\001\000\185\001\001W\001\000\187\001\000\188\001\000\189\001\000\190\001\000\230\001\001\004\001\001.\001\001\000\001\001\006\001\000\196\001\000\197\001\001\024\001\001\018\001\001\012\001\000\201\001\001y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001 \025" 0 : int array);;
+let iso885913_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\031\000\000\000\000\000\000\006\206\000\000\006\206\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\194@\145\160\160AA\160\160\001\001\001\001\000\226@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\192@\145\160\160EE\160\160\001\001\005\001\000\224@\145\160\160FF\160\160\001\001\006\001\000\195@\145\160\160GG\160\160\001\001\007\001\000\227@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\144\160PP\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\199@\145\160\160SS\160\160\001\001\019\001\000\231@\144\160TT\144\160UU\145\160\160VV\160\160\001\001\022\001\000\203@\145\160\160WW\160\160\001\001\023\001\000\235@\145\160\160XX\160\160\001\001\024\001\000\198@\145\160\160YY\160\160\001\001\025\001\000\230\160\160\001 \025\001\000\255@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\180@\145\160\160]]\160\160\001 \029\001\000\161@\145\160\160^^\160\160\001 \030\001\000\165@\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\217@\145\160\160\000B\000B\160\160\001\001B\001\000\249@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\145\160\160\000E\000E\160\160\001\001E\001\000\210@\145\160\160\000F\000F\160\160\001\001F\001\000\242@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\212@\145\160\160\000M\000M\160\160\001\001M\001\000\244@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\170@\145\160\160\000W\000W\160\160\001\001W\001\000\186@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\218@\145\160\160\000[\000[\160\160\001\001[\001\000\250@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~\001\000\254@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\175@@\144\160\001\000\201\001\000\201@@@@@@@@@\144\160\001\000\211\001\000\211@\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\168@@@\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@@@@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\191@@\144\160\001\000\233\001\000\233@@@@@@@@@\144\160\001\000\243\001\000\243@\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\184@@@\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let iso885914_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\030\002\001\030\003\001\000\163\001\001\n\001\001\011\001\030\n\001\000\167\001\030\128\001\000\169\001\030\130\001\030\011\001\030\242\001\000\173\001\000\174\001\001x\001\030\030\001\030\031\001\001 \001\001!\001\030@\001\030A\001\000\182\001\030V\001\030\129\001\030W\001\030\131\001\030`\001\030\243\001\030\132\001\030\133\001\030a\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001t\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\030j\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\001v\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001u\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\030k\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\001w\001\000\255" 0 : int array);;
+let iso885914_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\222\000\000\000\000\000\000\006w\000\000\006w\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\030\002\001\000\161@\145\160\160CC\160\160\001\030\003\001\000\162@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\145\160\160JJ\160\160\001\001\n\001\000\164\160\160\001\030\n\001\000\166@\145\160\160KK\160\160\001\001\011\001\000\165\160\160\001\030\011\001\000\171@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\145\160\160^^\160\160\001\030\030\001\000\176@\145\160\160__\160\160\001\030\031\001\000\177@\145\160\160``\160\160\001\001 \001\000\178@\145\160\160aa\160\160\001\001!\001\000\179@\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\030@\001\000\180@\145\160\160\000A\000A\160\160\001\030A\001\000\181@\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\030V\001\000\183@\145\160\160\000W\000W\160\160\001\030W\001\000\185@\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\030`\001\000\187@\145\160\160\000a\000a\160\160\001\030a\001\000\191@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\030j\001\000\215@\145\160\160\000k\000k\160\160\001\030k\001\000\247@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\145\160\160\000t\000t\160\160\001\001t\001\000\208@\145\160\160\000u\000u\160\160\001\001u\001\000\240@\145\160\160\000v\000v\160\160\001\001v\001\000\222@\145\160\160\000w\000w\160\160\001\001w\001\000\254@\145\160\160\000x\000x\160\160\001\001x\001\000\175@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\145\160\160\001\000\128\001\000\128\160\160\001\030\128\001\000\168@\145\160\160\001\000\129\001\000\129\160\160\001\030\129\001\000\184@\145\160\160\001\000\130\001\000\130\160\160\001\030\130\001\000\170@\145\160\160\001\000\131\001\000\131\160\160\001\030\131\001\000\186@\145\160\160\001\000\132\001\000\132\160\160\001\030\132\001\000\189@\145\160\160\001\000\133\001\000\133\160\160\001\030\133\001\000\190@\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@\144\160\001\000\163\001\000\163@@@\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@@@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@@@@@@@\144\160\001\000\182\001\000\182@@@@@@@@@\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214@\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\145\160\160\001\030\242\001\000\172\160\160\001\000\242\001\000\242@\145\160\160\001\030\243\001\000\188\160\160\001\000\243\001\000\243@\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246@\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let iso885915_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001 \172\001\000\165\001\001`\001\000\167\001\001a\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\001}\001\000\181\001\000\182\001\000\183\001\001~\001\000\185\001\000\186\001\000\187\001\001R\001\001S\001\001x\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
+let iso885915_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\157\000\000\000\000\000\000\006!\000\000\006!\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\188@\145\160\160\000S\000S\160\160\001\001S\001\000\189@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\166@\145\160\160\000a\000a\160\160\001\001a\001\000\168@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\190@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\180@\145\160\160\000~\000~\160\160\001\001~\001\000\184@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\165@\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\164\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187@@@\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let iso88592_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\002\216\001\001A\001\000\164\001\001=\001\001Z\001\000\167\001\000\168\001\001`\001\001^\001\001d\001\001y\001\000\173\001\001}\001\001{\001\000\176\001\001\005\001\002\219\001\001B\001\000\180\001\001>\001\001[\001\002\199\001\000\184\001\001a\001\001_\001\001e\001\001z\001\002\221\001\001~\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
+let iso88592_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007'\000\000\000\000\000\000\006\217\000\000\006\217\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\001\003\001\000\227@\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\145\160\160FF\160\160\001\001\006\001\000\198@\145\160\160GG\160\160\001\001\007\001\000\230@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\145\160\160NN\160\160\001\001\014\001\000\207@\145\160\160OO\160\160\001\001\015\001\000\239@\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\145\160\160ZZ\160\160\001\001\026\001\000\204@\145\160\160[[\160\160\001\001\027\001\000\236@\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\165@\145\160\160~~\160\160\001\001>\001\000\181@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\163@\145\160\160\000B\000B\160\160\001\001B\001\000\179@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\210@\145\160\160\000H\000H\160\160\001\001H\001\000\242@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\213@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\245@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001\001T\001\000\192@\145\160\160\000U\000U\160\160\001\001U\001\000\224@\144\160\000V\000V\144\160\000W\000W\145\160\160\000X\000X\160\160\001\001X\001\000\216@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\248@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\166@\145\160\160\000[\000[\160\160\001\001[\001\000\182@\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\145\160\160\000`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\171@\145\160\160\000e\000e\160\160\001\001e\001\000\187@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\172@\145\160\160\000z\000z\160\160\001\001z\001\000\188@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~\001\000\190@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@@@\144\160\001\000\180\001\000\180@@@\144\160\001\000\184\001\000\184@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\145\160\160\001\002\199\001\000\183\160\160\001\000\199\001\000\199@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\144\160\001\002\219\001\000\178\144\160\001\000\220\001\000\220\145\160\160\001\002\221\001\000\189\160\160\001\000\221\001\000\221@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@@\144\160\001\000\250\001\000\250@\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@@" 0 : Netmappings.from_uni_list array);;
+ let iso88593_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001&\001\002\216\001\000\163\001\000\164\000\255\001\001$\001\000\167\001\000\168\001\0010\001\001^\001\001\030\001\0014\001\000\173\000\255\001\001{\001\000\176\001\001'\001\000\178\001\000\179\001\000\180\001\000\181\001\001%\001\000\183\001\000\184\001\0011\001\001_\001\001\031\001\0015\001\000\189\000\255\001\001|\001\000\192\001\000\193\001\000\194\000\255\001\000\196\001\001\n\001\001\008\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\000\255\001\000\209\001\000\210\001\000\211\001\000\212\001\001 \001\000\214\001\000\215\001\001\028\001\000\217\001\000\218\001\000\219\001\000\220\001\001l\001\001\\\001\000\223\001\000\224\001\000\225\001\000\226\000\255\001\000\228\001\001\011\001\001\t\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\000\255\001\000\241\001\000\242\001\000\243\001\000\244\001\001!\001\000\246\001\000\247\001\001\029\001\000\249\001\000\250\001\000\251\001\000\252\001\001m\001\001]\001\002\217" 0 : int array);;
+let iso88593_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\165\000\000\000\000\000\000\006J\000\000\006J\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\145\160\160HH\160\160\001\001\008\001\000\198@\145\160\160II\160\160\001\001\t\001\000\230@\145\160\160JJ\160\160\001\001\n\001\000\197@\145\160\160KK\160\160\001\001\011\001\000\229@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001\001\028\001\000\216@\145\160\160]]\160\160\001\001\029\001\000\248@\145\160\160^^\160\160\001\001\030\001\000\171@\145\160\160__\160\160\001\001\031\001\000\187@\145\160\160``\160\160\001\001 \001\000\213@\145\160\160aa\160\160\001\001!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001\001$\001\000\166@\145\160\160ee\160\160\001\001%\001\000\182@\145\160\160ff\160\160\001\001&\001\000\161@\145\160\160gg\160\160\001\001'\001\000\177@\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\169@\145\160\160qq\160\160\001\0011\001\000\185@\144\160rr\144\160ss\145\160\160tt\160\160\001\0014\001\000\172@\145\160\160uu\160\160\001\0015\001\000\188@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\145\160\160\000\\\000\\\160\160\001\001\\\001\000\222@\145\160\160\000]\000]\160\160\001\001]\001\000\254@\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001\001l\001\000\221@\145\160\160\000m\000m\160\160\001\001m\001\000\253@\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@@\144\160\001\000\176\001\000\176@\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181@\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184@@@@\144\160\001\000\189\001\000\189@@\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\145\160\160\001\000\217\001\000\217\160\160\001\002\217\001\000\255@\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88594_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\0018\001\001V\001\000\164\001\001(\001\001;\001\000\167\001\000\168\001\001`\001\001\018\001\001\"\001\001f\001\000\173\001\001}\001\000\175\001\000\176\001\001\005\001\002\219\001\001W\001\000\180\001\001)\001\001<\001\002\199\001\000\184\001\001a\001\001\019\001\001#\001\001g\001\001J\001\001~\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\001*\001\001\016\001\001E\001\001L\001\0016\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\001h\001\001j\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\001+\001\001\017\001\001F\001\001M\001\0017\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\001i\001\001k\001\002\217" 0 : int array);;
+let iso88594_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\021\000\000\000\000\000\000\006\193\000\000\006\193\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\192@\145\160\160AA\160\160\001\001\001\001\000\224@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\161@\145\160\160EE\160\160\001\001\005\001\000\177@\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\145\160\160RR\160\160\001\001\018\001\000\170@\145\160\160SS\160\160\001\001\019\001\000\186@\144\160TT\144\160UU\145\160\160VV\160\160\001\001\022\001\000\204@\145\160\160WW\160\160\001\001\023\001\000\236@\145\160\160XX\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001\001\025\001\000\234@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\171@\145\160\160cc\160\160\001\001#\001\000\187@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\207@\145\160\160kk\160\160\001\001+\001\000\239@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\211@\145\160\160ww\160\160\001\0017\001\000\243@\145\160\160xx\160\160\001\0018\001\000\162@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\166@\145\160\160||\160\160\001\001<\001\000\182@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\145\160\160\000E\000E\160\160\001\001E\001\000\209@\145\160\160\000F\000F\160\160\001\001F\001\000\241@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\145\160\160\000J\000J\160\160\001\001J\001\000\189@\145\160\160\000K\000K\160\160\001\001K\001\000\191@\145\160\160\000L\000L\160\160\001\001L\001\000\210@\145\160\160\000M\000M\160\160\001\001M\001\000\242@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\163@\145\160\160\000W\000W\160\160\001\001W\001\000\179@\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\172@\145\160\160\000g\000g\160\160\001\001g\001\000\188@\145\160\160\000h\000h\160\160\001\001h\001\000\221@\145\160\160\000i\000i\160\160\001\001i\001\000\253@\145\160\160\000j\000j\160\160\001\001j\001\000\222@\145\160\160\000k\000k\160\160\001\001k\001\000\254@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~\001\000\190@\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168@@@@\144\160\001\000\173\001\000\173@\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176@@@\144\160\001\000\180\001\000\180@@@\144\160\001\000\184\001\000\184@@@@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\002\199\001\000\183@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@@\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\145\160\160\001\002\219\001\000\178\160\160\001\000\219\001\000\219@\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230@@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@@\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248@\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88595_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\004\001\001\004\002\001\004\003\001\004\004\001\004\005\001\004\006\001\004\007\001\004\008\001\004\t\001\004\n\001\004\011\001\004\012\001\000\173\001\004\014\001\004\015\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001!\022\001\004Q\001\004R\001\004S\001\004T\001\004U\001\004V\001\004W\001\004X\001\004Y\001\004Z\001\004[\001\004\\\001\000\167\001\004^\001\004_" 0 : int array);;
+let iso88595_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\154\000\000\000\000\000\000\007r\000\000\007r\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\161@\145\160\160BB\160\160\001\004\002\001\000\162@\145\160\160CC\160\160\001\004\003\001\000\163@\145\160\160DD\160\160\001\004\004\001\000\164@\145\160\160EE\160\160\001\004\005\001\000\165@\145\160\160FF\160\160\001\004\006\001\000\166@\145\160\160GG\160\160\001\004\007\001\000\167@\145\160\160HH\160\160\001\004\008\001\000\168@\145\160\160II\160\160\001\004\t\001\000\169@\145\160\160JJ\160\160\001\004\n\001\000\170@\145\160\160KK\160\160\001\004\011\001\000\171@\145\160\160LL\160\160\001\004\012\001\000\172@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\174@\145\160\160OO\160\160\001\004\015\001\000\175@\145\160\160PP\160\160\001\004\016\001\000\176@\145\160\160QQ\160\160\001\004\017\001\000\177@\145\160\160RR\160\160\001\004\018\001\000\178@\145\160\160SS\160\160\001\004\019\001\000\179@\145\160\160TT\160\160\001\004\020\001\000\180@\145\160\160UU\160\160\001\004\021\001\000\181@\145\160\160VV\160\160\001\004\022\001\000\182\160\160\001!\022\001\000\240@\145\160\160WW\160\160\001\004\023\001\000\183@\145\160\160XX\160\160\001\004\024\001\000\184@\145\160\160YY\160\160\001\004\025\001\000\185@\145\160\160ZZ\160\160\001\004\026\001\000\186@\145\160\160[[\160\160\001\004\027\001\000\187@\145\160\160\\\\\160\160\001\004\028\001\000\188@\145\160\160]]\160\160\001\004\029\001\000\189@\145\160\160^^\160\160\001\004\030\001\000\190@\145\160\160__\160\160\001\004\031\001\000\191@\145\160\160``\160\160\001\004 \001\000\192@\145\160\160aa\160\160\001\004!\001\000\193@\145\160\160bb\160\160\001\004\"\001\000\194@\145\160\160cc\160\160\001\004#\001\000\195@\145\160\160dd\160\160\001\004$\001\000\196@\145\160\160ee\160\160\001\004%\001\000\197@\145\160\160ff\160\160\001\004&\001\000\198@\145\160\160gg\160\160\001\004'\001\000\199@\145\160\160hh\160\160\001\004(\001\000\200@\145\160\160ii\160\160\001\004)\001\000\201@\145\160\160jj\160\160\001\004*\001\000\202@\145\160\160kk\160\160\001\004+\001\000\203@\145\160\160ll\160\160\001\004,\001\000\204@\145\160\160mm\160\160\001\004-\001\000\205@\145\160\160nn\160\160\001\004.\001\000\206@\145\160\160oo\160\160\001\004/\001\000\207@\145\160\160pp\160\160\001\0040\001\000\208@\145\160\160qq\160\160\001\0041\001\000\209@\145\160\160rr\160\160\001\0042\001\000\210@\145\160\160ss\160\160\001\0043\001\000\211@\145\160\160tt\160\160\001\0044\001\000\212@\145\160\160uu\160\160\001\0045\001\000\213@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\215@\145\160\160xx\160\160\001\0048\001\000\216@\145\160\160yy\160\160\001\0049\001\000\217@\145\160\160zz\160\160\001\004:\001\000\218@\145\160\160{{\160\160\001\004;\001\000\219@\145\160\160||\160\160\001\004<\001\000\220@\145\160\160}}\160\160\001\004=\001\000\221@\145\160\160~~\160\160\001\004>\001\000\222@\145\160\160\127\127\160\160\001\004?\001\000\223@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001\004R\001\000\242@\145\160\160\000S\000S\160\160\001\004S\001\000\243@\145\160\160\000T\000T\160\160\001\004T\001\000\244@\145\160\160\000U\000U\160\160\001\004U\001\000\245@\145\160\160\000V\000V\160\160\001\004V\001\000\246@\145\160\160\000W\000W\160\160\001\004W\001\000\247@\145\160\160\000X\000X\160\160\001\004X\001\000\248@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\249@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\250@\145\160\160\000[\000[\160\160\001\004[\001\000\251@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\252@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\254@\145\160\160\000_\000_\160\160\001\004_\001\000\255@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@@@@\144\160\001\000\167\001\000\253@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88596_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\000\255\000\255\000\255\001\000\164\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\006\012\001\000\173\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\006\027\000\255\000\255\000\255\001\006\031\000\255\001\006!\001\006\"\001\006#\001\006$\001\006%\001\006&\001\006'\001\006(\001\006)\001\006*\001\006+\001\006,\001\006-\001\006.\001\006/\001\0060\001\0061\001\0062\001\0063\001\0064\001\0065\001\0066\001\0067\001\0068\001\0069\001\006:\000\255\000\255\000\255\000\255\000\255\001\006@\001\006A\001\006B\001\006C\001\006D\001\006E\001\006F\001\006G\001\006H\001\006I\001\006J\001\006K\001\006L\001\006M\001\006N\001\006O\001\006P\001\006Q\001\006R\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255" 0 : int array);;
+let iso88596_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\218\000\000\000\000\000\000\005\224\000\000\005\224\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\006\012\001\000\172@\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\145\160\160[[\160\160\001\006\027\001\000\187@\144\160\\\\\144\160]]\144\160^^\145\160\160__\160\160\001\006\031\001\000\191@\144\160``\145\160\160aa\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\215@\145\160\160xx\160\160\001\0068\001\000\216@\145\160\160yy\160\160\001\0069\001\000\217@\145\160\160zz\160\160\001\006:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\145\160\160\000A\000A\160\160\001\006A\001\000\225@\145\160\160\000B\000B\160\160\001\006B\001\000\226@\145\160\160\000C\000C\160\160\001\006C\001\000\227@\145\160\160\000D\000D\160\160\001\006D\001\000\228@\145\160\160\000E\000E\160\160\001\006E\001\000\229@\145\160\160\000F\000F\160\160\001\006F\001\000\230@\145\160\160\000G\000G\160\160\001\006G\001\000\231@\145\160\160\000H\000H\160\160\001\006H\001\000\232@\145\160\160\000I\000I\160\160\001\006I\001\000\233@\145\160\160\000J\000J\160\160\001\006J\001\000\234@\145\160\160\000K\000K\160\160\001\006K\001\000\235@\145\160\160\000L\000L\160\160\001\006L\001\000\236@\145\160\160\000M\000M\160\160\001\006M\001\000\237@\145\160\160\000N\000N\160\160\001\006N\001\000\238@\145\160\160\000O\000O\160\160\001\006O\001\000\239@\145\160\160\000P\000P\160\160\001\006P\001\000\240@\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\145\160\160\000R\000R\160\160\001\006R\001\000\242@\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@@@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88597_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002?\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001 \024\001 \025\001\000\163\000\255\000\255\001\000\166\001\000\167\001\000\168\001\000\169\000\255\001\000\171\001\000\172\001\000\173\000\255\001 \021\001\000\176\001\000\177\001\000\178\001\000\179\001\003\132\001\003\133\001\003\134\001\000\183\001\003\136\001\003\137\001\003\138\001\000\187\001\003\140\001\000\189\001\003\142\001\003\143\001\003\144\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\000\255\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\001\003\172\001\003\173\001\003\174\001\003\175\001\003\176\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\194\001\003\195\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001\003\201\001\003\202\001\003\203\001\003\204\001\003\205\001\003\206\000\255" 0 : int array);;
+let iso88597_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\223\000\000\000\000\000\000\006\147\000\000\006\147\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\161@\145\160\160YY\160\160\001 \025\001\000\162@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\145\160\160\001\000\132\001\000\132\160\160\001\003\132\001\000\180@\145\160\160\001\000\133\001\000\133\160\160\001\003\133\001\000\181@\145\160\160\001\000\134\001\000\134\160\160\001\003\134\001\000\182@\144\160\001\000\135\001\000\135\145\160\160\001\000\136\001\000\136\160\160\001\003\136\001\000\184@\145\160\160\001\000\137\001\000\137\160\160\001\003\137\001\000\185@\145\160\160\001\000\138\001\000\138\160\160\001\003\138\001\000\186@\144\160\001\000\139\001\000\139\145\160\160\001\000\140\001\000\140\160\160\001\003\140\001\000\188@\144\160\001\000\141\001\000\141\145\160\160\001\000\142\001\000\142\160\160\001\003\142\001\000\190@\145\160\160\001\000\143\001\000\143\160\160\001\003\143\001\000\191@\145\160\160\001\000\144\001\000\144\160\160\001\003\144\001\000\192@\145\160\160\001\000\145\001\000\145\160\160\001\003\145\001\000\193@\145\160\160\001\000\146\001\000\146\160\160\001\003\146\001\000\194@\145\160\160\001\000\147\001\000\147\160\160\001\003\147\001\000\195@\145\160\160\001\000\148\001\000\148\160\160\001\003\148\001\000\196@\145\160\160\001\000\149\001\000\149\160\160\001\003\149\001\000\197@\145\160\160\001\000\150\001\000\150\160\160\001\003\150\001\000\198@\145\160\160\001\000\151\001\000\151\160\160\001\003\151\001\000\199@\145\160\160\001\000\152\001\000\152\160\160\001\003\152\001\000\200@\145\160\160\001\000\153\001\000\153\160\160\001\003\153\001\000\201@\145\160\160\001\000\154\001\000\154\160\160\001\003\154\001\000\202@\145\160\160\001\000\155\001\000\155\160\160\001\003\155\001\000\203@\145\160\160\001\000\156\001\000\156\160\160\001\003\156\001\000\204@\145\160\160\001\000\157\001\000\157\160\160\001\003\157\001\000\205@\145\160\160\001\000\158\001\000\158\160\160\001\003\158\001\000\206@\145\160\160\001\000\159\001\000\159\160\160\001\003\159\001\000\207@\145\160\160\001\000\160\001\000\160\160\160\001\003\160\001\000\208@\144\160\001\003\161\001\000\209@\145\160\160\001\000\163\001\000\163\160\160\001\003\163\001\000\211@\144\160\001\003\164\001\000\212\144\160\001\003\165\001\000\213\145\160\160\001\000\166\001\000\166\160\160\001\003\166\001\000\214@\145\160\160\001\000\167\001\000\167\160\160\001\003\167\001\000\215@\145\160\160\001\000\168\001\000\168\160\160\001\003\168\001\000\216@\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\217@\144\160\001\003\170\001\000\218\145\160\160\001\000\171\001\000\171\160\160\001\003\171\001\000\219@\145\160\160\001\000\172\001\000\172\160\160\001\003\172\001\000\220@\145\160\160\001\000\173\001\000\173\160\160\001\003\173\001\000\221@\144\160\001\003\174\001\000\222\144\160\001\003\175\001\000\223\145\160\160\001\000\176\001\000\176\160\160\001\003\176\001\000\224@\145\160\160\001\000\177\001\000\177\160\160\001\003\177\001\000\225@\145\160\160\001\000\178\001\000\178\160\160\001\003\178\001\000\226@\145\160\160\001\000\179\001\000\179\160\160\001\003\179\001\000\227@\144\160\001\003\180\001\000\228\144\160\001\003\181\001\000\229\144\160\001\003\182\001\000\230\145\160\160\001\000\183\001\000\183\160\160\001\003\183\001\000\231@\144\160\001\003\184\001\000\232\144\160\001\003\185\001\000\233\144\160\001\003\186\001\000\234\145\160\160\001\000\187\001\000\187\160\160\001\003\187\001\000\235@\144\160\001\003\188\001\000\236\145\160\160\001\000\189\001\000\189\160\160\001\003\189\001\000\237@\144\160\001\003\190\001\000\238\144\160\001\003\191\001\000\239\144\160\001\003\192\001\000\240\144\160\001\003\193\001\000\241\144\160\001\003\194\001\000\242\144\160\001\003\195\001\000\243\144\160\001\003\196\001\000\244\144\160\001\003\197\001\000\245\144\160\001\003\198\001\000\246\144\160\001\003\199\001\000\247\144\160\001\003\200\001\000\248\144\160\001\003\201\001\000\249\144\160\001\003\202\001\000\250\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\252\144\160\001\003\205\001\000\253\144\160\001\003\206\001\000\254@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88598_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002!\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\000\255\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\215\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\247\001\000\187\001\000\188\001\000\189\001\000\190\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \023\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\000\255\001 \014\001 \015\000\255" 0 : int array);;
+let iso88598_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\149\000\000\000\000\000\000\005]\000\000\005]\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\223@\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@@@@@@@@@@@@@\144\160\001\005\208\001\000\224\144\160\001\005\209\001\000\225\144\160\001\005\210\001\000\226\144\160\001\005\211\001\000\227\144\160\001\005\212\001\000\228\144\160\001\005\213\001\000\229\144\160\001\005\214\001\000\230\145\160\160\001\000\215\001\000\170\160\160\001\005\215\001\000\231@\144\160\001\005\216\001\000\232\144\160\001\005\217\001\000\233\144\160\001\005\218\001\000\234\144\160\001\005\219\001\000\235\144\160\001\005\220\001\000\236\144\160\001\005\221\001\000\237\144\160\001\005\222\001\000\238\144\160\001\005\223\001\000\239\144\160\001\005\224\001\000\240\144\160\001\005\225\001\000\241\144\160\001\005\226\001\000\242\144\160\001\005\227\001\000\243\144\160\001\005\228\001\000\244\144\160\001\005\229\001\000\245\144\160\001\005\230\001\000\246\144\160\001\005\231\001\000\247\144\160\001\005\232\001\000\248\144\160\001\005\233\001\000\249\144\160\001\005\234\001\000\250@@@@@@@@@@@@\144\160\001\000\247\001\000\186@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88599_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001\030\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\0010\001\001^\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001\031\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\0011\001\001_\001\000\255" 0 : int array);;
+let iso88599_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\151\000\000\000\000\000\000\006\025\000\000\006\025\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\145\160\160^^\160\160\001\001\030\001\000\208@\145\160\160__\160\160\001\001\031\001\000\240@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ Hashtbl.add Netmappings.to_unicode `Enc_iso88599 iso88599_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88599 iso88599_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88598 iso88598_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88598 iso88598_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88597 iso88597_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88597 iso88597_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88596 iso88596_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88596 iso88596_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88595 iso88595_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88595 iso88595_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88594 iso88594_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88594 iso88594_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88593 iso88593_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88593 iso88593_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88592 iso88592_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88592 iso88592_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885915 iso885915_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885915 iso885915_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885914 iso885914_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885914 iso885914_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885913 iso885913_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885913 iso885913_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885910 iso885910_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885910 iso885910_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88591 iso88591_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88591 iso88591_from_unicode;
+();;
diff --git a/helm/DEVEL/pxp/netstring/netmappings_other.ml b/helm/DEVEL/pxp/netstring/netmappings_other.ml
new file mode 100644 (file)
index 0000000..57fcb48
--- /dev/null
@@ -0,0 +1,154 @@
+(* WARNING! This is a generated file! *)
+let cp037_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\001\000\162n|hk\000|f\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223adji{\001\000\172mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp037_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\000_\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\001\000\236\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\001\000\252\144\160\001\000\221\001\000\173\144\160\001\000\222\001\000\174\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\000H\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W\144\160\001\000\240\001\000\140\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\204\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp1006_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\228\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\006\240\001\006\241\001\006\242\001\006\243\001\006\244\001\006\245\001\006\246\001\006\247\001\006\248\001\006\249\001\006\012\001\006\027\001\000\173\001\006\031\002\000\000\254\129\002\000\000\254\141\002\000\000\254\142\000\255\002\000\000\254\143\002\000\000\254\145\002\000\000\251V\002\000\000\251X\002\000\000\254\147\002\000\000\254\149\002\000\000\254\151\002\000\000\251f\002\000\000\251h\002\000\000\254\153\002\000\000\254\155\002\000\000\254\157\002\000\000\254\159\002\000\000\251z\002\000\000\251|\002\000\000\254\161\002\000\000\254\163\002\000\000\254\165\002\000\000\254\167\002\000\000\254\169\002\000\000\251\132\002\000\000\254\171\002\000\000\254\173\002\000\000\251\140\002\000\000\254\175\002\000\000\251\138\002\000\000\254\177\002\000\000\254\179\002\000\000\254\181\002\000\000\254\183\002\000\000\254\185\002\000\000\254\187\002\000\000\254\189\002\000\000\254\191\002\000\000\254\193\002\000\000\254\197\002\000\000\254\201\002\000\000\254\202\002\000\000\254\203\002\000\000\254\204\002\000\000\254\205\002\000\000\254\206\002\000\000\254\207\002\000\000\254\208\002\000\000\254\209\002\000\000\254\211\002\000\000\254\213\002\000\000\254\215\002\000\000\254\217\002\000\000\254\219\002\000\000\251\146\002\000\000\251\148\002\000\000\254\221\002\000\000\254\223\002\000\000\254\224\002\000\000\254\225\002\000\000\254\227\002\000\000\251\158\002\000\000\254\229\002\000\000\254\231\002\000\000\254\133\002\000\000\254\237\002\000\000\251\166\002\000\000\251\168\002\000\000\251\169\002\000\000\251\170\002\000\000\254\128\002\000\000\254\137\002\000\000\254\138\002\000\000\254\139\002\000\000\254\241\002\000\000\254\242\002\000\000\254\243\002\000\000\251\176\002\000\000\251\174\002\000\000\254|\002\000\000\254}" 0 : int array);;
+let cp1006_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\143\000\000\000\000\000\000\006\146\000\000\006\146\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\006\012\001\000\171@\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\145\160\160[[\160\160\001\006\027\001\000\172@\144\160\\\\\144\160]]\144\160^^\145\160\160__\160\160\001\006\031\001\000\174@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\002\000\000\251V\001\000\181@\144\160\000W\000W\145\160\160\000X\000X\160\160\002\000\000\251X\001\000\182@\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\002\000\000\251f\001\000\186@\144\160\000g\000g\145\160\160\000h\000h\160\160\002\000\000\251h\001\000\187@\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\145\160\160\000z\000z\160\160\002\000\000\251z\001\000\192@\144\160\000{\000{\145\160\160\000|\000|\160\160\002\000\000\251|\001\000\193\160\160\002\000\000\254|\001\000\254@\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\255@\144\160\000~\000~\144\160\000\127\000\127\145\160\160\001\000\128\001\000\128\160\160\002\000\000\254\128\001\000\245@\145\160\160\001\000\129\001\000\129\160\160\002\000\000\254\129\001\000\175@\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\145\160\160\001\000\132\001\000\132\160\160\002\000\000\251\132\001\000\199@\145\160\160\001\000\133\001\000\133\160\160\002\000\000\254\133\001\000\239@\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\145\160\160\001\000\137\001\000\137\160\160\002\000\000\254\137\001\000\246@\145\160\160\001\000\138\001\000\138\160\160\002\000\000\251\138\001\000\204\160\160\002\000\000\254\138\001\000\247@\145\160\160\001\000\139\001\000\139\160\160\002\000\000\254\139\001\000\248@\145\160\160\001\000\140\001\000\140\160\160\002\000\000\251\140\001\000\202@\145\160\160\001\000\141\001\000\141\160\160\002\000\000\254\141\001\000\176@\145\160\160\001\000\142\001\000\142\160\160\002\000\000\254\142\001\000\177@\145\160\160\001\000\143\001\000\143\160\160\002\000\000\254\143\001\000\179@\144\160\001\000\144\001\000\144\145\160\160\001\000\145\001\000\145\160\160\002\000\000\254\145\001\000\180@\145\160\160\001\000\146\001\000\146\160\160\002\000\000\251\146\001\000\229@\145\160\160\001\000\147\001\000\147\160\160\002\000\000\254\147\001\000\183@\145\160\160\001\000\148\001\000\148\160\160\002\000\000\251\148\001\000\230@\145\160\160\001\000\149\001\000\149\160\160\002\000\000\254\149\001\000\184@\144\160\001\000\150\001\000\150\145\160\160\001\000\151\001\000\151\160\160\002\000\000\254\151\001\000\185@\144\160\001\000\152\001\000\152\145\160\160\001\000\153\001\000\153\160\160\002\000\000\254\153\001\000\188@\144\160\001\000\154\001\000\154\145\160\160\001\000\155\001\000\155\160\160\002\000\000\254\155\001\000\189@\144\160\001\000\156\001\000\156\145\160\160\001\000\157\001\000\157\160\160\002\000\000\254\157\001\000\190@\145\160\160\001\000\158\001\000\158\160\160\002\000\000\251\158\001\000\236@\145\160\160\001\000\159\001\000\159\160\160\002\000\000\254\159\001\000\191@\144\160\001\000\160\001\000\160\144\160\002\000\000\254\161\001\000\194@\144\160\002\000\000\254\163\001\000\195@\144\160\002\000\000\254\165\001\000\196\144\160\002\000\000\251\166\001\000\241\144\160\002\000\000\254\167\001\000\197\144\160\002\000\000\251\168\001\000\242\145\160\160\002\000\000\254\169\001\000\198\160\160\002\000\000\251\169\001\000\243@\144\160\002\000\000\251\170\001\000\244\144\160\002\000\000\254\171\001\000\200@\145\160\160\001\000\173\001\000\173\160\160\002\000\000\254\173\001\000\201@\144\160\002\000\000\251\174\001\000\253\144\160\002\000\000\254\175\001\000\203\144\160\002\000\000\251\176\001\000\252\144\160\002\000\000\254\177\001\000\205@\144\160\002\000\000\254\179\001\000\206@\144\160\002\000\000\254\181\001\000\207@\144\160\002\000\000\254\183\001\000\208@\144\160\002\000\000\254\185\001\000\209@\144\160\002\000\000\254\187\001\000\210@\144\160\002\000\000\254\189\001\000\211@\144\160\002\000\000\254\191\001\000\212@\144\160\002\000\000\254\193\001\000\213@@@\144\160\002\000\000\254\197\001\000\214@@@\144\160\002\000\000\254\201\001\000\215\144\160\002\000\000\254\202\001\000\216\144\160\002\000\000\254\203\001\000\217\144\160\002\000\000\254\204\001\000\218\144\160\002\000\000\254\205\001\000\219\144\160\002\000\000\254\206\001\000\220\144\160\002\000\000\254\207\001\000\221\144\160\002\000\000\254\208\001\000\222\144\160\002\000\000\254\209\001\000\223@\144\160\002\000\000\254\211\001\000\224@\144\160\002\000\000\254\213\001\000\225@\144\160\002\000\000\254\215\001\000\226@\144\160\002\000\000\254\217\001\000\227@\144\160\002\000\000\254\219\001\000\228@\144\160\002\000\000\254\221\001\000\231@\144\160\002\000\000\254\223\001\000\232\144\160\002\000\000\254\224\001\000\233\144\160\002\000\000\254\225\001\000\234@\144\160\002\000\000\254\227\001\000\235@\144\160\002\000\000\254\229\001\000\237@\144\160\002\000\000\254\231\001\000\238@@@@@\144\160\002\000\000\254\237\001\000\240@@\144\160\001\006\240\001\000\161\145\160\160\001\006\241\001\000\162\160\160\002\000\000\254\241\001\000\249@\145\160\160\001\006\242\001\000\163\160\160\002\000\000\254\242\001\000\250@\145\160\160\001\006\243\001\000\164\160\160\002\000\000\254\243\001\000\251@\144\160\001\006\244\001\000\165\144\160\001\006\245\001\000\166\144\160\001\006\246\001\000\167\144\160\001\006\247\001\000\168\144\160\001\006\248\001\000\169\144\160\001\006\249\001\000\170@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp1026_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\000{\001\000\241\001\000\199n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\001\001\030\001\0010ji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\000[\001\000\209\001\001_le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\0011z\001\000\214\001\001^g}\001\000\220\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000}\000`\001\000\166\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\001\000\246\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\000]d\000@\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\001\000\231\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\000~\001\000\242\001\000\243\001\000\245\001\001\031\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\000\\\001\000\249\001\000\250\001\000\255\001\000\252\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212c\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219b\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp1026_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\151\000\000\000\000\000\000\006\025\000\000\006\025\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\145\160\160^^\160\160\001\001\030\000Z@\145\160\160__\160\160\001\001\031\001\000\208@\144\160`\000@\144\160a\000O\144\160b\001\000\252\144\160c\001\000\236\144\160d\001\000\173\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\145\160\160\001\0010\000[\160\160p\001\000\240@\145\160\160\001\0011\000y\160\160q\001\000\241@\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\001\000\174\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000h\144\160\000\\\001\000\220\144\160\000]\001\000\172\145\160\160\000^\000_\160\160\001\001^\000|@\145\160\160\001\001_\000j\160\160\000_\000m@\144\160\000`\001\000\141\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\000H\144\160\000|\001\000\187\144\160\000}\001\000\140\144\160\000~\001\000\204\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\001\000\176\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\001\000\142\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\001\000\186\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000J\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w@\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\000{\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\000\127@@\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\001\000\192\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W@\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\161\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\224@@\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp424_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\031\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\000\162n|hk\000|f\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225adji{\001\000\172mo\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\000\166le\000_~\127\000\255\001\005\234\000\255\000\255\001\000\160\000\255\000\255\000\255\001 \023\000`zc\000@g}b\000\255\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000\255\000\255\000\255\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\000\255\000\255\000\255\001\000\184\000\255\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\000\255\000\255\000\255\000\255\000\255\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\000\255\000\255\000\255\000\255\000\255\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\000\255\000\255\000\255\000\255\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\000\255\000\255\000\255\000\255\000\255pqrstuvwxy\001\000\179\000\255\000\255\000\255\000\255\001\000\159" 0 : int array);;
+let cp424_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\135\000\000\000\000\000\000\005K\000\000\005K\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\145\160\160Wf\160\160\001 \023\000x@\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000t@\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180@\144\160\001\000\171\001\000\138\144\160\001\000\172\000_\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218@\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185@@@@@@@@@@@@@@@@@\144\160\001\005\208\000A\144\160\001\005\209\000B\144\160\001\005\210\000C\144\160\001\005\211\000D\144\160\001\005\212\000E\144\160\001\005\213\000F\144\160\001\005\214\000G\145\160\160\001\005\215\000H\160\160\001\000\215\001\000\191@\144\160\001\005\216\000I\144\160\001\005\217\000Q\144\160\001\005\218\000R\144\160\001\005\219\000S\144\160\001\005\220\000T\144\160\001\005\221\000U\144\160\001\005\222\000V\144\160\001\005\223\000W\144\160\001\005\224\000X\144\160\001\005\225\000Y\144\160\001\005\226\000b\144\160\001\005\227\000c\144\160\001\005\228\000d\144\160\001\005\229\000e\144\160\001\005\230\000f\144\160\001\005\231\000g\144\160\001\005\232\000h\144\160\001\005\233\000i\144\160\001\005\234\000q@@@@@@@@@@@@\144\160\001\000\247\001\000\225@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp437_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\162\001\000\163\001\000\165\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp437_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@\144\160\001\000\165\001\000\157\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@@@@@\144\160\001\000\209\001\000\165@@@@\144\160\001\000\214\001\000\153@@@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
+ let cp500_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158Z`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\000[n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\000]dji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp500_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\001\000\187\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\001\000\176\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000j\144\160\001\000\167\001\000\181\144\160\001\000\168\001\000\189\144\160\001\000\169\001\000\180\144\160\001\000\170\001\000\154\144\160\001\000\171\001\000\138\144\160\001\000\172\001\000\186\144\160\001\000\173\001\000\202\144\160\001\000\174\001\000\175\144\160\001\000\175\001\000\188\144\160\001\000\176\001\000\144\144\160\001\000\177\001\000\143\144\160\001\000\178\001\000\234\144\160\001\000\179\001\000\250\144\160\001\000\180\001\000\190\144\160\001\000\181\001\000\160\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\179\144\160\001\000\184\001\000\157\144\160\001\000\185\001\000\218\144\160\001\000\186\001\000\155\144\160\001\000\187\001\000\139\144\160\001\000\188\001\000\183\144\160\001\000\189\001\000\184\144\160\001\000\190\001\000\185\144\160\001\000\191\001\000\171\144\160\001\000\192\000d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\001\000\236\144\160\001\000\215\001\000\191\144\160\001\000\216\001\000\128\144\160\001\000\217\001\000\253\144\160\001\000\218\001\000\254\144\160\001\000\219\001\000\251\144\160\001\000\220\001\000\252\144\160\001\000\221\001\000\173\144\160\001\000\222\001\000\174\144\160\001\000\223\000Y\144\160\001\000\224\000D\144\160\001\000\225\000E\144\160\001\000\226\000B\144\160\001\000\227\000F\144\160\001\000\228\000C\144\160\001\000\229\000G\144\160\001\000\230\001\000\156\144\160\001\000\231\000H\144\160\001\000\232\000T\144\160\001\000\233\000Q\144\160\001\000\234\000R\144\160\001\000\235\000S\144\160\001\000\236\000X\144\160\001\000\237\000U\144\160\001\000\238\000V\144\160\001\000\239\000W\144\160\001\000\240\001\000\140\144\160\001\000\241\000I\144\160\001\000\242\001\000\205\144\160\001\000\243\001\000\206\144\160\001\000\244\001\000\203\144\160\001\000\245\001\000\207\144\160\001\000\246\001\000\204\144\160\001\000\247\001\000\225\144\160\001\000\248\000p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp737_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\201\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\134\001\003\136\001\003\137\001\003\138\001\003\140\001\003\142\001\003\143\001\000\177\001\"e\001\"d\001\003\170\001\003\171\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp737_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007'\000\000\000\000\000\000\006\216\000\000\006\216\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@\144\160\001\003\134\001\000\234@\145\160\160\001%\136\001\000\219\160\160\001\003\136\001\000\235@\144\160\001\003\137\001\000\236\144\160\001\003\138\001\000\237@\145\160\160\001%\140\001\000\221\160\160\001\003\140\001\000\238@@\144\160\001\003\142\001\000\239\144\160\001\003\143\001\000\240\144\160\001%\144\001\000\222\145\160\160\001\003\145\001\000\128\160\160\001%\145\001\000\176@\145\160\160\001\003\146\001\000\129\160\160\001%\146\001\000\177@\145\160\160\001\003\147\001\000\130\160\160\001%\147\001\000\178@\144\160\001\003\148\001\000\131\144\160\001\003\149\001\000\132\144\160\001\003\150\001\000\133\144\160\001\003\151\001\000\134\144\160\001\003\152\001\000\135\144\160\001\003\153\001\000\136\144\160\001\003\154\001\000\137\144\160\001\003\155\001\000\138\144\160\001\003\156\001\000\139\144\160\001\003\157\001\000\140\144\160\001\003\158\001\000\141\144\160\001\003\159\001\000\142\145\160\160\001\003\160\001\000\143\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\003\161\001\000\144@\144\160\001\003\163\001\000\145\144\160\001\003\164\001\000\146\144\160\001\003\165\001\000\147\144\160\001\003\166\001\000\148\144\160\001\003\167\001\000\149\144\160\001\003\168\001\000\150\144\160\001\003\169\001\000\151\144\160\001\003\170\001\000\244\144\160\001\003\171\001\000\245\144\160\001\003\172\001\000\225\144\160\001\003\173\001\000\226\144\160\001\003\174\001\000\227\144\160\001\003\175\001\000\229\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\152\160\160\001\000\177\001\000\241@\145\160\160\001\003\178\001\000\153\160\160\001\000\178\001\000\253@\144\160\001\003\179\001\000\154\144\160\001\003\180\001\000\155\144\160\001\003\181\001\000\156\144\160\001\003\182\001\000\157\145\160\160\001\003\183\001\000\158\160\160\001\000\183\001\000\250@\144\160\001\003\184\001\000\159\144\160\001\003\185\001\000\160\144\160\001\003\186\001\000\161\144\160\001\003\187\001\000\162\144\160\001\003\188\001\000\163\144\160\001\003\189\001\000\164\144\160\001\003\190\001\000\165\144\160\001\003\191\001\000\166\144\160\001\003\192\001\000\167\144\160\001\003\193\001\000\168\144\160\001\003\194\001\000\170\144\160\001\003\195\001\000\169\144\160\001\003\196\001\000\171\144\160\001\003\197\001\000\172\144\160\001\003\198\001\000\173\144\160\001\003\199\001\000\174\144\160\001\003\200\001\000\175\144\160\001\003\201\001\000\224\144\160\001\003\202\001\000\228\144\160\001\003\203\001\000\232\144\160\001\003\204\001\000\230\144\160\001\003\205\001\000\231\144\160\001\003\206\001\000\233@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\246@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp775_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\001\006\001\000\252\001\000\233\001\001\001\001\000\228\001\001#\001\000\229\001\001\007\001\001B\001\001\019\001\001V\001\001W\001\001+\001\001y\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\001M\001\000\246\001\001\"\001\000\162\001\001Z\001\001[\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\000\215\001\000\164\001\001\000\001\001*\001\000\243\001\001{\001\001|\001\001z\001 \029\001\000\166\001\000\169\001\000\174\001\000\172\001\000\189\001\000\188\001\001A\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\001\004\001\001\012\001\001\024\001\001\022\001%c\001%Q\001%W\001%]\001\001.\001\001`\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001r\001\001j\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\001}\001\001\005\001\001\013\001\001\025\001\001\023\001\001/\001\001a\001\001s\001\001k\001\001~\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\000\211\001\000\223\001\001L\001\001C\001\000\245\001\000\213\001\000\181\001\001D\001\0016\001\0017\001\001;\001\001<\001\001F\001\001\018\001\001E\001 \025\001\000\173\001\000\177\001 \028\001\000\190\001\000\182\001\000\167\001\000\247\001 \030\001\000\176\001\"\025\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp775_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007U\000\000\000\000\000\000\007\019\000\000\007\019\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\160\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\001\001\001\000\131@\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\145\160\160DD\160\160\001\001\004\001\000\181@\145\160\160EE\160\160\001\001\005\001\000\208@\145\160\160FF\160\160\001\001\006\001\000\128@\145\160\160GG\160\160\001\001\007\001\000\135@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\182\160\160\001%\012\001\000\218@\145\160\160MM\160\160\001\001\013\001\000\209@\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\237@\145\160\160SS\160\160\001\001\019\001\000\137@\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\145\160\160VV\160\160\001\001\022\001\000\184@\145\160\160WW\160\160\001\001\023\001\000\211@\145\160\160XX\160\160\001\001\024\001\000\183\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\001\025\001\000\210\160\160\001 \025\001\000\239\160\160\001\"\025\001\000\249@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001 \028\001\000\242@\145\160\160]]\160\160\001 \029\001\000\166@\145\160\160^^\160\160\001 \030\001\000\247@\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\149@\145\160\160cc\160\160\001\001#\001\000\133@\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\161@\145\160\160kk\160\160\001\001+\001\000\140@\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\145\160\160nn\160\160\001\001.\001\000\189@\145\160\160oo\160\160\001\001/\001\000\212@\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\145\160\160vv\160\160\001\0016\001\000\232@\145\160\160ww\160\160\001\0017\001\000\233@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\234@\145\160\160||\160\160\001%<\001\000\197\160\160\001\001<\001\000\235@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\173@\145\160\160\000B\000B\160\160\001\001B\001\000\136@\145\160\160\000C\000C\160\160\001\001C\001\000\227@\145\160\160\000D\000D\160\160\001\001D\001\000\231@\145\160\160\000E\000E\160\160\001\001E\001\000\238@\145\160\160\000F\000F\160\160\001\001F\001\000\236@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\226@\145\160\160\000M\000M\160\160\001\001M\001\000\147@\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\138@\145\160\160\000W\000W\160\160\001\001W\001\000\139\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\151\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\001[\001\000\152@\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\190\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001\001a\001\000\213@\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001\001j\001\000\199@\145\160\160\000k\000k\160\160\001\001k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\198@\145\160\160\000s\000s\160\160\001\001s\001\000\214@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\165@\145\160\160\000{\000{\160\160\001\001{\001\000\163@\145\160\160\000|\000|\160\160\001\001|\001\000\164@\145\160\160\000}\000}\160\160\001\001}\001\000\207@\145\160\160\000~\000~\160\160\001\001~\001\000\216@\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\150\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\159@\144\160\001\000\166\001\000\167\144\160\001\000\167\001\000\245@\144\160\001\000\169\001\000\168@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169@\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252@\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250@\144\160\001\000\185\001\000\251@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243@@@@@\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146@@\144\160\001\000\201\001\000\144@@@@@@@@@\144\160\001\000\211\001\000\224@\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\000\216\001\000\157@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225@@@@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145@@\144\160\001\000\233\001\000\130@@@@@@@@@\144\160\001\000\243\001\000\162@\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155@@@\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
+ let cp850_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\000\215\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001\000\174\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\000\192\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\000\240\001\000\208\001\000\202\001\000\203\001\000\200\001\0011\001\000\205\001\000\206\001\000\207\001%\024\001%\012\001%\136\001%\132\001\000\166\001\000\204\001%\128\001\000\211\001\000\223\001\000\212\001\000\210\001\000\245\001\000\213\001\000\181\001\000\254\001\000\222\001\000\218\001\000\219\001\000\217\001\000\253\001\000\221\001\000\175\001\000\180\001\000\173\001\000\177\001 \023\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp850_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\211\000\000\000\000\000\000\006i\000\000\006i\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\145\160\160qq\160\160\001\0011\001\000\213@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243\144\160\001\000\191\001\000\168\144\160\001\000\192\001\000\183\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182\144\160\001\000\195\001\000\199\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\212\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\210\144\160\001\000\203\001\000\211\144\160\001\000\204\001\000\222\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215\144\160\001\000\207\001\000\216\144\160\001\000\208\001\000\209\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\227\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\000\216\001\000\157\144\160\001\000\217\001\000\235\144\160\001\000\218\001\000\233\144\160\001\000\219\001\000\234\144\160\001\000\220\001\000\154\144\160\001\000\221\001\000\237\144\160\001\000\222\001\000\232\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\198\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139\144\160\001\000\240\001\000\208\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\236\144\160\001\000\254\001\000\231\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
+ let cp852_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\001o\001\001\007\001\000\231\001\001B\001\000\235\001\001P\001\001Q\001\000\238\001\001y\001\000\196\001\001\006\001\000\201\001\0019\001\001:\001\000\244\001\000\246\001\001=\001\001>\001\001Z\001\001[\001\000\214\001\000\220\001\001d\001\001e\001\001A\001\000\215\001\001\013\001\000\225\001\000\237\001\000\243\001\000\250\001\001\004\001\001\005\001\001}\001\001~\001\001\024\001\001\025\001\000\172\001\001z\001\001\012\001\001_\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\001\026\001\001^\001%c\001%Q\001%W\001%]\001\001{\001\001|\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001\002\001\001\003\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\001\017\001\001\016\001\001\014\001\000\203\001\001\015\001\001G\001\000\205\001\000\206\001\001\027\001%\024\001%\012\001%\136\001%\132\001\001b\001\001n\001%\128\001\000\211\001\000\223\001\000\212\001\001C\001\001D\001\001H\001\001`\001\001a\001\001T\001\000\218\001\001U\001\001p\001\000\253\001\000\221\001\001c\001\000\180\001\000\173\001\002\221\001\002\219\001\002\199\001\002\216\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\002\217\001\001q\001\001X\001\001Y\001%\160\001\000\160" 0 : int array);;
+let cp852_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007X\000\000\000\000\000\000\007\023\000\000\007\023\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179\160\160\001\001\002\001\000\198@\145\160\160CC\160\160\001\001\003\001\000\199@\145\160\160DD\160\160\001\001\004\001\000\164@\145\160\160EE\160\160\001\001\005\001\000\165@\145\160\160FF\160\160\001\001\006\001\000\143@\145\160\160GG\160\160\001\001\007\001\000\134@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\172\160\160\001%\012\001\000\218@\145\160\160MM\160\160\001\001\013\001\000\159@\145\160\160NN\160\160\001\001\014\001\000\210@\145\160\160OO\160\160\001\001\015\001\000\212@\145\160\160PP\160\160\001%\016\001\000\191\160\160\001\001\016\001\000\209@\145\160\160QQ\160\160\001\001\017\001\000\208@\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001\001\024\001\000\168\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\001\025\001\000\169@\145\160\160ZZ\160\160\001\001\026\001\000\183@\145\160\160[[\160\160\001\001\027\001\000\216@\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\145@\145\160\160zz\160\160\001\001:\001\000\146@\144\160{{\145\160\160||\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\001=\001\000\149@\145\160\160~~\160\160\001\001>\001\000\150@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\157@\145\160\160\000B\000B\160\160\001\001B\001\000\136@\145\160\160\000C\000C\160\160\001\001C\001\000\227@\145\160\160\000D\000D\160\160\001\001D\001\000\228@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\213@\145\160\160\000H\000H\160\160\001\001H\001\000\229@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\138\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\139\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\001T\001\000\232@\145\160\160\000U\000U\160\160\001\001U\001\000\234@\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\001X\001\000\252@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\253@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\151\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\001[\001\000\152@\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\184@\145\160\160\000_\000_\160\160\001\001_\001\000\173@\145\160\160\000`\000`\160\160\001%`\001\000\204\160\160\001\001`\001\000\230@\145\160\160\000a\000a\160\160\001\001a\001\000\231@\145\160\160\000b\000b\160\160\001\001b\001\000\221@\145\160\160\000c\000c\160\160\001%c\001\000\185\160\160\001\001c\001\000\238@\145\160\160\000d\000d\160\160\001\001d\001\000\155@\145\160\160\000e\000e\160\160\001\001e\001\000\156@\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\222@\145\160\160\000o\000o\160\160\001\001o\001\000\133@\145\160\160\000p\000p\160\160\001\001p\001\000\235@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\171@\145\160\160\000{\000{\160\160\001\001{\001\000\189@\145\160\160\000|\000|\160\160\001\001|\001\000\190@\145\160\160\000}\000}\160\160\001\001}\001\000\166@\145\160\160\000~\000~\160\160\001\001~\001\000\167@\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249@@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240@@\144\160\001\000\176\001\000\248@@@\144\160\001\000\180\001\000\239@@@\144\160\001\000\184\001\000\247@@\144\160\001\000\187\001\000\175@@@@@\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182@\144\160\001\000\196\001\000\142@@\145\160\160\001\000\199\001\000\128\160\160\001\002\199\001\000\243@@\144\160\001\000\201\001\000\144@\144\160\001\000\203\001\000\211@\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215@@@@\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226@\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\002\216\001\000\244\144\160\001\002\217\001\000\250\144\160\001\000\218\001\000\233\144\160\001\002\219\001\000\242\144\160\001\000\220\001\000\154\145\160\160\001\000\221\001\000\237\160\160\001\002\221\001\000\241@@\144\160\001\000\223\001\000\225@\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132@@\144\160\001\000\231\001\000\135@\144\160\001\000\233\001\000\130@\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140@@@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246@@\144\160\001\000\250\001\000\163@\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\236@@" 0 : Netmappings.from_uni_list array);;
+ let cp855_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004R\001\004\002\001\004S\001\004\003\001\004Q\001\004\001\001\004T\001\004\004\001\004U\001\004\005\001\004V\001\004\006\001\004W\001\004\007\001\004X\001\004\008\001\004Y\001\004\t\001\004Z\001\004\n\001\004[\001\004\011\001\004\\\001\004\012\001\004^\001\004\014\001\004_\001\004\015\001\004N\001\004.\001\004J\001\004*\001\0040\001\004\016\001\0041\001\004\017\001\004F\001\004&\001\0044\001\004\020\001\0045\001\004\021\001\004D\001\004$\001\0043\001\004\019\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\004E\001\004%\001\0048\001\004\024\001%c\001%Q\001%W\001%]\001\0049\001\004\025\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\004:\001\004\026\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\004;\001\004\027\001\004<\001\004\028\001\004=\001\004\029\001\004>\001\004\030\001\004?\001%\024\001%\012\001%\136\001%\132\001\004\031\001\004O\001%\128\001\004/\001\004@\001\004 \001\004A\001\004!\001\004B\001\004\"\001\004C\001\004#\001\0046\001\004\022\001\0042\001\004\018\001\004L\001\004,\001!\022\001\000\173\001\004K\001\004+\001\0047\001\004\023\001\004H\001\004(\001\004M\001\004-\001\004I\001\004)\001\004G\001\004'\001\000\167\001%\160\001\000\160" 0 : int array);;
+let cp855_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\195\000\000\000\000\000\000\007\164\000\000\007\164\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\004\001\001\000\133@\145\160\160BB\160\160\001\004\002\001\000\129\160\160\001%\002\001\000\179@\145\160\160CC\160\160\001\004\003\001\000\131@\145\160\160DD\160\160\001\004\004\001\000\135@\145\160\160EE\160\160\001\004\005\001\000\137@\145\160\160FF\160\160\001\004\006\001\000\139@\145\160\160GG\160\160\001\004\007\001\000\141@\145\160\160HH\160\160\001\004\008\001\000\143@\145\160\160II\160\160\001\004\t\001\000\145@\145\160\160JJ\160\160\001\004\n\001\000\147@\145\160\160KK\160\160\001\004\011\001\000\149@\145\160\160LL\160\160\001\004\012\001\000\151\160\160\001%\012\001\000\218@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\153@\145\160\160OO\160\160\001\004\015\001\000\155@\145\160\160PP\160\160\001\004\016\001\000\161\160\160\001%\016\001\000\191@\145\160\160QQ\160\160\001\004\017\001\000\163@\145\160\160RR\160\160\001\004\018\001\000\236@\145\160\160SS\160\160\001\004\019\001\000\173@\145\160\160TT\160\160\001\004\020\001\000\167\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001\004\021\001\000\169@\145\160\160VV\160\160\001\004\022\001\000\234\160\160\001!\022\001\000\239@\145\160\160WW\160\160\001\004\023\001\000\244@\145\160\160XX\160\160\001\004\024\001\000\184\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\004\025\001\000\190@\145\160\160ZZ\160\160\001\004\026\001\000\199@\145\160\160[[\160\160\001\004\027\001\000\209@\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001\004\028\001\000\211@\145\160\160]]\160\160\001\004\029\001\000\213@\145\160\160^^\160\160\001\004\030\001\000\215@\145\160\160__\160\160\001\004\031\001\000\221@\145\160\160``\160\160\001\004 \001\000\226@\145\160\160aa\160\160\001\004!\001\000\228@\145\160\160bb\160\160\001\004\"\001\000\230@\145\160\160cc\160\160\001\004#\001\000\232@\145\160\160dd\160\160\001\004$\001\000\171\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\182@\145\160\160ff\160\160\001\004&\001\000\165@\145\160\160gg\160\160\001\004'\001\000\252@\145\160\160hh\160\160\001\004(\001\000\246@\145\160\160ii\160\160\001\004)\001\000\250@\145\160\160jj\160\160\001\004*\001\000\159@\145\160\160kk\160\160\001\004+\001\000\242@\145\160\160ll\160\160\001%,\001\000\194\160\160\001\004,\001\000\238@\145\160\160mm\160\160\001\004-\001\000\248@\145\160\160nn\160\160\001\004.\001\000\157@\145\160\160oo\160\160\001\004/\001\000\224@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\162@\145\160\160rr\160\160\001\0042\001\000\235@\145\160\160ss\160\160\001\0043\001\000\172@\145\160\160tt\160\160\001\0044\001\000\166\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\168@\145\160\160vv\160\160\001\0046\001\000\233@\145\160\160ww\160\160\001\0047\001\000\243@\145\160\160xx\160\160\001\0048\001\000\183@\145\160\160yy\160\160\001\0049\001\000\189@\145\160\160zz\160\160\001\004:\001\000\198@\145\160\160{{\160\160\001\004;\001\000\208@\145\160\160||\160\160\001%<\001\000\197\160\160\001\004<\001\000\210@\145\160\160}}\160\160\001\004=\001\000\212@\145\160\160~~\160\160\001\004>\001\000\214@\145\160\160\127\127\160\160\001\004?\001\000\216@\145\160\160\000@\000@\160\160\001\004@\001\000\225@\145\160\160\000A\000A\160\160\001\004A\001\000\227@\145\160\160\000B\000B\160\160\001\004B\001\000\229@\145\160\160\000C\000C\160\160\001\004C\001\000\231@\145\160\160\000D\000D\160\160\001\004D\001\000\170@\145\160\160\000E\000E\160\160\001\004E\001\000\181@\145\160\160\000F\000F\160\160\001\004F\001\000\164@\145\160\160\000G\000G\160\160\001\004G\001\000\251@\145\160\160\000H\000H\160\160\001\004H\001\000\245@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\158@\145\160\160\000K\000K\160\160\001\004K\001\000\241@\145\160\160\000L\000L\160\160\001\004L\001\000\237@\145\160\160\000M\000M\160\160\001\004M\001\000\247@\145\160\160\000N\000N\160\160\001\004N\001\000\156@\145\160\160\000O\000O\160\160\001\004O\001\000\222@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\004Q\001\000\132\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001\004R\001\000\128@\145\160\160\000S\000S\160\160\001\004S\001\000\130@\145\160\160\000T\000T\160\160\001\004T\001\000\134\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001\004U\001\000\136@\145\160\160\000V\000V\160\160\001\004V\001\000\138@\145\160\160\000W\000W\160\160\001\004W\001\000\140\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\004X\001\000\142@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\144@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\146\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\004[\001\000\148@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\150@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\004^\001\000\152@\145\160\160\000_\000_\160\160\001\004_\001\000\154@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\253@@@\144\160\001\000\171\001\000\174@\144\160\001\000\173\001\000\240@@@@@@@@@@@@@\144\160\001\000\187\001\000\175@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp856_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\028\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\001\000\163\000\255\001\000\215\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\174\001\000\172\001\000\189\001\000\188\000\255\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\000\255\000\255\000\255\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\000\255\000\255\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001%\024\001%\012\001%\136\001%\132\001\000\166\000\255\001%\128\000\255\000\255\000\255\000\255\000\255\000\255\001\000\181\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\175\001\000\180\001\000\173\001\000\177\001 \023\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp856_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\177\000\000\000\000\000\000\005\152\000\000\005\152\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243@@@@@@@@@@@@@@@@@\144\160\001\005\208\001\000\128\144\160\001\005\209\001\000\129\144\160\001\005\210\001\000\130\144\160\001\005\211\001\000\131\144\160\001\005\212\001\000\132\144\160\001\005\213\001\000\133\144\160\001\005\214\001\000\134\145\160\160\001\005\215\001\000\135\160\160\001\000\215\001\000\158@\144\160\001\005\216\001\000\136\144\160\001\005\217\001\000\137\144\160\001\005\218\001\000\138\144\160\001\005\219\001\000\139\144\160\001\005\220\001\000\140\144\160\001\005\221\001\000\141\144\160\001\005\222\001\000\142\144\160\001\005\223\001\000\143\144\160\001\005\224\001\000\144\144\160\001\005\225\001\000\145\144\160\001\005\226\001\000\146\144\160\001\005\227\001\000\147\144\160\001\005\228\001\000\148\144\160\001\005\229\001\000\149\144\160\001\005\230\001\000\150\144\160\001\005\231\001\000\151\144\160\001\005\232\001\000\152\144\160\001\005\233\001\000\153\144\160\001\005\234\001\000\154@@@@@@@@@@@@\144\160\001\000\247\001\000\246@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp857_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002B\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\0011\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\0010\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\001^\001\001_\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\001\030\001\001\031\001\000\191\001\000\174\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\000\192\001\000\169\001%c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\000\186\001\000\170\001\000\202\001\000\203\001\000\200\000\255\001\000\205\001\000\206\001\000\207\001%\024\001%\012\001%\136\001%\132\001\000\166\001\000\204\001%\128\001\000\211\001\000\223\001\000\212\001\000\210\001\000\245\001\000\213\001\000\181\000\255\001\000\215\001\000\218\001\000\219\001\000\217\001\000\236\001\000\255\001\000\175\001\000\180\001\000\173\001\000\177\000\255\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp857_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\199\000\000\000\000\000\000\006f\000\000\006f\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\001\030\001\000\166@\145\160\160__\160\160\001\001\031\001\000\167@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\152@\145\160\160qq\160\160\001\0011\001\000\141@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\158@\145\160\160\000_\000_\160\160\001\001_\001\000\159@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\189\144\160\001\000\163\001\000\156\144\160\001\000\164\001\000\207\144\160\001\000\165\001\000\190\144\160\001\000\166\001\000\221\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249\144\160\001\000\169\001\000\184\144\160\001\000\170\001\000\209\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240\144\160\001\000\174\001\000\169\144\160\001\000\175\001\000\238\144\160\001\000\176\001\000\248\144\160\001\000\177\001\000\241\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\252\144\160\001\000\180\001\000\239\144\160\001\000\181\001\000\230\144\160\001\000\182\001\000\244\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\247\144\160\001\000\185\001\000\251\144\160\001\000\186\001\000\208\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\243\144\160\001\000\191\001\000\168\144\160\001\000\192\001\000\183\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182\144\160\001\000\195\001\000\199\144\160\001\000\196\001\000\142\144\160\001\000\197\001\000\143\144\160\001\000\198\001\000\146\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\212\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\210\144\160\001\000\203\001\000\211\144\160\001\000\204\001\000\222\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215\144\160\001\000\207\001\000\216@\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\227\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226\144\160\001\000\213\001\000\229\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\232\144\160\001\000\216\001\000\157\144\160\001\000\217\001\000\235\144\160\001\000\218\001\000\233\144\160\001\000\219\001\000\234\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\198\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\228\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\237" 0 : Netmappings.from_uni_list array);;
+ let cp860_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\227\001\000\224\001\000\193\001\000\231\001\000\234\001\000\202\001\000\232\001\000\205\001\000\212\001\000\236\001\000\195\001\000\194\001\000\201\001\000\192\001\000\200\001\000\244\001\000\245\001\000\242\001\000\218\001\000\249\001\000\204\001\000\213\001\000\220\001\000\162\001\000\163\001\000\217\001 \167\001\000\211\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001\000\210\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp860_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007-\000\000\000\000\000\000\006\224\000\000\006\224\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\145\160\160\001\000\192\001\000\145\160\160\001\003\192\001\000\227@\144\160\001\000\193\001\000\134\144\160\001\000\194\001\000\143\145\160\160\001\000\195\001\000\142\160\160\001\003\195\001\000\229@\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\146\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\137@\144\160\001\000\204\001\000\152\144\160\001\000\205\001\000\139@@@\144\160\001\000\209\001\000\165\144\160\001\000\210\001\000\169\144\160\001\000\211\001\000\159\144\160\001\000\212\001\000\140\144\160\001\000\213\001\000\153@@@\144\160\001\000\217\001\000\157\144\160\001\000\218\001\000\150@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131\144\160\001\000\227\001\000\132@@@\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136@\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161@@@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147\144\160\001\000\245\001\000\148@\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163@\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
+ let cp861_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\208\001\000\240\001\000\222\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\254\001\000\251\001\000\221\001\000\253\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\193\001\000\205\001\000\211\001\000\218\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp861_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227\144\160\001\000\193\001\000\164@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@\144\160\001\000\205\001\000\165@@\144\160\001\000\208\001\000\139@@\144\160\001\000\211\001\000\166@@\144\160\001\000\214\001\000\153@\144\160\001\000\216\001\000\157@\144\160\001\000\218\001\000\167@\144\160\001\000\220\001\000\154\144\160\001\000\221\001\000\151\144\160\001\000\222\001\000\141\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161@@\144\160\001\000\240\001\000\140@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155@\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\152\144\160\001\000\254\001\000\149@" 0 : Netmappings.from_uni_list array);;
+ let cp862_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\001\000\162\001\000\163\001\000\165\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp862_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0074\000\000\000\000\000\000\006\233\000\000\006\233\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@\144\160\001\000\165\001\000\157\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237@@@@@@@@@\144\160\001\005\208\001\000\128\145\160\160\001\005\209\001\000\129\160\160\001\000\209\001\000\165@\144\160\001\005\210\001\000\130\144\160\001\005\211\001\000\131\144\160\001\005\212\001\000\132\144\160\001\005\213\001\000\133\144\160\001\005\214\001\000\134\144\160\001\005\215\001\000\135\144\160\001\005\216\001\000\136\144\160\001\005\217\001\000\137\144\160\001\005\218\001\000\138\144\160\001\005\219\001\000\139\144\160\001\005\220\001\000\140\144\160\001\005\221\001\000\141\144\160\001\005\222\001\000\142\145\160\160\001\005\223\001\000\143\160\160\001\000\223\001\000\225@\144\160\001\005\224\001\000\144\145\160\160\001\005\225\001\000\145\160\160\001\000\225\001\000\160@\144\160\001\005\226\001\000\146\144\160\001\005\227\001\000\147\144\160\001\005\228\001\000\148\144\160\001\005\229\001\000\149\144\160\001\005\230\001\000\150\144\160\001\005\231\001\000\151\144\160\001\005\232\001\000\152\144\160\001\005\233\001\000\153\144\160\001\005\234\001\000\154@@\144\160\001\000\237\001\000\161@@@\144\160\001\000\241\001\000\164@\144\160\001\000\243\001\000\162@@@\144\160\001\000\247\001\000\246@@\144\160\001\000\250\001\000\163@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp863_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\194\001\000\224\001\000\182\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001 \023\001\000\192\001\000\167\001\000\201\001\000\200\001\000\202\001\000\244\001\000\203\001\000\207\001\000\251\001\000\249\001\000\164\001\000\212\001\000\220\001\000\162\001\000\163\001\000\217\001\000\219\001\001\146\001\000\166\001\000\180\001\000\243\001\000\250\001\000\168\001\000\184\001\000\179\001\000\175\001\000\206\001#\016\001\000\172\001\000\189\001\000\188\001\000\190\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp863_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0077\000\000\000\000\000\000\006\237\000\000\006\237\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\141@\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@\144\160\001\000\162\001\000\155\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@\144\160\001\000\164\001\000\152@\145\160\160\001\000\166\001\000\160\160\160\001\003\166\001\000\232@\144\160\001\000\167\001\000\143\144\160\001\000\168\001\000\164\144\160\001\003\169\001\000\234@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@\144\160\001\000\175\001\000\167\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253\144\160\001\000\179\001\000\166\145\160\160\001\000\180\001\000\161\160\160\001\003\180\001\000\235@\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@\144\160\001\000\182\001\000\134\144\160\001\000\183\001\000\250\144\160\001\000\184\001\000\165@@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171\144\160\001\000\190\001\000\173@\145\160\160\001\000\192\001\000\142\160\160\001\003\192\001\000\227@@\144\160\001\000\194\001\000\132\144\160\001\003\195\001\000\229\144\160\001\003\196\001\000\231@\144\160\001\003\198\001\000\237\144\160\001\000\199\001\000\128\144\160\001\000\200\001\000\145\144\160\001\000\201\001\000\144\144\160\001\000\202\001\000\146\144\160\001\000\203\001\000\148@@\144\160\001\000\206\001\000\168\144\160\001\000\207\001\000\149@@@@\144\160\001\000\212\001\000\153@@@@\144\160\001\000\217\001\000\157@\144\160\001\000\219\001\000\158\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133@\144\160\001\000\226\001\000\131@@@@\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137@@\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@@\144\160\001\000\247\001\000\246@\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@@" 0 : Netmappings.from_uni_list array);;
+ let cp864_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\209\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcd\001\006jfghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\176\001\000\183\001\"\025\001\"\026\001%\146\001%\000\001%\002\001%<\001%$\001%,\001%\028\001%4\001%\016\001%\012\001%\020\001%\024\001\003\178\001\"\030\001\003\198\001\000\177\001\000\189\001\000\188\001\"H\001\000\171\001\000\187\002\000\000\254\247\002\000\000\254\248\000\255\000\255\002\000\000\254\251\002\000\000\254\252\000\255\001\000\160\001\000\173\002\000\000\254\130\001\000\163\001\000\164\002\000\000\254\132\000\255\000\255\002\000\000\254\142\002\000\000\254\143\002\000\000\254\149\002\000\000\254\153\001\006\012\002\000\000\254\157\002\000\000\254\161\002\000\000\254\165\001\006`\001\006a\001\006b\001\006c\001\006d\001\006e\001\006f\001\006g\001\006h\001\006i\002\000\000\254\209\001\006\027\002\000\000\254\177\002\000\000\254\181\002\000\000\254\185\001\006\031\001\000\162\002\000\000\254\128\002\000\000\254\129\002\000\000\254\131\002\000\000\254\133\002\000\000\254\202\002\000\000\254\139\002\000\000\254\141\002\000\000\254\145\002\000\000\254\147\002\000\000\254\151\002\000\000\254\155\002\000\000\254\159\002\000\000\254\163\002\000\000\254\167\002\000\000\254\169\002\000\000\254\171\002\000\000\254\173\002\000\000\254\175\002\000\000\254\179\002\000\000\254\183\002\000\000\254\187\002\000\000\254\191\002\000\000\254\193\002\000\000\254\197\002\000\000\254\203\002\000\000\254\207\001\000\166\001\000\172\001\000\247\001\000\215\002\000\000\254\201\001\006@\002\000\000\254\211\002\000\000\254\215\002\000\000\254\219\002\000\000\254\223\002\000\000\254\227\002\000\000\254\231\002\000\000\254\235\002\000\000\254\237\002\000\000\254\239\002\000\000\254\243\002\000\000\254\189\002\000\000\254\204\002\000\000\254\206\002\000\000\254\205\002\000\000\254\225\002\000\000\254}\001\006Q\002\000\000\254\229\002\000\000\254\233\002\000\000\254\236\002\000\000\254\240\002\000\000\254\242\002\000\000\254\208\002\000\000\254\213\002\000\000\254\245\002\000\000\254\246\002\000\000\254\221\002\000\000\254\217\002\000\000\254\241\001%\160\000\255" 0 : int array);;
+let cp864_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007i\000\000\000\000\000\000\006\136\000\000\006\136\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\133@\144\160AA\145\160\160BB\160\160\001%\002\001\000\134@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\141\160\160\001\006\012\001\000\172@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\140@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\142@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\143@\145\160\160YY\160\160\001\"\025\001\000\130@\145\160\160ZZ\160\160\001\"\026\001\000\131@\145\160\160[[\160\160\001\006\027\001\000\187@\145\160\160\\\\\160\160\001%\028\001\000\138@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\145@\145\160\160__\160\160\001\006\031\001\000\191@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\136@@\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\137@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\139@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\135@\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\150@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\006`\001\000\176@\145\160\160\000a\000a\160\160\001\006a\001\000\177@\145\160\160\000b\000b\160\160\001\006b\001\000\178@\145\160\160\000c\000c\160\160\001\006c\001\000\179@\145\160\160\000d\000d\160\160\001\006d\001\000\180@\145\160\160\000e\000e\160\160\001\006e\001\000\181@\145\160\160\000f\000f\160\160\001\006f\001\000\182@\145\160\160\000g\000g\160\160\001\006g\001\000\183@\145\160\160\000h\000h\160\160\001\006h\001\000\184@\145\160\160\000i\000i\160\160\001\006i\001\000\185@\145\160\160\001\006je\160\160\000j\000j@\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\240@\144\160\000~\000~\144\160\000\127\000\127\144\160\002\000\000\254\128\001\000\193\144\160\002\000\000\254\129\001\000\194\144\160\002\000\000\254\130\001\000\162\144\160\002\000\000\254\131\001\000\195\144\160\002\000\000\254\132\001\000\165\144\160\002\000\000\254\133\001\000\196@@@@@\144\160\002\000\000\254\139\001\000\198@\144\160\002\000\000\254\141\001\000\199\144\160\002\000\000\254\142\001\000\168\144\160\002\000\000\254\143\001\000\169@\144\160\002\000\000\254\145\001\000\200\144\160\001%\146\001\000\132\144\160\002\000\000\254\147\001\000\201@\144\160\002\000\000\254\149\001\000\170@\144\160\002\000\000\254\151\001\000\202@\144\160\002\000\000\254\153\001\000\171@\144\160\002\000\000\254\155\001\000\203@\144\160\002\000\000\254\157\001\000\173@\144\160\002\000\000\254\159\001\000\204\145\160\160\001\000\160\001\000\160\160\160\001%\160\001\000\254@\144\160\002\000\000\254\161\001\000\174\144\160\001\000\162\001\000\192\145\160\160\001\000\163\001\000\163\160\160\002\000\000\254\163\001\000\205@\144\160\001\000\164\001\000\164\144\160\002\000\000\254\165\001\000\175\144\160\001\000\166\001\000\219\144\160\002\000\000\254\167\001\000\206@\144\160\002\000\000\254\169\001\000\207@\145\160\160\001\000\171\001\000\151\160\160\002\000\000\254\171\001\000\208@\144\160\001\000\172\001\000\220\145\160\160\001\000\173\001\000\161\160\160\002\000\000\254\173\001\000\209@@\144\160\002\000\000\254\175\001\000\210\144\160\001\000\176\001\000\128\145\160\160\001\000\177\001\000\147\160\160\002\000\000\254\177\001\000\188@\144\160\001\003\178\001\000\144\144\160\002\000\000\254\179\001\000\211@\144\160\002\000\000\254\181\001\000\189@\145\160\160\001\000\183\001\000\129\160\160\002\000\000\254\183\001\000\212@@\144\160\002\000\000\254\185\001\000\190@\145\160\160\001\000\187\001\000\152\160\160\002\000\000\254\187\001\000\213@\144\160\001\000\188\001\000\149\145\160\160\001\000\189\001\000\148\160\160\002\000\000\254\189\001\000\235@@\144\160\002\000\000\254\191\001\000\214@\144\160\002\000\000\254\193\001\000\215@@@\144\160\002\000\000\254\197\001\000\216\144\160\001\003\198\001\000\146@@\144\160\002\000\000\254\201\001\000\223\144\160\002\000\000\254\202\001\000\197\144\160\002\000\000\254\203\001\000\217\144\160\002\000\000\254\204\001\000\236\144\160\002\000\000\254\205\001\000\238\144\160\002\000\000\254\206\001\000\237\144\160\002\000\000\254\207\001\000\218\144\160\002\000\000\254\208\001\000\247\144\160\002\000\000\254\209\001\000\186@\144\160\002\000\000\254\211\001\000\225@\144\160\002\000\000\254\213\001\000\248@\145\160\160\001\000\215\001\000\222\160\160\002\000\000\254\215\001\000\226@@\144\160\002\000\000\254\217\001\000\252@\144\160\002\000\000\254\219\001\000\227@\144\160\002\000\000\254\221\001\000\251@\144\160\002\000\000\254\223\001\000\228@\144\160\002\000\000\254\225\001\000\239@\144\160\002\000\000\254\227\001\000\229@\144\160\002\000\000\254\229\001\000\242@\144\160\002\000\000\254\231\001\000\230@\144\160\002\000\000\254\233\001\000\243@\144\160\002\000\000\254\235\001\000\231\144\160\002\000\000\254\236\001\000\244\144\160\002\000\000\254\237\001\000\232@\144\160\002\000\000\254\239\001\000\233\144\160\002\000\000\254\240\001\000\245\144\160\002\000\000\254\241\001\000\253\144\160\002\000\000\254\242\001\000\246\144\160\002\000\000\254\243\001\000\234@\144\160\002\000\000\254\245\001\000\249\144\160\002\000\000\254\246\001\000\250\145\160\160\002\000\000\254\247\001\000\153\160\160\001\000\247\001\000\221@\144\160\002\000\000\254\248\001\000\154@@\144\160\002\000\000\254\251\001\000\157\144\160\002\000\000\254\252\001\000\158@@@" 0 : Netmappings.from_uni_list array);;
+ let cp865_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\164\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp865_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\247@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@\144\160\001\000\164\001\000\175@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234\144\160\001\000\170\001\000\166\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@\144\160\001\000\186\001\000\167@\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227@@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@@@@@\144\160\001\000\209\001\000\165@@@@\144\160\001\000\214\001\000\153@\144\160\001\000\216\001\000\157@@@\144\160\001\000\220\001\000\154@@\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137\144\160\001\000\236\001\000\141\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140\144\160\001\000\239\001\000\139@\144\160\001\000\241\001\000\164\144\160\001\000\242\001\000\149\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155\144\160\001\000\249\001\000\151\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129@@\144\160\001\000\255\001\000\152" 0 : Netmappings.from_uni_list array);;
+ let cp866_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001\004\001\001\004Q\001\004\004\001\004T\001\004\007\001\004W\001\004\014\001\004^\001\000\176\001\"\025\001\000\183\001\"\026\001!\022\001\000\164\001%\160\001\000\160" 0 : int array);;
+let cp866_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\195\000\000\000\000\000\000\007\164\000\000\007\164\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\004\001\001\000\240@\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\145\160\160DD\160\160\001\004\004\001\000\242@\144\160EE\144\160FF\145\160\160GG\160\160\001\004\007\001\000\244@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\246@\144\160OO\145\160\160PP\160\160\001\004\016\001\000\128\160\160\001%\016\001\000\191@\145\160\160QQ\160\160\001\004\017\001\000\129@\145\160\160RR\160\160\001\004\018\001\000\130@\145\160\160SS\160\160\001\004\019\001\000\131@\145\160\160TT\160\160\001\004\020\001\000\132\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001\004\021\001\000\133@\145\160\160VV\160\160\001\004\022\001\000\134\160\160\001!\022\001\000\252@\145\160\160WW\160\160\001\004\023\001\000\135@\145\160\160XX\160\160\001\004\024\001\000\136\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\004\025\001\000\137\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\004\026\001\000\138\160\160\001\"\026\001\000\251@\145\160\160[[\160\160\001\004\027\001\000\139@\145\160\160\\\\\160\160\001\004\028\001\000\140\160\160\001%\028\001\000\195@\145\160\160]]\160\160\001\004\029\001\000\141@\145\160\160^^\160\160\001\004\030\001\000\142@\145\160\160__\160\160\001\004\031\001\000\143@\145\160\160``\160\160\001\004 \001\000\144@\145\160\160aa\160\160\001\004!\001\000\145@\145\160\160bb\160\160\001\004\"\001\000\146@\145\160\160cc\160\160\001\004#\001\000\147@\145\160\160dd\160\160\001\004$\001\000\148\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\149@\145\160\160ff\160\160\001\004&\001\000\150@\145\160\160gg\160\160\001\004'\001\000\151@\145\160\160hh\160\160\001\004(\001\000\152@\145\160\160ii\160\160\001\004)\001\000\153@\145\160\160jj\160\160\001\004*\001\000\154@\145\160\160kk\160\160\001\004+\001\000\155@\145\160\160ll\160\160\001\004,\001\000\156\160\160\001%,\001\000\194@\145\160\160mm\160\160\001\004-\001\000\157@\145\160\160nn\160\160\001\004.\001\000\158@\145\160\160oo\160\160\001\004/\001\000\159@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\161@\145\160\160rr\160\160\001\0042\001\000\162@\145\160\160ss\160\160\001\0043\001\000\163@\145\160\160tt\160\160\001\0044\001\000\164\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\165@\145\160\160vv\160\160\001\0046\001\000\166@\145\160\160ww\160\160\001\0047\001\000\167@\145\160\160xx\160\160\001\0048\001\000\168@\145\160\160yy\160\160\001\0049\001\000\169@\145\160\160zz\160\160\001\004:\001\000\170@\145\160\160{{\160\160\001\004;\001\000\171@\145\160\160||\160\160\001\004<\001\000\172\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\004=\001\000\173@\145\160\160~~\160\160\001\004>\001\000\174@\145\160\160\127\127\160\160\001\004?\001\000\175@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\004T\001\000\243@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187\160\160\001\004W\001\000\245@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198\160\160\001\004^\001\000\247@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209@\145\160\160\000e\000e\160\160\001%e\001\000\210@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\253@@@@@@@@@@@\144\160\001\000\176\001\000\248@@@@@@\144\160\001\000\183\001\000\250@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp869_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\000\255\000\255\000\255\000\255\000\255\000\255\001\003\134\000\255\001\000\183\001\000\172\001\000\166\001 \024\001 \025\001\003\136\001 \021\001\003\137\001\003\138\001\003\170\001\003\140\000\255\000\255\001\003\142\001\003\171\001\000\169\001\003\143\001\000\178\001\000\179\001\003\172\001\000\163\001\003\173\001\003\174\001\003\175\001\003\202\001\003\144\001\003\204\001\003\205\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\000\189\001\003\152\001\003\153\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\003\154\001\003\155\001\003\156\001\003\157\001%c\001%Q\001%W\001%]\001\003\158\001\003\159\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\003\160\001\003\161\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\177\001\003\178\001\003\179\001%\024\001%\012\001%\136\001%\132\001\003\180\001\003\181\001%\128\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\003\194\001\003\196\001\003\132\001\000\173\001\000\177\001\003\197\001\003\198\001\003\199\001\000\167\001\003\200\001\003\133\001\000\176\001\000\168\001\003\201\001\003\203\001\003\176\001\003\206\001%\160\001\000\160" 0 : int array);;
+let cp869_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\207\000\000\000\000\000\000\006\138\000\000\006\138\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001 \021\001\000\142@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\139\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001 \025\001\000\140@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\145\160\160\001%\132\001\000\220\160\160\001\003\132\001\000\239@\144\160\001\003\133\001\000\247\144\160\001\003\134\001\000\134@\145\160\160\001\003\136\001\000\141\160\160\001%\136\001\000\219@\144\160\001\003\137\001\000\143\144\160\001\003\138\001\000\144@\144\160\001\003\140\001\000\146@\144\160\001\003\142\001\000\149\144\160\001\003\143\001\000\152\144\160\001\003\144\001\000\161\145\160\160\001\003\145\001\000\164\160\160\001%\145\001\000\176@\145\160\160\001\003\146\001\000\165\160\160\001%\146\001\000\177@\145\160\160\001\003\147\001\000\166\160\160\001%\147\001\000\178@\144\160\001\003\148\001\000\167\144\160\001\003\149\001\000\168\144\160\001\003\150\001\000\169\144\160\001\003\151\001\000\170\144\160\001\003\152\001\000\172\144\160\001\003\153\001\000\173\144\160\001\003\154\001\000\181\144\160\001\003\155\001\000\182\144\160\001\003\156\001\000\183\144\160\001\003\157\001\000\184\144\160\001\003\158\001\000\189\144\160\001\003\159\001\000\190\145\160\160\001\003\160\001\000\198\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\003\161\001\000\199@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\207@\144\160\001\003\164\001\000\208\144\160\001\003\165\001\000\209\145\160\160\001\000\166\001\000\138\160\160\001\003\166\001\000\210@\145\160\160\001\003\167\001\000\211\160\160\001\000\167\001\000\245@\145\160\160\001\003\168\001\000\212\160\160\001\000\168\001\000\249@\145\160\160\001\000\169\001\000\151\160\160\001\003\169\001\000\213@\144\160\001\003\170\001\000\145\145\160\160\001\003\171\001\000\150\160\160\001\000\171\001\000\174@\145\160\160\001\000\172\001\000\137\160\160\001\003\172\001\000\155@\145\160\160\001\003\173\001\000\157\160\160\001\000\173\001\000\240@\144\160\001\003\174\001\000\158\144\160\001\003\175\001\000\159\145\160\160\001\000\176\001\000\248\160\160\001\003\176\001\000\252@\145\160\160\001\003\177\001\000\214\160\160\001\000\177\001\000\241@\145\160\160\001\000\178\001\000\153\160\160\001\003\178\001\000\215@\145\160\160\001\000\179\001\000\154\160\160\001\003\179\001\000\216@\144\160\001\003\180\001\000\221\144\160\001\003\181\001\000\222\144\160\001\003\182\001\000\224\145\160\160\001\000\183\001\000\136\160\160\001\003\183\001\000\225@\144\160\001\003\184\001\000\226\144\160\001\003\185\001\000\227\144\160\001\003\186\001\000\228\145\160\160\001\000\187\001\000\175\160\160\001\003\187\001\000\229@\144\160\001\003\188\001\000\230\145\160\160\001\000\189\001\000\171\160\160\001\003\189\001\000\231@\144\160\001\003\190\001\000\232\144\160\001\003\191\001\000\233\144\160\001\003\192\001\000\234\144\160\001\003\193\001\000\235\144\160\001\003\194\001\000\237\144\160\001\003\195\001\000\236\144\160\001\003\196\001\000\238\144\160\001\003\197\001\000\242\144\160\001\003\198\001\000\243\144\160\001\003\199\001\000\244\144\160\001\003\200\001\000\246\144\160\001\003\201\001\000\250\144\160\001\003\202\001\000\160\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\162\144\160\001\003\205\001\000\163\144\160\001\003\206\001\000\253@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp874_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002&\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\000\255\000\255\000\255\001 &\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\160\001\014\001\001\014\002\001\014\003\001\014\004\001\014\005\001\014\006\001\014\007\001\014\008\001\014\t\001\014\n\001\014\011\001\014\012\001\014\013\001\014\014\001\014\015\001\014\016\001\014\017\001\014\018\001\014\019\001\014\020\001\014\021\001\014\022\001\014\023\001\014\024\001\014\025\001\014\026\001\014\027\001\014\028\001\014\029\001\014\030\001\014\031\001\014 \001\014!\001\014\"\001\014#\001\014$\001\014%\001\014&\001\014'\001\014(\001\014)\001\014*\001\014+\001\014,\001\014-\001\014.\001\014/\001\0140\001\0141\001\0142\001\0143\001\0144\001\0145\001\0146\001\0147\001\0148\001\0149\001\014:\000\255\000\255\000\255\000\255\001\014?\001\014@\001\014A\001\014B\001\014C\001\014D\001\014E\001\014F\001\014G\001\014H\001\014I\001\014J\001\014K\001\014L\001\014M\001\014N\001\014O\001\014P\001\014Q\001\014R\001\014S\001\014T\001\014U\001\014V\001\014W\001\014X\001\014Y\001\014Z\001\014[\000\255\000\255\000\255\000\255" 0 : int array);;
+let cp874_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\185\000\000\000\000\000\000\006\202\000\000\006\202\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\014\001\001\000\161@\145\160\160BB\160\160\001\014\002\001\000\162@\145\160\160CC\160\160\001\014\003\001\000\163@\145\160\160DD\160\160\001\014\004\001\000\164@\145\160\160EE\160\160\001\014\005\001\000\165@\145\160\160FF\160\160\001\014\006\001\000\166@\145\160\160GG\160\160\001\014\007\001\000\167@\145\160\160HH\160\160\001\014\008\001\000\168@\145\160\160II\160\160\001\014\t\001\000\169@\145\160\160JJ\160\160\001\014\n\001\000\170@\145\160\160KK\160\160\001\014\011\001\000\171@\145\160\160LL\160\160\001\014\012\001\000\172@\145\160\160MM\160\160\001\014\013\001\000\173@\145\160\160NN\160\160\001\014\014\001\000\174@\145\160\160OO\160\160\001\014\015\001\000\175@\145\160\160PP\160\160\001\014\016\001\000\176@\145\160\160QQ\160\160\001\014\017\001\000\177@\145\160\160RR\160\160\001\014\018\001\000\178@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\014\019\001\000\179@\145\160\160TT\160\160\001 \020\001\000\151\160\160\001\014\020\001\000\180@\145\160\160UU\160\160\001\014\021\001\000\181@\145\160\160VV\160\160\001\014\022\001\000\182@\145\160\160WW\160\160\001\014\023\001\000\183@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\014\024\001\000\184@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\014\025\001\000\185@\145\160\160ZZ\160\160\001\014\026\001\000\186@\145\160\160[[\160\160\001\014\027\001\000\187@\145\160\160\\\\\160\160\001 \028\001\000\147\160\160\001\014\028\001\000\188@\145\160\160]]\160\160\001 \029\001\000\148\160\160\001\014\029\001\000\189@\145\160\160^^\160\160\001\014\030\001\000\190@\145\160\160__\160\160\001\014\031\001\000\191@\145\160\160``\160\160\001\014 \001\000\192@\145\160\160aa\160\160\001\014!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001\014\"\001\000\194@\145\160\160cc\160\160\001\014#\001\000\195@\145\160\160dd\160\160\001\014$\001\000\196@\145\160\160ee\160\160\001\014%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\014&\001\000\198@\145\160\160gg\160\160\001\014'\001\000\199@\145\160\160hh\160\160\001\014(\001\000\200@\145\160\160ii\160\160\001\014)\001\000\201@\145\160\160jj\160\160\001\014*\001\000\202@\145\160\160kk\160\160\001\014+\001\000\203@\145\160\160ll\160\160\001\014,\001\000\204@\145\160\160mm\160\160\001\014-\001\000\205@\145\160\160nn\160\160\001\014.\001\000\206@\145\160\160oo\160\160\001\014/\001\000\207@\145\160\160pp\160\160\001\0140\001\000\208@\145\160\160qq\160\160\001\0141\001\000\209@\145\160\160rr\160\160\001\0142\001\000\210@\145\160\160ss\160\160\001\0143\001\000\211@\145\160\160tt\160\160\001\0144\001\000\212@\145\160\160uu\160\160\001\0145\001\000\213@\145\160\160vv\160\160\001\0146\001\000\214@\145\160\160ww\160\160\001\0147\001\000\215@\145\160\160xx\160\160\001\0148\001\000\216@\145\160\160yy\160\160\001\0149\001\000\217@\145\160\160zz\160\160\001\014:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~\145\160\160\127\127\160\160\001\014?\001\000\223@\145\160\160\000@\000@\160\160\001\014@\001\000\224@\145\160\160\000A\000A\160\160\001\014A\001\000\225@\145\160\160\000B\000B\160\160\001\014B\001\000\226@\145\160\160\000C\000C\160\160\001\014C\001\000\227@\145\160\160\000D\000D\160\160\001\014D\001\000\228@\145\160\160\000E\000E\160\160\001\014E\001\000\229@\145\160\160\000F\000F\160\160\001\014F\001\000\230@\145\160\160\000G\000G\160\160\001\014G\001\000\231@\145\160\160\000H\000H\160\160\001\014H\001\000\232@\145\160\160\000I\000I\160\160\001\014I\001\000\233@\145\160\160\000J\000J\160\160\001\014J\001\000\234@\145\160\160\000K\000K\160\160\001\014K\001\000\235@\145\160\160\000L\000L\160\160\001\014L\001\000\236@\145\160\160\000M\000M\160\160\001\014M\001\000\237@\145\160\160\000N\000N\160\160\001\014N\001\000\238@\145\160\160\000O\000O\160\160\001\014O\001\000\239@\145\160\160\000P\000P\160\160\001\014P\001\000\240@\145\160\160\000Q\000Q\160\160\001\014Q\001\000\241@\145\160\160\000R\000R\160\160\001\014R\001\000\242@\145\160\160\000S\000S\160\160\001\014S\001\000\243@\145\160\160\000T\000T\160\160\001\014T\001\000\244@\145\160\160\000U\000U\160\160\001\014U\001\000\245@\145\160\160\000V\000V\160\160\001\014V\001\000\246@\145\160\160\000W\000W\160\160\001\014W\001\000\247@\145\160\160\000X\000X\160\160\001\014X\001\000\248@\145\160\160\000Y\000Y\160\160\001\014Y\001\000\249@\145\160\160\000Z\000Z\160\160\001\014Z\001\000\250@\145\160\160\000[\000[\160\160\001\014[\001\000\251@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@@@@@@@@@\144\160\001 \172\001\000\128@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp875_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABC\001\000\156I\001\000\134\000\127\001\000\151\001\000\141\001\000\142KLMNOPQRS\001\000\157\001\000\133H\001\000\135XY\001\000\146\001\000\143\\]^_\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132JW[\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140EFG\001\000\144\001\000\145V\001\000\147\001\000\148\001\000\149\001\000\150D\001\000\152\001\000\153\001\000\154\001\000\155TU\001\000\158\000\255`\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\000[n|hkaf\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\000]dji{\000^mo\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\000|le\000_~\127\001\000\168\001\003\134\001\003\136\001\003\137\001\000\160\001\003\138\001\003\140\001\003\142\001\003\143\000`zc\000@g}b\001\003\133\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\000\180\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\000\163\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\003\201\001\003\144\001\003\176\001 \024\001 \021\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\177\001\000\189\000\255\001\003\135\001 \025\001\000\166\000\\\000\255\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\167\000\255\000\255\001\000\171\001\000\172pqrstuvwxy\001\000\179\001\000\169\000\255\000\255\001\000\187\001\000\159" 0 : int array);;
+let cp875_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\220\000\000\000\000\000\000\006\142\000\000\006\142\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\145\160\160U}\160\160\001 \021\001\000\207@\144\160Vr\144\160Wf\145\160\160XX\160\160\001 \024\001\000\206@\145\160\160YY\160\160\001 \025\001\000\222@@\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000j\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\145\160\160\001\000\133U\160\160\001\003\133\001\000\128@\145\160\160\001\000\134F\160\160\001\003\134\000q@\145\160\160\001\000\135W\160\160\001\003\135\001\000\221@\145\160\160\001\000\136h\160\160\001\003\136\000r@\145\160\160\001\000\137i\160\160\001\003\137\000s@\145\160\160\001\000\138j\160\160\001\003\138\000u@\144\160\001\000\139k\145\160\160\001\000\140l\160\160\001\003\140\000v@\144\160\001\000\141I\145\160\160\001\000\142J\160\160\001\003\142\000w@\145\160\160\001\000\143[\160\160\001\003\143\000x@\145\160\160\001\000\144p\160\160\001\003\144\001\000\204@\145\160\160\001\000\145q\160\160\001\003\145\000A@\145\160\160\001\000\146Z\160\160\001\003\146\000B@\145\160\160\001\000\147s\160\160\001\003\147\000C@\145\160\160\001\000\148t\160\160\001\003\148\000D@\145\160\160\001\000\149u\160\160\001\003\149\000E@\145\160\160\001\000\150v\160\160\001\003\150\000F@\145\160\160\001\000\151H\160\160\001\003\151\000G@\145\160\160\001\000\152x\160\160\001\003\152\000H@\145\160\160\001\000\153y\160\160\001\003\153\000I@\145\160\160\001\000\154z\160\160\001\003\154\000Q@\145\160\160\001\000\155{\160\160\001\003\155\000R@\145\160\160\001\000\156D\160\160\001\003\156\000S@\145\160\160\001\000\157T\160\160\001\003\157\000T@\145\160\160\001\000\158~\160\160\001\003\158\000U@\145\160\160\001\003\159\000V\160\160\001\000\159\001\000\255@\145\160\160\001\003\160\000W\160\160\001\000\160\000t@\144\160\001\003\161\000X@\145\160\160\001\003\163\000Y\160\160\001\000\163\001\000\176@\144\160\001\003\164\000b\144\160\001\003\165\000c\145\160\160\001\003\166\000d\160\160\001\000\166\001\000\223@\145\160\160\001\003\167\000e\160\160\001\000\167\001\000\235@\145\160\160\001\003\168\000f\160\160\001\000\168\000p@\145\160\160\001\003\169\000g\160\160\001\000\169\001\000\251@\144\160\001\003\170\000h\145\160\160\001\003\171\000i\160\160\001\000\171\001\000\238@\145\160\160\001\003\172\001\000\177\160\160\001\000\172\001\000\239@\145\160\160\001\003\173\001\000\178\160\160\001\000\173\001\000\202@\144\160\001\003\174\001\000\179\144\160\001\003\175\001\000\181\145\160\160\001\000\176\001\000\144\160\160\001\003\176\001\000\205@\145\160\160\001\003\177\001\000\138\160\160\001\000\177\001\000\218@\145\160\160\001\003\178\001\000\139\160\160\001\000\178\001\000\234@\145\160\160\001\003\179\001\000\140\160\160\001\000\179\001\000\250@\145\160\160\001\003\180\001\000\141\160\160\001\000\180\001\000\160@\144\160\001\003\181\001\000\142\144\160\001\003\182\001\000\143\144\160\001\003\183\001\000\154\144\160\001\003\184\001\000\155\144\160\001\003\185\001\000\156\144\160\001\003\186\001\000\157\145\160\160\001\003\187\001\000\158\160\160\001\000\187\001\000\254@\144\160\001\003\188\001\000\159\145\160\160\001\003\189\001\000\170\160\160\001\000\189\001\000\219@\144\160\001\003\190\001\000\171\144\160\001\003\191\001\000\172\144\160\001\003\192\001\000\173\144\160\001\003\193\001\000\174\144\160\001\003\194\001\000\186\144\160\001\003\195\001\000\175\144\160\001\003\196\001\000\187\144\160\001\003\197\001\000\188\144\160\001\003\198\001\000\189\144\160\001\003\199\001\000\190\144\160\001\003\200\001\000\191\144\160\001\003\201\001\000\203\144\160\001\003\202\001\000\180\144\160\001\003\203\001\000\184\144\160\001\003\204\001\000\182\144\160\001\003\205\001\000\183\144\160\001\003\206\001\000\185@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let adobe_standard_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\031\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdef\001 \025hijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\001 \024\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\161\001\000\162\001\000\163\001 D\001\000\165\001\001\146\001\000\167\001\000\164g\001 \028\001\000\171\001 9\001 :\002\000\000\251\001\002\000\000\251\002\000\255\001 \019\001  \001 !\001\000\183\000\255\001\000\182\001 \"\001 \026\001 \030\001 \029\001\000\187\001 &\001 0\000\255\001\000\191\000\255\000`\001\000\180\001\002\198\001\002\220\001\000\175\001\002\216\001\002\217\001\000\168\000\255\001\002\218\001\000\184\000\255\001\002\221\001\002\219\001\002\199\001 \020\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\198\000\255\001\000\170\000\255\000\255\000\255\000\255\001\001A\001\000\216\001\001R\001\000\186\000\255\000\255\000\255\000\255\000\255\001\000\230\000\255\000\255\000\255\001\0011\000\255\000\255\001\001B\001\000\248\001\001S\001\000\223\000\255\000\255\000\255\000\255" 0 : int array);;
+let adobe_standard_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\004K\000\000\000\000\000\000\004&\000\000\004&\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\174\144\160\002\000\000\251\002\001\000\175@@@@@@@@@@@@@@@@\144\160\001 \019\001\000\177\144\160\001 \020\001\000\208@@@\144\160\001 \024\000`\144\160\001 \025g\144\160\001 \026\001\000\184@\144\160\001 \028\001\000\170\144\160\001 \029\001\000\186\144\160\001 \030\001\000\185@\145\160\160``\160\160\001  \001\000\178@\145\160\160aa\160\160\001 !\001\000\179@\145\160\160bb\160\160\001 \"\001\000\183@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160g\001\000\169\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\189@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\172@\145\160\160zz\160\160\001 :\001\000\173@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\232@\145\160\160\000B\000B\160\160\001\001B\001\000\248@\144\160\000C\000C\145\160\160\000D\000D\160\160\001 D\001\000\164@\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\234@\145\160\160\000S\000S\160\160\001\001S\001\000\250@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\001\000\193\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~@@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\166@@@@@@@@@@@@@@\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\168\144\160\001\000\165\001\000\165@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\200@\144\160\001\000\170\001\000\227\144\160\001\000\171\001\000\171@@@\144\160\001\000\175\001\000\197@@@@\144\160\001\000\180\001\000\194@\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\180\144\160\001\000\184\001\000\203@\144\160\001\000\186\001\000\235\144\160\001\000\187\001\000\187@@@\144\160\001\000\191\001\000\191@@@@@@\145\160\160\001\002\198\001\000\195\160\160\001\000\198\001\000\225@\144\160\001\002\199\001\000\207@@@@@@@@@@@@@@@@\145\160\160\001\002\216\001\000\198\160\160\001\000\216\001\000\233@\144\160\001\002\217\001\000\199\144\160\001\002\218\001\000\202\144\160\001\002\219\001\000\206\144\160\001\002\220\001\000\196\144\160\001\002\221\001\000\205@\144\160\001\000\223\001\000\251@@@@@@\144\160\001\000\230\001\000\241@@@@@@@@@@@@@@@@@\144\160\001\000\248\001\000\249@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let adobe_symbol_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\192\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`a\001\"\000c\001\"\003ef\001\"\011hi\001\"\023kl\001\"\018nopqrstuvwxyz{|}~\127\001\"E\001\003\145\001\003\146\001\003\167\001\003\148\001\003\149\001\003\166\001\003\147\001\003\151\001\003\153\001\003\209\001\003\154\001\003\155\001\003\156\001\003\157\001\003\159\001\003\160\001\003\152\001\003\161\001\003\163\001\003\164\001\003\165\001\003\194\001\003\169\001\003\158\001\003\168\001\003\150\000[\001\"4\000]\001\"\165\000_\002\000\000\248\229\001\003\177\001\003\178\001\003\199\001\003\180\001\003\181\001\003\198\001\003\179\001\003\183\001\003\185\001\003\213\001\003\186\001\003\187\001\003\188\001\003\189\001\003\191\001\003\192\001\003\184\001\003\193\001\003\195\001\003\196\001\003\197\001\003\214\001\003\201\001\003\190\001\003\200\001\003\182\000{\000|\000}\001\"<\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \172\001\003\210\001 2\001\"d\001 D\001\"\030\001\001\146\001&c\001&f\001&e\001&`\001!\148\001!\144\001!\145\001!\146\001!\147\001\000\176\001\000\177\001 3\001\"e\001\000\215\001\"\029\001\"\002\001 \"\001\000\247\001\"`\001\"a\001\"H\001 &\002\000\000\248\230\002\000\000\248\231\001!\181\001!5\001!\017\001!\028\001!\024\001\"\151\001\"\149\001\"\005\001\")\001\"*\001\"\131\001\"\135\001\"\132\001\"\130\001\"\134\001\"\008\001\"\t\001\" \001\"\007\002\000\000\246\218\002\000\000\246\217\002\000\000\246\219\001\"\015\001\"\026\001\"\197\001\000\172\001\"'\001\"(\001!\212\001!\208\001!\209\001!\210\001!\211\001%\202\001#)\002\000\000\248\232\002\000\000\248\233\002\000\000\248\234\001\"\017\002\000\000\248\235\002\000\000\248\236\002\000\000\248\237\002\000\000\248\238\002\000\000\248\239\002\000\000\248\240\002\000\000\248\241\002\000\000\248\242\002\000\000\248\243\002\000\000\248\244\000\255\001#*\001\"+\001# \002\000\000\248\245\001#!\002\000\000\248\246\002\000\000\248\247\002\000\000\248\248\002\000\000\248\249\002\000\000\248\250\002\000\000\248\251\002\000\000\248\252\002\000\000\248\253\002\000\000\248\254\000\255" 0 : int array);;
+let adobe_symbol_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\t\000\000\000\000\000\000\005)\000\000\005)\008\000\004\000\000\144\160\001\"\000b@\144\160\001\"\002\001\000\182\144\160\001\"\003d@\144\160\001\"\005\001\000\198@\144\160\001\"\007\001\000\209\144\160\001\"\008\001\000\206\144\160\001\"\t\001\000\207@\144\160\001\"\011g@@@\144\160\001\"\015\001\000\213@\145\160\160\001!\017\001\000\193\160\160\001\"\017\001\000\229@\144\160\001\"\018m@@@@\144\160\001\"\023j\144\160\001!\024\001\000\195@\144\160\001\"\026\001\000\214@\144\160\001!\028\001\000\194\144\160\001\"\029\001\000\181\144\160\001\"\030\001\000\165@\145\160\160``\160\160\001\" \001\000\208\160\160\001# \001\000\243@\145\160\160aa\160\160\001#!\001\000\245@\144\160\001 \"\001\000\183\144\160cc@\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160\001\"'\001\000\217\145\160\160hh\160\160\001\"(\001\000\218@\145\160\160ii\160\160\001\")\001\000\199\160\160\001#)\001\000\225@\145\160\160\001\"*\001\000\200\160\160\001#*\001\000\241@\145\160\160kk\160\160\001\"+\001\000\242@\144\160ll@\144\160nn\144\160oo\144\160pp\144\160qq\145\160\160rr\160\160\001 2\001\000\162@\145\160\160ss\160\160\001 3\001\000\178@\145\160\160tt\160\160\001\"4\000\\@\145\160\160uu\160\160\001!5\001\000\192@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001\"<\000~@\144\160}}\144\160~~\144\160\127\127@@@@\144\160\001 D\001\000\164\144\160\001\"E\000@@@\144\160\001\"H\001\000\187@@@@@@@@@@@@@@@@@@\144\160\000[\000[@\144\160\000]\000]@\144\160\000_\000_\145\160\160\001&`\001\000\170\160\160\001\"`\001\000\185@\144\160\001\"a\001\000\186@\144\160\001&c\001\000\167\144\160\001\"d\001\000\163\145\160\160\001&e\001\000\169\160\160\001\"e\001\000\179@\144\160\001&f\001\000\168@@@@@@@@@@@@@@@@@@@@\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}@@@@\144\160\001\"\130\001\000\204\144\160\001\"\131\001\000\201\144\160\001\"\132\001\000\203@\144\160\001\"\134\001\000\205\144\160\001\"\135\001\000\202@@@@@@@@\144\160\001!\144\001\000\172\145\160\160\001\003\145\000A\160\160\001!\145\001\000\173@\145\160\160\001\003\146\000B\160\160\001\001\146\001\000\166\160\160\001!\146\001\000\174@\145\160\160\001\003\147\000G\160\160\001!\147\001\000\175@\145\160\160\001\003\148\000D\160\160\001!\148\001\000\171@\145\160\160\001\003\149\000E\160\160\001\"\149\001\000\197@\144\160\001\003\150\000Z\145\160\160\001\003\151\000H\160\160\001\"\151\001\000\196@\144\160\001\003\152\000Q\144\160\001\003\153\000I\144\160\001\003\154\000K\144\160\001\003\155\000L\144\160\001\003\156\000M\144\160\001\003\157\000N\144\160\001\003\158\000X\144\160\001\003\159\000O\144\160\001\003\160\000P\144\160\001\003\161\000R@\144\160\001\003\163\000S\144\160\001\003\164\000T\145\160\160\001\003\165\000U\160\160\001\"\165\000^@\144\160\001\003\166\000F\144\160\001\003\167\000C\144\160\001\003\168\000Y\144\160\001\003\169\000W@@\145\160\160\001 \172\001\000\160\160\160\001\000\172\001\000\216@@@@\144\160\001\000\176\001\000\176\145\160\160\001\003\177\000a\160\160\001\000\177\001\000\177@\144\160\001\003\178\000b\144\160\001\003\179\000g\144\160\001\003\180\000d\145\160\160\001\003\181\000e\160\160\001!\181\001\000\191@\144\160\001\003\182\000z\144\160\001\003\183\000h\144\160\001\003\184\000q\144\160\001\003\185\000i\144\160\001\003\186\000k\144\160\001\003\187\000l\144\160\001\003\188\000m\144\160\001\003\189\000n\144\160\001\003\190\000x\144\160\001\003\191\000o\144\160\001\003\192\000p\144\160\001\003\193\000r\144\160\001\003\194\000V\144\160\001\003\195\000s\144\160\001\003\196\000t\145\160\160\001\003\197\000u\160\160\001\"\197\001\000\215@\144\160\001\003\198\000f\144\160\001\003\199\000c\144\160\001\003\200\000y\144\160\001\003\201\000w\144\160\001%\202\001\000\224@@@@@\144\160\001!\208\001\000\220\145\160\160\001\003\209\000J\160\160\001!\209\001\000\221@\145\160\160\001\003\210\001\000\161\160\160\001!\210\001\000\222@\144\160\001!\211\001\000\223\144\160\001!\212\001\000\219\144\160\001\003\213\000j\144\160\001\003\214\000v\144\160\001\000\215\001\000\180@\144\160\002\000\000\246\217\001\000\211\144\160\002\000\000\246\218\001\000\210\144\160\002\000\000\246\219\001\000\212@@@@@@@@@\144\160\002\000\000\248\229\000`\144\160\002\000\000\248\230\001\000\189\144\160\002\000\000\248\231\001\000\190\144\160\002\000\000\248\232\001\000\226\144\160\002\000\000\248\233\001\000\227\144\160\002\000\000\248\234\001\000\228\144\160\002\000\000\248\235\001\000\230\144\160\002\000\000\248\236\001\000\231\144\160\002\000\000\248\237\001\000\232\144\160\002\000\000\248\238\001\000\233\144\160\002\000\000\248\239\001\000\234\144\160\002\000\000\248\240\001\000\235\144\160\002\000\000\248\241\001\000\236\144\160\002\000\000\248\242\001\000\237\144\160\002\000\000\248\243\001\000\238\144\160\002\000\000\248\244\001\000\239\144\160\002\000\000\248\245\001\000\244\144\160\002\000\000\248\246\001\000\246\145\160\160\001\000\247\001\000\184\160\160\002\000\000\248\247\001\000\247@\144\160\002\000\000\248\248\001\000\248\144\160\002\000\000\248\249\001\000\249\144\160\002\000\000\248\250\001\000\250\144\160\002\000\000\248\251\001\000\251\144\160\002\000\000\248\252\001\000\252\144\160\002\000\000\248\253\001\000\253\144\160\002\000\000\248\254\001\000\254@" 0 : Netmappings.from_uni_list array);;
+ let adobe_zapf_dingbats_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\233\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`\001'\001\001'\002\001'\003\001'\004\001&\014\001'\006\001'\007\001'\008\001'\t\001&\027\001&\030\001'\012\001'\013\001'\014\001'\015\001'\016\001'\017\001'\018\001'\019\001'\020\001'\021\001'\022\001'\023\001'\024\001'\025\001'\026\001'\027\001'\028\001'\029\001'\030\001'\031\001' \001'!\001'\"\001'#\001'$\001'%\001'&\001''\001&\005\001')\001'*\001'+\001',\001'-\001'.\001'/\001'0\001'1\001'2\001'3\001'4\001'5\001'6\001'7\001'8\001'9\001':\001';\001'<\001'=\001'>\001'?\001'@\001'A\001'B\001'C\001'D\001'E\001'F\001'G\001'H\001'I\001'J\001'K\001%\207\001'M\001%\160\001'O\001'P\001'Q\001'R\001%\178\001%\188\001%\198\001'V\001%\215\001'X\001'Y\001'Z\001'[\001'\\\001']\001'^\000\255\002\000\000\248\215\002\000\000\248\216\002\000\000\248\217\002\000\000\248\218\002\000\000\248\219\002\000\000\248\220\002\000\000\248\221\002\000\000\248\222\002\000\000\248\223\002\000\000\248\224\002\000\000\248\225\002\000\000\248\226\002\000\000\248\227\002\000\000\248\228\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001'a\001'b\001'c\001'd\001'e\001'f\001'g\001&c\001&f\001&e\001&`\001$`\001$a\001$b\001$c\001$d\001$e\001$f\001$g\001$h\001$i\001'v\001'w\001'x\001'y\001'z\001'{\001'|\001'}\001'~\001'\127\001'\128\001'\129\001'\130\001'\131\001'\132\001'\133\001'\134\001'\135\001'\136\001'\137\001'\138\001'\139\001'\140\001'\141\001'\142\001'\143\001'\144\001'\145\001'\146\001'\147\001'\148\001!\146\001!\148\001!\149\001'\152\001'\153\001'\154\001'\155\001'\156\001'\157\001'\158\001'\159\001'\160\001'\161\001'\162\001'\163\001'\164\001'\165\001'\166\001'\167\001'\168\001'\169\001'\170\001'\171\001'\172\001'\173\001'\174\001'\175\000\255\001'\177\001'\178\001'\179\001'\180\001'\181\001'\182\001'\183\001'\184\001'\185\001'\186\001'\187\001'\188\001'\189\001'\190\000\255" 0 : int array);;
+let adobe_zapf_dingbats_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006_\000\000\000\000\000\000\005>\000\000\005>\008\000\004\000\000@\144\160\001'\001a\144\160\001'\002b\144\160\001'\003c\144\160\001'\004d\144\160\001&\005\000H\144\160\001'\006f\144\160\001'\007g\144\160\001'\008h\144\160\001'\ti@@\144\160\001'\012l\144\160\001'\013m\145\160\160\001&\014e\160\160\001'\014n@\144\160\001'\015o\144\160\001'\016p\144\160\001'\017q\144\160\001'\018r\144\160\001'\019s\144\160\001'\020t\144\160\001'\021u\144\160\001'\022v\144\160\001'\023w\144\160\001'\024x\144\160\001'\025y\144\160\001'\026z\145\160\160\001&\027j\160\160\001'\027{@\144\160\001'\028|\144\160\001'\029}\145\160\160\001&\030k\160\160\001'\030~@\144\160\001'\031\127\145\160\160``\160\160\001' \000@@\144\160\001'!\000A\144\160\001'\"\000B\144\160\001'#\000C\144\160\001'$\000D\144\160\001'%\000E\144\160\001'&\000F\144\160\001''\000G@\144\160\001')\000I\144\160\001'*\000J\144\160\001'+\000K\144\160\001',\000L\144\160\001'-\000M\144\160\001'.\000N\144\160\001'/\000O\144\160\001'0\000P\144\160\001'1\000Q\144\160\001'2\000R\144\160\001'3\000S\144\160\001'4\000T\144\160\001'5\000U\144\160\001'6\000V\144\160\001'7\000W\144\160\001'8\000X\144\160\001'9\000Y\144\160\001':\000Z\144\160\001';\000[\144\160\001'<\000\\\144\160\001'=\000]\144\160\001'>\000^\144\160\001'?\000_\144\160\001'@\000`\144\160\001'A\000a\144\160\001'B\000b\144\160\001'C\000c\144\160\001'D\000d\144\160\001'E\000e\144\160\001'F\000f\144\160\001'G\000g\144\160\001'H\000h\144\160\001'I\000i\144\160\001'J\000j\144\160\001'K\000k@\144\160\001'M\000m@\144\160\001'O\000o\144\160\001'P\000p\144\160\001'Q\000q\144\160\001'R\000r@@@\144\160\001'V\000v@\144\160\001'X\000x\144\160\001'Y\000y\144\160\001'Z\000z\144\160\001'[\000{\144\160\001'\\\000|\144\160\001']\000}\144\160\001'^\000~@\145\160\160\001&`\001\000\171\160\160\001$`\001\000\172@\145\160\160\001'a\001\000\161\160\160\001$a\001\000\173@\145\160\160\001'b\001\000\162\160\160\001$b\001\000\174@\145\160\160\001'c\001\000\163\160\160\001&c\001\000\168\160\160\001$c\001\000\175@\145\160\160\001'd\001\000\164\160\160\001$d\001\000\176@\145\160\160\001'e\001\000\165\160\160\001&e\001\000\170\160\160\001$e\001\000\177@\145\160\160\001'f\001\000\166\160\160\001&f\001\000\169\160\160\001$f\001\000\178@\145\160\160\001'g\001\000\167\160\160\001$g\001\000\179@\144\160\001$h\001\000\180\144\160\001$i\001\000\181@@@@@@@@@@@@\144\160\001'v\001\000\182\144\160\001'w\001\000\183\144\160\001'x\001\000\184\144\160\001'y\001\000\185\144\160\001'z\001\000\186\144\160\001'{\001\000\187\144\160\001'|\001\000\188\144\160\001'}\001\000\189\144\160\001'~\001\000\190\144\160\001'\127\001\000\191\144\160\001'\128\001\000\192\144\160\001'\129\001\000\193\144\160\001'\130\001\000\194\144\160\001'\131\001\000\195\144\160\001'\132\001\000\196\144\160\001'\133\001\000\197\144\160\001'\134\001\000\198\144\160\001'\135\001\000\199\144\160\001'\136\001\000\200\144\160\001'\137\001\000\201\144\160\001'\138\001\000\202\144\160\001'\139\001\000\203\144\160\001'\140\001\000\204\144\160\001'\141\001\000\205\144\160\001'\142\001\000\206\144\160\001'\143\001\000\207\144\160\001'\144\001\000\208\144\160\001'\145\001\000\209\145\160\160\001'\146\001\000\210\160\160\001!\146\001\000\213@\144\160\001'\147\001\000\211\145\160\160\001'\148\001\000\212\160\160\001!\148\001\000\214@\144\160\001!\149\001\000\215@@\144\160\001'\152\001\000\216\144\160\001'\153\001\000\217\144\160\001'\154\001\000\218\144\160\001'\155\001\000\219\144\160\001'\156\001\000\220\144\160\001'\157\001\000\221\144\160\001'\158\001\000\222\144\160\001'\159\001\000\223\145\160\160\001%\160\000n\160\160\001'\160\001\000\224@\144\160\001'\161\001\000\225\144\160\001'\162\001\000\226\144\160\001'\163\001\000\227\144\160\001'\164\001\000\228\144\160\001'\165\001\000\229\144\160\001'\166\001\000\230\144\160\001'\167\001\000\231\144\160\001'\168\001\000\232\144\160\001'\169\001\000\233\144\160\001'\170\001\000\234\144\160\001'\171\001\000\235\144\160\001'\172\001\000\236\144\160\001'\173\001\000\237\144\160\001'\174\001\000\238\144\160\001'\175\001\000\239@\144\160\001'\177\001\000\241\145\160\160\001%\178\000s\160\160\001'\178\001\000\242@\144\160\001'\179\001\000\243\144\160\001'\180\001\000\244\144\160\001'\181\001\000\245\144\160\001'\182\001\000\246\144\160\001'\183\001\000\247\144\160\001'\184\001\000\248\144\160\001'\185\001\000\249\144\160\001'\186\001\000\250\144\160\001'\187\001\000\251\145\160\160\001%\188\000t\160\160\001'\188\001\000\252@\144\160\001'\189\001\000\253\144\160\001'\190\001\000\254@@@@@@@\144\160\001%\198\000u@@@@@@@@\144\160\001%\207\000l@@@@@@@\145\160\160\001%\215\000w\160\160\002\000\000\248\215\001\000\128@\144\160\002\000\000\248\216\001\000\129\144\160\002\000\000\248\217\001\000\130\144\160\002\000\000\248\218\001\000\131\144\160\002\000\000\248\219\001\000\132\144\160\002\000\000\248\220\001\000\133\144\160\002\000\000\248\221\001\000\134\144\160\002\000\000\248\222\001\000\135\144\160\002\000\000\248\223\001\000\136\144\160\002\000\000\248\224\001\000\137\144\160\002\000\000\248\225\001\000\138\144\160\002\000\000\248\226\001\000\139\144\160\002\000\000\248\227\001\000\140\144\160\002\000\000\248\228\001\000\141@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let jis0201_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\164\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\001\000\165\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\001 >\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\002\000\000\255a\002\000\000\255b\002\000\000\255c\002\000\000\255d\002\000\000\255e\002\000\000\255f\002\000\000\255g\002\000\000\255h\002\000\000\255i\002\000\000\255j\002\000\000\255k\002\000\000\255l\002\000\000\255m\002\000\000\255n\002\000\000\255o\002\000\000\255p\002\000\000\255q\002\000\000\255r\002\000\000\255s\002\000\000\255t\002\000\000\255u\002\000\000\255v\002\000\000\255w\002\000\000\255x\002\000\000\255y\002\000\000\255z\002\000\000\255{\002\000\000\255|\002\000\000\255}\002\000\000\255~\002\000\000\255\127\002\000\000\255\128\002\000\000\255\129\002\000\000\255\130\002\000\000\255\131\002\000\000\255\132\002\000\000\255\133\002\000\000\255\134\002\000\000\255\135\002\000\000\255\136\002\000\000\255\137\002\000\000\255\138\002\000\000\255\139\002\000\000\255\140\002\000\000\255\141\002\000\000\255\142\002\000\000\255\143\002\000\000\255\144\002\000\000\255\145\002\000\000\255\146\002\000\000\255\147\002\000\000\255\148\002\000\000\255\149\002\000\000\255\150\002\000\000\255\151\002\000\000\255\152\002\000\000\255\153\002\000\000\255\154\002\000\000\255\155\002\000\000\255\156\002\000\000\255\157\002\000\000\255\158\002\000\000\255\159\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255" 0 : int array);;
+let jis0201_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0053\000\000\000\000\000\000\004\143\000\000\004\143\008\000\004\000\000@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\145\160\160~~\160\160\001 >\000~@\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[@\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\145\160\160\000a\000a\160\160\002\000\000\255a\001\000\161@\145\160\160\000b\000b\160\160\002\000\000\255b\001\000\162@\145\160\160\000c\000c\160\160\002\000\000\255c\001\000\163@\145\160\160\000d\000d\160\160\002\000\000\255d\001\000\164@\145\160\160\000e\000e\160\160\002\000\000\255e\001\000\165@\145\160\160\000f\000f\160\160\002\000\000\255f\001\000\166@\145\160\160\000g\000g\160\160\002\000\000\255g\001\000\167@\145\160\160\000h\000h\160\160\002\000\000\255h\001\000\168@\145\160\160\000i\000i\160\160\002\000\000\255i\001\000\169@\145\160\160\000j\000j\160\160\002\000\000\255j\001\000\170@\145\160\160\000k\000k\160\160\002\000\000\255k\001\000\171@\145\160\160\000l\000l\160\160\002\000\000\255l\001\000\172@\145\160\160\000m\000m\160\160\002\000\000\255m\001\000\173@\145\160\160\000n\000n\160\160\002\000\000\255n\001\000\174@\145\160\160\000o\000o\160\160\002\000\000\255o\001\000\175@\145\160\160\000p\000p\160\160\002\000\000\255p\001\000\176@\145\160\160\000q\000q\160\160\002\000\000\255q\001\000\177@\145\160\160\000r\000r\160\160\002\000\000\255r\001\000\178@\145\160\160\000s\000s\160\160\002\000\000\255s\001\000\179@\145\160\160\000t\000t\160\160\002\000\000\255t\001\000\180@\145\160\160\000u\000u\160\160\002\000\000\255u\001\000\181@\145\160\160\000v\000v\160\160\002\000\000\255v\001\000\182@\145\160\160\000w\000w\160\160\002\000\000\255w\001\000\183@\145\160\160\000x\000x\160\160\002\000\000\255x\001\000\184@\145\160\160\000y\000y\160\160\002\000\000\255y\001\000\185@\145\160\160\000z\000z\160\160\002\000\000\255z\001\000\186@\145\160\160\000{\000{\160\160\002\000\000\255{\001\000\187@\145\160\160\000|\000|\160\160\002\000\000\255|\001\000\188@\145\160\160\000}\000}\160\160\002\000\000\255}\001\000\189@\144\160\002\000\000\255~\001\000\190\144\160\002\000\000\255\127\001\000\191\144\160\002\000\000\255\128\001\000\192\144\160\002\000\000\255\129\001\000\193\144\160\002\000\000\255\130\001\000\194\144\160\002\000\000\255\131\001\000\195\144\160\002\000\000\255\132\001\000\196\144\160\002\000\000\255\133\001\000\197\144\160\002\000\000\255\134\001\000\198\144\160\002\000\000\255\135\001\000\199\144\160\002\000\000\255\136\001\000\200\144\160\002\000\000\255\137\001\000\201\144\160\002\000\000\255\138\001\000\202\144\160\002\000\000\255\139\001\000\203\144\160\002\000\000\255\140\001\000\204\144\160\002\000\000\255\141\001\000\205\144\160\002\000\000\255\142\001\000\206\144\160\002\000\000\255\143\001\000\207\144\160\002\000\000\255\144\001\000\208\144\160\002\000\000\255\145\001\000\209\144\160\002\000\000\255\146\001\000\210\144\160\002\000\000\255\147\001\000\211\144\160\002\000\000\255\148\001\000\212\144\160\002\000\000\255\149\001\000\213\144\160\002\000\000\255\150\001\000\214\144\160\002\000\000\255\151\001\000\215\144\160\002\000\000\255\152\001\000\216\144\160\002\000\000\255\153\001\000\217\144\160\002\000\000\255\154\001\000\218\144\160\002\000\000\255\155\001\000\219\144\160\002\000\000\255\156\001\000\220\144\160\002\000\000\255\157\001\000\221\144\160\002\000\000\255\158\001\000\222\144\160\002\000\000\255\159\001\000\223@@@@@\144\160\001\000\165\000\\@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let koi8r_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001%\000\001%\002\001%\012\001%\016\001%\020\001%\024\001%\028\001%$\001%,\001%4\001%<\001%\128\001%\132\001%\136\001%\140\001%\144\001%\145\001%\146\001%\147\001# \001%\160\001\"\025\001\"\026\001\"H\001\"d\001\"e\001\000\160\001#!\001\000\176\001\000\178\001\000\183\001\000\247\001%P\001%Q\001%R\001\004Q\001%S\001%T\001%U\001%V\001%W\001%X\001%Y\001%Z\001%[\001%\\\001%]\001%^\001%_\001%`\001%a\001\004\001\001%b\001%c\001%d\001%e\001%f\001%g\001%h\001%i\001%j\001%k\001%l\001\000\169\001\004N\001\0040\001\0041\001\004F\001\0044\001\0045\001\004D\001\0043\001\004E\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004O\001\004@\001\004A\001\004B\001\004C\001\0046\001\0042\001\004L\001\004K\001\0047\001\004H\001\004M\001\004I\001\004G\001\004J\001\004.\001\004\016\001\004\017\001\004&\001\004\020\001\004\021\001\004$\001\004\019\001\004%\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004/\001\004 \001\004!\001\004\"\001\004#\001\004\022\001\004\018\001\004,\001\004+\001\004\023\001\004(\001\004-\001\004)\001\004'\001\004*" 0 : int array);;
+let koi8r_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\187\000\000\000\000\000\000\007\153\000\000\007\153\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\128@\145\160\160AA\160\160\001\004\001\001\000\179@\145\160\160BB\160\160\001%\002\001\000\129@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\130@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\131\160\160\001\004\016\001\000\225@\145\160\160QQ\160\160\001\004\017\001\000\226@\145\160\160RR\160\160\001\004\018\001\000\247@\145\160\160SS\160\160\001\004\019\001\000\231@\145\160\160TT\160\160\001%\020\001\000\132\160\160\001\004\020\001\000\228@\145\160\160UU\160\160\001\004\021\001\000\229@\145\160\160VV\160\160\001\004\022\001\000\246@\145\160\160WW\160\160\001\004\023\001\000\250@\145\160\160XX\160\160\001%\024\001\000\133\160\160\001\004\024\001\000\233@\145\160\160YY\160\160\001\"\025\001\000\149\160\160\001\004\025\001\000\234@\145\160\160ZZ\160\160\001\"\026\001\000\150\160\160\001\004\026\001\000\235@\145\160\160[[\160\160\001\004\027\001\000\236@\145\160\160\\\\\160\160\001%\028\001\000\134\160\160\001\004\028\001\000\237@\145\160\160]]\160\160\001\004\029\001\000\238@\145\160\160^^\160\160\001\004\030\001\000\239@\145\160\160__\160\160\001\004\031\001\000\240@\145\160\160``\160\160\001# \001\000\147\160\160\001\004 \001\000\242@\145\160\160aa\160\160\001#!\001\000\155\160\160\001\004!\001\000\243@\145\160\160bb\160\160\001\004\"\001\000\244@\145\160\160cc\160\160\001\004#\001\000\245@\145\160\160dd\160\160\001%$\001\000\135\160\160\001\004$\001\000\230@\145\160\160ee\160\160\001\004%\001\000\232@\145\160\160ff\160\160\001\004&\001\000\227@\145\160\160gg\160\160\001\004'\001\000\254@\145\160\160hh\160\160\001\004(\001\000\251@\145\160\160ii\160\160\001\004)\001\000\253@\145\160\160jj\160\160\001\004*\001\000\255@\145\160\160kk\160\160\001\004+\001\000\249@\145\160\160ll\160\160\001%,\001\000\136\160\160\001\004,\001\000\248@\145\160\160mm\160\160\001\004-\001\000\252@\145\160\160nn\160\160\001\004.\001\000\224@\145\160\160oo\160\160\001\004/\001\000\241@\145\160\160pp\160\160\001\0040\001\000\193@\145\160\160qq\160\160\001\0041\001\000\194@\145\160\160rr\160\160\001\0042\001\000\215@\145\160\160ss\160\160\001\0043\001\000\199@\145\160\160tt\160\160\001%4\001\000\137\160\160\001\0044\001\000\196@\145\160\160uu\160\160\001\0045\001\000\197@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\218@\145\160\160xx\160\160\001\0048\001\000\201@\145\160\160yy\160\160\001\0049\001\000\202@\145\160\160zz\160\160\001\004:\001\000\203@\145\160\160{{\160\160\001\004;\001\000\204@\145\160\160||\160\160\001%<\001\000\138\160\160\001\004<\001\000\205@\145\160\160}}\160\160\001\004=\001\000\206@\145\160\160~~\160\160\001\004>\001\000\207@\145\160\160\127\127\160\160\001\004?\001\000\208@\145\160\160\000@\000@\160\160\001\004@\001\000\210@\145\160\160\000A\000A\160\160\001\004A\001\000\211@\145\160\160\000B\000B\160\160\001\004B\001\000\212@\145\160\160\000C\000C\160\160\001\004C\001\000\213@\145\160\160\000D\000D\160\160\001\004D\001\000\198@\145\160\160\000E\000E\160\160\001\004E\001\000\200@\145\160\160\000F\000F\160\160\001\004F\001\000\195@\145\160\160\000G\000G\160\160\001\004G\001\000\222@\145\160\160\000H\000H\160\160\001\"H\001\000\151\160\160\001\004H\001\000\219@\145\160\160\000I\000I\160\160\001\004I\001\000\221@\145\160\160\000J\000J\160\160\001\004J\001\000\223@\145\160\160\000K\000K\160\160\001\004K\001\000\217@\145\160\160\000L\000L\160\160\001\004L\001\000\216@\145\160\160\000M\000M\160\160\001\004M\001\000\220@\145\160\160\000N\000N\160\160\001\004N\001\000\192@\145\160\160\000O\000O\160\160\001\004O\001\000\209@\145\160\160\000P\000P\160\160\001%P\001\000\160@\145\160\160\000Q\000Q\160\160\001%Q\001\000\161\160\160\001\004Q\001\000\163@\145\160\160\000R\000R\160\160\001%R\001\000\162@\145\160\160\000S\000S\160\160\001%S\001\000\164@\145\160\160\000T\000T\160\160\001%T\001\000\165@\145\160\160\000U\000U\160\160\001%U\001\000\166@\145\160\160\000V\000V\160\160\001%V\001\000\167@\145\160\160\000W\000W\160\160\001%W\001\000\168@\145\160\160\000X\000X\160\160\001%X\001\000\169@\145\160\160\000Y\000Y\160\160\001%Y\001\000\170@\145\160\160\000Z\000Z\160\160\001%Z\001\000\171@\145\160\160\000[\000[\160\160\001%[\001\000\172@\145\160\160\000\\\000\\\160\160\001%\\\001\000\173@\145\160\160\000]\000]\160\160\001%]\001\000\174@\145\160\160\000^\000^\160\160\001%^\001\000\175@\145\160\160\000_\000_\160\160\001%_\001\000\176@\145\160\160\000`\000`\160\160\001%`\001\000\177@\145\160\160\000a\000a\160\160\001%a\001\000\178@\145\160\160\000b\000b\160\160\001%b\001\000\180@\145\160\160\000c\000c\160\160\001%c\001\000\181@\145\160\160\000d\000d\160\160\001\"d\001\000\152\160\160\001%d\001\000\182@\145\160\160\000e\000e\160\160\001\"e\001\000\153\160\160\001%e\001\000\183@\145\160\160\000f\000f\160\160\001%f\001\000\184@\145\160\160\000g\000g\160\160\001%g\001\000\185@\145\160\160\000h\000h\160\160\001%h\001\000\186@\145\160\160\000i\000i\160\160\001%i\001\000\187@\145\160\160\000j\000j\160\160\001%j\001\000\188@\145\160\160\000k\000k\160\160\001%k\001\000\189@\145\160\160\000l\000l\160\160\001%l\001\000\190@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\139@@@\144\160\001%\132\001\000\140@@@\144\160\001%\136\001\000\141@@@\144\160\001%\140\001\000\142@@@\144\160\001%\144\001\000\143\144\160\001%\145\001\000\144\144\160\001%\146\001\000\145\144\160\001%\147\001\000\146@@@@@@@@@@@@\145\160\160\001%\160\001\000\148\160\160\001\000\160\001\000\154@@@@@@@@@\144\160\001\000\169\001\000\191@@@@@@\144\160\001\000\176\001\000\156@\144\160\001\000\178\001\000\157@@@@\144\160\001\000\183\001\000\158@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\159@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let macroman_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002k\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\255\001\000\196\001\000\197\001\000\199\001\000\201\001\000\209\001\000\214\001\000\220\001\000\225\001\000\224\001\000\226\001\000\228\001\000\227\001\000\229\001\000\231\001\000\233\001\000\232\001\000\234\001\000\235\001\000\237\001\000\236\001\000\238\001\000\239\001\000\241\001\000\243\001\000\242\001\000\244\001\000\246\001\000\245\001\000\250\001\000\249\001\000\251\001\000\252\001  \001\000\176\001\000\162\001\000\163\001\000\167\001 \"\001\000\182\001\000\223\001\000\174\001\000\169\001!\"\001\000\180\001\000\168\001\"`\001\000\198\001\000\216\001\"\030\001\000\177\001\"d\001\"e\001\000\165\001\000\181\001\"\002\001\"\017\001\"\015\001\003\192\001\"+\001\000\170\001\000\186\001\003\169\001\000\230\001\000\248\001\000\191\001\000\161\001\000\172\001\"\026\001\001\146\001\"H\001\"\006\001\000\171\001\000\187\001 &\001\000\160\001\000\192\001\000\195\001\000\213\001\001R\001\001S\001 \019\001 \020\001 \028\001 \029\001 \024\001 \025\001\000\247\001%\202\001\000\255\001\001x\001 D\001 \172\001 9\001 :\002\000\000\251\001\002\000\000\251\002\001 !\001\000\183\001 \026\001 \030\001 0\001\000\194\001\000\202\001\000\193\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\000\211\001\000\212\002\000\000\248\255\001\000\210\001\000\218\001\000\219\001\000\217\001\0011\001\002\198\001\002\220\001\000\175\001\002\216\001\002\217\001\002\218\001\000\184\001\002\221\001\002\219\001\002\199" 0 : int array);;
+let macroman_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\135\000\000\000\000\000\000\005\221\000\000\005\221\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\222\145\160\160\001\"\002\001\000\182\160\160\002\000\000\251\002\001\000\223@@@@\144\160\001\"\006\001\000\198@@@@@@@@\144\160\001\"\015\001\000\184@\144\160\001\"\017\001\000\183@\144\160\001 \019\001\000\208\144\160\001 \020\001\000\209@@@\144\160\001 \024\001\000\212\144\160\001 \025\001\000\213\145\160\160\001\"\026\001\000\195\160\160\001 \026\001\000\226@@\144\160\001 \028\001\000\210\144\160\001 \029\001\000\211\145\160\160\001\"\030\001\000\176\160\160\001 \030\001\000\227@@\145\160\160``\160\160\001  \001\000\160@\145\160\160aa\160\160\001 !\001\000\224@\145\160\160bb\160\160\001 \"\001\000\165\160\160\001!\"\001\000\170@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\201@\144\160gg\144\160hh\144\160ii\144\160jj\145\160\160kk\160\160\001\"+\001\000\186@\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\228@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\220@\145\160\160zz\160\160\001 :\001\000\221@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\145\160\160\000D\000D\160\160\001 D\001\000\218@\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\197@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\206@\145\160\160\000S\000S\160\160\001\001S\001\000\207@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\"`\001\000\173@\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\145\160\160\000d\000d\160\160\001\"d\001\000\178@\145\160\160\000e\000e\160\160\001\"e\001\000\179@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\217@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~@@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\196@@@@@@@@@@@@@\144\160\001\000\160\001\000\202\144\160\001\000\161\001\000\193\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\180@\144\160\001\000\167\001\000\164\144\160\001\000\168\001\000\172\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\189@\144\160\001\000\170\001\000\187\144\160\001\000\171\001\000\199\145\160\160\001\000\172\001\000\194\160\160\001 \172\001\000\219@@\144\160\001\000\174\001\000\168\144\160\001\000\175\001\000\248\144\160\001\000\176\001\000\161\144\160\001\000\177\001\000\177@@\144\160\001\000\180\001\000\171\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\166\144\160\001\000\183\001\000\225\144\160\001\000\184\001\000\252@\144\160\001\000\186\001\000\188\144\160\001\000\187\001\000\200@@@\144\160\001\000\191\001\000\192\145\160\160\001\003\192\001\000\185\160\160\001\000\192\001\000\203@\144\160\001\000\193\001\000\231\144\160\001\000\194\001\000\229\144\160\001\000\195\001\000\204\144\160\001\000\196\001\000\128\144\160\001\000\197\001\000\129\145\160\160\001\000\198\001\000\174\160\160\001\002\198\001\000\246@\145\160\160\001\000\199\001\000\130\160\160\001\002\199\001\000\255@\144\160\001\000\200\001\000\233\144\160\001\000\201\001\000\131\145\160\160\001%\202\001\000\215\160\160\001\000\202\001\000\230@\144\160\001\000\203\001\000\232\144\160\001\000\204\001\000\237\144\160\001\000\205\001\000\234\144\160\001\000\206\001\000\235\144\160\001\000\207\001\000\236@\144\160\001\000\209\001\000\132\144\160\001\000\210\001\000\241\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\239\144\160\001\000\213\001\000\205\144\160\001\000\214\001\000\133@\145\160\160\001\000\216\001\000\175\160\160\001\002\216\001\000\249@\145\160\160\001\000\217\001\000\244\160\160\001\002\217\001\000\250@\145\160\160\001\000\218\001\000\242\160\160\001\002\218\001\000\251@\145\160\160\001\000\219\001\000\243\160\160\001\002\219\001\000\254@\145\160\160\001\000\220\001\000\134\160\160\001\002\220\001\000\247@\144\160\001\002\221\001\000\253@\144\160\001\000\223\001\000\167\144\160\001\000\224\001\000\136\144\160\001\000\225\001\000\135\144\160\001\000\226\001\000\137\144\160\001\000\227\001\000\139\144\160\001\000\228\001\000\138\144\160\001\000\229\001\000\140\144\160\001\000\230\001\000\190\144\160\001\000\231\001\000\141\144\160\001\000\232\001\000\143\144\160\001\000\233\001\000\142\144\160\001\000\234\001\000\144\144\160\001\000\235\001\000\145\144\160\001\000\236\001\000\147\144\160\001\000\237\001\000\146\144\160\001\000\238\001\000\148\144\160\001\000\239\001\000\149@\144\160\001\000\241\001\000\150\144\160\001\000\242\001\000\152\144\160\001\000\243\001\000\151\144\160\001\000\244\001\000\153\144\160\001\000\245\001\000\155\144\160\001\000\246\001\000\154\144\160\001\000\247\001\000\214\144\160\001\000\248\001\000\191\144\160\001\000\249\001\000\157\144\160\001\000\250\001\000\156\144\160\001\000\251\001\000\158\144\160\001\000\252\001\000\159@@\145\160\160\001\000\255\001\000\216\160\160\002\000\000\248\255\001\000\240@" 0 : Netmappings.from_uni_list array);;
+ let windows1250_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001  \001 !\000\255\001 0\001\001`\001 9\001\001Z\001\001d\001\001}\001\001y\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\001\001a\001 :\001\001[\001\001e\001\001~\001\001z\001\000\160\001\002\199\001\002\216\001\001A\001\000\164\001\001\004\001\000\166\001\000\167\001\000\168\001\000\169\001\001^\001\000\171\001\000\172\001\000\173\001\000\174\001\001{\001\000\176\001\000\177\001\002\219\001\001B\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\001\005\001\001_\001\000\187\001\001=\001\002\221\001\001>\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
+let windows1250_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007+\000\000\000\000\000\000\006\242\000\000\006\242\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\001\003\001\000\227@\145\160\160DD\160\160\001\001\004\001\000\165@\145\160\160EE\160\160\001\001\005\001\000\185@\145\160\160FF\160\160\001\001\006\001\000\198@\145\160\160GG\160\160\001\001\007\001\000\230@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\145\160\160NN\160\160\001\001\014\001\000\207@\145\160\160OO\160\160\001\001\015\001\000\239@\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\001\024\001\000\202@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\001\025\001\000\234@\145\160\160ZZ\160\160\001 \026\001\000\130\160\160\001\001\026\001\000\204@\145\160\160[[\160\160\001\001\027\001\000\236@\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\188@\145\160\160~~\160\160\001\001>\001\000\190@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\163@\145\160\160\000B\000B\160\160\001\001B\001\000\179@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\210@\145\160\160\000H\000H\160\160\001\001H\001\000\242@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\213@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\245@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001\001T\001\000\192@\145\160\160\000U\000U\160\160\001\001U\001\000\224@\144\160\000V\000V\144\160\000W\000W\145\160\160\000X\000X\160\160\001\001X\001\000\216@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\248@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\140@\145\160\160\000[\000[\160\160\001\001[\001\000\156@\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\141@\145\160\160\000e\000e\160\160\001\001e\001\000\157@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\143@\145\160\160\000z\000z\160\160\001\001z\001\000\159@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~\001\000\158@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184@@\144\160\001\000\187\001\000\187@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\145\160\160\001\002\199\001\000\161\160\160\001\000\199\001\000\199@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\144\160\001\002\219\001\000\178\144\160\001\000\220\001\000\220\145\160\160\001\002\221\001\000\189\160\160\001\000\221\001\000\221@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@@\144\160\001\000\250\001\000\250@\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@@" 0 : Netmappings.from_uni_list array);;
+ let windows1251_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002D\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004\002\001\004\003\001 \026\001\004S\001 \030\001 &\001  \001 !\001 \172\001 0\001\004\t\001 9\001\004\n\001\004\012\001\004\011\001\004\015\001\004R\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\001\004Y\001 :\001\004Z\001\004\\\001\004[\001\004_\001\000\160\001\004\014\001\004^\001\004\008\001\000\164\001\004\144\001\000\166\001\000\167\001\004\001\001\000\169\001\004\004\001\000\171\001\000\172\001\000\173\001\000\174\001\004\007\001\000\176\001\000\177\001\004\006\001\004V\001\004\145\001\000\181\001\000\182\001\000\183\001\004Q\001!\022\001\004T\001\000\187\001\004X\001\004\005\001\004U\001\004W\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O" 0 : int array);;
+let windows1251_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\166\000\000\000\000\000\000\007\129\000\000\007\129\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\168@\145\160\160BB\160\160\001\004\002\001\000\128@\145\160\160CC\160\160\001\004\003\001\000\129@\145\160\160DD\160\160\001\004\004\001\000\170@\145\160\160EE\160\160\001\004\005\001\000\189@\145\160\160FF\160\160\001\004\006\001\000\178@\145\160\160GG\160\160\001\004\007\001\000\175@\145\160\160HH\160\160\001\004\008\001\000\163@\145\160\160II\160\160\001\004\t\001\000\138@\145\160\160JJ\160\160\001\004\n\001\000\140@\145\160\160KK\160\160\001\004\011\001\000\142@\145\160\160LL\160\160\001\004\012\001\000\141@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\161@\145\160\160OO\160\160\001\004\015\001\000\143@\145\160\160PP\160\160\001\004\016\001\000\192@\145\160\160QQ\160\160\001\004\017\001\000\193@\145\160\160RR\160\160\001\004\018\001\000\194@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\004\019\001\000\195@\145\160\160TT\160\160\001 \020\001\000\151\160\160\001\004\020\001\000\196@\145\160\160UU\160\160\001\004\021\001\000\197@\145\160\160VV\160\160\001!\022\001\000\185\160\160\001\004\022\001\000\198@\145\160\160WW\160\160\001\004\023\001\000\199@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\004\024\001\000\200@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\004\025\001\000\201@\145\160\160ZZ\160\160\001 \026\001\000\130\160\160\001\004\026\001\000\202@\145\160\160[[\160\160\001\004\027\001\000\203@\145\160\160\\\\\160\160\001 \028\001\000\147\160\160\001\004\028\001\000\204@\145\160\160]]\160\160\001 \029\001\000\148\160\160\001\004\029\001\000\205@\145\160\160^^\160\160\001 \030\001\000\132\160\160\001\004\030\001\000\206@\145\160\160__\160\160\001\004\031\001\000\207@\145\160\160``\160\160\001  \001\000\134\160\160\001\004 \001\000\208@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\004!\001\000\209@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\004\"\001\000\210@\145\160\160cc\160\160\001\004#\001\000\211@\145\160\160dd\160\160\001\004$\001\000\212@\145\160\160ee\160\160\001\004%\001\000\213@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\004&\001\000\214@\145\160\160gg\160\160\001\004'\001\000\215@\145\160\160hh\160\160\001\004(\001\000\216@\145\160\160ii\160\160\001\004)\001\000\217@\145\160\160jj\160\160\001\004*\001\000\218@\145\160\160kk\160\160\001\004+\001\000\219@\145\160\160ll\160\160\001\004,\001\000\220@\145\160\160mm\160\160\001\004-\001\000\221@\145\160\160nn\160\160\001\004.\001\000\222@\145\160\160oo\160\160\001\004/\001\000\223@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0040\001\000\224@\145\160\160qq\160\160\001\0041\001\000\225@\145\160\160rr\160\160\001\0042\001\000\226@\145\160\160ss\160\160\001\0043\001\000\227@\145\160\160tt\160\160\001\0044\001\000\228@\145\160\160uu\160\160\001\0045\001\000\229@\145\160\160vv\160\160\001\0046\001\000\230@\145\160\160ww\160\160\001\0047\001\000\231@\145\160\160xx\160\160\001\0048\001\000\232@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0049\001\000\233@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\004:\001\000\234@\145\160\160{{\160\160\001\004;\001\000\235@\145\160\160||\160\160\001\004<\001\000\236@\145\160\160}}\160\160\001\004=\001\000\237@\145\160\160~~\160\160\001\004>\001\000\238@\145\160\160\127\127\160\160\001\004?\001\000\239@\145\160\160\000@\000@\160\160\001\004@\001\000\240@\145\160\160\000A\000A\160\160\001\004A\001\000\241@\145\160\160\000B\000B\160\160\001\004B\001\000\242@\145\160\160\000C\000C\160\160\001\004C\001\000\243@\145\160\160\000D\000D\160\160\001\004D\001\000\244@\145\160\160\000E\000E\160\160\001\004E\001\000\245@\145\160\160\000F\000F\160\160\001\004F\001\000\246@\145\160\160\000G\000G\160\160\001\004G\001\000\247@\145\160\160\000H\000H\160\160\001\004H\001\000\248@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\250@\145\160\160\000K\000K\160\160\001\004K\001\000\251@\145\160\160\000L\000L\160\160\001\004L\001\000\252@\145\160\160\000M\000M\160\160\001\004M\001\000\253@\145\160\160\000N\000N\160\160\001\004N\001\000\254@\145\160\160\000O\000O\160\160\001\004O\001\000\255@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\184@\145\160\160\000R\000R\160\160\001\004R\001\000\144@\145\160\160\000S\000S\160\160\001\004S\001\000\131@\145\160\160\000T\000T\160\160\001\004T\001\000\186@\145\160\160\000U\000U\160\160\001\004U\001\000\190@\145\160\160\000V\000V\160\160\001\004V\001\000\179@\145\160\160\000W\000W\160\160\001\004W\001\000\191@\145\160\160\000X\000X\160\160\001\004X\001\000\188@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\154@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\156@\145\160\160\000[\000[\160\160\001\004[\001\000\158@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\157@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\162@\145\160\160\000_\000_\160\160\001\004_\001\000\159@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@\144\160\001\004\144\001\000\165\144\160\001\004\145\001\000\180@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\136\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@@@\144\160\001\000\187\001\000\187@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1252_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001  \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\001\001}\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\001\001~\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\000\208\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\000\240\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\000\255" 0 : int array);;
+let windows1252_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~\001\000\158@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let windows1253_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001  \001 !\000\255\001 0\000\255\001 9\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\000\255\001 :\000\255\000\255\000\255\000\255\001\000\160\001\003\133\001\003\134\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\000\255\001\000\171\001\000\172\001\000\173\001\000\174\001 \021\001\000\176\001\000\177\001\000\178\001\000\179\001\003\132\001\000\181\001\000\182\001\000\183\001\003\136\001\003\137\001\003\138\001\000\187\001\003\140\001\000\189\001\003\142\001\003\143\001\003\144\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\000\255\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\001\003\172\001\003\173\001\003\174\001\003\175\001\003\176\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\194\001\003\195\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001\003\201\001\003\202\001\003\203\001\003\204\001\003\205\001\003\206\000\255" 0 : int array);;
+let windows1253_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\130\000\000\000\000\000\000\006F\000\000\006F\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@\144\160\001\003\132\001\000\180\144\160\001\003\133\001\000\161\144\160\001\003\134\001\000\162@\144\160\001\003\136\001\000\184\144\160\001\003\137\001\000\185\144\160\001\003\138\001\000\186@\144\160\001\003\140\001\000\188@\144\160\001\003\142\001\000\190\144\160\001\003\143\001\000\191\144\160\001\003\144\001\000\192\144\160\001\003\145\001\000\193\145\160\160\001\001\146\001\000\131\160\160\001\003\146\001\000\194@\144\160\001\003\147\001\000\195\144\160\001\003\148\001\000\196\144\160\001\003\149\001\000\197\144\160\001\003\150\001\000\198\144\160\001\003\151\001\000\199\144\160\001\003\152\001\000\200\144\160\001\003\153\001\000\201\144\160\001\003\154\001\000\202\144\160\001\003\155\001\000\203\144\160\001\003\156\001\000\204\144\160\001\003\157\001\000\205\144\160\001\003\158\001\000\206\144\160\001\003\159\001\000\207\145\160\160\001\000\160\001\000\160\160\160\001\003\160\001\000\208@\144\160\001\003\161\001\000\209@\145\160\160\001\000\163\001\000\163\160\160\001\003\163\001\000\211@\145\160\160\001\000\164\001\000\164\160\160\001\003\164\001\000\212@\145\160\160\001\000\165\001\000\165\160\160\001\003\165\001\000\213@\145\160\160\001\000\166\001\000\166\160\160\001\003\166\001\000\214@\145\160\160\001\000\167\001\000\167\160\160\001\003\167\001\000\215@\145\160\160\001\000\168\001\000\168\160\160\001\003\168\001\000\216@\145\160\160\001\000\169\001\000\169\160\160\001\003\169\001\000\217@\144\160\001\003\170\001\000\218\145\160\160\001\000\171\001\000\171\160\160\001\003\171\001\000\219@\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172\160\160\001\003\172\001\000\220@\145\160\160\001\000\173\001\000\173\160\160\001\003\173\001\000\221@\145\160\160\001\000\174\001\000\174\160\160\001\003\174\001\000\222@\144\160\001\003\175\001\000\223\145\160\160\001\000\176\001\000\176\160\160\001\003\176\001\000\224@\145\160\160\001\000\177\001\000\177\160\160\001\003\177\001\000\225@\145\160\160\001\000\178\001\000\178\160\160\001\003\178\001\000\226@\145\160\160\001\000\179\001\000\179\160\160\001\003\179\001\000\227@\144\160\001\003\180\001\000\228\145\160\160\001\000\181\001\000\181\160\160\001\003\181\001\000\229@\145\160\160\001\000\182\001\000\182\160\160\001\003\182\001\000\230@\145\160\160\001\000\183\001\000\183\160\160\001\003\183\001\000\231@\144\160\001\003\184\001\000\232\144\160\001\003\185\001\000\233\144\160\001\003\186\001\000\234\145\160\160\001\000\187\001\000\187\160\160\001\003\187\001\000\235@\144\160\001\003\188\001\000\236\145\160\160\001\000\189\001\000\189\160\160\001\003\189\001\000\237@\144\160\001\003\190\001\000\238\144\160\001\003\191\001\000\239\144\160\001\003\192\001\000\240\144\160\001\003\193\001\000\241\144\160\001\003\194\001\000\242\144\160\001\003\195\001\000\243\144\160\001\003\196\001\000\244\144\160\001\003\197\001\000\245\144\160\001\003\198\001\000\246\144\160\001\003\199\001\000\247\144\160\001\003\200\001\000\248\144\160\001\003\201\001\000\249\144\160\001\003\202\001\000\250\144\160\001\003\203\001\000\251\144\160\001\003\204\001\000\252\144\160\001\003\205\001\000\253\144\160\001\003\206\001\000\254@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1254_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001  \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\000\255\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001\030\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\0010\001\001^\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001\031\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\0011\001\001_\001\000\255" 0 : int array);;
+let windows1254_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\168\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132\160\160\001\001\030\001\000\208@\145\160\160__\160\160\001\001\031\001\000\240@\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let windows1255_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002.\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001  \001 !\001\002\198\001 0\000\255\001 9\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\000\255\001 :\000\255\000\255\000\255\000\255\001\000\160\001\000\161\001\000\162\001\000\163\001 \170\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\215\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\247\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\005\176\001\005\177\001\005\178\001\005\179\001\005\180\001\005\181\001\005\182\001\005\183\001\005\184\001\005\185\000\255\001\005\187\001\005\188\001\005\189\001\005\190\001\005\191\001\005\192\001\005\193\001\005\194\001\005\195\001\005\240\001\005\241\001\005\242\001\005\243\001\005\244\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\005\234\000\255\000\255\001 \014\001 \015\000\255" 0 : int array);;
+let windows1255_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006N\000\000\000\000\000\000\006\027\000\000\006\027\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163@\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001 \170\001\000\164\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\145\160\160\001\000\176\001\000\176\160\160\001\005\176\001\000\192@\145\160\160\001\000\177\001\000\177\160\160\001\005\177\001\000\193@\145\160\160\001\000\178\001\000\178\160\160\001\005\178\001\000\194@\145\160\160\001\000\179\001\000\179\160\160\001\005\179\001\000\195@\145\160\160\001\000\180\001\000\180\160\160\001\005\180\001\000\196@\145\160\160\001\000\181\001\000\181\160\160\001\005\181\001\000\197@\145\160\160\001\000\182\001\000\182\160\160\001\005\182\001\000\198@\145\160\160\001\000\183\001\000\183\160\160\001\005\183\001\000\199@\145\160\160\001\000\184\001\000\184\160\160\001\005\184\001\000\200@\145\160\160\001\000\185\001\000\185\160\160\001\005\185\001\000\201@@\145\160\160\001\000\187\001\000\187\160\160\001\005\187\001\000\203@\145\160\160\001\000\188\001\000\188\160\160\001\005\188\001\000\204@\145\160\160\001\000\189\001\000\189\160\160\001\005\189\001\000\205@\145\160\160\001\000\190\001\000\190\160\160\001\005\190\001\000\206@\145\160\160\001\000\191\001\000\191\160\160\001\005\191\001\000\207@\144\160\001\005\192\001\000\208\144\160\001\005\193\001\000\209\144\160\001\005\194\001\000\210\144\160\001\005\195\001\000\211@@\144\160\001\002\198\001\000\136@@@@@@@@@\144\160\001\005\208\001\000\224\144\160\001\005\209\001\000\225\144\160\001\005\210\001\000\226\144\160\001\005\211\001\000\227\144\160\001\005\212\001\000\228\144\160\001\005\213\001\000\229\144\160\001\005\214\001\000\230\145\160\160\001\000\215\001\000\170\160\160\001\005\215\001\000\231@\144\160\001\005\216\001\000\232\144\160\001\005\217\001\000\233\144\160\001\005\218\001\000\234\144\160\001\005\219\001\000\235\145\160\160\001\002\220\001\000\152\160\160\001\005\220\001\000\236@\144\160\001\005\221\001\000\237\144\160\001\005\222\001\000\238\144\160\001\005\223\001\000\239\144\160\001\005\224\001\000\240\144\160\001\005\225\001\000\241\144\160\001\005\226\001\000\242\144\160\001\005\227\001\000\243\144\160\001\005\228\001\000\244\144\160\001\005\229\001\000\245\144\160\001\005\230\001\000\246\144\160\001\005\231\001\000\247\144\160\001\005\232\001\000\248\144\160\001\005\233\001\000\249\144\160\001\005\234\001\000\250@@@@@\144\160\001\005\240\001\000\212\144\160\001\005\241\001\000\213\144\160\001\005\242\001\000\214\144\160\001\005\243\001\000\215\144\160\001\005\244\001\000\216@@\144\160\001\000\247\001\000\186@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1256_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\001\006~\001 \026\001\001\146\001 \030\001 &\001  \001 !\001\002\198\001 0\001\006y\001 9\001\001R\001\006\134\001\006\152\001\006\136\001\006\175\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\006\169\001!\"\001\006\145\001 :\001\001S\001 \012\001 \013\001\006\186\001\000\160\001\006\012\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\006\190\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\006\027\001\000\187\001\000\188\001\000\189\001\000\190\001\006\031\001\006\193\001\006!\001\006\"\001\006#\001\006$\001\006%\001\006&\001\006'\001\006(\001\006)\001\006*\001\006+\001\006,\001\006-\001\006.\001\006/\001\0060\001\0061\001\0062\001\0063\001\0064\001\0065\001\0066\001\000\215\001\0067\001\0068\001\0069\001\006:\001\006@\001\006A\001\006B\001\006C\001\000\224\001\006D\001\000\226\001\006E\001\006F\001\006G\001\006H\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\006I\001\006J\001\000\238\001\000\239\001\006K\001\006L\001\006M\001\006N\001\000\244\001\006O\001\006P\001\000\247\001\006Q\001\000\249\001\006R\001\000\251\001\000\252\001 \014\001 \015\001\006\210" 0 : int array);;
+let windows1256_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007W\000\000\000\000\000\000\007\022\000\000\007\022\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001 \012\001\000\157\160\160\001\006\012\001\000\161@\145\160\160MM\160\160\001 \013\001\000\158@\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\145\160\160[[\160\160\001\006\027\001\000\186@\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\145\160\160__\160\160\001\006\031\001\000\191@\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\216@\145\160\160xx\160\160\001\0068\001\000\217@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0069\001\000\218@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\006:\001\000\219@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\220@\145\160\160\000A\000A\160\160\001\006A\001\000\221@\145\160\160\000B\000B\160\160\001\006B\001\000\222@\145\160\160\000C\000C\160\160\001\006C\001\000\223@\145\160\160\000D\000D\160\160\001\006D\001\000\225@\145\160\160\000E\000E\160\160\001\006E\001\000\227@\145\160\160\000F\000F\160\160\001\006F\001\000\228@\145\160\160\000G\000G\160\160\001\006G\001\000\229@\145\160\160\000H\000H\160\160\001\006H\001\000\230@\145\160\160\000I\000I\160\160\001\006I\001\000\236@\145\160\160\000J\000J\160\160\001\006J\001\000\237@\145\160\160\000K\000K\160\160\001\006K\001\000\240@\145\160\160\000L\000L\160\160\001\006L\001\000\241@\145\160\160\000M\000M\160\160\001\006M\001\000\242@\145\160\160\000N\000N\160\160\001\006N\001\000\243@\145\160\160\000O\000O\160\160\001\006O\001\000\245@\145\160\160\000P\000P\160\160\001\006P\001\000\246@\145\160\160\000Q\000Q\160\160\001\006Q\001\000\248@\145\160\160\000R\000R\160\160\001\001R\001\000\140\160\160\001\006R\001\000\250@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\006y\001\000\138@\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\145\160\160\000~\000~\160\160\001\006~\001\000\129@\144\160\000\127\000\127@@@@@@\144\160\001\006\134\001\000\141@\144\160\001\006\136\001\000\143@@@@@@@@\144\160\001\006\145\001\000\154\144\160\001\001\146\001\000\131@@@@@\144\160\001\006\152\001\000\142@@@@@@@\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\145\160\160\001\006\169\001\000\152\160\160\001\000\169\001\000\169@@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\145\160\160\001\006\175\001\000\144\160\160\001\000\175\001\000\175@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\006\186\001\000\159\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\145\160\160\001\006\190\001\000\170\160\160\001\000\190\001\000\190@@@\144\160\001\006\193\001\000\192@@@@\144\160\001\002\198\001\000\136@@@@@@@@@@@\144\160\001\006\210\001\000\255@@@@\144\160\001\000\215\001\000\215@@@@@@@@\144\160\001\000\224\001\000\224@\144\160\001\000\226\001\000\226@@@@\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235@@\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@@@@\144\160\001\000\244\001\000\244@@\144\160\001\000\247\001\000\247@\144\160\001\000\249\001\000\249@\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1257_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0029\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001  \001 !\000\255\001 0\000\255\001 9\000\255\001\000\168\001\002\199\001\000\184\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\000\255\001 :\000\255\001\000\175\001\002\219\000\255\001\000\160\000\255\001\000\162\001\000\163\001\000\164\000\255\001\000\166\001\000\167\001\000\216\001\000\169\001\001V\001\000\171\001\000\172\001\000\173\001\000\174\001\000\198\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\248\001\000\185\001\001W\001\000\187\001\000\188\001\000\189\001\000\190\001\000\230\001\001\004\001\001.\001\001\000\001\001\006\001\000\196\001\000\197\001\001\024\001\001\018\001\001\012\001\000\201\001\001y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001\002\217" 0 : int array);;
+let windows1257_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\234\000\000\000\000\000\000\006\186\000\000\006\186\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\194@\145\160\160AA\160\160\001\001\001\001\000\226@\144\160BB\144\160CC\145\160\160DD\160\160\001\001\004\001\000\192@\145\160\160EE\160\160\001\001\005\001\000\224@\145\160\160FF\160\160\001\001\006\001\000\195@\145\160\160GG\160\160\001\001\007\001\000\227@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\200@\145\160\160MM\160\160\001\001\013\001\000\232@\144\160NN\144\160OO\144\160PP\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\199@\145\160\160SS\160\160\001 \019\001\000\150\160\160\001\001\019\001\000\231@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\145\160\160VV\160\160\001\001\022\001\000\203@\145\160\160WW\160\160\001\001\023\001\000\235@\145\160\160XX\160\160\001 \024\001\000\145\160\160\001\001\024\001\000\198@\145\160\160YY\160\160\001 \025\001\000\146\160\160\001\001\025\001\000\230@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\217@\145\160\160\000B\000B\160\160\001\001B\001\000\249@\145\160\160\000C\000C\160\160\001\001C\001\000\209@\145\160\160\000D\000D\160\160\001\001D\001\000\241@\145\160\160\000E\000E\160\160\001\001E\001\000\210@\145\160\160\000F\000F\160\160\001\001F\001\000\242@\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\145\160\160\000L\000L\160\160\001\001L\001\000\212@\145\160\160\000M\000M\160\160\001\001M\001\000\244@\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\001V\001\000\170@\145\160\160\000W\000W\160\160\001\001W\001\000\186@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001\001Z\001\000\218@\145\160\160\000[\000[\160\160\001\001[\001\000\250@\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~\001\000\254@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\141\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\157\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\143\144\160\001\000\185\001\000\185@\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190@@@@@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\175\144\160\001\002\199\001\000\142@\144\160\001\000\201\001\000\201@@@@@@@@@\144\160\001\000\211\001\000\211@\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\168\144\160\001\002\217\001\000\255@\144\160\001\002\219\001\000\158\144\160\001\000\220\001\000\220@@\144\160\001\000\223\001\000\223@@@@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\191@@\144\160\001\000\233\001\000\233@@@@@@@@@\144\160\001\000\243\001\000\243@\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\184@@@\144\160\001\000\252\001\000\252@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1258_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001  \001 !\001\002\198\001 0\000\255\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\000\255\001 :\001\001S\000\255\000\255\001\001x\001\000\160\001\000\161\001\000\162\001\000\163\001\000\164\001\000\165\001\000\166\001\000\167\001\000\168\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\000\185\001\000\186\001\000\187\001\000\188\001\000\189\001\000\190\001\000\191\001\000\192\001\000\193\001\000\194\001\001\002\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\003\000\001\000\205\001\000\206\001\000\207\001\001\016\001\000\209\001\003\t\001\000\211\001\000\212\001\001\160\001\000\214\001\000\215\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\001\175\001\003\003\001\000\223\001\000\224\001\000\225\001\000\226\001\001\003\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\003\001\001\000\237\001\000\238\001\000\239\001\001\017\001\000\241\001\003#\001\000\243\001\000\244\001\001\161\001\000\246\001\000\247\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\001\176\001 \171\001\000\255" 0 : int array);;
+let windows1258_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006^\000\000\006^\008\000\004\000\000\145\160\160@@\160\160\001\003\000\001\000\204@\145\160\160AA\160\160\001\003\001\001\000\236@\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\003\003\001\000\222\160\160\001\001\003\001\000\227@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\145\160\160II\160\160\001\003\t\001\000\210@\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001\001\016\001\000\208@\145\160\160QQ\160\160\001\001\017\001\000\240@\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001  \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\145\160\160cc\160\160\001\003#\001\000\242@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\131@@@@@@@@@@@@@\145\160\160\001\000\160\001\000\160\160\160\001\001\160\001\000\213@\145\160\160\001\000\161\001\000\161\160\160\001\001\161\001\000\245@\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\145\160\160\001\000\171\001\000\171\160\160\001 \171\001\000\254@\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\145\160\160\001\000\175\001\000\175\160\160\001\001\175\001\000\221@\145\160\160\001\000\176\001\000\176\160\160\001\001\176\001\000\253@\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\145\160\160\001\002\198\001\000\136\160\160\001\000\198\001\000\198@\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207@\144\160\001\000\209\001\000\209@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\145\160\160\001\002\220\001\000\152\160\160\001\000\220\001\000\220@@@\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239@\144\160\001\000\241\001\000\241@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252@@\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ Hashtbl.add Netmappings.to_unicode `Enc_windows1258 windows1258_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1258 windows1258_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1257 windows1257_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1257 windows1257_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1256 windows1256_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1256 windows1256_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1255 windows1255_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1255 windows1255_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1254 windows1254_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1254 windows1254_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1253 windows1253_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1253 windows1253_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1252 windows1252_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1252 windows1252_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1251 windows1251_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1251 windows1251_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1250 windows1250_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1250 windows1250_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_macroman macroman_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_macroman macroman_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_koi8r koi8r_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_koi8r koi8r_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_jis0201 jis0201_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_jis0201 jis0201_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp875 cp875_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp875 cp875_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp874 cp874_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp874 cp874_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp869 cp869_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp869 cp869_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp866 cp866_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp866 cp866_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp865 cp865_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp865 cp865_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp864 cp864_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp864 cp864_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp863 cp863_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp863 cp863_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp862 cp862_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp862 cp862_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp861 cp861_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp861 cp861_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp860 cp860_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp860 cp860_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp857 cp857_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp857 cp857_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp856 cp856_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp856 cp856_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp855 cp855_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp855 cp855_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp852 cp852_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp852 cp852_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp850 cp850_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp850 cp850_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp775 cp775_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp775 cp775_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp737 cp737_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp737 cp737_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp500 cp500_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp500 cp500_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp437 cp437_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp437 cp437_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp424 cp424_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp424 cp424_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp1026 cp1026_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp1026 cp1026_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp1006 cp1006_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp1006 cp1006_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp037 cp037_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp037 cp037_from_unicode;
+();;
diff --git a/helm/DEVEL/pxp/netstring/netstream.ml b/helm/DEVEL/pxp/netstring/netstream.ml
new file mode 100644 (file)
index 0000000..76c2e3a
--- /dev/null
@@ -0,0 +1,162 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+type t =
+    { s_channel : in_channel;
+      s_maxlength : int option;
+      s_blocksize : int;
+      mutable s_current_length : int;
+      mutable s_at_eos : bool;
+      mutable s_win_pos : int;
+      mutable s_win_len : int;
+      s_netbuf : Netbuffer.t;
+      s_iobuf : string;
+    }
+;;
+
+
+let dump s text = 
+  print_string ("*** NETSTREAM DUMP " ^ text ^ "\n");
+  Printf.printf "current_length=%d  at_eos=%b  win_pos=%d  win_len=%d\n"
+                s.s_current_length s.s_at_eos s.s_win_pos s.s_win_len;
+  Printf.printf "netbuffer_length=%d  netbuffer_size=%d\n"
+                (Netbuffer.length s.s_netbuf)
+                (String.length(Netbuffer.unsafe_buffer s.s_netbuf));
+  Printf.printf "netbuffer=\"%s\"\n"
+                (String.escaped(Netbuffer.contents s.s_netbuf));
+  print_string "*** ---------------\n";
+  flush stdout
+;;
+
+
+let want_another_block s =
+  if not s.s_at_eos then begin
+    (* How much are we allowed to read? *)
+    let m =
+      match s.s_maxlength with
+         None   -> s.s_blocksize
+       | Some k -> min (k - s.s_current_length) s.s_blocksize
+    in
+    (* Read this. *)
+    let rec read_block k =
+      if k < m then
+       let n = 
+         input s.s_channel s.s_iobuf k (m - k) in
+       ( if n > 0 then
+           read_block (k+n)
+         else (* EOF *)
+           k
+       )
+      else
+       k
+    in
+    let n = read_block 0 in
+    (* If n < blocksize, EOS is reached. *)
+    Netbuffer.add_sub_string s.s_netbuf s.s_iobuf 0 n;
+    s.s_win_len        <- s.s_win_len + n;
+    s.s_current_length <- s.s_current_length + n;
+    s.s_at_eos         <- n < s.s_blocksize;
+
+    (* dump s "After appending block"; *)
+  end
+;;
+
+
+let want s n =
+  while not s.s_at_eos && s.s_win_len < n do
+    want_another_block s
+  done
+;;
+
+
+let want_minimum s =
+  want s (s.s_blocksize + s.s_blocksize)
+;;
+
+
+let move s n =
+  Netbuffer.delete s.s_netbuf 0 n;
+  s.s_win_pos <- s.s_win_pos + n;
+  s.s_win_len <- s.s_win_len - n;
+  want_minimum s;
+  (* dump s "After move"; *)
+;;
+
+
+let create_from_channel ch maxlength blocksize =
+  let s =
+    { s_channel = ch;
+      s_maxlength = maxlength;
+      s_blocksize = blocksize;
+      s_current_length = 0;
+      s_at_eos = false;
+      s_win_pos = 0;
+      s_win_len = 0;
+      s_netbuf = Netbuffer.create (2*blocksize);
+      s_iobuf = String.create blocksize;
+    }
+  in
+  want_minimum s;
+  s
+;;
+
+
+let create_from_string str =
+  let l = String.length str in
+  { s_channel = stdin;
+    s_maxlength = None;
+    s_blocksize = l;
+    s_current_length = l;
+    s_at_eos = true;
+    s_win_pos = 0;
+    s_win_len = l;
+    s_netbuf =
+      ( let nb = Netbuffer.create l in
+       Netbuffer.add_string nb str;
+       nb
+      );
+    s_iobuf = "";
+  }
+;;
+
+
+let block_size s = s.s_blocksize;;
+
+let current_length s = s.s_current_length;;
+
+let at_eos s = s.s_at_eos;;
+
+let window_position s = s.s_win_pos;;
+
+let window_length s = s.s_win_len;;
+
+let window s = s.s_netbuf;;
+
+let print_stream s =
+  Format.printf
+    "<NETSTREAM window:%d/%d total_length:%d eof=%b>"
+    s.s_win_pos
+    s.s_win_len
+    s.s_current_length
+    s.s_at_eos
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/06/24 20:20:33  gerd
+ *     Added the toploop printer.
+ *
+ * Revision 1.1  2000/04/15 13:07:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstream.mli b/helm/DEVEL/pxp/netstring/netstream.mli
new file mode 100644 (file)
index 0000000..7cb1857
--- /dev/null
@@ -0,0 +1,118 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* A netstream is an input channel that is read block by block. The 
+ * fragment of the channel currently loaded into memory is called the
+ * current window of the netstream.
+ *
+ * PICTURE:
+ *
+ * 0            window_position     current_length                  EOS
+ * +------------------+-------------------+--------------------------+
+ *                    ====================
+ *                     The current window
+ *
+ * window_length = current_length - window_position
+ *
+ * There is an automatism that the window has a certain length. If possible,
+ * the window is at least twice the block size long, where a "block" is
+ * the amount of data that is read from the input channel in one step.
+ *
+ * (The idea is that you choose as block size the number of bytes you want
+ * to analyze at once, and which must be loaded into memory. You can start
+ * your analysis at window_position and proceed until window_position +
+ * blocksize without having to check whether your window is large enough.
+ * Only when the first blocksize bytes of the window are already processed,
+ * the window must be enlarged by loading the next block.)
+ *
+ * If you want that the window becomes larger, you can call 'want' (to
+ * enlarge the window to a certain size) or 'want_another_block' (to load
+ * just another block from the input channel). Note that this affects only
+ * the current window and not future windows.
+ *
+ * If you do not need the first n bytes of the window anymore, you can
+ * call 'move' to move the beginning of the window by n bytes. If the
+ * window becomes too small after this operation, it is enlarged until
+ * it has twice the block size or until it reaches EOS.
+ *)
+
+type t
+
+val create_from_channel : in_channel -> int option -> int -> t
+    (* create_from_channel ch maxlength blocksize:
+     * The new netstream reads from the channel 'ch'. If maxlength = None,
+     * the channel is read until EOF. If maxlength = Some n, at most n bytes
+     * are read; i.e. the netstream reads until n bytes have been read or
+     * until EOF has been reached, whatever comes first. The blocksize 
+     * specifies the number of bytes to read at once.
+     *)
+
+val create_from_string : string -> t
+    (* Creates a new netstream from a string. The initial window of this
+     * netstream is a copy of the passed string.
+     *)
+
+val block_size : t -> int
+   (* Returns the (immutable) block size. *)
+
+val current_length : t -> int
+    (* Returns the number of bytes read so far. *)
+
+val at_eos : t -> bool
+    (* True iff EOS (end of stream) is reached, i.e. the last byte of the
+     * window is the last byte of the stream.
+     *)
+
+val window_position : t -> int
+    (* Returns the absolute position of the current window. *)
+
+val window_length : t -> int
+    (* Returns the length of the current window. *)
+
+val window : t -> Netbuffer.t
+    (* Returns the current window. *)
+
+val move : t -> int -> unit
+    (* move s n:
+     * Moves the window: The first n bytes of the current window are 
+     * discarded. If the window would become smaller than twice the
+     * blocksize and if the end of the stream is not yet reached, another
+     * block is read from the input channel and appended to the window.
+     * 
+     * PRECONDITION:
+     * - n <= window_length
+     *)
+
+val want : t -> int -> unit
+    (* want s n:
+     * If the window is smaller than n bytes, it is tried to enlarge
+     * the window such that it is at least n bytes long. The enlargement
+     * is not possible if the stream is not long enough; in this case
+     * the window becomes as large as possible.
+     *)
+
+val want_another_block : t -> unit
+    (* Enlarges the window by another block (if possible i.e. if the stream
+     * is long enough).
+     *)
+
+val print_stream : t -> unit
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/06/24 20:20:33  gerd
+ *     Added the toploop printer.
+ *
+ * Revision 1.1  2000/04/15 13:07:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring.cma b/helm/DEVEL/pxp/netstring/netstring.cma
new file mode 100644 (file)
index 0000000..1cf66b5
Binary files /dev/null and b/helm/DEVEL/pxp/netstring/netstring.cma differ
diff --git a/helm/DEVEL/pxp/netstring/netstring.cmxa b/helm/DEVEL/pxp/netstring/netstring.cmxa
new file mode 100644 (file)
index 0000000..f95c008
Binary files /dev/null and b/helm/DEVEL/pxp/netstring/netstring.cmxa differ
diff --git a/helm/DEVEL/pxp/netstring/netstring_mt.ml b/helm/DEVEL/pxp/netstring/netstring_mt.ml
new file mode 100644 (file)
index 0000000..96576f1
--- /dev/null
@@ -0,0 +1,37 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Initialize multi-threading mode: *)
+
+let str_mutex = Mutex.create();;
+let cgi_mutex = Mutex.create();;
+let mappings_mutex = Mutex.create();;
+
+Netstring_str.init_mt
+  (fun () -> Mutex.lock str_mutex)
+  (fun () -> Mutex.unlock str_mutex);
+Cgi.init_mt
+  (fun () -> Mutex.lock cgi_mutex)
+  (fun () -> Mutex.unlock cgi_mutex);
+Netmappings.init_mt
+  (fun () -> Mutex.lock mappings_mutex)
+  (fun () -> Mutex.unlock mappings_mutex)
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/29 00:45:42  gerd
+ *     Initializing Netmappings, too
+ *
+ * Revision 1.1  2000/06/25 21:15:27  gerd
+ *     Initial revision
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_mt.mli b/helm/DEVEL/pxp/netstring/netstring_mt.mli
new file mode 100644 (file)
index 0000000..c224b2b
--- /dev/null
@@ -0,0 +1,25 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module initializes the multi-threading mode of 
+ * Netstring. You must link it with every application that
+ * uses multi-threading.
+ * PITFALL: Link this module _directly_ with the executable,
+ * _don't_ put this module into a cma archive! This would not work!
+ *)
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/06/25 21:15:27  gerd
+ *     Initial revision
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_str.ml b/helm/DEVEL/pxp/netstring/netstring_str.ml
new file mode 100644 (file)
index 0000000..7353719
--- /dev/null
@@ -0,0 +1,241 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let lock   = ref (fun () -> ());;
+let unlock = ref (fun () -> ());;
+
+let init_mt new_lock new_unlock =
+  lock   := new_lock;
+  unlock := new_unlock
+;;
+
+let protect f =
+  !lock();
+  try
+    let r = f() in
+    !unlock();
+    r
+  with
+      x ->
+       !unlock();
+       raise x
+;;
+
+type regexp = Str.regexp;;
+type split_result = Str.split_result = Text of string | Delim of string;;
+
+type result =
+    { pos : int;
+      match_beg : int;
+      match_end : int;
+      group_beg : int array;
+      group_end : int array;
+    }
+;;
+
+let regexp s =
+  protect
+    (fun () -> Str.regexp s)
+;;
+
+let regexp_case_fold s =
+  protect
+    (fun () -> Str.regexp_case_fold s)
+;;
+
+let quote s =
+  protect
+    (fun () -> Str.quote s)
+;;
+
+let regexp_string s =
+  protect
+    (fun () -> Str.regexp_string s)
+;;
+
+let regexp_string_case_fold s =
+  protect
+    (fun () -> Str.regexp_string_case_fold s)
+;;
+
+let return_result pos n_groups =
+  let r =
+    { pos = pos;
+      match_beg = (try Str.match_beginning() with Not_found -> -1);
+      match_end = (try Str.match_end()       with Not_found -> -1);
+      group_beg = Array.create n_groups (-1);
+      group_end = Array.create n_groups (-1);
+    }
+  in
+  for g = 0 to n_groups - 1 do
+    r.group_beg.(g) <- (try Str.group_beginning (g+1) with Not_found -> -1);
+    r.group_end.(g) <- (try Str.group_end (g+1)       with Not_found -> -1);
+  done;
+  r
+;;
+
+let string_match ?(groups = 9) ~pat s ~pos =
+  protect
+    (fun () ->
+       if Str.string_match pat s pos then
+        Some (return_result pos groups)
+       else
+        None
+    )
+;;
+
+let string_partial_match ?(groups = 9) ~pat s ~pos =
+  protect
+    (fun () ->
+       if Str.string_partial_match pat s pos then
+        Some (return_result pos groups)
+       else
+        None
+    )
+;;
+
+let search_forward ?(groups = 9) ~pat s ~pos =
+  protect
+    (fun () ->
+       let i = Str.search_forward pat s pos in
+       i, return_result pos groups
+    )
+;;
+
+let search_backward ?(groups = 9) ~pat s ~pos =
+  protect
+    (fun () ->
+       let i = Str.search_backward pat s pos in
+       i, return_result pos groups
+    )
+;;
+
+let matched_string result s =
+  if result.match_beg < 0 or result.match_end < 0 then raise Not_found;
+  String.sub s result.match_beg (result.match_end - result.match_beg)
+;;
+
+let match_beginning result =
+  if result.match_beg < 0 then raise Not_found;
+  result.match_beg
+;;
+
+let match_end result =
+  if result.match_end < 0 then raise Not_found;
+  result.match_end
+;;
+
+let matched_group result n s =
+  if n < 0 || n >= Array.length result.group_beg then raise Not_found;
+  let gbeg = result.group_beg.(n-1) in
+  let gend = result.group_end.(n-1) in
+  if gbeg < 0 or gend < 0 then raise Not_found;
+  String.sub s gbeg (gend - gbeg)
+;;
+
+let group_beginning result n =
+  if n < 0 || n >= Array.length result.group_beg then raise Not_found;
+  let gbeg = result.group_beg.(n-1) in
+  if gbeg < 0 then raise Not_found else 
+    gbeg
+;;
+
+let group_end result n =
+  if n < 0 || n >= Array.length result.group_end then raise Not_found;
+  let gend = result.group_end.(n-1) in
+  if gend < 0 then raise Not_found else 
+    gend
+;;
+
+let global_replace ~pat ~templ s =
+  protect
+    (fun () ->
+       Str.global_replace pat templ s)
+;;
+
+let replace_first ~pat ~templ s =
+  protect
+    (fun () ->
+       Str.replace_first pat templ s)
+;;
+
+let global_substitute ?(groups = 9) ~pat ~subst s =
+  protect
+    (fun () ->
+       let xsubst s =
+        let r = return_result 0 groups in
+        subst r s
+       in
+       Str.global_substitute pat xsubst s)
+;;
+
+let substitute_first ?(groups = 9) ~pat ~subst s =
+  protect
+    (fun () ->
+       let xsubst s =
+        let r = return_result 0 groups in
+        subst r s
+       in
+       Str.substitute_first pat xsubst s)
+;;
+
+(* replace_matched: n/a *)
+
+let split ~sep s =
+  protect
+    (fun () ->
+       Str.split sep s)
+;;
+
+let bounded_split ~sep s ~max =
+  protect
+    (fun () ->
+       Str.bounded_split sep s max)
+;;
+
+let split_delim ~sep s =
+  protect
+    (fun () ->
+       Str.split_delim sep s)
+;;
+
+let bounded_split_delim ~sep s ~max =
+  protect
+    (fun () ->
+       Str.bounded_split_delim sep s max)
+;;
+
+let full_split ~sep s =
+  protect
+    (fun () ->
+       Str.full_split sep s)
+;;
+
+let bounded_full_split ~sep s ~max =
+  protect
+    (fun () ->
+       Str.bounded_full_split sep s max)
+;;
+
+let string_before = Str.string_before;;
+let string_after = Str.string_after;;
+let first_chars = Str.first_chars;;
+let last_chars = Str.last_chars;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.1  2000/06/25 20:48:19  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_str.mli b/helm/DEVEL/pxp/netstring/netstring_str.mli
new file mode 100644 (file)
index 0000000..86d6840
--- /dev/null
@@ -0,0 +1,82 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module is a version of Str with a thread-safe interface *)
+
+type regexp = Str.regexp;;
+type split_result = Str.split_result = Text of string | Delim of string;;
+
+type result;;
+  (* The type of matching results *)
+
+val regexp: string -> regexp
+val regexp_case_fold: string -> regexp
+val quote: string -> string
+val regexp_string: string -> regexp
+val regexp_string_case_fold: string -> regexp
+
+val string_match: 
+      ?groups:int -> pat:regexp -> string -> pos:int -> result option
+val search_forward: 
+      ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
+val search_backward: 
+      ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
+val string_partial_match: 
+      ?groups:int -> pat:regexp -> string -> pos:int -> result option
+
+(* The ~groups option specifies how many groups will be stored into
+ * 'result'. Default: 9
+ *)
+
+val matched_string : result -> string -> string
+val match_beginning : result -> int
+val match_end : result -> int
+val matched_group : result -> int -> string -> string
+val group_beginning : result -> int -> int
+val group_end : result -> int -> int
+
+val global_replace: pat:regexp -> templ:string -> string -> string
+val replace_first: pat:regexp -> templ:string -> string -> string
+val global_substitute:
+       ?groups:int -> 
+       pat:regexp -> subst:(result -> string -> string) -> string -> string
+val substitute_first:
+       ?groups:int -> 
+       pat:regexp -> subst:(result -> string -> string) -> string -> string
+
+(* replace_matched: not available *)
+
+val split: sep:regexp -> string -> string list
+val bounded_split: sep:regexp -> string -> max:int -> string list
+val split_delim: sep:regexp -> string -> string list
+val bounded_split_delim: sep:regexp -> string -> max:int -> string list
+val full_split: sep:regexp -> string -> split_result list
+val bounded_full_split: sep:regexp -> string -> max:int -> split_result list
+
+val string_before: string -> int -> string
+val string_after: string -> int -> string
+val first_chars: string -> len:int -> string
+val last_chars: string -> len:int -> string
+
+(* Private: *)
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/06/25 21:15:48  gerd
+ *     Checked thread-safety.
+ *
+ * Revision 1.1  2000/06/25 20:48:19  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_top.ml b/helm/DEVEL/pxp/netstring/netstring_top.ml
new file mode 100644 (file)
index 0000000..d25505c
--- /dev/null
@@ -0,0 +1,34 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+let exec s =
+  let l = Lexing.from_string s in
+  let ph = !Toploop.parse_toplevel_phrase l in
+  assert(Toploop.execute_phrase false Format.err_formatter ph)
+;;
+
+(* Install the printers: *)
+
+exec "#install_printer Neturl.print_url;;";;
+exec "#install_printer Netbuffer.print_buffer;;";;
+exec "#install_printer Netstream.print_stream;;";;
+exec "#install_printer Cgi.print_argument;;";;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/06/25 22:34:43  gerd
+ *     Added labels to arguments.
+ *
+ * Revision 1.1  2000/06/24 20:20:58  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/netstring_top.mli b/helm/DEVEL/pxp/netstring/netstring_top.mli
new file mode 100644 (file)
index 0000000..1d5ac72
--- /dev/null
@@ -0,0 +1,21 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* You may load this module into the toploop in order to install
+ * the printers for the various opaque data types of Netstring.
+ *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/06/25 22:53:45  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/neturl.ml b/helm/DEVEL/pxp/netstring/neturl.ml
new file mode 100644 (file)
index 0000000..f597b0c
--- /dev/null
@@ -0,0 +1,1302 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+exception Malformed_URL
+
+type url_syntax_option =
+    Url_part_not_recognized
+  | Url_part_allowed
+  | Url_part_required
+
+
+type url_syntax =
+    { url_enable_scheme    : url_syntax_option;
+      url_enable_user      : url_syntax_option;
+      url_enable_password  : url_syntax_option;
+      url_enable_host      : url_syntax_option;
+      url_enable_port      : url_syntax_option;
+      url_enable_path      : url_syntax_option;
+      url_enable_param     : url_syntax_option;
+      url_enable_query     : url_syntax_option;
+      url_enable_fragment  : url_syntax_option;
+      url_enable_other     : url_syntax_option;
+      url_accepts_8bits    : bool;
+      url_is_valid         : url -> bool;
+    }
+
+and url =
+    { 
+      url_syntax   : url_syntax;
+      mutable url_validity : bool;
+      url_scheme   : string option;
+      url_user     : string option;
+      url_password : string option;
+      url_host     : string option;
+      url_port     : int option;
+      url_path     : string list;
+      url_param    : string list;
+      url_query    : string option;
+      url_fragment : string option;
+      url_other    : string option;
+    }
+;;
+
+
+type char_category =
+    Accepted
+  | Rejected
+  | Separator
+
+
+
+let scan_url_part s k_from k_to cats accept_8bits =
+  (* Scans the longest word of accepted characters from position 'k_from'
+   * in 's' until at most position 'k_to'. The character following the
+   * word (if any) must be a separator character.
+   * On success, the function returns the position of the last character
+   * of the word + 1.
+   * If there is any rejected character before the separator or the end
+   * of the string (i.e. position 'k_to') is reached, the exception
+   * Malformed_URL is raised.
+   * Furthermore, if the character '%' is accepted it is checked whether
+   * two hexadecimal digits follow (which must be accepted, too). If this
+   * is not true, the exception Malformed_URL is raised, too.
+   * 'cats': contains for every character code (0 to 255) the category
+   * of the character.
+   *)
+  let check_hex c =
+    if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
+    match c with
+       ('0'..'9'|'A'..'F'|'a'..'f') -> ()
+      | _ -> raise Malformed_URL
+  in
+
+  let rec scan k =
+    if k >= k_to then
+      k
+    else begin
+      let c = s.[k] in
+      let cat = cats.(Char.code c) in
+      match cat with
+         Accepted -> 
+           if c = '%' then begin
+             if k+2 >= k_to then raise Malformed_URL;
+             let c1 = s.[k+1] in
+             let c2 = s.[k+2] in
+             check_hex c1;
+             check_hex c2;
+             scan (k+3)
+           end
+           else
+             scan (k+1)
+       | Separator -> k
+       | Rejected -> 
+           if accept_8bits && c >= '\128' 
+           then scan (k+1)
+           else raise Malformed_URL
+    end
+  in
+
+  assert (Array.length cats = 256);
+  assert (k_from >= 0);
+  assert (k_from <= k_to);
+  assert (k_to <= String.length s);
+  
+  scan k_from
+;;
+
+  
+(* Create a categorization: *)
+
+let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+              'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]
+
+let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+              'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]
+
+let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]
+
+let safe = [ '$'; '-'; '_'; '.'; '+' ]
+
+let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]
+
+let make_cats accepted separators =
+  (* create a categorization:
+   * - All characters listed in 'separators' are separators.
+   * - All characters listed in 'accepted' and which do not occur in
+   *   'separators' are accepted characters.
+   * - All other characters are rejected.
+   *)
+  let cats = Array.make 256 Rejected in
+  List.iter
+    (fun c ->
+       cats.(Char.code c) <- Accepted
+    )
+    accepted;
+
+  List.iter
+    (fun c ->
+       cats.(Char.code c) <- Separator
+    )
+    separators;
+  cats
+;;
+
+
+let scheme_cats =
+  make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;
+
+    (* scheme_cats: character categorization to _extract_ the URL scheme *)
+
+
+let login_cats =
+  make_cats 
+    (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])  
+    [':'; '@'; '/'; '#' ]
+;;
+
+    (* login_cats: character categorization to _extract_ user name, password,
+     * host name, and port.
+     *)
+
+let host_cats =
+  make_cats
+    (lalpha @ ualpha @ digit @ ['.'; '-'])
+    []
+;;
+
+    (* host_cats: character categorization to _check_ whether the host name
+     * is formed only by legal characters.
+     * Especially '%' is not allowed here!
+     *)
+
+let port_cats =
+  make_cats
+    digit
+    []
+;;
+
+    (* port_cats: character categorization to _check_ whether the port number
+     * is formed only by legal characters.
+     * Especially '%' is not allowed here!
+     *)
+
+let path_cats separators =
+  make_cats
+    (lalpha @ ualpha @ digit @ safe @ extra @ 
+              ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
+    separators
+;;
+
+
+let separators_from_syntax syn =
+  let include_if syn_option clist =
+    if syn_option <> Url_part_not_recognized then
+      clist
+    else
+      []
+  in
+  (include_if syn.url_enable_param [';']) @
+  (include_if syn.url_enable_query ['?']) @
+  (include_if syn.url_enable_fragment ['#'])
+;;
+
+
+let path_cats_from_syntax syn extraseps =
+  let separators = separators_from_syntax syn in
+  path_cats (separators @ extraseps)
+;;
+
+(* path_cats_from_syntax:
+ * Computes a character categorization to extract the path from an URL.
+ * This depends on the syntax because the list of possible separators
+ * contains the characters that may begin the next URL clause.
+ *
+ * Notes:
+ * - The '#' is rejected unless fragments are enabled. 
+ * - The '~' is accepted although this violates RFC 1738.
+ *)
+
+
+let other_cats_from_syntax syn =
+  let include_if syn_option clist =
+    if syn_option <> Url_part_not_recognized then
+      clist
+    else
+      []
+  in
+  let separators =
+    (include_if syn.url_enable_param [';']) @
+    (include_if syn.url_enable_query ['?']) @
+    (include_if syn.url_enable_fragment ['#'])
+  in
+
+  make_cats
+    (lalpha @ ualpha @ digit @ safe @ extra @ 
+              (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
+    []
+;;
+
+    (* other_cats: character categorization to extract or check the
+     * "other" part of the URL.
+     *)
+
+
+
+let extract_url_scheme s = 
+  let l = String.length s in
+  let k = scan_url_part s 0 l scheme_cats false in
+          (* or raise Malformed_URL *)
+  if k = l then raise Malformed_URL;
+  assert (s.[k] = ':');
+  String.lowercase(String.sub s 0 k)
+;;
+
+
+let ( => ) a b = not a or b;;   (* implication *)
+
+let ( <=> ) (a:bool) b = ( a = b );;  (* equivalence *)
+
+let url_syntax_is_valid syn =
+  let recognized x = x <> Url_part_not_recognized in
+  let not_recognized x = x = Url_part_not_recognized in
+  (recognized syn.url_enable_password => recognized syn.url_enable_user) &
+  (recognized syn.url_enable_port     => recognized syn.url_enable_host) &
+  (recognized syn.url_enable_user     => recognized syn.url_enable_host) &
+  not ( (recognized syn.url_enable_user ||
+        recognized syn.url_enable_password ||
+        recognized syn.url_enable_host ||
+        recognized syn.url_enable_port ||
+        recognized syn.url_enable_path) &&
+       (recognized syn.url_enable_other))
+;;
+
+
+let partial_url_syntax syn =
+  let weaken =
+    function
+       Url_part_not_recognized -> Url_part_not_recognized
+      | Url_part_allowed        -> Url_part_allowed
+      | Url_part_required       -> Url_part_allowed
+  in
+  { url_enable_scheme    = weaken syn.url_enable_scheme;
+    url_enable_user      = weaken syn.url_enable_user;
+    url_enable_password  = weaken syn.url_enable_password;
+    url_enable_host      = weaken syn.url_enable_host;
+    url_enable_port      = weaken syn.url_enable_port;
+    url_enable_path      = weaken syn.url_enable_path;
+    url_enable_param     = weaken syn.url_enable_param;
+    url_enable_query     = weaken syn.url_enable_query;
+    url_enable_fragment  = weaken syn.url_enable_fragment;
+    url_enable_other     = weaken syn.url_enable_other;
+    url_accepts_8bits    = syn.url_accepts_8bits;
+    url_is_valid         = syn.url_is_valid;
+  }
+;;
+
+
+
+let file_url_syntax =
+  { url_enable_scheme    = Url_part_required;
+    url_enable_user      = Url_part_not_recognized;
+    url_enable_password  = Url_part_not_recognized;
+    url_enable_host      = Url_part_allowed;
+    url_enable_port      = Url_part_not_recognized;
+    url_enable_path      = Url_part_required;
+    url_enable_param     = Url_part_not_recognized;
+    url_enable_query     = Url_part_not_recognized;
+    url_enable_fragment  = Url_part_not_recognized;
+    url_enable_other     = Url_part_not_recognized;
+    url_accepts_8bits    = false;
+    url_is_valid         = (fun _ -> true);
+  }
+;;
+
+
+let ftp_url_syntax =
+  { url_enable_scheme    = Url_part_required;
+    url_enable_user      = Url_part_allowed;
+    url_enable_password  = Url_part_allowed;
+    url_enable_host      = Url_part_required;
+    url_enable_port      = Url_part_allowed;
+    url_enable_path      = Url_part_allowed;
+    url_enable_param     = Url_part_allowed;
+    url_enable_query     = Url_part_not_recognized;
+    url_enable_fragment  = Url_part_not_recognized;
+    url_enable_other     = Url_part_not_recognized;
+    url_accepts_8bits    = false;
+    url_is_valid         = (fun _ -> true);
+  }
+;;
+
+
+let http_url_syntax =
+  { url_enable_scheme    = Url_part_required;
+    url_enable_user      = Url_part_allowed;
+    url_enable_password  = Url_part_allowed;
+    url_enable_host      = Url_part_required;
+    url_enable_port      = Url_part_allowed;
+    url_enable_path      = Url_part_allowed;
+    url_enable_param     = Url_part_not_recognized;
+    url_enable_query     = Url_part_allowed;
+    url_enable_fragment  = Url_part_not_recognized;
+    url_enable_other     = Url_part_not_recognized;
+    url_accepts_8bits    = false;
+    url_is_valid         = (fun _ -> true);
+  }
+;;
+
+
+let mailto_url_syntax =
+  { url_enable_scheme    = Url_part_required;
+    url_enable_user      = Url_part_not_recognized;
+    url_enable_password  = Url_part_not_recognized;
+    url_enable_host      = Url_part_not_recognized;
+    url_enable_port      = Url_part_not_recognized;
+    url_enable_path      = Url_part_not_recognized;
+    url_enable_param     = Url_part_not_recognized;
+    url_enable_query     = Url_part_not_recognized;
+    url_enable_fragment  = Url_part_not_recognized;
+    url_enable_other     = Url_part_required;
+    url_accepts_8bits    = false;
+    url_is_valid         = (fun _ -> true);
+  }
+;;
+
+
+let null_url_syntax =
+  { url_enable_scheme    = Url_part_not_recognized;
+    url_enable_user      = Url_part_not_recognized;
+    url_enable_password  = Url_part_not_recognized;
+    url_enable_host      = Url_part_not_recognized;
+    url_enable_port      = Url_part_not_recognized;
+    url_enable_path      = Url_part_not_recognized;
+    url_enable_param     = Url_part_not_recognized;
+    url_enable_query     = Url_part_not_recognized;
+    url_enable_fragment  = Url_part_not_recognized;
+    url_enable_other     = Url_part_not_recognized;
+    url_accepts_8bits    = false;
+    url_is_valid         = (fun _ -> true);
+  }
+;;
+
+
+let ip_url_syntax =
+  { url_enable_scheme    = Url_part_allowed;
+    url_enable_user      = Url_part_allowed;
+    url_enable_password  = Url_part_allowed;
+    url_enable_host      = Url_part_allowed;
+    url_enable_port      = Url_part_allowed;
+    url_enable_path      = Url_part_allowed;
+    url_enable_param     = Url_part_allowed;
+    url_enable_query     = Url_part_allowed;
+    url_enable_fragment  = Url_part_allowed;
+    url_enable_other     = Url_part_not_recognized;
+    url_accepts_8bits    = false;
+    url_is_valid         = (fun _ -> true);
+  }
+;;
+
+
+let common_url_syntax =
+  let h = Hashtbl.create 10 in
+  Hashtbl.add h "file"   file_url_syntax;
+  Hashtbl.add h "ftp"    ftp_url_syntax;
+  Hashtbl.add h "http"   http_url_syntax;
+  Hashtbl.add h "mailto" mailto_url_syntax;
+  h
+;;
+
+
+let url_conforms_to_syntax url =
+  let recognized x = x <> Url_part_not_recognized in
+  let required x = x = Url_part_required in
+  let present x    = x <> None in
+  let syn = url.url_syntax in
+  (present url.url_scheme   => recognized syn.url_enable_scheme)   &
+  (present url.url_user     => recognized syn.url_enable_user)     &
+  (present url.url_password => recognized syn.url_enable_password) &
+  (present url.url_host     => recognized syn.url_enable_host)     &
+  (present url.url_port     => recognized syn.url_enable_port)     &
+  ((url.url_path <> [])     => recognized syn.url_enable_path)     &
+  ((url.url_param <> [])    => recognized syn.url_enable_param)    &
+  (present url.url_query    => recognized syn.url_enable_query)    &
+  (present url.url_fragment => recognized syn.url_enable_fragment) &
+  (present url.url_other    => recognized syn.url_enable_other)    &
+  (required syn.url_enable_scheme   => present url.url_scheme)     &
+  (required syn.url_enable_user     => present url.url_user)       &
+  (required syn.url_enable_password => present url.url_password)   &
+  (required syn.url_enable_host     => present url.url_host)       &
+  (required syn.url_enable_port     => present url.url_port)       &
+  (required syn.url_enable_path     => (url.url_path <> []))       &
+  (required syn.url_enable_param    => (url.url_param <> []))      &
+  (required syn.url_enable_query    => present url.url_query)      &
+  (required syn.url_enable_fragment => present url.url_fragment)   &
+  (required syn.url_enable_other    => present url.url_other)      &
+  (url.url_validity or syn.url_is_valid url)
+;;
+
+
+let url_syntax_of_url url = url.url_syntax
+;;
+
+
+let modify_url
+      ?syntax
+      ?(encoded = false)
+      ?scheme
+      ?user
+      ?password
+      ?host
+      ?port
+      ?path
+      ?param
+      ?query
+      ?fragment
+      ?other
+      url 
+  =
+
+  let encode = Netencoding.Url.encode in
+  let enc x =
+    if encoded then
+      x
+    else
+      match x with
+         None -> None
+       | Some x' -> Some (encode x')
+  in
+  let enc_list l = 
+    if encoded then
+      l
+    else
+      List.map encode l 
+  in
+
+  let new_syntax =
+    match syntax with
+       None -> url.url_syntax
+      | Some syn -> syn
+  in
+
+  let check_string s_opt cats =
+    match s_opt with
+       None   -> ()
+      | Some s ->
+         let l = String.length s in
+         let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
+                 (* or raise Malformed_URL *)
+         if k <> l then raise Malformed_URL
+  in
+
+  let check_string_list p cats sep =
+    List.iter
+      (fun p_component ->
+        let l = String.length p_component in
+        let k = 
+          scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
+          (* or raise Malformed_URL *)
+        if k <> l then raise Malformed_URL;
+        if String.contains p_component sep then raise Malformed_URL;
+      )
+      p
+  in
+
+  (* Create the modified record: *)
+  let url' =
+    { 
+      url_syntax   = new_syntax;
+      url_validity = false;
+      url_scheme   = if scheme   = None then url.url_scheme   else scheme;
+      url_user     = if user     = None then url.url_user     else enc user;
+      url_password = if password = None then url.url_password else enc password;
+      url_host     = if host     = None then url.url_host     else host;
+      url_port     = if port     = None then url.url_port     else port;
+      url_path     = (match path with
+                         None -> url.url_path
+                       | Some p -> enc_list p);
+      url_param    = (match param with
+                         None -> url.url_param
+                       | Some p -> enc_list p);
+      url_query    = if query    = None then url.url_query    else enc query;
+      url_fragment = if fragment = None then url.url_fragment else enc fragment;
+      url_other    = if other    = None then url.url_other    else enc other;
+    }
+  in
+  (* Check whether the URL conforms to the syntax:
+   *)
+  if not (url_conforms_to_syntax url') then raise Malformed_URL;
+  if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
+  if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
+  if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
+  (* Check every part: *)
+  check_string url'.url_scheme   scheme_cats;
+  check_string url'.url_user     login_cats;
+  check_string url'.url_password login_cats;
+  check_string url'.url_host     host_cats;
+  (match url'.url_port with 
+       None -> ()
+     | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
+  );
+  let path_cats  = path_cats_from_syntax  new_syntax [] in
+  let other_cats = other_cats_from_syntax new_syntax in
+  check_string url'.url_query    path_cats;
+  check_string url'.url_fragment path_cats;
+  check_string url'.url_other    other_cats;
+  (* Check the lists: *)
+  check_string_list url'.url_param path_cats ';';
+  check_string_list url'.url_path  path_cats '/';
+  (* Further path checks: *)
+  begin match url'.url_path with
+      [] ->
+       (* The path is empty: There must not be a 'param' or 'query' *)
+       if url'.url_host <> None then begin
+         if url'.url_param <> [] then raise Malformed_URL;
+         if url'.url_query <> None then raise Malformed_URL;
+       end
+    | ["";""] ->
+       (* This is illegal. *)
+       raise Malformed_URL;
+    | "" :: p' ->
+       (* The path is absolute: always ok *)
+       ()
+    | _ ->
+       (* The path is relative: there must not be a host *)
+       if url'.url_host <> None then raise Malformed_URL;
+  end;
+  begin match url'.url_path with
+      _ :: rest ->              (* "//" ambiguity *)
+       begin match List.rev rest with
+           _ :: rest' -> 
+             if List.exists (fun p -> p = "") rest' then
+               raise Malformed_URL;
+         | [] ->
+             ()
+       end
+    | [] ->
+       ()
+  end;
+  (* Cache that the URL is valid: *)
+  url'.url_validity <- true;
+
+  url'
+;;
+
+
+let null_url =
+  { 
+    url_syntax   = null_url_syntax;
+    url_validity = true;
+    url_scheme   = None;
+    url_user     = None;
+    url_password = None;
+    url_host     = None;
+    url_port     = None;
+    url_path     = [];
+    url_param    = [];
+    url_query    = None;
+    url_fragment = None;
+    url_other    = None;
+  }
+;;
+
+
+let make_url
+      ?(encoded = false)
+      ?scheme
+      ?user
+      ?password
+      ?host
+      ?port
+      ?path
+      ?param
+      ?query
+      ?fragment
+      ?other
+      url_syntax
+  =
+
+  if not (url_syntax_is_valid url_syntax) then
+    invalid_arg "Neturl.make_url";
+
+  modify_url
+    ~encoded:encoded
+    ~syntax:url_syntax
+    ?scheme:scheme
+    ?user:user
+    ?password:password
+    ?host:host
+    ?port:port
+    ?path:path
+    ?param:param
+    ?query:query
+    ?fragment:fragment
+    ?other:other
+    null_url
+;;
+
+
+let remove_from_url
+      ?(scheme = false)
+      ?(user = false)
+      ?(password = false)
+      ?(host = false)
+      ?(port = false)
+      ?(path = false)
+      ?(param = false)
+      ?(query = false)
+      ?(fragment = false)
+      ?(other = false)
+      url
+  =
+
+  make_url
+    ~encoded:  true
+    ?scheme:   (if scheme   then None else url.url_scheme)
+    ?user:     (if user     then None else url.url_user)
+    ?password: (if password then None else url.url_password)
+    ?host:     (if host     then None else url.url_host)
+    ?port:     (if port     then None else url.url_port)
+    ?path:     (if path     then None else Some url.url_path)
+    ?param:    (if param    then None else Some url.url_param)
+    ?query:    (if query    then None else url.url_query)
+    ?fragment: (if fragment then None else url.url_fragment)
+    ?other:    (if other    then None else url.url_other)
+    url.url_syntax
+;;
+
+
+let default_url
+      ?(encoded = false)
+      ?scheme
+      ?user
+      ?password
+      ?host
+      ?port
+      ?(path = [])
+      ?(param = [])
+      ?query
+      ?fragment
+      ?other
+      url
+  =
+
+  let encode = Netencoding.Url.encode in
+
+  let enc x =
+    if encoded then
+      x
+    else
+      match x with
+         None -> None
+       | Some x' -> Some (encode x')
+  in
+
+  let enc_list l = 
+    if encoded then
+      l
+    else
+      List.map encode l 
+  in
+
+  let pass_if_missing current arg =
+    match current with
+       None -> arg
+      | _    -> current
+  in
+
+  make_url
+    ~encoded:  true
+    ?scheme:   (pass_if_missing url.url_scheme   scheme)
+    ?user:     (pass_if_missing url.url_user     (enc user))
+    ?password: (pass_if_missing url.url_password (enc password))
+    ?host:     (pass_if_missing url.url_host     host)
+    ?port:     (pass_if_missing url.url_port     port)
+    ~path:     (if url.url_path  = [] then enc_list path  else url.url_path)
+    ~param:    (if url.url_param = [] then enc_list param else url.url_param)
+    ?query:    (pass_if_missing url.url_query    (enc query))
+    ?fragment: (pass_if_missing url.url_fragment (enc fragment))
+    ?other:    (pass_if_missing url.url_other    (enc other))
+    url.url_syntax
+;;
+
+
+let undefault_url
+      ?scheme
+      ?user
+      ?password
+      ?host
+      ?port
+      ?path
+      ?param
+      ?query
+      ?fragment
+      ?other
+      url
+  =
+
+  let remove_if_matching current arg =
+    match current with
+       None -> None
+      | Some x -> 
+         (match arg with
+              None -> current
+            | Some x' ->
+                if x=x' then
+                  None
+                else
+                  current)
+  in
+
+  make_url
+    ~encoded:  true
+    ?scheme:   (remove_if_matching url.url_scheme   scheme)
+    ?user:     (remove_if_matching url.url_user     user)
+    ?password: (remove_if_matching url.url_password password)
+    ?host:     (remove_if_matching url.url_host     host)
+    ?port:     (remove_if_matching url.url_port     port)
+    ~path:     (match path with
+                    None -> url.url_path
+                  | Some x ->
+                      if x = url.url_path then
+                        []
+                      else
+                        url.url_path)
+    ~param:    (match param with
+                    None -> url.url_param
+                  | Some x ->
+                      if x = url.url_param then
+                        []
+                      else
+                        url.url_param)
+    ?query:    (remove_if_matching url.url_query    query)
+    ?fragment: (remove_if_matching url.url_fragment fragment)
+    ?other:    (remove_if_matching url.url_other    other)
+    url.url_syntax
+;;
+
+
+let url_provides 
+      ?(scheme = false)
+      ?(user = false)
+      ?(password = false)
+      ?(host = false)
+      ?(port = false)
+      ?(path = false)
+      ?(param = false)
+      ?(query = false)
+      ?(fragment = false)
+      ?(other = false)
+      url
+  =
+  
+  (scheme   => (url.url_scheme   <> None)) &
+  (user     => (url.url_user     <> None)) &
+  (password => (url.url_password <> None)) &
+  (host     => (url.url_host     <> None)) &
+  (port     => (url.url_port     <> None)) &
+  (path     => (url.url_path     <> []))   &
+  (param    => (url.url_param    <> [])) &
+  (query    => (url.url_query    <> None)) &
+  (fragment => (url.url_fragment <> None)) &
+  (other    => (url.url_other    <> None))
+;;
+  
+
+let return_if value =
+  match value with
+      None -> raise Not_found
+    | Some x -> x
+;;
+
+
+let decode_if want_encoded value =
+  let value' = return_if value in
+  if want_encoded then
+    value'
+  else
+    Netencoding.Url.decode value'     (* WARNING: not thread-safe! *)
+;;
+
+
+let decode_path_if want_encoded value =
+  if want_encoded then
+    value
+  else
+    List.map Netencoding.Url.decode value     (* WARNING: not thread-safe! *)
+;;
+
+
+let url_scheme                    url = return_if url.url_scheme;;
+let url_user     ?(encoded=false) url = decode_if encoded url.url_user;;
+let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
+let url_host                      url = return_if url.url_host;;
+let url_port                      url = return_if url.url_port;;
+let url_path     ?(encoded=false) url = decode_path_if encoded url.url_path;;
+let url_param    ?(encoded=false) url = decode_path_if encoded url.url_param;;
+let url_query    ?(encoded=false) url = decode_if encoded url.url_query;;
+let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
+let url_other    ?(encoded=false) url = decode_if encoded url.url_other;;
+
+
+let string_of_url url =
+  if not (url.url_validity) then
+    failwith "Neturl.string_of_url: URL not flagged as valid";
+  (match url.url_scheme with
+       None -> ""
+     | Some s -> s ^ ":") ^ 
+  (match url.url_host with
+       None -> ""
+     | Some host ->
+        "//" ^ 
+        (match url.url_user with
+             None -> "" 
+           | Some user -> 
+               user ^ 
+               (match url.url_password with
+                    None -> ""
+                  | Some password ->
+                      ":" ^ password 
+               ) ^ 
+               "@") ^ 
+        host ^ 
+        (match url.url_port with
+             None -> ""
+           | Some port ->
+               ":" ^ string_of_int port)) ^ 
+  (match url.url_path with
+     | [""] ->
+        "/"
+     | x :: p  when  url.url_scheme = None &&
+                     url.url_host = None &&
+                    String.contains x ':' 
+       ->
+         (* Really a special case: The colon contained in 'x' may cause
+          * that a prefix of 'x' is interpreted as URL scheme. In this
+          * case, "./" is prepended (as recommended in RFC 1808, 5.3).
+          *)
+         "./"
+     | _ ->
+        ""
+  ) ^
+  String.concat "/" url.url_path ^ 
+  (match url.url_other with
+       None -> ""
+     | Some other ->
+        other) ^ 
+  String.concat ""  (List.map (fun s -> ";" ^ s) url.url_param) ^ 
+  (match url.url_query with
+       None -> ""
+     | Some query ->
+        "?" ^ query) ^ 
+  (match url.url_fragment with
+       None -> ""
+     | Some fragment ->
+        "#" ^ fragment)
+;;
+
+
+let url_of_string url_syntax s =
+  let l = String.length s in
+  let recognized x = x <> Url_part_not_recognized in
+
+  let rec collect_words terminators eof_char cats k =
+    (* Collect words as recognized by 'cats', starting at position 'k' in
+     * 's'. Collection stops if one the characters listed in 'terminators'
+     * is found. If the end of the string is reached, it is treated as
+     * 'eof_char'.
+     *)
+    let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in  
+             (* or raise Malformed_URL *)
+    let word, sep =
+      String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
+    if List.mem sep terminators then
+      [word, sep], k'
+    else
+      let word_sep_list', k'' = 
+       collect_words terminators eof_char cats (k'+1) in
+      ((word, sep) :: word_sep_list'), k''
+  in
+
+  (* Try to extract the scheme name: *)
+  let scheme, k1 =
+    if recognized url_syntax.url_enable_scheme then
+      try
+       let k = scan_url_part s 0 l scheme_cats false in
+        (* or raise Malformed_URL *)
+       if k = l then raise Malformed_URL;
+       assert (s.[k] = ':');
+       Some (String.sub s 0 k), (k+1)
+      with
+         Malformed_URL -> None, 0
+    else
+      None, 0
+  in
+
+  (* If there is a "//", a host will follow: *)
+  let host, port, user, password, k2 =
+    if recognized url_syntax.url_enable_host  &&
+       k1 + 2 <= l  &&  s.[k1]='/'  && s.[k1+1]='/' then begin
+
+      let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2) 
+      in
+          (* or raise Malformed_URL *)
+
+      let int x =
+       try int_of_string x with _ -> raise Malformed_URL in
+
+      match word_sep_list with
+         [ host, ('/'|'#') ] ->
+           Some host, None, None, None, k'
+       | [ host, ':'; port, ('/'|'#') ] ->
+           Some host, Some (int port), None, None, k'
+       | [ user, '@'; host, ('/'|'#') ] ->
+           Some host, None, Some user, None, k'
+       | [ user, '@'; host, ':'; port, ('/'|'#') ] ->
+           Some host, Some (int port), Some user, None, k'
+       | [ user, ':'; password, '@'; host, ('/'|'#') ] ->
+           Some host, None, Some user, Some password, k'
+       | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
+           Some host, Some (int port), Some user, Some password, k'
+       | _ ->
+           raise Malformed_URL
+    end
+    else
+      None, None, None, None, k1
+  in
+
+  let path, k3 =
+    if recognized url_syntax.url_enable_path  &&
+       k2 < l  (*  &&  s.[k2]='/'  *)
+    then begin
+      let cats = path_cats_from_syntax url_syntax [ '/' ] in
+      let seps = separators_from_syntax url_syntax in
+
+      (* Note: '>' is not allowed within URLs; because of this we can use
+       * it as end-of-string character.
+       *)
+
+      let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
+          (* or raise Malformed_URL *)
+      match word_sep_list with
+         [ "", '/'; "", _ ] ->
+           [ "" ], k'
+       | [ "", _ ] ->
+           [], k'
+       | _ ->
+           List.map fst word_sep_list, k'
+    end
+    else begin
+      (* If there is a single '/': skip it *)
+      if not (recognized url_syntax.url_enable_other) &&
+        k2 < l  &&  s.[k2]='/'
+      then
+       [], (k2+1)
+      else
+       [], k2
+    end
+  in
+
+  let other, k4 =
+    if recognized url_syntax.url_enable_other  &&
+       k3 < l 
+    then begin
+      
+      let cats = other_cats_from_syntax url_syntax in
+
+      (* Note: '>' is not allowed within URLs; because of this we can use
+       * it as end-of-string character.
+       *)
+
+      let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
+          (* or raise Malformed_URL *)
+
+      match word_sep_list with
+         [ other, _ ] -> Some other, k'
+       | _ -> assert false
+    end
+    else
+      None, k3
+  in
+
+  let param, k5 =
+    if recognized url_syntax.url_enable_param  &&
+       k4 < l  &&  s.[k4]=';' 
+    then begin
+      let cats  = path_cats_from_syntax url_syntax [] in
+      let seps  = separators_from_syntax url_syntax in
+      let seps' = List.filter (fun c -> c <> ';') seps in
+
+      (* Note: '>' is not allowed within URLs; because of this we can use
+       * it as end-of-string character.
+       *)
+
+      let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
+          (* or raise Malformed_URL *)
+      
+      List.map fst word_sep_list, k'
+    end
+    else
+      [], k4
+  in
+
+  let query, k6 =
+    if recognized url_syntax.url_enable_query  &&
+       k5 < l  &&  s.[k5]='?'
+    then begin
+      let cats  = path_cats_from_syntax url_syntax [] in
+      let seps  = separators_from_syntax url_syntax in
+      
+      (* Note: '>' is not allowed within URLs; because of this we can use
+       * it as end-of-string character.
+       *)
+
+      let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
+          (* or raise Malformed_URL *)
+
+      match word_sep_list with
+         [ query, _ ] -> Some query, k'
+       | _ -> assert false
+    end
+    else
+      None, k5
+  in
+
+  let fragment, k7 =
+    if recognized url_syntax.url_enable_fragment  &&
+       k6 < l  &&  s.[k6]='#'
+    then begin
+      let cats  = path_cats_from_syntax url_syntax [] in
+      let seps  = separators_from_syntax url_syntax in
+      
+      (* Note: '>' is not allowed within URLs; because of this we can use
+       * it as end-of-string character.
+       *)
+
+      let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
+          (* or raise Malformed_URL *)
+
+      match word_sep_list with
+         [ fragment, _ ] -> Some fragment, k'
+       | _ -> assert false
+    end
+    else
+      None, k6
+  in
+
+  if k7 <> l then raise Malformed_URL;
+
+  make_url
+    ~encoded:true
+    ?scheme:scheme
+    ?user:user
+    ?password:password
+    ?host:host
+    ?port:port
+    ~path:path
+    ~param:param
+    ?query:query
+    ?fragment:fragment
+    ?other:other
+    url_syntax
+;;
+
+
+let split_path s =
+  let l = String.length s in
+  let rec collect_words k =
+    let k' = 
+      try
+       String.index_from s k '/'
+      with
+         Not_found -> l
+    in
+    let word = String.sub s k (k'-k) in
+    if k' >= l then
+      [word]
+    else
+      word :: collect_words (k'+1)
+  in
+  match collect_words 0 with
+      [ "" ] -> []
+    | [ "";"" ] -> [ "" ]
+    | other -> other
+;;
+
+
+let join_path l = 
+  match l with
+      [ "" ] -> "/"
+    | _      -> String.concat "/" l;;
+
+
+let norm_path l = 
+
+  let rec remove_slash_slash l first =
+    match l with
+      | [ "" ] ->
+         [ "" ]
+      | [ ""; "" ] when first ->
+         [ "" ]
+      | "" :: l' when not first ->
+         remove_slash_slash l' false
+      | x :: l' ->
+         x :: remove_slash_slash l' false
+      | [] ->
+         []
+  in
+
+  let rec remove_dot l first =
+    match l with
+      | ([ "." ] | ["."; ""]) ->
+         if first then [] else [ "" ]
+      |        "." :: x :: l' ->
+         remove_dot (x :: l') false
+      | x :: l' ->
+         x :: remove_dot l' false
+      | [] ->
+         []
+  in
+
+  let rec remove_dot_dot_once l first =
+    match l with
+       x :: ".." :: [] when x <> "" && x <> ".." && not first ->
+         [ "" ]
+      |        x :: ".." :: l' when x <> "" && x <> ".." ->
+         l'
+      | x :: l' ->
+         x :: remove_dot_dot_once l' false
+      | [] ->
+         raise Not_found
+  in
+
+  let rec remove_dot_dot l =
+    try
+      let l' = remove_dot_dot_once l true in
+      remove_dot_dot l'
+    with
+       Not_found -> l
+  in
+
+  let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
+  match l' with
+      [".."] -> [".."; ""]
+    | ["";""] -> [ "" ]
+    | _      -> l'
+;;
+
+
+let apply_relative_url baseurl relurl =
+  if not (baseurl.url_validity) or not (relurl.url_validity) then
+    failwith "Neturl.apply_relative_url: URL not flagged as valid";
+
+  if relurl.url_scheme <> None then
+    modify_url 
+      ~syntax:baseurl.url_syntax           (* inherit syntax *)
+      relurl
+  else
+    if relurl.url_host <> None then
+      modify_url 
+       ~syntax:baseurl.url_syntax         (* inherit syntax and scheme *)
+       ?scheme:baseurl.url_scheme
+       relurl
+    else
+      match relurl.url_path with
+         "" :: other ->
+           (* An absolute path *)
+           modify_url 
+             ~syntax:baseurl.url_syntax   (* inherit syntax, scheme, and *)
+             ~encoded:true
+             ?scheme:baseurl.url_scheme   (* login info *)
+             ?host:baseurl.url_host
+             ?port:baseurl.url_port
+             ?user:baseurl.url_user
+             ?password:baseurl.url_password
+             relurl
+       | [] ->
+           (* Empty: Inherit also path, params, query, and fragment *)
+           let new_params, new_query, new_fragment =
+             match relurl.url_param, relurl.url_query, relurl.url_fragment
+             with
+                 [], None, None ->
+                   (* Inherit all three *)
+                   baseurl.url_param, baseurl.url_query, baseurl.url_fragment
+               | [], None, f ->
+                   (* Inherit params and query *)
+                   baseurl.url_param, baseurl.url_query, f
+               | [], q, f ->
+                   (* Inherit params *)
+                   baseurl.url_param, q, f
+               | p, q, f ->
+                   (* Inherit none of them *)
+                   p, q, f
+           in
+           modify_url 
+             ~syntax:baseurl.url_syntax
+             ~encoded:true
+             ?scheme:baseurl.url_scheme
+             ?host:baseurl.url_host
+             ?port:baseurl.url_port
+             ?user:baseurl.url_user
+             ?password:baseurl.url_password
+             ~path:baseurl.url_path
+             ~param:new_params
+             ?query:new_query
+             ?fragment:new_fragment
+             relurl
+       | relpath ->
+           (* A relative path *)
+           let rec change_path basepath =
+             match basepath with
+               | [] ->
+                   relpath
+               | [ x ] ->
+                   relpath
+               | x :: basepath' ->
+                   x :: change_path basepath'
+           in
+           let new_path = norm_path (change_path baseurl.url_path) in
+           modify_url 
+             ~syntax:baseurl.url_syntax   (* inherit syntax, scheme, and *)
+             ~encoded:true
+             ?scheme:baseurl.url_scheme   (* login info *)
+             ?host:baseurl.url_host
+             ?port:baseurl.url_port
+             ?user:baseurl.url_user
+             ?password:baseurl.url_password
+             ~path:new_path               (* and change path *)
+             relurl
+
+;;
+
+
+let print_url url =
+  Format.print_string ("<URL:" ^ string_of_url url ^ ">")
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:28  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/07/04 21:50:51  gerd
+ *     Fixed typo.
+ *
+ * Revision 1.3  2000/06/26 22:57:49  gerd
+ *     Change: The record 'url_syntax' has an additional component
+ * 'url_accepts_8bits'. Setting this option to 'true' causes that
+ * the bytes >= 0x80 are no longer rejected.
+ *
+ * Revision 1.2  2000/06/25 19:39:48  gerd
+ *     Lots of Bugfixes.
+ *
+ * Revision 1.1  2000/06/24 20:19:59  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/neturl.mli b/helm/DEVEL/pxp/netstring/neturl.mli
new file mode 100644 (file)
index 0000000..988aef6
--- /dev/null
@@ -0,0 +1,460 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module applies already O'Caml-3 features. *)
+
+(* Uniform Resource Locators (URLs):
+ *
+ * This module provides functions to parse URLs, to print URLs, to
+ * store URLs, to modify URLs, and to apply relative URLs.
+ *
+ * URLs are strings formed according to pattern (1) or (2):
+ *
+ * (1) scheme://user:password@host:port/path;params?query#fragment
+ * (2) scheme:other;params?query#fragment
+ *
+ * The word at the beginning of the URL identifies the URL scheme
+ * (such as "http" or "file"). Depending on the scheme, not all of the
+ * parts are allowed, or parts may be omitted. This module defines the
+ * type 'url_syntax' whose values describe which parts are allowed/required/
+ * not allowed for a concrete URL scheme (see below).
+ *
+ * Not all characters are allowed in a URL. Some characters are allowed,
+ * but have the special task to separate the various parts of the URL
+ * (reserved characters).
+ * However, it is possible to include even invalid or reserved characters
+ * as normal content by applying the '%'-encoding on these characters:
+ * A '%' indicates that an encoded character follows, and the character
+ * is denoted by a two-digit hexadecimal number (e.g. %2f for '/').
+ * In the following descriptions, the term "encoded string" means a string
+ * containing such %-encoded characters, and the "decoded string" means a
+ * string not containing such characters.
+ * See the module Netencoding.Url for functions encoding or decoding
+ * strings.
+ *
+ * The type 'url' describes values storing the components of a URL,
+ * and the 'url_syntax' for the URL. In general, the components are
+ * stored as encoded strings; however, not for all components the
+ * '%'-encoding is applicable.
+ * For convenience, the functions creating, modifying, and accessing
+ * URLs can handle both encoded and decoded strings. In order to
+ * avoid errors, the functions pass strings even in their decoded form.
+ *
+ * Note that there is currently no function to compare URLs. The
+ * canoncical comparison ( = ) is not applicable because the same URL
+ * may be written differently.
+ *
+ * Note that nothing is said about the character set/encoding of URLs.
+ * Some protocols and standards prefer UTF-8 as fundamental encoding
+ * and apply the '%'-encoding on top of it; i.e. the byte sequence
+ * representing a character in UTF-8 is '%'-encoded. There is no special
+ * support for this technique.
+ *
+ * For more information about URLs, see RFCs 1738 and 1808.
+ *)
+
+exception Malformed_URL
+(* Is raised by a number of functions when encountering a badly formed
+ * URL.
+ *)
+
+val extract_url_scheme : string -> string
+  (* Returns the URL scheme from the string representation of an URL. 
+   * E.g. extract_url_scheme "http://host/path" = "http". 
+   * The scheme name is always converted to lowercase characters.
+   * Raises Malformed_URL if the scheme name is not found.
+   *)
+
+type url_syntax_option =
+    Url_part_not_recognized
+  | Url_part_allowed
+  | Url_part_required
+
+
+type url_syntax =
+    { url_enable_scheme    : url_syntax_option;
+      url_enable_user      : url_syntax_option;
+      url_enable_password  : url_syntax_option;
+      url_enable_host      : url_syntax_option;
+      url_enable_port      : url_syntax_option;
+      url_enable_path      : url_syntax_option;
+      url_enable_param     : url_syntax_option;
+      url_enable_query     : url_syntax_option;
+      url_enable_fragment  : url_syntax_option;
+      url_enable_other     : url_syntax_option;
+      url_accepts_8bits    : bool;
+      url_is_valid         : url -> bool;
+    }
+
+and url
+;;
+
+(* Values of type 'url_syntax' describe which components of an URL are
+ * recognized, which are allowed (and optional), and which are required.
+ * Not all combinations are valid; the predicate expressed by the
+ * function 'url_syntax_is_valid' must hold.
+ * The function 'url_is_valid' is applied when a fresh URL is created
+ * and must return 'true'. This function allows it to add an arbitrary
+ * validity criterion to 'url_syntax'. (Note that the URL passed to 
+ * this function is not fully working; you can safely assume that the
+ * accessor functions url_scheme etc. can be applied to it.)
+ *
+ * Switch 'url_accepts_8bit': If 'true', the bytes with code 128 to
+ * 255 are treated like alphanumeric characters; if 'false' these bytes
+ * are illegal (but it is still possible to include such byte in their
+ * encoded form: %80 to %FF).
+ *
+ * Values of type 'url' describe concrete URLs. Every URL must have
+ * a fundamental 'url_syntax', and it is only possible to create URLs
+ * conforming to the syntax. See 'make_url' for further information.
+ *)
+
+
+val url_syntax_is_valid : url_syntax -> bool
+  (* Checks whether the passed url_syntax is valid. This means:
+   *
+   * - If passwords are recognized, users (and hosts) must be recognized, too
+   * - If ports are recognized, hosts must be recognized, too
+   * - If users are recognized, hosts must be recognized, too
+   * - Either the syntax recognizes one of the phrases
+   *   { user, password, host, port, path }, or the syntax recognized
+   *   the phrase 'other'.
+   *)
+
+
+val partial_url_syntax : url_syntax -> url_syntax
+  (* Transforms the syntax into another syntax where all required parts are
+   * changed into optional parts.
+   *)
+
+
+(* Note that all following url_syntaxes do not allow 8bit bytes. *)
+
+val null_url_syntax   : url_syntax
+
+val ip_url_syntax : url_syntax
+  (* Maximum syntax for IP based protocols *)
+
+val common_url_syntax : (string, url_syntax) Hashtbl.t
+  (* Syntax descriptions for common URL schemes:
+   * 
+   * null_url_syntax: nothing is recognized
+   *
+   * common_url_syntax: Hashtable mapping from URL scheme names to
+   * definitions of syntaxes:
+   *
+   * "file":   scheme, host?, path
+   * "ftp":    scheme, user?, password?, host, port?, path?, param?
+   * "http":   scheme, user?, password?, host, port?, path?, query?
+   * "mailto": scheme, other
+   *
+   * Notes:
+   * (1) These syntax descriptions can be weakened for partial/relative URLs 
+   *     by changing the required parts to optional parts: See the function
+   *     'partial_url_syntax'.
+   * (2) None of the descriptions allows fragments. These can be enabled by
+   *     setting 'url_enable_fragment' to Url_part_allowed. E.g.
+   *     { file_url_syntax with url_enable_fragment = Url_part_allowed }
+   *)
+
+val null_url : url
+  (* A URL without any component and 'null_url_syntax'
+   *)
+
+val make_url :
+      ?encoded:bool ->
+      ?scheme:string ->
+      ?user:string ->
+      ?password:string ->
+      ?host:string ->
+      ?port:int ->
+      ?path:string list ->
+      ?param:string list ->
+      ?query:string ->
+      ?fragment:string ->
+      ?other:string ->
+      url_syntax ->
+      url
+  (* Creates a URL from components:
+   * 
+   * - The components "scheme" and "host" are simple strings to which the
+   *   '%'-encoding is not applicable.
+   * - The component "port" is a simple number. Of course, the '%'-encoding
+   *   is not applicable, too.
+   * - The components "user", "password", "query", "fragment", and "other"
+   *   are strings which may contains '%'-encoded characters. By default,
+   *   you can pass any string for these components, and problematic characters 
+   *   are automatically encoded. If you set ~encoded:true, the passed
+   *   strings must already be encoded, but the function checks whether
+   *   the encoding is correct.
+   *   Note that for "query" even the characters '?' and '=' are encoded
+   *   by default, so you need to set ~encoded:true to pass a reasonable
+   *   query string.
+   * - The components "path" and "param" are lists of strings which may
+   *   contain '%'-encoded characters. Again, the default is to pass
+   *   decoded strings to the function, and the function encodes them
+   *   automatically, and by setting ~encoded:true the caller is responsible
+   *   for encoding the strings.
+   *   path = [] and params = [] mean that no path and no parameters are
+   *   specified, respectively.
+   *   See below for the respresentation of these components.
+   *
+   * Except of "path", the strings representing the components do not
+   * contain the characters separating the components from each other. 
+   * The "path" component includes the '/' at the beginning of the path
+   * (if present).
+   *
+   * The created URL must conform to the 'url_syntax', i.e.
+   * - The URL must only contain components which are recognized by the
+   *   syntax
+   * - The URL must contain components which are required by the syntax
+   * - The URL must fulfill the predicate expressed by the 'url_is_valid'
+   *   function of the syntax.
+   *
+   * The path of a URL is represented as a list of '/'-separated path
+   * components. i.e.
+   *   [ s1; s2; ...; sN ]  represents the path  
+   *                        s1 ^ "/" ^ s2 ^ "/" ^ ... ^ "/" ^ sN
+   * As special cases:
+   *   []                   is the non-existing path
+   *   [ "" ]               is "/"
+   *   [ "";"" ]            is illegal
+   * 
+   * Except of s1 and sN, the path components must not be empty strings.
+   *
+   * To avoid ambiguities, it is illegal to create URLs with both relative
+   * paths (s1 <> "") and host components.
+   *
+   * Parameters of URLs are components beginning with ';'. The list
+   * of parameters is represented as list of strings where the strings
+   * contain the value following ';'.
+   *)
+
+val modify_url :
+      ?syntax:url_syntax ->
+      ?encoded:bool ->
+      ?scheme:string ->
+      ?user:string ->
+      ?password:string ->
+      ?host:string ->
+      ?port:int ->
+      ?path:string list ->
+      ?param:string list ->
+      ?query:string ->
+      ?fragment:string ->
+      ?other:string ->
+      url ->
+      url
+  (* Modifies the passed components and returns the modified URL. 
+   * The modfied URL shares unmodified components with the original
+   * URL.
+   *)
+
+val remove_from_url :
+      ?scheme:bool ->
+      ?user:bool ->
+      ?password:bool ->
+      ?host:bool ->
+      ?port:bool ->
+      ?path:bool ->
+      ?param:bool ->
+      ?query:bool ->
+      ?fragment:bool ->
+      ?other:bool ->
+      url ->
+      url
+  (* Removes the 'true' components from the URL, and returns the modified
+   * URL.
+   * The modfied URL shares unmodified components with the original
+   * URL.
+   *)
+
+val default_url :
+      ?encoded:bool -> 
+      ?scheme:string ->
+      ?user:string ->
+      ?password:string ->
+      ?host:string ->
+      ?port:int ->
+      ?path:string list ->
+      ?param:string list ->
+      ?query:string ->
+      ?fragment:string ->
+      ?other:string ->
+      url ->
+      url
+  (* Adds missing components and returns the modified URL.
+   * The modfied URL shares unmodified components with the original
+   * URL.
+   *)
+
+val undefault_url :
+      ?scheme:string ->
+      ?user:string ->
+      ?password:string ->
+      ?host:string ->
+      ?port:int ->
+      ?path:string list ->
+      ?param:string list ->
+      ?query:string ->
+      ?fragment:string ->
+      ?other:string ->
+      url ->
+      url
+  (* Removes components from the URL if they have the passed value, and
+   * returns the modified URL.
+   * Note: The values must always be passed in _encoded_ form!
+   * The modfied URL shares unmodified components with the original
+   * URL.
+   *)
+
+val url_syntax_of_url : url -> url_syntax
+  (* Returns the 'url_syntax' record of a URL. *)
+
+val url_of_string : url_syntax -> string -> url
+  (* Parses the passed string according to the passed url_syntax. *)
+
+val string_of_url : url -> string
+  (* Returns the URL as string *)
+
+val url_provides :
+      ?scheme:bool ->
+      ?user:bool ->
+      ?password:bool ->
+      ?host:bool ->
+      ?port:bool ->
+      ?path:bool ->
+      ?param:bool ->
+      ?query:bool ->
+      ?fragment:bool ->
+      ?other:bool ->
+      url ->
+      bool
+  (* Returns 'true' iff the URL has all of the components passed with
+   * 'true' value.
+   *)
+
+val url_scheme    :                  url -> string
+val url_user      : ?encoded:bool -> url -> string
+val url_password  : ?encoded:bool -> url -> string
+val url_host      :                  url -> string
+val url_port      :                  url -> int
+val url_path      : ?encoded:bool -> url -> string list
+val url_param     : ?encoded:bool -> url -> string list
+val url_query     : ?encoded:bool -> url -> string
+val url_fragment  : ?encoded:bool -> url -> string
+val url_other     : ?encoded:bool -> url -> string
+  (* Return components of the URL. The functions return decoded strings
+   * unless ~encoded:true is set.
+   * If the component does not exist, the exception Not_found
+   * is raised.
+   *)
+
+val split_path : string -> string list
+  (* Splits a '/'-separated path into components (e.g. to set up the
+   * ~path argument of make_url).
+   * E.g. split_path "a/b/c" = [ "a"; "b"; "c" ],
+   *      split_path "/a/b"  = [ ""; "a"; "b" ],
+   *      split_path "a/b/"  = [ "a"; "b"; "" ]
+   *)
+
+val join_path : string list -> string
+  (* Concatenates the path components (reverse function of split_path).
+   *)
+
+val norm_path : string list -> string list
+  (* Removes "." and ".." from the path if possible. Deletes double slashes.
+   *
+   * EXAMPLES:
+   *
+   * norm_path ["."] = []
+   *           means: "." = ""
+   * norm_path ["."; ""] = []
+   *           means: "./" = ""
+   * norm_path ["a"; "."] = ["a"; ""]
+   *           means: "a/." = "a/"
+   * norm_path ["a"; "b"; "."] = ["a"; "b"; ""]
+   *           means: "a/b/." = "a/b/"
+   * norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]
+   *           means: "a/./b/." = "a/b/"
+   * norm_path [".."] = [".."; ""]
+   *           means: ".." = "../"
+   * norm_path [".."; ""] = [".."; ""]
+   *           means: "../" = "../"
+   * norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]
+   *           means: "a/b/../c" = "a/c"
+   * norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]
+   *           means: "a/b/../c/" = "a/c/"
+   * norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]
+   *           means: "//a//b" = "/a/b"
+   * norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]
+   *           means: "a/b//../c/" = "a/c/"
+   * norm_path ["a"; ".."] = []
+   *           means: "a/.." = ""
+   *)
+
+
+val apply_relative_url : url -> url -> url
+  (* apply_relative_url base rel:
+   * Interprets 'rel' relative to 'base' and returns the new URL. This
+   * function implements RFC 1808.
+   *)
+
+val print_url : url -> unit
+  (* Printer for the toploop. *)
+
+(* ---------------------------------------------------------------------- *)
+
+(* EXAMPLES:
+ *
+ * let http = Hashtbl.find common_url_syntax "http";;
+ * let u = url_of_string http "http://g:pw@host/a/%62/";;
+ * string_of_url u;;
+ *   --> "http://g:pw@host/a/%62/"
+ * url_scheme u;;
+ *   --> "http"
+ * url_user u;;
+ *   --> "g"
+ * url_password u;;
+ *   --> "pw"
+ * url_host u;;
+ *   --> "host"
+ * url_path u;;
+ *   --> [ ""; "a"; "b"; "" ]          (* sic! *)
+ * url_path ~encoded:true u;;
+ *   --> [ ""; "a"; "%62"; "" ]
+ * let v = make_url 
+ *   ~path:[ ".."; "c" ]
+ *   ~fragment:"near-the-#-character"
+ *   { (partial_url_syntax http) with url_enable_fragment = Url_part_allowed };;
+ * string_of_url v;;
+ *   --> "../c#near-the-%23-character"
+ * let u' = modify_url ~syntax:(url_syntax_of_url v) u;;
+ *    (* u does not permit fragments *)
+ * let w = apply_relative_url u' v;;
+ * string_of_url w;;
+ *   --> "http://g:pw@host/c#near-the-%23-character"
+ *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:27  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/06/26 22:57:49  gerd
+ *     Change: The record 'url_syntax' has an additional component
+ * 'url_accepts_8bits'. Setting this option to 'true' causes that
+ * the bytes >= 0x80 are no longer rejected.
+ *
+ * Revision 1.2  2000/06/25 22:55:47  gerd
+ *     Doc update.
+ *
+ * Revision 1.1  2000/06/24 20:19:59  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/netstring/tests/.cvsignore b/helm/DEVEL/pxp/netstring/tests/.cvsignore
new file mode 100644 (file)
index 0000000..c1fcbc4
--- /dev/null
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/tests/Makefile b/helm/DEVEL/pxp/netstring/tests/Makefile
new file mode 100644 (file)
index 0000000..1aa5265
--- /dev/null
@@ -0,0 +1,26 @@
+# Note: you need an appopriate toploop "ocamlfattop" to run the
+# tests.
+
+# 2nd note: "test_encoding.cgi" is a CGI script; you must invoke
+# it through browser and WWW server.
+
+test: test_recode
+       ocamlfattop test_netencoding.ml
+       ocamlfattop test_mimestring.ml
+       ocamlfattop test_cgi.ml
+       ocamlfattop test_neturl.ml
+       ./test_recode
+
+test_recode: test_recode.ml
+       ocamlc -custom -o test_recode unix.cma threads.cma str.cma \
+               ../netstring.cma ../netmappings_iso.cmo \
+               -I .. -thread test_recode.ml
+
+distclean: clean
+       rm -f *~ test_recode
+
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+CLEAN:
+
diff --git a/helm/DEVEL/pxp/netstring/tests/test_cgi.ml b/helm/DEVEL/pxp/netstring/tests/test_cgi.ml
new file mode 100644 (file)
index 0000000..43d9886
--- /dev/null
@@ -0,0 +1,423 @@
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+
+open Cgi;;
+
+(**********************************************************************)
+(* dest_form_encoded_parameters                                       *)
+(**********************************************************************)
+
+let t001 f =
+  let r =
+    f
+      "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--
+blah blah"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t002 f =
+  let r =
+    f
+      "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--
+blah blah"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t003 f =
+  let r =
+    f
+      "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t004 f =
+  let r =
+    f
+      "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+
+--snip--"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text\013\n"]
+;;
+
+
+let t005 f =
+  let r =
+    f
+      "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+
+--snip--"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text\n"]
+;;
+
+
+let t006 f =
+  let r =
+    f
+      "blah blah
+--snip
+Content-Disposition: form-data;name= \"blupp\"
+
+This is a text
+--snip--
+blah blah"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t007 f =
+  let r =
+    f
+      "blah blah
+--snip
+Content-Disposition: form-data;name= \"name=blupp\"
+
+This is a text
+--snip--
+blah blah"
+      "snip"
+  in
+  r = ["name=blupp", "text/plain", "This is a text"]
+;;
+
+
+let t008 f =
+  let r =
+    f
+      "blah blah
+--snip
+Content-Disposition: form-data; strange=\"name=blop\"; name= \"blupp\"
+
+This is a text
+--snip--
+blah blah"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t009 f =
+  let r =
+    f
+      "blah blah
+--snip
+Content-Disposition: form-data; strange=\" name=blop \";  name=blupp
+
+This is a text
+--snip--
+blah blah"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t010 f =
+  (* There is a space after "octet-stream"! *)
+  let r =
+    f
+      "--snip
+Content-Disposition: form-data; name=blupp
+Content-type:  application/octet-stream
+
+This is a text
+--snip--"
+      "snip"
+  in
+  r = ["blupp", "application/octet-stream", "This is a text"]
+;;
+
+
+let t011 f =
+  let r =
+    f
+      "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip
+Content-Disposition: form-data; name=blipp
+
+Another line
+--snip-- blah
+blah blah"
+      "snip"
+  in
+  r = ["blupp", "text/plain", "This is a text";
+       "blipp", "text/plain", "Another line" ]
+;;
+
+
+let t012 f =
+  (* A real example *)
+   let r =
+     f
+"-----------------------------10843891265508332411092264958
+Content-Disposition: form-data; name=\"line\"
+
+aaa
+-----------------------------10843891265508332411092264958
+Content-Disposition: form-data; name=\"submit\"
+
+Submit
+-----------------------------10843891265508332411092264958--
+"
+      "---------------------------10843891265508332411092264958"
+   in
+   r = [ "line", "text/plain", "aaa";
+        "submit", "text/plain", "Submit";
+       ]
+;;
+
+
+(**********************************************************************)
+(* encode/decode                                                      *)
+(**********************************************************************)
+
+let t100() =
+  let s = String.create 256 in
+  for i = 0 to 255 do s.[i] <- Char.chr i done;
+  let r = encode s in
+  r = ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+       "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+       "+!%22%23$%25%26'()*%2B,-.%2F" ^
+       "0123456789%3A%3B%3C%3D%3E%3F" ^
+       "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+       "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+       "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+       "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+       "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+       "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+       "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+       "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+       "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+       "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF")
+;;
+
+
+let t101() =
+  let r = String.create 256 in
+  for i = 0 to 255 do r.[i] <- Char.chr i done;
+  let s = decode
+           ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+            "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+            "+!%22%23$%25%26'()*%2B,-.%2F" ^
+            "0123456789%3A%3B%3C%3D%3E%3F" ^
+            "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+            "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+            "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+            "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+            "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+            "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+            "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+            "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+            "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+            "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF") in
+  r = s
+;;
+
+
+let t102() =
+  let r = String.create 256 in
+  for i = 0 to 255 do r.[i] <- Char.chr i done;
+  let s = decode
+           ((String.lowercase
+               ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+                "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+                "+!%22%23$%25%26'()*%2B,-.%2F" ^
+                "0123456789%3A%3B%3C%3D%3E%3F")) ^
+            "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+            (String.lowercase
+               ("%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+                "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+                "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+                "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+                "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+                "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+                "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+                "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+                "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF"))) in
+  r = s
+;;
+
+(**********************************************************************)
+(* dest_url_encoded_parameters                                        *)
+(**********************************************************************)
+
+let t200() =
+  let r = dest_url_encoded_parameters "a=b&c=d" in
+  r = ["a", "b"; "c", "d" ]
+;;
+
+
+let t201() =
+  let r = dest_url_encoded_parameters "a=&c=d" in
+  r = ["a", ""; "c", "d" ]
+;;
+
+
+let t202() =
+  let r = dest_url_encoded_parameters "a=&c=" in
+  r = ["a", ""; "c", "" ]
+;;
+
+
+let t203() =
+  let r = dest_url_encoded_parameters "" in
+  r = []
+;;
+
+
+let t204() =
+  let r = dest_url_encoded_parameters "%41=%42" in
+  r = ["A", "B"]
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+  if f() then
+    print_endline ("Test " ^ n ^ " ok")
+  else
+    print_endline ("Test " ^ n ^ " FAILED!!!!");
+  flush stdout
+;;
+
+
+let test_dest_form_encoded_parameters f n =
+  let dest s b =
+    let args = dest_form_encoded_parameters s b default_config in
+    List.map
+      (fun a -> arg_name a, arg_mimetype a, arg_value a)
+      args
+  in
+  if f dest then
+    print_endline ("Test dest_form_encoded_parameters " ^ n ^ " ok")
+  else
+    print_endline ("Test dest_form_encoded_parameters " ^ n ^ " FAILED!!!!");
+  flush stdout
+;;
+
+
+let fill_stream s =
+  (* Returns a channel that reads from string s.
+   * This requires forking.
+   *)
+  let rd, wr = Unix.pipe() in
+  let pid = Unix.fork() in
+  if pid = 0 then begin
+    Unix.close rd;
+    let out = Unix.out_channel_of_descr wr in
+    output_string out s;
+    close_out out;
+    exit(0);
+  end;
+  Unix.close wr;
+  Unix.in_channel_of_descr rd
+;;
+
+
+let test_dest_form_encoded_parameters_from_netstream f n =
+  let dest s b =
+    let fd = fill_stream s in
+    let bs = String.length b * 2 in
+    let stream = Netstream.create_from_channel fd None bs in
+    let args = dest_form_encoded_parameters_from_netstream
+                stream b default_config in
+
+(*
+    List.iter
+      (fun a ->
+        Printf.printf "name=%s mimetype=%s value=%s\n"
+          (arg_name a) (arg_mimetype a) (arg_value a))
+      args;
+*)
+    List.map
+      (fun a -> arg_name a, arg_mimetype a, arg_value a)
+      args
+  in
+  if f dest then
+    Printf.printf
+      "Test dest_form_encoded_parameters_from_netstream %s ok\n"
+      n
+  else
+    print_endline ("Test dest_form_encoded_parameters_from_netstream " ^ n ^ " FAILED!!!!");
+  flush stdout
+;;
+
+
+
+test_dest_form_encoded_parameters t001 "001";;
+test_dest_form_encoded_parameters t002 "002";;
+test_dest_form_encoded_parameters t003 "003";;
+test_dest_form_encoded_parameters t004 "004";;
+test_dest_form_encoded_parameters t005 "005";;
+test_dest_form_encoded_parameters t006 "006";;
+test_dest_form_encoded_parameters t007 "007";;
+test_dest_form_encoded_parameters t008 "008";;
+test_dest_form_encoded_parameters t009 "009";;
+test_dest_form_encoded_parameters t010 "010";;
+test_dest_form_encoded_parameters t011 "011";;
+test_dest_form_encoded_parameters t012 "012";;
+
+test_dest_form_encoded_parameters_from_netstream t001 "001";;
+test_dest_form_encoded_parameters_from_netstream t002 "002";;
+test_dest_form_encoded_parameters_from_netstream t003 "003";;
+test_dest_form_encoded_parameters_from_netstream t004 "004";;
+test_dest_form_encoded_parameters_from_netstream t005 "005";;
+test_dest_form_encoded_parameters_from_netstream t006 "006";;
+test_dest_form_encoded_parameters_from_netstream t007 "007";;
+test_dest_form_encoded_parameters_from_netstream t008 "008";;
+test_dest_form_encoded_parameters_from_netstream t009 "009";;
+test_dest_form_encoded_parameters_from_netstream t010 "010";;
+test_dest_form_encoded_parameters_from_netstream t011 "011";;
+test_dest_form_encoded_parameters_from_netstream t012 "012";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+
+test t200 "200";;
+test t201 "201";;
+test t202 "202";;
+test t203 "203";;
+test t204 "204";;
diff --git a/helm/DEVEL/pxp/netstring/tests/test_encoding.cgi b/helm/DEVEL/pxp/netstring/tests/test_encoding.cgi
new file mode 100755 (executable)
index 0000000..2402cda
--- /dev/null
@@ -0,0 +1,72 @@
+#! /bin/sh
+# (*
+exec /opt/ocaml-2.04/bin/ocamlfattop "$0" "$@"
+*) directory ".";;
+
+#directory "..";;
+#load "netstring.cma";;
+
+Cgi.header "";
+Cgi.parse_arguments 
+  { Cgi.default_config with
+      Cgi.how_to_process_arguments = (fun _ -> Cgi.File)
+  };
+let params = Cgi.arguments() in
+print_string "<html><body>\n";
+print_string "<h1>Parameters:</h1>\n";
+print_string "<ul>\n";
+List.iter
+  (fun (n,a) ->
+     print_string "<li>";
+     print_string n;
+     print_string ":";
+     print_string (Cgi.arg_mimetype a);
+     print_string "=";
+     (match Cgi.arg_filename a with
+         None -> ()
+       | Some fn -> print_string ("[filename=" ^ fn ^ "]")
+     );
+     print_string (Cgi.arg_value a);
+     print_string "</li>\n";
+
+  )
+  params;
+
+Cgi.cleanup();
+
+print_string "</ul>\n";
+
+print_string "<h1>GET URL-encoded form</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=GET>\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+print_string "<h1>POST URL-encoded form</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=POST>\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+print_string "<h1>POST FORM-encoded form</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=POST enctype=\"multipart/form-data\">\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=text name=\"sträange\">\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+print_string "<h1>File upload</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=POST enctype=\"multipart/form-data\">\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=file name=file>\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+
+
+print_string "</body></html>\n";
+
+flush stdout
+;;
+
+     
diff --git a/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml b/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml
new file mode 100644 (file)
index 0000000..db5eac9
--- /dev/null
@@ -0,0 +1,589 @@
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+open Mimestring;;
+
+(**********************************************************************)
+(* scan_structured_value                                              *)
+(**********************************************************************)
+
+let t001() =
+  let r = scan_structured_value "user@domain.com" [ '@'; '.' ] [] in
+  r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t002() =
+  let r = scan_structured_value "user @ domain . com" [ '@'; '.' ]  [] in
+  r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t003() =
+  let r = scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ]
+ []  in
+  r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t004() =
+  let r = scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] []  in
+  r = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
+       Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+
+let t005() =
+  let r = scan_structured_value "user(Do you know him?)@domain.com"
+                               ['@'; '.'; '(']  [] in
+  r = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
+       Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t006() =
+  let r = scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ]  [] in
+  r = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
+       Atom "com" ]
+;;
+
+
+let t007() =
+  let r = scan_structured_value "\"\\\"()@. \"@domain.com" [ '@'; '.' ]  [] in
+  r = [ QString "\"()@. "; Special '@'; Atom "domain"; Special '.';
+       Atom "com" ]
+;;
+
+
+let t008() =
+  let r = scan_structured_value "a(b(c(d)e)f)g" [] [] in
+  r = [ Atom "a"; Atom "g" ]
+;;
+
+
+let t009() =
+  let r = scan_structured_value "a(b(c(d)e)f" [] [] in
+  r = [ Atom "a" ]
+;;
+
+
+let t010() =
+  let r = scan_structured_value "a(b\\(c\\(d\\)e)f" [] [] in
+  r = [ Atom "a"; Atom "f" ]
+;;
+
+
+let t011() =
+  let r = scan_structured_value "a(b(c(d)e)f\\" [] [] in
+  r = [ Atom "a" ]
+;;
+
+
+let t012() =
+  let r = scan_structured_value "\"abc" [] [] in
+  r = [ QString "abc" ]
+;;
+
+
+let t013() =
+  let r = scan_structured_value "\"abc\\" [] [] in
+  r = [ QString "abc\\" ]
+;;
+
+
+(* New tests for netstring-0.9: *)
+
+let t020() =
+  let r = scan_structured_value "user(Do you know him?)@domain.com" 
+           [ '@'; '.' ] [ Return_comments ] in
+  r = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.'; 
+       Atom "com" ]
+;;
+
+let t021() =
+  let r = scan_structured_value "user (Do you know him?) @ domain . com"
+           [ '@'; '.'; ' ' ] [] in
+  r = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@';
+       Special ' '; Atom "domain";
+       Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+let t022() =
+  let r = scan_structured_value "user (Do you know him?) @ domain . com"
+           [ '@'; '.'; ' ' ] [ Return_comments ] in
+  r = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@'; 
+       Special ' '; Atom "domain";
+       Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+let t023() =
+  let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" 
+           [] [] in
+  r = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
+;;
+
+let t024() =
+  let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" 
+           [ ] [ Recognize_encoded_words ] in
+  r = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
+;;
+
+let t025() =
+  let r = scan_structured_value 
+           "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
+           []
+           [ Recognize_encoded_words ] in
+  r = [ EncodedWord
+         ("ISO-8859-1", "B", "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=");
+       EncodedWord
+         ("ISO-8859-2", "B", "dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==")
+      ]
+;;
+
+(**********************************************************************)
+(* s_extended_token                                                   *)
+(**********************************************************************)
+
+let scan specials options str =
+  let scn = create_mime_scanner specials options str in
+  scan_token_list scn;;
+
+let t100() =
+  let r = scan [] [] "Two atoms" in
+  match r with
+      [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 3) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 4) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 4) &&
+       (get_length a2 = 5) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+
+let t101() =
+  let r = scan [] [] "  Two  atoms  " in
+  match r with
+      [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+       (get_pos a1 = 2) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 2) &&
+       (get_length a1 = 3) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 7) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 7) &&
+       (get_length a2 = 5) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+
+let t102() =
+  let r = scan [] [] "  Two\n atoms  " in
+  match r with
+      [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+       (get_pos a1 = 2) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 2) &&
+       (get_length a1 = 3) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 7) &&
+       (get_line a2 = 2) &&
+       (get_column a2 = 1) &&
+       (get_length a2 = 5) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t110() =
+  let r = scan [] [] "\"Two\" \"qstrings\"" in
+  match r with
+      [ a1, QString "Two"; a2, QString "qstrings" ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 5) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 6) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 6) &&
+       (get_length a2 = 10) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t111() =
+  let r = scan [] [] "  \"Two\"  \"qstrings\"  " in
+  match r with
+      [ a1, QString "Two"; a2, QString "qstrings" ] ->
+
+       (get_pos a1 = 2) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 2) &&
+       (get_length a1 = 5) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 9) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 9) &&
+       (get_length a2 = 10) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t112() =
+  let r = scan [] [] "  \"Two\nlines\"  \"and\nqstrings\"  " in
+  match r with
+      [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
+
+       (get_pos a1 = 2) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 2) &&
+       (get_length a1 = 11) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 15) &&
+       (get_line a2 = 2) &&
+       (get_column a2 = 8) &&
+       (get_length a2 = 14) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t113() =
+  let r = scan [] [] "  \"Two\\\nlines\"  \"and\\\nqstrings\"  " in
+  match r with
+      [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
+
+       (get_pos a1 = 2) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 2) &&
+       (get_length a1 = 12) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 16) &&
+       (get_line a2 = 2) &&
+       (get_column a2 = 8) &&
+       (get_length a2 = 15) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t120() =
+  (* Domain literals are implemented like quoted strings, so only the
+   * most complicated test case.
+   *)
+  let r = scan [] [] "  [Two\\\nlines]  [and\\\nliterals]  " in
+  match r with
+      [ a1, DomainLiteral "Two\nlines"; a2, DomainLiteral "and\nliterals" ] ->
+
+       (get_pos a1 = 2) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 2) &&
+       (get_length a1 = 12) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 16) &&
+       (get_line a2 = 2) &&
+       (get_column a2 = 8) &&
+       (get_length a2 = 15) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t130() =
+  let r = scan [] [ Return_comments ] "(Two) (comments)" in
+  match r with
+      [ a1, Comment; a2, Comment ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 5) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 6) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 6) &&
+       (get_length a2 = 10) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t131() =
+  let r = scan [] [ Return_comments ] "(Two\nlines) (and\ncomments)" in
+  match r with
+      [ a1, Comment; a2, Comment ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 11) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 12) &&
+       (get_line a2 = 2) &&
+       (get_column a2 = 7) &&
+       (get_length a2 = 14) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t132() =
+  let r = scan [] [ Return_comments ] "(Two\\\nlines) (and\\\ncomments)" in
+  match r with
+      [ a1, Comment; a2, Comment ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 12) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 13) &&
+       (get_line a2 = 2) &&
+       (get_column a2 = 7) &&
+       (get_length a2 = 15) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t133() =
+  let r = scan [] [ Return_comments ] "(a\n(b\nc)d\ne(f)) atom" in
+  match r with
+      [ a1, Comment; a2, Atom "atom" ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 15) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 16) &&
+       (get_line a2 = 4) &&
+       (get_column a2 = 6) &&
+       (get_length a2 = 4) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t140() = 
+  let r = scan [] [] "\031\031" in
+  match r with
+      [ a1, Control '\031'; a2, Control '\031' ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 1) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 1) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 1) &&
+       (get_length a2 = 1) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t150() = 
+  let r = scan [ '\t'; '\n' ] [] " \t\n  \n  \t" in
+  match r with
+      [ a1, Special '\t'; _, Special '\n'; _, Special '\n'; a2, Special '\t'] ->
+
+       (get_pos a1 = 1) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 1) &&
+       (get_length a1 = 1) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+
+       (get_pos a2 = 8) &&
+       (get_line a2 = 3) &&
+       (get_column a2 = 2) &&
+       (get_length a2 = 1) &&
+       (separates_adjacent_encoded_words a2 = false)
+
+    | _ ->
+       false
+;;
+
+let t160() =
+  let r = scan [] [ Recognize_encoded_words ] 
+           "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+  match r with
+      [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd"); 
+       a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 22) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+       (get_decoded_word a1 = "Görd") &&
+       (get_charset a1 = "ISO8859-1") &&
+
+       (get_pos a2 = 23) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 23) &&
+       (get_length a2 = 22) &&
+       (separates_adjacent_encoded_words a2 = false) &&
+       (get_decoded_word a2 = "Görd") &&
+       (get_charset a2 = "ISO8859-1")
+
+    | _ ->
+       false
+;;
+
+let t161() =
+  let r = scan [ ' ' ] [ Recognize_encoded_words ] 
+           "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+  match r with
+      [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd"); 
+       sp, Special ' ';
+       a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 22) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+       (get_decoded_word a1 = "Görd") &&
+       (get_charset a1 = "ISO8859-1") &&
+
+       (get_pos a2 = 23) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 23) &&
+       (get_length a2 = 22) &&
+       (separates_adjacent_encoded_words a2 = false) &&
+       (get_decoded_word a2 = "Görd") &&
+       (get_charset a2 = "ISO8859-1") &&
+
+       (separates_adjacent_encoded_words sp = true)
+
+    | _ ->
+       false
+;;
+
+let t162() =
+  let r = scan [ ' ' ] [ Recognize_encoded_words ] 
+           "=?iso8859-1?q?G=F6rd?=  =?iso8859-1?q?G=F6rd?=" in
+  match r with
+      [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd"); 
+       sp1, Special ' ';
+       sp2, Special ' ';
+       a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+       (get_pos a1 = 0) &&
+       (get_line a1 = 1) &&
+       (get_column a1 = 0) &&
+       (get_length a1 = 22) &&
+       (separates_adjacent_encoded_words a1 = false) &&
+       (get_decoded_word a1 = "Görd") &&
+       (get_charset a1 = "ISO8859-1") &&
+
+       (get_pos a2 = 24) &&
+       (get_line a2 = 1) &&
+       (get_column a2 = 24) &&
+       (get_length a2 = 22) &&
+       (separates_adjacent_encoded_words a2 = false) &&
+       (get_decoded_word a2 = "Görd") &&
+       (get_charset a2 = "ISO8859-1") &&
+
+       (separates_adjacent_encoded_words sp1 = true) &&
+       (separates_adjacent_encoded_words sp2 = true)
+
+    | _ ->
+       false
+;;
+
+
+
+(**********************************************************************)
+
+let test f n =
+  if f() then
+    print_endline ("Test " ^ n ^ " ok")
+  else
+    print_endline ("Test " ^ n ^ " FAILED!!!!");
+  flush stdout
+;;
+
+test t001 "001";;
+test t002 "002";;
+test t003 "003";;
+test t004 "004";;
+test t005 "005";;
+test t006 "006";;
+test t007 "007";;
+test t008 "008";;
+test t009 "009";;
+test t010 "010";;
+test t011 "011";;
+test t012 "012";;
+test t013 "013";;
+
+test t020 "020";;
+test t021 "021";;
+test t022 "022";;
+test t023 "023";;
+test t024 "024";;
+test t025 "025";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+test t110 "110";;
+test t111 "111";;
+test t112 "112";;
+test t113 "113";;
+test t120 "120";;
+test t130 "130";;
+test t131 "131";;
+test t132 "132";;
+test t133 "133";;
+test t140 "140";;
+test t150 "150";;
+test t160 "160";;
+test t161 "161";;
+test t162 "162";;
diff --git a/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml b/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml
new file mode 100644 (file)
index 0000000..29673fa
--- /dev/null
@@ -0,0 +1,223 @@
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+
+open Netencoding;;
+
+(**********************************************************************)
+(* Base64                                                             *)
+(**********************************************************************)
+
+(* Test strings:
+ * "", "a", "ab", "abc", "abcd", "abcde",
+ * "abcdefghijklmnopqrstuvwxyz".
+ *)
+
+let t001() =
+  (* ENCODE. No line breaks. *)
+  Base64.encode "" = "" &
+  Base64.encode "a" = "YQ==" &
+  Base64.encode "ab" = "YWI=" &
+  Base64.encode "abc" = "YWJj" &
+  Base64.encode "abcd" = "YWJjZA==" &
+  Base64.encode "abcde" = "YWJjZGU=" &
+  Base64.encode "abcdefghijklmnopqrstuvwxyz" =
+                "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="
+;;
+
+
+let t002() =
+  (* ENCODE. Lines with length of 4, separated by LF *)
+  let abc = "abcdefghijklmnopqrstuvwxyz" in
+  Base64.encode_substring abc 0 0 4 false = "" &
+  Base64.encode_substring abc 0 1 4 false = "YQ==\n" &
+  Base64.encode_substring abc 0 2 4 false = "YWI=\n" &
+  Base64.encode_substring abc 0 3 4 false = "YWJj\n" &
+  Base64.encode_substring abc 0 4 4 false = "YWJj\nZA==\n" &
+  Base64.encode_substring abc 0 5 4 false = "YWJj\nZGU=\n" &
+  Base64.encode_substring abc 0 26 4 false = 
+    "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t003() =
+  (* ENCODE. Lines with length of 5, separated by LF *)
+  let abc = "abcdefghijklmnopqrstuvwxyz" in
+  Base64.encode_substring abc 0 0 5 false = "" &
+  Base64.encode_substring abc 0 1 5 false = "YQ==\n" &
+  Base64.encode_substring abc 0 2 5 false = "YWI=\n" &
+  Base64.encode_substring abc 0 3 5 false = "YWJj\n" &
+  Base64.encode_substring abc 0 4 5 false = "YWJj\nZA==\n" &
+  Base64.encode_substring abc 0 5 5 false = "YWJj\nZGU=\n" &
+  Base64.encode_substring abc 0 26 5 false = 
+    "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t004() =
+  (* ENCODE. Lines with length of 7, separated by LF *)
+  let abc = "abcdefghijklmnopqrstuvwxyz" in
+  Base64.encode_substring abc 0 0 7 false = "" &
+  Base64.encode_substring abc 0 1 7 false = "YQ==\n" &
+  Base64.encode_substring abc 0 2 7 false = "YWI=\n" &
+  Base64.encode_substring abc 0 3 7 false = "YWJj\n" &
+  Base64.encode_substring abc 0 4 7 false = "YWJj\nZA==\n" &
+  Base64.encode_substring abc 0 5 7 false = "YWJj\nZGU=\n" &
+  Base64.encode_substring abc 0 26 7 false = 
+    "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t005() =
+  (* ENCODE. Lines with length of 8, separated by LF *)
+  let abc = "abcdefghijklmnopqrstuvwxyz" in
+  Base64.encode_substring abc 0 0 8 false = "" &
+  Base64.encode_substring abc 0 1 8 false = "YQ==\n" &
+  Base64.encode_substring abc 0 2 8 false = "YWI=\n" &
+  Base64.encode_substring abc 0 3 8 false = "YWJj\n" &
+  Base64.encode_substring abc 0 4 8 false = "YWJjZA==\n" &
+  Base64.encode_substring abc 0 5 8 false = "YWJjZGU=\n" &
+  Base64.encode_substring abc 0 26 8 false = 
+    "YWJjZGVm\nZ2hpamts\nbW5vcHFy\nc3R1dnd4\neXo=\n"
+;;
+
+
+let t006() =
+  (* ENCODE. Lines with length of 8, separated by CRLF *)
+  let abc = "abcdefghijklmnopqrstuvwxyz" in
+  Base64.encode_substring abc 0 0 8 true = "" &
+  Base64.encode_substring abc 0 1 8 true = "YQ==\r\n" &
+  Base64.encode_substring abc 0 2 8 true = "YWI=\r\n" &
+  Base64.encode_substring abc 0 3 8 true = "YWJj\r\n" &
+  Base64.encode_substring abc 0 4 8 true = "YWJjZA==\r\n" &
+  Base64.encode_substring abc 0 5 8 true = "YWJjZGU=\r\n" &
+  Base64.encode_substring abc 0 26 8 true = 
+    "YWJjZGVm\r\nZ2hpamts\r\nbW5vcHFy\r\nc3R1dnd4\r\neXo=\r\n"
+;;
+
+
+let t020() =
+  (* DECODE. First test without spaces *)
+  Base64.decode_substring "" 0 0 false false = "" &
+  Base64.decode_substring "YQ==" 0 4 false false = "a" &
+  Base64.decode_substring "YWI=" 0 4 false false = "ab" &
+  Base64.decode_substring "YWJj" 0 4 false false = "abc" &
+  Base64.decode_substring "YWJjZA==" 0 8 false false = "abcd" &
+  Base64.decode_substring "YWJjZGU=" 0 8 false false = "abcde" &
+  Base64.decode_substring 
+    "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" 0 36 false false =
+    "abcdefghijklmnopqrstuvwxyz"
+;;
+
+
+let t021() =
+  (* DECODE. With spaces *)
+  Base64.decode_substring " \r\n\t" 0 4 false true = "" &
+  Base64.decode_substring " Y W J j\n Z G U = " 0 18 false true = "abcde"
+;;
+
+let t022() =
+  (* DECODE. With URL characters and spaces *)
+  Base64.decode_substring " Y W J j\n Z G U = " 0 18 true true = "abcde" &
+  Base64.decode_substring " Y W J j\n Z G U . " 0 18 true true = "abcde"
+;;
+
+(**********************************************************************)
+(* Quoted Printable                                                   *)
+(**********************************************************************)
+
+let t100() =
+  (* ENCODE. *)
+  QuotedPrintable.encode "a %= 12345 &$[]\"" = "a %=3D 12345 &=24=5B=5D=22" &
+  QuotedPrintable.encode "\000\001\002" = "=00=01=02" &
+  QuotedPrintable.encode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
+  QuotedPrintable.encode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
+  QuotedPrintable.encode "abc \r\n def\nghi " = "abc=20\r\n def\nghi=20"
+;;
+
+
+let t120() =
+  (* DECODE. *)
+  QuotedPrintable.decode "a %=3D 12345 &=24=5B=5D=22" = "a %= 12345 &$[]\"" &
+  QuotedPrintable.decode "=00=01=02" = "\000\001\002" &
+  QuotedPrintable.decode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
+  QuotedPrintable.decode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
+  QuotedPrintable.decode "abc=20\r\n def\nghi=20" = "abc \r\n def\nghi " &
+  QuotedPrintable.decode "abc=\r\n def\nghi=20" = "abc def\nghi "
+;;
+
+(**********************************************************************)
+(* Q                                                                  *)
+(**********************************************************************)
+
+let t200() =
+  (* ENCODE. *)
+  Q.encode "a %= 12345 &$[]\"" = "a=20=25=3D=2012345=20=26=24=5B=5D=22" &
+  Q.encode "\000\001\002\r\n" = "=00=01=02=0D=0A"
+;;
+
+
+let t220() =
+  (* DECODE. *)
+  Q.decode "a=20=25=3D=2012345=20=26=24=5B=5D=22" = "a %= 12345 &$[]\"" &
+  Q.decode "=00=01=02=0D=0A" = "\000\001\002\r\n" &
+  Q.decode "a=20=25=3d=2012345=20=26=24=5b=5d=22" = "a %= 12345 &$[]\"" 
+;;
+
+(**********************************************************************)
+(* Url                                                                *)
+(**********************************************************************)
+
+(* Already tested for Cgi *)
+
+(**********************************************************************)
+(* Html                                                               *)
+(**********************************************************************)
+
+let t300() =
+  Html.encode_from_latin1 "<>&\"abcdefäöÜ\160\025'" = 
+    "&lt;&gt;&amp;&quot;abcdef&auml;&ouml;&Uuml;&nbsp;&#25;'"
+;;
+
+
+let t320() =
+  Html.decode_to_latin1 
+    "&lt;&gt;&amp;&quot;abcdef&auml;&ouml;&Uuml;&nbsp;&#25;" =
+    "<>&\"abcdefäöÜ\160\025" &
+  Html.decode_to_latin1 "&apos;" = "'" &
+  Html.decode_to_latin1 "&nonsense;" = "&nonsense;" &
+  Html.decode_to_latin1 "&#256;" = "&#256;"
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+  if f() then
+    print_endline ("Test " ^ n ^ " ok")
+  else 
+    print_endline ("Test " ^ n ^ " FAILED!!!!");
+  flush stdout
+;;
+
+test t001 "001";
+test t002 "002";
+test t003 "003";
+test t004 "004";
+test t005 "005";
+test t006 "006";
+
+test t020 "020";
+test t021 "021";
+test t022 "022";
+
+test t100 "100";
+test t120 "120";
+
+test t200 "200";
+test t220 "220";
+
+test t300 "300";
+test t320 "320";
diff --git a/helm/DEVEL/pxp/netstring/tests/test_neturl.ml b/helm/DEVEL/pxp/netstring/tests/test_neturl.ml
new file mode 100644 (file)
index 0000000..633bfda
--- /dev/null
@@ -0,0 +1,969 @@
+#directory "..";;
+#load "netstring.cma";;
+
+open Neturl;;
+
+
+let expect_malformed_url f =
+  try ignore(f()); false with Malformed_URL -> true;;
+
+let works f =
+  not (expect_malformed_url f)
+;;
+
+(**********************************************************************)
+(* extract_url_scheme                                                 *)
+(**********************************************************************)
+
+let t001 () =
+  extract_url_scheme "a:bc" = "a" &&
+  extract_url_scheme "A:bc" = "a" &&
+  extract_url_scheme "a:b:c" = "a" &&
+  extract_url_scheme "a+b-c:d:e" = "a+b-c"
+;;
+
+
+let t002 () =
+  let test s =
+    try ignore(extract_url_scheme s); false with Malformed_URL -> true
+  in
+  test "a" &&
+  test "a/b:c" &&
+  test "%61:b" &&
+  test "a%3ab"
+;;
+
+(**********************************************************************)
+(* url_syntax                                                         *)
+(**********************************************************************)
+
+let hashtbl_for_all f h =
+  let b = ref true in
+  Hashtbl.iter
+    (fun k v -> b := !b && f k v)
+    h;
+  !b
+;;
+
+let t010 () =
+  url_syntax_is_valid null_url_syntax &&
+  url_syntax_is_valid ip_url_syntax &&
+  hashtbl_for_all
+    (fun _ syn ->
+       url_syntax_is_valid syn
+    )
+    common_url_syntax
+;;
+
+let t011 () =
+  url_syntax_is_valid (partial_url_syntax null_url_syntax) &&
+  url_syntax_is_valid (partial_url_syntax ip_url_syntax) &&
+  hashtbl_for_all
+    (fun _ syn ->
+       url_syntax_is_valid (partial_url_syntax syn)
+    )
+    common_url_syntax
+;;
+
+let t012 () =
+  let f = fun _ -> true in
+  let syn =
+    { url_enable_scheme    = Url_part_not_recognized;
+      url_enable_user      = Url_part_required;
+      url_enable_password  = Url_part_allowed;
+      url_enable_host      = Url_part_required;
+      url_enable_port      = Url_part_not_recognized;
+      url_enable_path      = Url_part_required;
+      url_enable_param     = Url_part_not_recognized;
+      url_enable_query     = Url_part_not_recognized;
+      url_enable_fragment  = Url_part_required;
+      url_enable_other     = Url_part_not_recognized;
+      url_accepts_8bits    = false;
+      url_is_valid         = f;
+    } in
+  let syn' = partial_url_syntax syn in
+  
+  (syn'.url_enable_scheme    = Url_part_not_recognized) &&
+  (syn'.url_enable_user      = Url_part_allowed) &&
+  (syn'.url_enable_password  = Url_part_allowed) &&
+  (syn'.url_enable_host      = Url_part_allowed) &&
+  (syn'.url_enable_port      = Url_part_not_recognized) &&
+  (syn'.url_enable_path      = Url_part_allowed) &&
+  (syn'.url_enable_param     = Url_part_not_recognized) &&
+  (syn'.url_enable_query     = Url_part_not_recognized) &&
+  (syn'.url_enable_fragment  = Url_part_allowed) &&
+  (syn'.url_enable_other     = Url_part_not_recognized) &&
+  (syn'.url_is_valid        == f) &&
+
+  url_syntax_is_valid syn &&
+  url_syntax_is_valid syn'
+;;
+
+(**********************************************************************)
+(* make_url                                                           *)
+(**********************************************************************)
+
+let t020 () =
+  (* Basic functionality: *)
+  let http_syn = Hashtbl.find common_url_syntax "http" in
+
+  let u1 = make_url
+            (* default: not encoded *)
+            ~scheme:"http"
+            ~user:"U"
+            ~password:"%()~$@"
+            ~host:"a.b.c"
+            ~port:81
+            ~path:["";"?";""]
+            http_syn in
+
+  url_provides 
+    ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true 
+    u1 &&
+
+  not
+    (url_provides
+       ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true 
+       ~query:true u1) &&
+
+  (url_syntax_of_url u1 == http_syn) &&
+
+  (url_scheme   u1 = "http") &&
+  (url_user     u1 = "U") &&
+  (url_password u1 = "%()~$@") &&
+  (url_host     u1 = "a.b.c") &&
+  (url_port     u1 = 81) &&
+  (url_path     u1 = ["";"?";""]) &&
+
+  (url_user     ~encoded:true u1 = "U") &&
+  (url_password ~encoded:true u1 = "%25()%7E$%40") &&
+  (url_path     ~encoded:true u1 = ["";"%3F";""]) &&
+
+  string_of_url u1 = "http://U:%25()%7E$%40@a.b.c:81/%3F/"
+;;
+
+
+let t021 () =
+  (* Basic functionality: *)
+  let http_syn = Hashtbl.find common_url_syntax "http" in
+
+  let u1 = make_url
+            ~encoded:true
+            ~scheme:"http"
+            ~user:"%55"
+            ~password:"%25()%7e$%40"
+            ~host:"a.b.c"
+            ~port:81
+            ~path:["";"%3F";""]
+            http_syn in
+
+  url_provides 
+    ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true 
+    u1 &&
+
+  not
+    (url_provides
+       ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true 
+       ~query:true u1) &&
+
+  (url_syntax_of_url u1 == http_syn) &&
+
+  (url_scheme   u1 = "http") &&
+  (url_user     u1 = "U") &&
+  (url_password u1 = "%()~$@") &&
+  (url_host     u1 = "a.b.c") &&
+  (url_port     u1 = 81) &&
+  (url_path     u1 = ["";"?";""]) &&
+
+  (url_user     ~encoded:true u1 = "%55") &&
+  (url_password ~encoded:true u1 = "%25()%7e$%40") &&
+  (url_path     ~encoded:true u1 = ["";"%3F";""]) &&
+
+  string_of_url u1 = "http://%55:%25()%7e$%40@a.b.c:81/%3F/"
+;;
+
+
+(* NEGATIVE TESTS *)
+
+let t030 () =
+  (* It is not possible to add a component which is not recognized *)
+  let http_syn = Hashtbl.find common_url_syntax "http" in
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:"http"
+        ~user:"U"
+        ~password:"%()~$@"
+        ~host:"a.b.c"
+        ~port:81
+        ~path:["";"?";""]
+        ~fragment:"abc"
+        http_syn)
+;;
+
+
+let t031 () =
+  (* It is not possible to put malformed '%'-encodings into the URL *)
+  let http_syn = Hashtbl.find common_url_syntax "http" in
+
+  works                      (* reference *)
+    (fun () ->
+       make_url
+        ~encoded:true
+        ~scheme:"http"
+        ~user:"U"
+        ~password:"XX"
+        ~host:"a.b.c"
+        ~port:81
+        ~path:["";"a";""]
+        http_syn) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~encoded:true
+        ~scheme:"http"
+        ~user:"U"
+        ~password:"%XX"
+        ~host:"a.b.c"
+        ~port:81
+        ~path:["";"a";""]
+        http_syn) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~encoded:true
+        ~scheme:"http"
+        ~user:"U"
+        ~password:"%X"
+        ~host:"a.b.c"
+        ~port:81
+        ~path:["";"a";""]
+        http_syn) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~encoded:true
+        ~scheme:"http"
+        ~user:"U"
+        ~password:"%"
+        ~host:"a.b.c"
+        ~port:81
+        ~path:["";"a";""]
+        http_syn) 
+;;
+
+let t032 () =
+  (* It is not possible to put unsafe characters into the URL *)
+  let http_syn = Hashtbl.find common_url_syntax "http" in
+
+  let make c =
+    make_url
+      ~encoded:true
+      ~scheme:"http"
+      ~user:"U"
+      ~password:(String.make 1 c)
+      ~host:"a.b.c"
+      ~port:81
+      ~path:["";"a";""]
+      http_syn
+  in
+
+  works (fun () -> make 'a') &&                   (* reference *)
+
+  (* List of unsafe characters taken from RFC1738: *)
+  expect_malformed_url (fun () -> make '<') && 
+  expect_malformed_url (fun () -> make '>') && 
+  expect_malformed_url (fun () -> make '"') && 
+  expect_malformed_url (fun () -> make '#') && 
+    (* Note: '#' would be considered as reserved if fragments were enabled *)
+  expect_malformed_url (fun () -> make '%') && 
+  expect_malformed_url (fun () -> make '{') && 
+  expect_malformed_url (fun () -> make '}') && 
+  expect_malformed_url (fun () -> make '|') && 
+  expect_malformed_url (fun () -> make '\\') && 
+  expect_malformed_url (fun () -> make '^') && 
+  expect_malformed_url (fun () -> make '[') && 
+  expect_malformed_url (fun () -> make ']') && 
+  expect_malformed_url (fun () -> make '`') &&
+  expect_malformed_url (fun () -> make '~') &&
+    (* Note: '~' is considered as safe in paths: *)
+  works 
+    (fun () ->
+    make_url
+      ~encoded:true
+      ~scheme:"http"
+      ~user:"U"
+      ~password:"a"
+      ~host:"a.b.c"
+      ~port:81
+      ~path:["";"~";""]
+      http_syn)
+;;
+
+let t033 () =
+  (* It is not possible to put reserved characters into the URL *)
+  let http_syn = Hashtbl.find common_url_syntax "http" in
+
+  let make_password c =
+    make_url
+      ~encoded:true
+      ~scheme:"http"
+      ~user:"U"
+      ~password:(String.make 1 c)
+      ~host:"a.b.c"
+      ~port:81
+      ~path:["";"a";""]
+      http_syn
+  in
+  let make_path c =
+    make_url
+      ~encoded:true
+      ~scheme:"http"
+      ~user:"U"
+      ~password:"a"
+      ~host:"a.b.c"
+      ~port:81
+      ~path:["";String.make 1 c;""]
+      http_syn
+  in
+  let make_query c =
+    make_url
+      ~encoded:true
+      ~scheme:"http"
+      ~user:"U"
+      ~password:"a"
+      ~host:"a.b.c"
+      ~port:81
+      ~path:["";"a";""]
+      ~query:(String.make 1 c)
+      http_syn
+  in
+
+  (* Note: There is a difference between RFC 1738 and RFC 1808 regarding
+   * which characters are reserved. RFC 1808 defines a fixed set of characters
+   * as reserved while RFC 1738 defines the reserved characters depending
+   * on the scheme.
+   * This implementation of URLs follows RFC 1738 (because of practical
+   * reasons).
+   *)
+
+  works (fun () -> make_password 'a') &&                   (* reference *)
+  works (fun () -> make_path 'a') &&
+  works (fun () -> make_query 'a') &&
+
+  expect_malformed_url (fun () -> make_password ':') && 
+  expect_malformed_url (fun () -> make_password '@') && 
+  expect_malformed_url (fun () -> make_password '/') && 
+  works                (fun () -> make_password ';') &&
+  works                (fun () -> make_password '?') &&
+  works                (fun () -> make_password '=') &&
+  works                (fun () -> make_password '&') &&
+
+  (* Note: ';' is allowed in path and query because parameters are not
+   * recognized in HTTP syntax.
+   *)
+
+  expect_malformed_url (fun () -> make_path '/') && 
+  expect_malformed_url (fun () -> make_path '?') && 
+  works                (fun () -> make_path ':') && 
+  works                (fun () -> make_path '@') && 
+  works                (fun () -> make_path ';') && 
+  works                (fun () -> make_path '=') && 
+  works                (fun () -> make_path '&') && 
+
+  expect_malformed_url (fun () -> make_query '?') && 
+  works                (fun () -> make_query '/') && 
+  works                (fun () -> make_query ':') && 
+  works                (fun () -> make_query '@') && 
+  works                (fun () -> make_query ';') && 
+  works                (fun () -> make_query '=') && 
+  works                (fun () -> make_query '&')
+;;
+
+
+let t034 () =
+  (* It is not possible to create a URL with a password, but without user;
+   * and neither to create a URL with a port, but without host;
+   * and neither to create a URL with a user, but without host
+   *)
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:"http"
+        ~password:"a"
+        ~host:"a.b.c"
+        ~path:["";"a";""]
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:"http"
+        ~user:"U"
+        ~path:["";"a";""]
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:"http"
+        ~port:81
+        ~path:["";"a";""]
+        ip_url_syntax)
+;;
+
+
+let t035 () =
+  (* It is not possible to create a URL with illegal scheme prefix *)
+  
+  (* reference: *)
+  works
+    (fun () ->
+       make_url
+        ~scheme:"a"
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:":"
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:"a=b"
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:"a%62b"
+        ip_url_syntax) &&
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~scheme:"a&b"
+        ip_url_syntax)
+;;
+
+
+let t036 () =
+  (* It is not possible to have a path with double slashes *)
+  
+  (* reference: *)
+  works
+    (fun () ->
+       make_url
+        ~path:["";"a";""]
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~path:["";""]
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~path:["a";"";""]
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~path:["";"";"a"]
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~path:["a";"";"a"]
+        ip_url_syntax)
+;;
+
+
+let t037 () =
+  (* It is not possible to have port numbers outside 0..65535 *)
+  
+  (* reference: *)
+  works
+    (fun () ->
+       make_url
+        ~host:"a"
+        ~port:1
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~host:"a"
+        ~port:(-1)
+        ip_url_syntax) &&
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~host:"a"
+        ~port:65536
+        ip_url_syntax)
+;;
+
+
+let t038 () =
+  (* Several cases which are not allowed. *)
+  
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~host:"a"
+        ~path:["a"]
+        ip_url_syntax
+    ) &&                       (* illegal: host + relative path *)
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~host:"a"
+        ~path:[]
+        ~param:["x"]
+        ip_url_syntax
+    ) &&                       (* illegal: host + no path + params *)
+
+  expect_malformed_url
+    (fun () ->
+       make_url
+        ~host:"a"
+        ~path:[]
+        ~query:"x"
+        ip_url_syntax
+    )                          (* illegal: host + no path + query *)
+;;
+
+(**********************************************************************)
+(* url_of_string                                                      *)
+(**********************************************************************)
+
+let t050 () =
+  (* absolute URLs with ip_url_syntax *)
+  let identical s =
+    string_of_url (url_of_string ip_url_syntax s) = s in
+
+  let fails s =
+    try ignore(url_of_string ip_url_syntax s); false 
+    with Malformed_URL -> true
+  in
+
+  identical "http:" &&
+
+  identical "http://host" &&
+  identical "http://user@host" &&
+  identical "http://user:password@host" &&
+  identical "http://user@host:99" &&
+  identical "http://user:password@host:99" &&
+
+  identical "http://host/" &&
+  identical "http://user@host/" &&
+  identical "http://user:password@host/" &&
+  identical "http://user@host:99/" &&
+  identical "http://user:password@host:99/" &&
+
+  identical "http://host/a/b" &&
+  identical "http://user@host/a/b" &&
+  identical "http://user:password@host/a/b" &&
+  identical "http://user@host:99/a/b" &&
+  identical "http://user:password@host:99/a/b" &&
+
+  identical "http://host/a/b/" &&
+  identical "http://user@host/a/b/" &&
+  identical "http://user:password@host/a/b/" &&
+  identical "http://user@host:99/a/b/" &&
+  identical "http://user:password@host:99/a/b/" &&
+
+  identical "http://host/?a=b&c=d" &&
+  identical "http://user@host/?a=b&c=d" &&
+  identical "http://user:password@host/?a=b&c=d" &&
+  identical "http://user@host:99/?a=b&c=d" &&
+  identical "http://user:password@host:99/?a=b&c=d" &&
+
+  fails "http://host?a=b&c=d" &&
+  fails "http://user@host?a=b&c=d" &&
+  fails "http://user:password@host?a=b&c=d" &&
+  fails "http://user@host:99?a=b&c=d" &&
+  fails "http://user:password@host:99?a=b&c=d" &&
+
+  identical "http://host/?a=/&c=/" &&
+  identical "http://user@host/?a=/&c=/" &&
+  identical "http://user:password@host/?a=/&c=/" &&
+  identical "http://user@host:99/?a=/&c=/" &&
+  identical "http://user:password@host:99/?a=/&c=/" &&
+
+  identical "http://host/;a;b" &&
+  identical "http://user@host/;a;b" &&
+  identical "http://user:password@host/;a;b" &&
+  identical "http://user@host:99/;a;b" &&
+  identical "http://user:password@host:99/;a;b" &&
+
+  fails "http://host;a;b" &&
+  fails "http://user@host;a;b" &&
+  fails "http://user:password@host;a;b" &&
+  fails "http://user@host:99;a;b" &&
+  fails "http://user:password@host:99;a;b" &&
+
+  identical "http://host/;a;b?a=b&c=d" &&
+  identical "http://user@host/;a;b?a=b&c=d" &&
+  identical "http://user:password@host/;a;b?a=b&c=d" &&
+  identical "http://user@host:99/;a;b?a=b&c=d" &&
+  identical "http://user:password@host:99/;a;b?a=b&c=d" &&
+
+  identical "http:#f" &&
+
+  identical "http://host#f" &&
+  identical "http://user@host#f" &&
+  identical "http://user:password@host#f" &&
+  identical "http://user@host:99#f" &&
+  identical "http://user:password@host:99#f" &&
+
+  identical "http://host/;a;b?a=b&c=d#f" &&
+  identical "http://user@host/;a;b?a=b&c=d#f" &&
+  identical "http://user:password@host/;a;b?a=b&c=d#f" &&
+  identical "http://user@host:99/;a;b?a=b&c=d#f" &&
+  identical "http://user:password@host:99/;a;b?a=b&c=d#f" &&
+
+  true
+;;
+
+
+let t051 () =
+  (* relative URLs with ip_url_syntax *)
+  let identical s =
+    string_of_url (url_of_string ip_url_syntax s) = s in
+
+  let fails s =
+    try ignore(url_of_string ip_url_syntax s); false 
+    with Malformed_URL -> true
+  in
+
+  identical "//host" &&
+  identical "//user@host" &&
+  identical "//user:password@host" &&
+  identical "//user@host:99" &&
+  identical "//user:password@host:99" &&
+
+  identical "//host/" &&
+  identical "//user@host/" &&
+  identical "//user:password@host/" &&
+  identical "//user@host:99/" &&
+  identical "//user:password@host:99/" &&
+
+  identical "//host#f" &&
+  identical "//user@host#f" &&
+  identical "//user:password@host#f" &&
+  identical "//user@host:99#f" &&
+  identical "//user:password@host:99#f" &&
+
+  identical "/" &&
+  identical "/a" &&
+  identical "/a/" &&
+  identical "/a/a" &&
+
+  identical "/;a;b" &&
+  identical "/a;a;b" &&
+  identical "/a/;a;b" &&
+  identical "/a/a;a;b" &&
+
+  identical "/?a=b&c=d" &&
+  identical "/a?a=b&c=d" &&
+  identical "/a/?a=b&c=d" &&
+  identical "/a/a?a=b&c=d" &&
+
+  identical "/;a;b?a=b&c=d" &&
+  identical "/a;a;b?a=b&c=d" &&
+  identical "/a/;a;b?a=b&c=d" &&
+  identical "/a/a;a;b?a=b&c=d" &&
+
+  identical "/#f" &&
+  identical "/a#f" &&
+  identical "/a/#f" &&
+  identical "/a/a#f" &&
+
+  identical "/;a;b#f" &&
+  identical "/a;a;b#f" &&
+  identical "/a/;a;b#f" &&
+  identical "/a/a;a;b#f" &&
+
+  identical "/;a;b?a=b&c=d#f" &&
+  identical "/a;a;b?a=b&c=d#f" &&
+  identical "/a/;a;b?a=b&c=d#f" &&
+  identical "/a/a;a;b?a=b&c=d#f" &&
+
+  identical "" &&
+  identical "a" &&
+  identical "a/" &&
+  identical "a/a" &&
+
+  identical ";a;b" &&
+  identical "a;a;b" &&
+  identical "a/;a;b" &&
+  identical "a/a;a;b" &&
+
+  identical "?a=b&c=d" &&
+  identical "a?a=b&c=d" &&
+  identical "a/?a=b&c=d" &&
+  identical "a/a?a=b&c=d" &&
+
+  identical ";a;b?a=b&c=d" &&
+  identical "a;a;b?a=b&c=d" &&
+  identical "a/;a;b?a=b&c=d" &&
+  identical "a/a;a;b?a=b&c=d" &&
+
+  identical "#f" &&
+  identical "a#f" &&
+  identical "a/#f" &&
+  identical "a/a#f" &&
+
+  identical ";a;b#f" &&
+  identical "a;a;b#f" &&
+  identical "a/;a;b#f" &&
+  identical "a/a;a;b#f" &&
+
+  identical ";a;b?a=b&c=d#f" &&
+  identical "a;a;b?a=b&c=d#f" &&
+  identical "a/;a;b?a=b&c=d#f" &&
+  identical "a/a;a;b?a=b&c=d#f" &&
+
+  identical "." &&
+  identical "./" &&
+  identical "./a" &&
+
+  identical ".;a;b" &&
+  identical "./;a;b" &&
+  identical "./a;a;b" &&
+
+  identical ".?a=b&c=d" &&
+  identical "./?a=b&c=d" &&
+  identical "./a?a=b&c=d" &&
+
+  identical ".;a;b?a=b&c=d" &&
+  identical "./;a;b?a=b&c=d" &&
+  identical "./a;a;b?a=b&c=d" &&
+
+  identical ".#f" &&
+  identical "./#f" &&
+  identical "./a#f" &&
+
+  identical ".;a;b#f" &&
+  identical "./;a;b#f" &&
+  identical "./a;a;b#f" &&
+
+  identical ".;a;b?a=b&c=d#f" &&
+  identical "./;a;b?a=b&c=d#f" &&
+  identical "./a;a;b?a=b&c=d#f" &&
+
+  identical ".." &&
+  identical "../" &&
+  identical "../a" &&
+
+  identical "..;a;b" &&
+  identical "../;a;b" &&
+  identical "../a;a;b" &&
+
+  identical "..?a=b&c=d" &&
+  identical "../?a=b&c=d" &&
+  identical "../a?a=b&c=d" &&
+
+  identical "..;a;b?a=b&c=d" &&
+  identical "../;a;b?a=b&c=d" &&
+  identical "../a;a;b?a=b&c=d" &&
+
+  identical "..#f" &&
+  identical "../#f" &&
+  identical "../a#f" &&
+
+  identical "..;a;b#f" &&
+  identical "../;a;b#f" &&
+  identical "../a;a;b#f" &&
+
+  identical "..;a;b?a=b&c=d#f" &&
+  identical "../;a;b?a=b&c=d#f" &&
+  identical "../a;a;b?a=b&c=d#f" &&
+
+  string_of_url
+    (make_url ~path:["a:b"] ip_url_syntax) = "a%3Ab" &&
+
+  string_of_url
+    (make_url ~encoded:true ~path:["a:b"] ip_url_syntax) = "./a:b" &&
+
+  true
+;;
+
+
+let t052 () =
+  (* mailto: URLs *)
+  let mailto_syn = Hashtbl.find common_url_syntax "mailto" in
+
+  let identical s =
+    string_of_url (url_of_string mailto_syn s) = s in
+
+  let fails s =
+    try ignore(url_of_string mailto_syn s); false 
+    with Malformed_URL -> true
+  in
+
+  identical "mailto:user@host" &&
+  identical "mailto:user@host;?;?" &&
+  fails     "mailto:user@host#f"
+;;
+
+(**********************************************************************)
+(* split_path/join_path/norm_path:                                    *)
+(**********************************************************************)
+
+let t060 () =
+  (split_path "" = []) &&
+  (split_path "/" = [ "" ]) &&
+  (split_path "/a" = [ ""; "a" ]) &&
+  (split_path "a" = [ "a" ]) &&
+  (split_path "a/" = [ "a"; "" ]) &&
+  (split_path "/a/" = [ ""; "a"; "" ]) &&
+  (split_path "/a/b" = [ ""; "a"; "b" ]) &&
+  (split_path "/a/b/" = [ ""; "a"; "b"; "" ]) &&
+  (split_path "/a/b/c" = [ ""; "a"; "b"; "c" ]) &&
+
+  (join_path [] = "") &&
+  (join_path [ "" ] = "/") &&
+  (join_path [ ""; "a" ] = "/a") &&
+  (join_path [ "a" ] = "a") &&
+  (join_path [ "a"; "" ] = "a/") &&
+  (join_path [ ""; "a"; "" ] = "/a/") &&
+  (join_path [ ""; "a"; "b" ] = "/a/b") &&
+  (join_path [ ""; "a"; "b"; "" ] = "/a/b/") &&
+  (join_path [ ""; "a"; "b"; "c" ] = "/a/b/c") &&
+
+  true
+;;
+
+
+let t061 () =
+  (norm_path ["."] = []) &&
+  (norm_path ["."; ""] = []) &&
+  (norm_path ["a"; "."] = ["a"; ""]) &&
+  (norm_path ["a"; "b"; "."] = ["a"; "b"; ""]) &&
+  (norm_path ["a"; "b"; ".."] = ["a"; ""]) &&
+  (norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]) &&
+  (norm_path [".."] = [".."; ""]) &&
+  (norm_path [".."; ""] = [".."; ""]) &&
+  (norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]) &&
+  (norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]) &&
+  (norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]) &&
+  (norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]) &&
+  (norm_path ["a"; ".."] = []) &&
+  (norm_path ["";""] = [""]) &&
+  (norm_path [""] = [""]) &&
+  (norm_path [] = []) &&
+
+  true
+;;
+                 
+(**********************************************************************)
+(* apply_relative_url:                                                *)
+(**********************************************************************)
+
+let t070() =
+  (* Examples taken from RFC 1808 *)
+  let url = url_of_string ip_url_syntax in
+  let base = url "http://a/b/c/d;p?q#f" in
+  let aru = apply_relative_url base in
+
+  (aru (url "g:h")     = url "g:h") &&
+  (aru (url "g")       = url "http://a/b/c/g") &&
+  (aru (url "./g")     = url "http://a/b/c/g") &&
+  (aru (url "g/")      = url "http://a/b/c/g/") &&
+  (aru (url "/g")      = url "http://a/g") &&
+  (aru (url "//g")     = url "http://g") &&
+  (aru (url "?y")      = url "http://a/b/c/d;p?y") &&
+  (aru (url "g?y")     = url "http://a/b/c/g?y") &&
+  (aru (url "g?y/./x") = url "http://a/b/c/g?y/./x") &&
+  (aru (url "#s")      = url "http://a/b/c/d;p?q#s") &&
+  (aru (url "g#s")     = url "http://a/b/c/g#s") &&
+  (aru (url "g#s/./x") = url "http://a/b/c/g#s/./x") &&
+  (aru (url "g?y#s")   = url "http://a/b/c/g?y#s") &&
+  (aru (url ";x")      = url "http://a/b/c/d;x") &&
+  (aru (url "g;x")     = url "http://a/b/c/g;x") &&
+  (aru (url "g;x?y#s") = url "http://a/b/c/g;x?y#s") &&
+  (aru (url ".")       = url "http://a/b/c/") &&
+  (aru (url "./")      = url "http://a/b/c/") &&
+  (aru (url "..")      = url "http://a/b/") &&
+  (aru (url "../")     = url "http://a/b/") &&
+  (aru (url "../g")    = url "http://a/b/g") &&
+  (aru (url "../..")   = url "http://a/") &&
+  (aru (url "../../")  = url "http://a/") &&
+  (aru (url "../../g") = url "http://a/g") &&
+
+  (aru (url "")              = url "http://a/b/c/d;p?q#f") &&
+  (aru (url "../../../g")    = url "http://a/../g") &&
+  (aru (url "../../../../g") = url "http://a/../../g") &&
+  (aru (url "/./g")          = url "http://a/./g") &&
+  (aru (url "/../g")         = url "http://a/../g") &&
+  (aru (url "g.")            = url "http://a/b/c/g.") &&
+  (aru (url ".g")            = url "http://a/b/c/.g") &&
+  (aru (url "g..")           = url "http://a/b/c/g..") &&
+  (aru (url "..g")           = url "http://a/b/c/..g") &&
+  (aru (url "./../g")        = url "http://a/b/g") &&
+  (aru (url "./g/.")         = url "http://a/b/c/g/") &&
+  (aru (url "g/./h")         = url "http://a/b/c/g/h") &&
+  (aru (url "g/../h")        = url "http://a/b/c/h") &&
+  (aru (url "http:g")        = url "http:g") &&
+  (aru (url "http:")         = url "http:") &&
+
+  true
+;;
+  
+
+(**********************************************************************)
+
+let test f n =
+  if f() then
+    print_endline ("Test " ^ n ^ " ok")
+  else 
+    print_endline ("Test " ^ n ^ " FAILED!!!!");
+  flush stdout
+;;
+
+test t001 "001";
+test t002 "002";
+
+test t010 "010";
+test t011 "011";
+test t012 "012";
+
+test t020 "020";
+test t021 "021";
+
+test t030 "030";
+test t031 "031";
+test t032 "032";
+test t033 "033";
+test t034 "034";
+test t035 "035";
+test t036 "036";
+test t037 "037";
+test t038 "038";
+
+test t050 "050";
+test t051 "051";
+test t052 "052";
+
+test t060 "060";
+test t061 "061";
+
+test t070 "070";
+()
+;;
diff --git a/helm/DEVEL/pxp/netstring/tests/test_recode.ml b/helm/DEVEL/pxp/netstring/tests/test_recode.ml
new file mode 100644 (file)
index 0000000..64a04ca
--- /dev/null
@@ -0,0 +1,169 @@
+
+
+let make_iso enc =
+  let s = ref "" in
+  for i = 0 to 255 do
+    let u = try Netconversion.makechar (enc :> Netconversion.encoding) i 
+            with Not_found -> "" in
+    s := !s ^ u
+  done;
+  !s
+;;
+
+let make_ucs2 start stop =
+  let s = String.create ((stop - start) * 2) in
+  for i = 0 to stop-start-1 do
+    let k = 2 * i in
+    let c = i + start in
+    s.[k]   <- Char.chr(c lsr 8);
+    s.[k+1] <- Char.chr(c land 0xff);
+  done;
+  s
+;;
+
+let make_ucs4 start stop =
+  let s = String.create ((stop - start) * 4) in
+  for i = 0 to stop-start-1 do
+    let k = 4 * i in
+    let c = i + start in
+    s.[k]   <- Char.chr(c lsr 24);
+    s.[k+1] <- Char.chr((c lsr 16) land 0xff);
+    s.[k+2] <- Char.chr((c lsr 8) land 0xff);
+    s.[k+3] <- Char.chr(c land 0xff);
+  done;
+  s
+;;
+
+let name_of_encoding enc =
+  match enc with
+      `Enc_iso88591 -> "ISO_8859-1"
+    | `Enc_iso88592 -> "ISO_8859-2"
+    | `Enc_iso88593 -> "ISO_8859-3"
+    | `Enc_iso88594 -> "ISO_8859-4"
+    | `Enc_iso88595 -> "ISO_8859-5"
+    | `Enc_iso88596 -> "ISO_8859-6"
+    | `Enc_iso88597 -> "ISO_8859-7"
+    | `Enc_iso88598 -> "ISO_8859-8"
+    | `Enc_iso88599 -> "ISO_8859-9"
+    | `Enc_iso885910 -> "ISO_8859-10"
+    | `Enc_iso885913 -> "ISO_8859-13"
+    | `Enc_iso885914 -> "ISO_8859-14"
+    | `Enc_iso885915 -> "ISO_8859-15"
+    | `Enc_utf8     -> "UTF-8"
+    | `Enc_ucs4     -> "UCS-4"
+    | `Enc_ucs2     -> "UCS-2"
+    | `Enc_utf16    -> "UTF-16"
+
+  (* Note: GNU-iconv assumes big endian byte order *)
+;;
+
+let iconv_recode_string in_enc out_enc in_s =
+  let in_enc_name  = name_of_encoding in_enc in
+  let out_enc_name = name_of_encoding out_enc in
+  let out_s = ref "" in
+
+  let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^ 
+                                       out_enc_name) in
+  (* Write in_s to in_ch in a new thread: *)
+  ignore
+    (Thread.create
+       (fun () ->
+         output_string in_ch in_s;
+         close_out in_ch;
+       )
+       ()
+    );
+  (* Read the result in the current thread: *)
+  let buf = String.create 1024 in
+  let n = ref 1 in
+  while !n <> 0 do
+    let n' = input out_ch buf 0 1024 in
+    out_s := !out_s ^ String.sub buf 0 n';
+    n := n'
+  done;
+  ignore(Unix.close_process (out_ch,in_ch));
+  !out_s
+;;
+
+let test_iso_and_utf8 enc  =
+  let name = name_of_encoding enc in
+  print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout;
+  let s = make_iso enc in
+  let s1' = Netconversion.recode_string (enc :> Netconversion.encoding) 
+                                        `Enc_utf8 s in
+  let s2' = iconv_recode_string         enc `Enc_utf8 s in
+  assert(s1' = s2');
+  let s1  = Netconversion.recode_string `Enc_utf8 
+                                       (enc :> Netconversion.encoding) s1' in
+  let s2  = iconv_recode_string         `Enc_utf8 enc s1' in
+  assert(s1 = s2 && s1 = s);
+  print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_0000_d7ff () =
+  print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... "; 
+  flush stdout;
+  let s = make_ucs2 0 0xd800 in
+  let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+  let s2' = iconv_recode_string        `Enc_utf16    `Enc_utf8 s in
+  assert(s1' = s2');
+  let s1  = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+  let s2  = iconv_recode_string        `Enc_utf8 `Enc_utf16 s1' in
+  assert(s1 = s2 && s1 = s);
+  print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_e000_fffd () =
+  print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... "; 
+  flush stdout;
+  let s = make_ucs2 0xe000 0xfffe in
+  let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+  let s2' = iconv_recode_string        `Enc_utf16    `Enc_utf8 s in
+  assert(s1' = s2');
+  let s1  = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+  let s2  = iconv_recode_string        `Enc_utf8 `Enc_utf16 s1' in
+  assert(s1 = s2 && s1 = s);
+  print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_10000_10FFFF () =
+  print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... "; 
+  flush stdout;
+  for i = 1 to 16 do
+    let s0  = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in
+    let s   = iconv_recode_string        `Enc_ucs4     `Enc_utf16 s0 in
+    let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+    let s2' = iconv_recode_string        `Enc_utf16    `Enc_utf8 s in
+    assert(s1' = s2');
+    let s1  = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+    let s2  = iconv_recode_string        `Enc_utf8 `Enc_utf16 s1' in
+    assert(s1 = s2 && s1 = s);
+    print_string "+"; flush stdout;
+  done;
+  print_endline "OK"; flush stdout
+;;
+
+
+print_endline "Warning: You need the command 'iconv' to run this test!";
+flush stdout;
+test_iso_and_utf8 `Enc_iso88591;
+test_iso_and_utf8 `Enc_iso88592;
+test_iso_and_utf8 `Enc_iso88593;
+test_iso_and_utf8 `Enc_iso88594;
+test_iso_and_utf8 `Enc_iso88595;
+test_iso_and_utf8 `Enc_iso88596;
+test_iso_and_utf8 `Enc_iso88597;
+(* test_iso_and_utf8 `Enc_iso88598; *)
+test_iso_and_utf8 `Enc_iso88599;
+test_iso_and_utf8 `Enc_iso885910;
+(* test_iso_and_utf8 `Enc_iso885913; *)
+(* test_iso_and_utf8 `Enc_iso885914; *)
+(* test_iso_and_utf8 `Enc_iso885915; *)
+test_utf16_and_utf8_0000_d7ff();
+test_utf16_and_utf8_e000_fffd();
+(* This test does not work because iconv does not support the surrogate
+ * representation of UTF-16:
+ * test_utf16_and_utf8_10000_10FFFF();
+ *)
+()
+;;
diff --git a/helm/DEVEL/pxp/netstring/tools/Makefile b/helm/DEVEL/pxp/netstring/tools/Makefile
new file mode 100644 (file)
index 0000000..b3c148d
--- /dev/null
@@ -0,0 +1,10 @@
+all:
+       $(MAKE) -C unimap_to_ocaml
+
+clean:
+
+CLEAN: clean
+       $(MAKE) -C unimap_to_ocaml CLEAN
+
+distclean: clean
+       $(MAKE) -C unimap_to_ocaml distclean
diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore
new file mode 100644 (file)
index 0000000..c1fcbc4
--- /dev/null
@@ -0,0 +1,7 @@
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile
new file mode 100644 (file)
index 0000000..ed42773
--- /dev/null
@@ -0,0 +1,15 @@
+all: unimap_to_ocaml
+
+unimap_to_ocaml: unimap_to_ocaml.ml
+       ocamlfind ocamlc -g -package str -linkpkg -custom \
+               -o unimap_to_ocaml \
+               unimap_to_ocaml.ml
+
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+CLEAN: clean
+
+distclean: clean
+       rm -f *~ unimap_to_ocaml
+
diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml
new file mode 100644 (file)
index 0000000..14a89e9
--- /dev/null
@@ -0,0 +1,201 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Printf;;
+
+let comment_re = Str.regexp "#.*$";;
+let space_re = Str.regexp "[ \t\r\n]+";;
+
+let read_unimap_format_a fname f =
+  (* Reads a Unicode mapping in format A from a "local" code to Unicode.
+   * Returns a list of pairs (localcode, unicode).
+   *)
+  
+  let read_unimap_line() =
+    let s = input_line f in    (* may raise End_of_file *)
+    let s' = Str.global_replace comment_re "" s in
+    let words = Str.split space_re s' in
+    match words with
+       [] -> raise Not_found
+      | [ localcode; unicode ] ->
+         int_of_string localcode, int_of_string unicode
+      | _ ->
+         failwith ("File " ^ fname ^ ": Do not know what to do with:\n" ^ s')
+  in
+
+  let rec read_following_lines() =
+    try
+      let localcode, unicode = read_unimap_line() in 
+                               (* may raise End_of_file, Not_found *)
+      (localcode, unicode) :: read_following_lines()
+    with
+       Not_found -> read_following_lines()
+      | End_of_file -> []
+  in
+
+  read_following_lines()
+;;
+
+
+type from_uni_list =
+    U_nil
+  | U_single of (int * int)
+  | U_list of (int * int) list
+
+type from_unicode =
+    from_uni_list array;;
+  (* A hashtable with fixed size (256). A pair (unicode, localcode) is
+   * stored at the position unicode mod 256 in the array.
+   *)
+
+
+let make_bijection unimap =
+  (* unimap: a list of pairs (localcode, unicode)
+   * returns a pair of arrays (m_to_unicode, m_from_unicode) with:
+   *   - m_to_unicode.(localcode) = Some unicode, 
+   *                                 if the pair (localcode, unicode) exists
+   *     m_to_unicode.(x) = None otherwise
+   *   - m_from_unicode.(unicode lsr 8) = [ ...; (unicode,localcode); ... ]
+   *)
+
+  let m_to_unicode   = Array.create 256 None in
+  let m_from_unicode = Array.create 256 [] in
+
+  List.iter
+    (fun (localcode, unicode) ->
+       assert(localcode < 256);
+
+       (* Update m_to_unicode: *)
+       if m_to_unicode.(localcode) <> None then
+        failwith ("Local code point " ^ string_of_int localcode ^ 
+                  " mapped twice");
+       m_to_unicode.(localcode) <- Some unicode;
+
+       (* Update m_from_unicode: *)
+       let unilow = unicode land 255 in
+       if List.mem_assoc unicode (m_from_unicode.(unilow)) then
+        failwith ("Unicode code point " ^ string_of_int unicode ^ 
+                  " mapped twice");
+       m_from_unicode.(unilow) <- 
+         m_from_unicode.(unilow) @ [unicode,localcode];
+    )
+    unimap;
+
+  m_to_unicode, m_from_unicode
+;;
+
+
+let to_unimap_as_string to_unimap =
+  let make_repr x =
+    match x with
+       None -> -1
+      | Some u -> u
+  in
+  Marshal.to_string (Array.map make_repr to_unimap) [ Marshal.No_sharing ]
+;;
+
+
+let from_unimap_as_string from_unimap =
+  let make_repr l =
+    match l with
+       []    -> U_nil
+      | [u,l] -> U_single(u,l)
+      | _     -> U_list l
+  in
+  let m = Array.map make_repr from_unimap in
+  Marshal.to_string m [ Marshal.No_sharing ]
+;;
+
+
+let print_bijection f name m_to_unicode m_from_unicode =
+  (* Prints on file f this O'Caml code:
+   * let <name>_to_unicode = ...
+   * let <name>_from_unicode = ...
+   *)
+  fprintf f "let %s_to_unicode = lazy (Marshal.from_string \"%s\" 0 : int array);;\n" 
+    name 
+    (String.escaped (to_unimap_as_string m_to_unicode));
+
+  fprintf f "let %s_from_unicode = lazy (Marshal.from_string \"%s\" 0 : Netmappings.from_uni_list array);;\n "
+    name
+    (String.escaped (from_unimap_as_string m_from_unicode));
+;;
+
+
+let main() =
+  let files = ref [] in
+  let outch = ref (lazy stdout) in
+  Arg.parse
+      [ "-o", Arg.String (fun s -> outch := lazy (open_out s)),
+           " <file>   Write result to this file"]
+      (fun s -> files := !files @ [s])
+      "usage: unimap_to_ocaml file.unimap ...";
+  
+  (* First read in all unimaps: *)
+  let unimaps =
+    List.map
+      (fun filename ->
+        let mapname = Str.replace_first (Str.regexp "\.unimap$") "" 
+                                        (Filename.basename filename) in
+        let f = open_in filename in
+        prerr_endline ("Reading " ^ filename);
+        let unimap = read_unimap_format_a filename f in
+        close_in f;
+        mapname, unimap
+      )
+      !files
+  in
+
+  (* Second compute all bijections: *)
+  let bijections =
+    List.map
+      (fun (mapname, unimap) ->
+        prerr_endline ("Processing " ^ mapname);
+        let to_unicode, from_unicode = make_bijection unimap in
+        mapname, to_unicode, from_unicode
+      )
+      unimaps
+  in
+
+  let out = Lazy.force !outch in
+  (* Third output all results: *)
+  output_string out "(* WARNING! This is a generated file! *)\n";
+
+  List.iter
+    (fun (mapname, to_unicode, from_unicode) ->
+       print_bijection out mapname to_unicode from_unicode)
+    bijections;
+  List.iter
+    (fun (mapname, _, _) ->
+       fprintf out "Hashtbl.add Netmappings.to_unicode `Enc_%s %s_to_unicode;\n" 
+                  mapname mapname;
+       fprintf out "Hashtbl.add Netmappings.from_unicode `Enc_%s %s_from_unicode;\n" 
+                  mapname mapname;
+    )
+    (List.rev bijections);
+  fprintf out "();;\n";
+
+  close_out out
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/08/29 00:48:52  gerd
+ *     Conversion tables are now stored in marshalled form.
+ *     New type for the conversion table Unicode to 8bit.
+ *
+ * Revision 1.2  2000/08/12 23:54:56  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/.cvsignore b/helm/DEVEL/pxp/pxp/.cvsignore
new file mode 100644 (file)
index 0000000..deb5b7f
--- /dev/null
@@ -0,0 +1,4 @@
+*.cmo
+*.cmx
+*.cmi
+
diff --git a/helm/DEVEL/pxp/pxp/LICENSE b/helm/DEVEL/pxp/pxp/LICENSE
new file mode 100644 (file)
index 0000000..55182a7
--- /dev/null
@@ -0,0 +1,22 @@
+Copyright 1999 by Gerd Stolpmann
+
+The package "markup" is copyright by Gerd Stolpmann. 
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "markup" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
diff --git a/helm/DEVEL/pxp/pxp/META b/helm/DEVEL/pxp/pxp/META
new file mode 100644 (file)
index 0000000..020128a
--- /dev/null
@@ -0,0 +1,20 @@
+version = "1.0"
+requires = "netstring"
+description = "Validating parser for XML-1.0"
+archive(byte) = "pxp_types.cma 
+                 pxp_lex_iso88591.cma 
+                 pxp_lex_utf8.cma 
+                 pxp_engine.cma 
+                 pxp_utf8.cmo"
+archive(byte, pxp_without_utf8) = "pxp_types.cma 
+                 pxp_lex_iso88591.cma 
+                 pxp_engine.cma"
+archive(native) = "pxp_types.cmxa 
+                   pxp_lex_iso88591.cmxa 
+                   pxp_lex_utf8.cmxa 
+                   pxp_engine.cmxa 
+                   pxp_utf8.cmx"
+archive(native, pxp_without_utf8) = "pxp_types.cmxa 
+                 pxp_lex_iso88591.cmxa 
+                 pxp_engine.cmxa"
+
diff --git a/helm/DEVEL/pxp/pxp/Makefile b/helm/DEVEL/pxp/pxp/Makefile
new file mode 100644 (file)
index 0000000..f08eab9
--- /dev/null
@@ -0,0 +1,105 @@
+# make all:            make bytecode archive
+# make opt:            make native archive
+# make install:        install bytecode archive, and if present, native archive
+# make uninstall:      uninstall package
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+# make release:        cleanup, create archive, tag CVS module 
+#                      (for developers)
+
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all: 
+       $(MAKE) -C m2parsergen all
+       $(MAKE) -C tools/ucs2_to_utf8 all
+       $(MAKE) -f Makefile.code all
+       $(MAKE) -C compatibility all
+
+.PHONY: opt
+opt: 
+       $(MAKE) -C m2parsergen all
+       $(MAKE) -C tools/ucs2_to_utf8 all
+       $(MAKE) -f Makefile.code opt
+       $(MAKE) -C compatibility opt
+
+.PHONY: install
+install: all tmp/pxp_entity.mli
+       files=`tools/collect_files *.cmi *.cma *.cmxa *.a \
+               pxp_utf8.cmo pxp_utf8.cmx pxp_utf8.o` && \
+       ocamlfind install $(NAME) $(MLI) tmp/pxp_entity.mli $$files META
+
+.PHONY: uninstall
+uninstall:
+       ocamlfind remove $(NAME)
+
+.PHONY: markup-install
+markup-install:
+       $(MAKE) -C compatibility install
+
+.PHONY: markup-uninstall
+markup-uninstall:
+       $(MAKE) -C compatibility uninstall
+
+tmp/pxp_entity.mli: pxp_entity.ml
+       mkdir -p tmp
+       rm -f tmp/pxp_entity.*
+       cp pxp_entity.ml tmp
+       echo '(* Sorry, this is currently undocumented *)' >tmp/mli
+       ocamlc -i -c tmp/pxp_entity.ml >>tmp/mli
+       mv tmp/mli tmp/pxp_entity.mli
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
+       rm -f pxp_yacc.ml
+       touch lexers/objects_iso88591 lexers/objects_utf8 lexers/depend
+       $(MAKE) -C lexers clean
+       $(MAKE) -C compatibility clean
+
+.PHONY: CLEAN
+CLEAN: clean
+       $(MAKE) -C doc CLEAN
+       $(MAKE) -C examples CLEAN
+       $(MAKE) -C rtests CLEAN
+       $(MAKE) -C m2parsergen CLEAN
+       touch tools/ucs2_to_utf8/depend
+       $(MAKE) -C tools/ucs2_to_utf8 clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~ depend depend.pkg
+       $(MAKE) -C doc distclean
+       $(MAKE) -C examples distclean
+       $(MAKE) -C rtests distclean
+       $(MAKE) -C m2parsergen distclean
+       touch tools/ucs2_to_utf8/depend
+       $(MAKE) -C tools/ucs2_to_utf8 clean
+       $(MAKE) -C compatibility distclean
+
+RELEASE: META
+       awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
+
+.PHONY: dist
+dist: RELEASE
+       r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*~" --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/oo_questions*" --exclude="*/testsamples*" --exclude="*/tmp/*" --exclude="*reptil*" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*.fig.bak" --exclude="*/ps/pic*" --exclude="*/examples/panel*" --exclude="*/examples/xmlforms_gtk*" --exclude="*/Mail*" $(NAME)/*
+
+.PHONY: tag-release
+tag-release: RELEASE
+       r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r markup
+
+.PHONY: release
+release: distclean
+       $(MAKE) tag-release
+       $(MAKE) dist
+
+.PHONY: dev
+dev:
+       $(MAKE) all
+       -$(MAKE) uninstall
+       $(MAKE) install
+       $(MAKE) -C examples/validate distclean
+       $(MAKE) -C examples/validate validate
diff --git a/helm/DEVEL/pxp/pxp/Makefile.code b/helm/DEVEL/pxp/pxp/Makefile.code
new file mode 100644 (file)
index 0000000..3afed39
--- /dev/null
@@ -0,0 +1,96 @@
+# make all:            make bytecode archives
+# make opt:            make native archives
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+all: 
+       $(MAKE) -f Makefile.code pxp_types.cma
+       $(MAKE) -f Makefile.code pxp_lex_iso88591.cma
+       if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cma; else rm -f pxp_lex_utf8.cma; fi
+       $(MAKE) -f Makefile.code pxp_engine.cma
+       if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmo; else rm -f pxp_utf8.cmo; fi
+
+opt:
+       $(MAKE) -f Makefile.code pxp_types.cmxa
+       $(MAKE) -f Makefile.code pxp_lex_iso88591.cmxa
+       if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cmxa; else rm -f pxp_lex_utf8.cmxa; fi
+       $(MAKE) -f Makefile.code pxp_engine.cmxa
+       if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmx; else rm -f pxp_utf8.cmx; fi
+
+#----------------------------------------------------------------------
+
+pxp_types.cma: $(OBJECTS_types)
+       $(OCAMLC) -a -o pxp_types.cma $(OBJECTS_types)
+
+pxp_types.cmxa: $(XOBJECTS_types)
+       $(OCAMLOPT) -a -o pxp_types.cmxa $(XOBJECTS_types)
+
+pxp_engine.cma: $(OBJECTS_engine)
+       $(OCAMLC) -a -o pxp_engine.cma $(OBJECTS_engine)
+
+pxp_engine.cmxa: $(XOBJECTS_engine)
+       $(OCAMLOPT) -a -o pxp_engine.cmxa $(XOBJECTS_engine)
+
+
+# The following rules are "phony" to force 'make' to go into the
+# "lexers" subdirectory.
+
+.PHONY: pxp_lex_iso88591.cma
+pxp_lex_iso88591.cma: $(CMI_types)
+       $(MAKE) -C lexers all_iso88591
+       cp lexers/pxp_lex_iso88591.cma .
+
+.PHONY: pxp_lex_iso88591.cmxa
+pxp_lex_iso88591.cmxa: $(CMI_types)
+       $(MAKE) -C lexers opt_iso88591
+       cp lexers/pxp_lex_iso88591.cmxa lexers/pxp_lex_iso88591.a .
+
+.PHONY: pxp_lex_utf8.cma
+pxp_lex_utf8.cma: $(CMI_types)
+       $(MAKE) -C lexers all_utf8
+       cp lexers/pxp_lex_utf8.cma .
+
+.PHONY: pxp_lex_utf8.cmxa
+pxp_lex_utf8.cmxa: $(CMI_types)
+       $(MAKE) -C lexers opt_utf8
+       cp lexers/pxp_lex_utf8.cmxa lexers/pxp_lex_utf8.a .
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS   =
+OCAMLC    = $(OCAMLFIND) ocamlc -package "$(PACKAGES)" \
+              -g -I lexers $(OPTIONS) $(ROPTIONS)
+OCAMLOPT  = $(OCAMLFIND) ocamlopt -package "$(PACKAGES)" \
+              -p -I lexers $(OPTIONS) $(ROPTIONS)
+OCAMLDEP  = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli pxp_yacc.ml
+       $(OCAMLDEP) *.ml *.mli >depend
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .m2y
+
+.ml.cmx:
+       $(OCAMLOPT) -c $<
+
+.ml.cmo:
+       $(OCAMLC) -c $<
+
+.mli.cmi:
+       $(OCAMLC) -c $<
+
+.mll.ml:
+       ocamllex $<
+
+.m2y.ml:
+       ./m2parsergen/m2parsergen < $< >`basename $< .m2y`.ml || { rm -f `basename $< .m2y`.ml; false; }
+
+*.mli:
+
+
+# Generated dependencies:
+
+include depend
+
diff --git a/helm/DEVEL/pxp/pxp/Makefile.conf b/helm/DEVEL/pxp/pxp/Makefile.conf
new file mode 100644 (file)
index 0000000..749c702
--- /dev/null
@@ -0,0 +1,37 @@
+# User-configurable section:
+
+# yes or no: Do you want that the parser has support for the internal
+# representation as UTF-8 strings? "yes" is recommended, but the parser
+# becomes much bigger 
+UTF8_SUPPORT = yes
+
+# --- End of User-configurable section.
+
+# Settings.
+
+NAME = pxp
+PACKAGES = netstring
+
+# Caml objects that are needed by the lexers:
+OBJECTS_types = \
+           pxp_types.cmo pxp_lexer_types.cmo
+
+CMI_types = $(OBJECTS_types:.cmo=.cmi)
+
+# Caml objects that depend on the lexers:
+OBJECTS_engine = \
+          pxp_lexers.cmo \
+           pxp_dfa.cmo \
+          pxp_aux.cmo pxp_reader.cmo \
+           pxp_entity.cmo pxp_dtd.cmo pxp_document.cmo \
+          pxp_yacc.cmo pxp_codewriter.cmo
+
+# Same as native objects:
+XOBJECTS_types  = $(OBJECTS_types:.cmo=.cmx)
+XOBJECTS_engine = $(OBJECTS_engine:.cmo=.cmx)
+
+# .mli files to install:
+
+MLI = pxp_document.mli pxp_dtd.mli \
+      pxp_types.mli pxp_yacc.mli \
+      pxp_codewriter.mli pxp_dfa.mli
diff --git a/helm/DEVEL/pxp/pxp/RELEASE b/helm/DEVEL/pxp/pxp/RELEASE
new file mode 100644 (file)
index 0000000..d3827e7
--- /dev/null
@@ -0,0 +1 @@
+1.0
diff --git a/helm/DEVEL/pxp/pxp/compatibility/.cvsignore b/helm/DEVEL/pxp/pxp/compatibility/.cvsignore
new file mode 100644 (file)
index 0000000..deb5b7f
--- /dev/null
@@ -0,0 +1,4 @@
+*.cmo
+*.cmx
+*.cmi
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/META b/helm/DEVEL/pxp/pxp/compatibility/META
new file mode 100644 (file)
index 0000000..441e30a
--- /dev/null
@@ -0,0 +1,6 @@
+version = "PXP-emulator"
+requires = "pxp"
+description = "Validating parser for XML-1.0"
+archive(byte) = "markup.cma"
+archive(native) = "markup.cmxa"
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile b/helm/DEVEL/pxp/pxp/compatibility/Makefile
new file mode 100644 (file)
index 0000000..187116c
--- /dev/null
@@ -0,0 +1,40 @@
+# make all:            make bytecode archive
+# make opt:            make native archive
+# make install:        install bytecode archive, and if present, native archive
+# make uninstall:      uninstall package
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all: 
+       $(MAKE) -f Makefile.code all
+
+.PHONY: opt
+opt: 
+       $(MAKE) -f Makefile.code opt
+
+.PHONY: install
+install: all 
+       files=`../tools/collect_files *.cmi *.cma *.cmxa *.a` && \
+       ocamlfind install $(NAME) $(MLI) $$files META
+
+.PHONY: uninstall
+uninstall:
+       ocamlfind remove $(NAME)
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~ depend depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile.code b/helm/DEVEL/pxp/pxp/compatibility/Makefile.code
new file mode 100644 (file)
index 0000000..2733faa
--- /dev/null
@@ -0,0 +1,50 @@
+# make all:            make bytecode archives
+# make opt:            make native archives
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all: markup.cma
+
+.PHONY: opt
+opt: markup.cmxa
+
+#----------------------------------------------------------------------
+
+markup.cma: $(OBJECTS)
+       $(OCAMLC) -a -o markup.cma $(OBJECTS)
+
+markup.cmxa: $(XOBJECTS)
+       $(OCAMLOPT) -a -o markup.cmxa $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS   =
+OCAMLC    = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS) $(ROPTIONS)
+OCAMLOPT  = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS) $(ROPTIONS)
+OCAMLDEP  = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli
+       $(OCAMLDEP) *.ml *.mli >depend
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli
+
+.ml.cmx:
+       $(OCAMLOPT) -c $<
+
+.ml.cmo:
+       $(OCAMLC) -c $<
+
+.mli.cmi:
+       $(OCAMLC) -c $<
+
+*.mli:
+
+
+# Generated dependencies:
+
+include depend
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf b/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf
new file mode 100644 (file)
index 0000000..061d0ca
--- /dev/null
@@ -0,0 +1,9 @@
+NAME = markup
+
+OBJECTS = markup_types.cmo markup_dtd.cmo markup_reader.cmo \
+         markup_document.cmo markup_yacc.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+
+MLI = markup_document.mli markup_dtd.mli \
+      markup_types.mli markup_yacc.mli markup_reader.mli
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/README b/helm/DEVEL/pxp/pxp/compatibility/README
new file mode 100644 (file)
index 0000000..5008673
--- /dev/null
@@ -0,0 +1,21 @@
+This directory contains the modules for Markup-0.2.10
+compatibility. The modules consist mainly of wrapper classes for the
+new PXP classes, and translate the old methods to the new ones.
+
+Please note that the compatibility is not perfect. Sometimes there are
+new methods which do not exist in Markup-0.2.10, and sometimes even
+existing methods changed their signature. I have tried to avoid that,
+but there are some ugly cases which are hard to solve without such
+modifications. 
+
+Translating old methods into new methods costs time and
+memory. Because of this, it is best to consider the compatibility
+modules as migration path to PXP: You can test whether PXP parses your
+input files, and you can compare the old API with the new API
+directly. (However, it is hard to test new features of PXP with the
+compatibility modules; the old API does not reflect the new features.)
+
+The compatibility modules are currently maintained, but that will stop
+once PXP has been established.
+
+(Gerd)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml
new file mode 100644 (file)
index 0000000..bbc4979
--- /dev/null
@@ -0,0 +1,374 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+type node_type = 
+    T_element of string
+  | T_data
+
+class type [ 'node ] extension = [ 'node ] Pxp_document.extension
+
+class type [ 'ext, 'node ] pxp_extension_type =
+object ('self)
+    method clone : 'self
+    method node : 'self Pxp_document.node
+    method set_node : 'self Pxp_document.node -> unit
+
+    method markup_node : 'node
+    method set_markup_node : 'node -> unit
+
+    method set_index : 'self Pxp_yacc.index -> unit
+    method index : 'self Pxp_yacc.index
+  end
+;;
+
+
+class type [ 'ext ] node = 
+  object ('self)
+    constraint 'ext = 'ext node #extension
+    method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
+
+    method extension : 'ext
+    method delete : unit
+    method parent : 'ext node
+    method root : 'ext node
+    method orphaned_clone : 'ext node
+    method orphaned_flat_clone : 'ext node
+    method add_node : 'ext node -> unit
+    method add_pinstr : Markup_dtd.proc_instruction -> unit
+    method pinstr : string -> Markup_dtd.proc_instruction list
+    method pinstr_names : string list
+    method sub_nodes : 'ext node list
+    method iter_nodes : ('ext node -> unit) -> unit
+    method iter_nodes_sibl :
+      ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+    method set_nodes : 'ext node list -> unit
+    method data : string
+    method node_type : node_type
+    method attribute : string -> Markup_types.att_value
+    method attribute_names : string list
+    method attribute_type : string -> Markup_types.att_type
+    method attributes : (string * Markup_types.att_value) list
+    method required_string_attribute : string -> string
+    method required_list_attribute : string -> string list
+    method optional_string_attribute : string -> string option
+    method optional_list_attribute : string -> string list
+    method quick_set_attributes : (string * Markup_types.att_value) list -> unit
+    method find : string -> 'ext node
+    method reset_finder : unit
+    method dtd : Markup_dtd.dtd
+    method create_element :
+      Markup_dtd.dtd -> node_type -> (string * string) list -> 'ext node
+    method create_data : Markup_dtd.dtd -> string -> 'ext node
+    method local_validate : unit
+    method keep_always_whitespace_mode : unit
+    method write_compact_as_latin1 : Markup_types.output_stream -> unit
+    method internal_adopt : 'ext node option -> unit
+    method internal_delete : 'ext node -> unit
+    method internal_init : Markup_dtd.dtd -> string -> (string * string) list -> unit
+  end
+;;
+
+
+class [ 'ext ] pxp_extension init_markup_node =
+  (object (self : 'self)
+    (* constraint 'ext = 'ext node #extension *)
+    val mutable pxp_node = (None : 
+                             'self Pxp_document.node option)
+    (* 'ext pxp_extension Pxp_document.node option *)
+    val mutable markup_node = (init_markup_node : 'ext node)
+
+    val mutable index = (None : 'self Pxp_yacc.index option)
+
+    method clone =
+      {< >}
+
+    method node =
+      match pxp_node with
+          None ->
+            assert false
+        | Some n -> n
+
+    method set_node n =
+      pxp_node <- Some n
+
+    method markup_node = markup_node
+
+    method set_markup_node n = markup_node <- n
+
+    method set_index ix =
+      index <- Some ix
+
+    method index = 
+      match index with
+         None -> assert false
+       | Some x -> x
+
+   end
+     : ['ext, 'ext node] pxp_extension_type )
+;;
+
+
+class [ 'ext ] emulate_markup_node init_ext init_pxp_node = 
+  object (self)
+    constraint 'ext = 'ext node #extension
+    val mutable pxp_node = (init_pxp_node : 
+                             ('ext, 'ext #node) 
+                             pxp_extension_type Pxp_document.node option)
+    val mutable extension = (init_ext : 'ext)
+
+    method pxp_node =
+      match pxp_node with
+         None   -> assert false
+       | Some n -> n 
+
+    method extension = extension
+    method delete = self # pxp_node # delete
+    method parent = self # pxp_node # parent # extension # markup_node
+    method root   = self # pxp_node # root # extension # markup_node
+
+    method orphaned_clone =
+      let ext' = extension # clone in
+      let pxp' = self # pxp_node # orphaned_clone in
+      let n = new emulate_markup_node ext' (Some pxp') in
+      ext' # set_node (n : 'ext #node  :> 'ext node);
+      pxp' # extension # set_markup_node n;
+      n
+
+     method orphaned_flat_clone =
+      let ext' = extension # clone in
+      let pxp' = self # pxp_node # orphaned_flat_clone in
+      let n = new emulate_markup_node ext' (Some pxp') in
+      ext' # set_node (n : 'ext #node  :> 'ext node);
+      pxp' # extension # set_markup_node n;
+      n
+
+     method dtd = self # pxp_node # dtd
+
+     method add_node (n : 'ext node) =
+       let n_pxp = n # pxp_node in
+       self # pxp_node # add_node n_pxp
+
+     method add_pinstr pi =
+       self # pxp_node # add_pinstr pi
+
+     method sub_nodes =
+       let l = self # pxp_node # sub_nodes in
+       List.map (fun n_pxp -> n_pxp # extension # markup_node) l
+
+     method pinstr name =
+       self # pxp_node # pinstr name
+
+     method pinstr_names =
+       self # pxp_node # pinstr_names
+
+     method iter_nodes f =
+       self # pxp_node # iter_nodes
+        (fun n_pxp -> f (n_pxp # extension # markup_node))
+
+     method iter_nodes_sibl f =
+       self # pxp_node # iter_nodes_sibl
+        (fun left_pxp node_pxp right_pxp ->
+           let left =
+             match left_pxp with 
+                 None       -> None
+               | Some n_pxp -> Some (n_pxp # extension # markup_node) in
+           let right =
+             match right_pxp with 
+                 None       -> None
+               | Some n_pxp -> Some (n_pxp # extension # markup_node) in
+           let node =
+             node_pxp # extension # markup_node in
+           f left node right
+        )
+
+     method set_nodes (l : 'ext node list) =
+       let l_pxp = List.map (fun n -> n # pxp_node) l in
+       self # pxp_node # set_nodes l_pxp
+
+     method data = self # pxp_node # data
+
+     method node_type =
+       match self # pxp_node # node_type with
+          Pxp_document.T_data         -> T_data
+        | Pxp_document.T_element name -> T_element name
+        | Pxp_document.T_super_root   -> T_element "-vr"
+        | Pxp_document.T_pinstr _     -> T_element "-pi"
+        | _ -> assert false
+
+     method attribute name =
+       self # pxp_node # attribute name
+
+     method attribute_names =
+       self # pxp_node # attribute_names
+
+     method attribute_type name =
+       self # pxp_node # attribute_type name
+
+     method attributes =
+       self # pxp_node # attributes
+
+     method required_string_attribute name =
+       self # pxp_node # required_string_attribute name
+
+     method required_list_attribute name =
+       self # pxp_node # required_list_attribute name
+
+     method optional_string_attribute name =
+       self # pxp_node # optional_string_attribute name
+
+     method optional_list_attribute name =
+       self # pxp_node # optional_list_attribute name
+
+     method quick_set_attributes l =
+       self # pxp_node # quick_set_attributes l
+
+     method find (name : string) =
+       let index = self # root # pxp_node # extension # index in
+       let n = index # find name in (* may raise Not_found *)
+       n # extension # markup_node
+
+     method reset_finder = ()
+
+     method create_element dtd nt atts =
+       let nt_pxp =
+        match nt with
+            T_data -> Pxp_document.T_data
+          | T_element name -> Pxp_document.T_element name in
+       let node_pxp =
+        self # pxp_node # create_element dtd nt_pxp atts in
+       let ext' = extension # clone in
+       let n = new emulate_markup_node ext' (Some node_pxp) in
+       ext' # set_node (n : 'ext #node  :> 'ext node);
+       node_pxp # extension # set_markup_node n;
+       n
+
+     method create_data dtd s =
+       let node_pxp =
+        self # pxp_node # create_data dtd s in
+       let ext' = extension # clone in
+       let n = new emulate_markup_node ext' (Some node_pxp) in
+       ext' # set_node (n : 'ext #node  :> 'ext node);
+       node_pxp # extension # set_markup_node n;
+       n
+
+     method keep_always_whitespace_mode =
+       self # pxp_node # keep_always_whitespace_mode
+
+     method write_compact_as_latin1 out =
+       self # pxp_node # write_compact_as_latin1 out
+
+     method local_validate =
+       self # pxp_node # local_validate()
+
+     method internal_adopt (p:'ext node option) =
+       assert false;
+       ()
+
+     method internal_delete (n:'ext node) =
+       assert false;
+       ()
+     method internal_init (d:Markup_dtd.dtd) (s:string) (atts:(string*string)list) =
+       assert false;
+       ()
+  end
+;;
+
+class [ 'ext ] data_impl ext data =
+  object (self)
+    inherit [ 'ext ] emulate_markup_node ext None
+    constraint 'ext = 'ext node #extension
+    initializer
+      if data <> "" then
+       failwith "Emulation of Markup_document: Cannot instantiate data node with non-empty string";
+      let self' = (self : 'ext #node :> 'ext node ) in
+      pxp_node <- Some (new Pxp_document.data_impl (new pxp_extension self'))
+
+  end
+;;
+
+class [ 'ext ] element_impl ext =
+  object (self)
+    inherit [ 'ext ] emulate_markup_node ext None
+    initializer
+      let self' = (self : 'ext #node :> 'ext node ) in
+      pxp_node <- Some (new Pxp_document.element_impl (new pxp_extension self'))
+  end
+;;
+
+
+class [ 'ext ] document w =
+  object (self)
+    val pxp_doc = new Pxp_document.document 
+                   (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings)
+
+    val mutable standalone_flag = false
+
+    method init_xml_version v =
+      pxp_doc # init_xml_version v
+
+    method xml_version =
+      pxp_doc # xml_version
+
+    method init_xml_standalone b =
+      standalone_flag <- b
+
+    method xml_standalone = standalone_flag
+
+    method init_root (r : 'ext node) =
+      pxp_doc # init_root (r # pxp_node);
+      self # dtd # set_standalone_declaration standalone_flag
+        (* questionable *)
+
+    method root =
+      let pxp_root = pxp_doc # root in
+      pxp_root # extension # markup_node
+
+    method dtd =
+      pxp_doc # dtd
+
+    method add_pinstr pi =
+      pxp_doc # add_pinstr pi
+
+    method pinstr name =
+      pxp_doc # pinstr name
+
+    method pinstr_names =
+      pxp_doc # pinstr_names
+
+    method write_compact_as_latin1 out =
+      pxp_doc # write_compact_as_latin1 out
+
+  end
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.6  2000/08/18 20:19:00  gerd
+ *     Changed the emulation: there are now wrapper objects for nodes.
+ * This was necessary because node_type changed in PXP such that it became
+ * incompatible with Markup's node_type.
+ *
+ * Revision 1.5  2000/07/14 21:35:35  gerd
+ *     Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.4  2000/07/08 17:40:50  gerd
+ *     Updated the simulation.
+ *
+ * Revision 1.3  2000/06/14 22:19:27  gerd
+ *     Update because of additional 'encoding' methods.
+ *
+ * Revision 1.2  2000/05/30 00:08:40  gerd
+ *     Bugfix.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ *)
+
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli
new file mode 100644 (file)
index 0000000..2e37f0f
--- /dev/null
@@ -0,0 +1,420 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_document.mli.
+ * It corresponds to revision 1.13 of markup_document.mli.
+ *)
+
+(**********************************************************************)
+(*                                                                    *)
+(* Markup_document:                                                   *)
+(*     Object model of the document/element instances                 *)
+(*                                                                    *)
+(**********************************************************************)
+
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class type node ............. The common class type of the nodes of
+ *                               the element tree. Nodes are either
+ *                               elements (inner nodes) or data nodes
+ *                               (leaves)
+ * class type extension ........ The minimal properties of the so-called
+ *                               extensions of the nodes: Nodes can be
+ *                               customized by applying a class parameter
+ *                               that adds methods/values to nodes.
+ * class data_impl : node ...... Implements data nodes.
+ * class element_impl : node ... Implements element nodes
+ * class document .............. A document is an element with some additional
+ *                               properties
+ *
+ * ======================================================================
+ *
+ * THE STRUCTURE OF NODE TREES:
+ *
+ * Every node except the root node has a parent node. The parent node is
+ * always an element, because data nodes never contain other nodes.
+ * In the other direction, element nodes may have children; both elements
+ * and data nodes are possible as children.
+ * Every node knows its parent (if any) and all its children (if any);
+ * the linkage is maintained in both directions. A node without a parent
+ * is called a root.
+ * It is not possible that a node is the child of two nodes (two different nodes
+ * or a multiple child of the same node).
+ * You can break the connection between a node and its parent; the method
+ * "delete" performs this operations and deletes the node from the parent's
+ * list of children. The node is now a root, for itself and for all
+ * subordinate nodes. In this context, the node is also called an orphan,
+ * because it has lost its parent (this is a bit misleading because the
+ * parent is not always the creator of a node).
+ * In order to simplify complex operations, you can also set the list of
+ * children of an element. Nodes that have been children before are unchanged;
+ * new nodes are added (and the linkage is set up), nodes no more occurring
+ * in the list are handled if they have been deleted.
+ * If you try to add a node that is not a root (either by an "add" or by a
+ * "set" operation) the operation fails.
+ *
+ * CREATION OF NODES
+ *
+ * The class interface supports creation of nodes by cloning a so-called
+ * exemplar. The idea is that it is sometimes useful to implement different
+ * element types by different classes, and to implement this by looking up
+ * exemplars.
+ * Imagine you have three element types A, B, and C, and three classes
+ * a, b, and c implementing the node interface (for example, by providing
+ * different extensions, see below). The XML parser can be configured to
+ * have a lookup table
+ *   { A --> a0,  B --> b0, C --> c0 }
+ * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
+ * objects belonging to these classes. If the parser finds an instance of
+ * A, it looks up the exemplar a0 of A and clones it (actually, the method
+ * "create_element" performs this for elements, and "create_data" for data
+ * nodes). Clones belong to the same class as the original nodes, so the
+ * instances of the elements have the same classes as the configured
+ * exemplars.
+ * Note: This technique assumes that the interface of all exemplars is the
+ * same!
+ *
+ * THE EXTENSION
+ *
+ * The class type node and all its implementations have a class parameter
+ * 'ext which must at least fulfil the properties of the class type "extension".
+ * The idea is that you can add properties, for example:
+ *
+ * class my_extension =
+ *   object
+ *     (* minimal properties required by class type "extension": *)
+ *     method clone = ...
+ *     method node = ...
+ *     method set_node n = ...
+ *     (* here my own methods: *)
+ *     method do_this_and_that ...
+ *   end
+ *
+ * class my_element_impl = [ my_extension ] element_impl
+ * class my_data_impl    = [ my_extension ] data_impl
+ *
+ * The whole XML parser is parameterized with 'ext, so your extension is
+ * visible everywhere (this is the reason why extensibility is solved by
+ * parametric polymorphism and not by inclusive polymorphism (subtyping)).
+ *
+ *
+ * SOME COMPLICATED TYPE EXPRESSIONS
+ *
+ * Sometimes the following type expressions turn out to be necessary:
+ *
+ * 'a node extension as 'a
+ *      This is the type of an extension that belongs to a node that
+ *      has an extension that is the same as we started with.
+ *
+ * 'a extension node as 'a
+ *      This is the type of a node that has an extension that belongs to a
+ *      node of the type we started with.
+ *
+ *
+ * DOCUMENTS
+ * ...
+ *
+ * ======================================================================
+ *
+ * SIMPLE USAGE: ...
+ *)
+
+
+open Markup_dtd
+
+
+type node_type = 
+    T_element of string
+  | T_data
+
+
+
+class type [ 'node ] extension =
+  object ('self)
+    method clone : 'self
+      (* "clone" should return an exact deep copy of the object. *)
+    method node : 'node
+      (* "node" returns the corresponding node of this extension. This method
+       * intended to return exactly what previously has been set by "set_node".
+       *)
+    method set_node : 'node -> unit
+      (* "set_node" is invoked once the extension is associated to a new
+       * node object.
+       *)
+  end
+;;
+
+class type [ 'ext, 'node ] pxp_extension_type =
+object ('self)
+    method clone : 'self
+    method node : 'self Pxp_document.node
+    method set_node : 'self Pxp_document.node -> unit
+
+    method markup_node : 'node
+    method set_markup_node : 'node -> unit
+
+    method set_index : 'self Pxp_yacc.index -> unit
+    method index : 'self Pxp_yacc.index
+  end
+;;
+
+class type [ 'ext ] node =
+  object ('self)
+    constraint 'ext = 'ext node #extension
+    method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
+
+    method extension : 'ext
+      (* Return the extension of this node: *)
+
+    method delete : unit
+      (* Delete this node from the parent's list of sub nodes. This node gets
+       * orphaned.
+       * 'delete' does nothing if this node does not have a parent.
+       *)
+
+    method parent : 'ext node
+      (* Get the parent, or raise Not_found if this node is an orphan. *)
+
+    method root : 'ext node
+      (* Get the direct or indirect parent that does not have a parent itself,
+       * i.e. the root of the tree.
+       *)
+
+    method orphaned_clone : 'ext node
+      (* return an exact clone of this element and all sub nodes (deep copy)
+       * except string values which are shared by this node and the clone.
+       * The other exception is that the clone has no parent (i.e. it is now
+       * a root).
+       *)
+
+    method orphaned_flat_clone : 'ext node
+      (* return a clone of this element where all subnodes are omitted.
+       * The type of the node, and the attributes are the same as in the
+       * original node.
+       * The clone has no parent.
+       *)
+
+    method add_node : 'ext node -> unit
+      (* Append new sub nodes -- mainly used by the parser itself, but
+       * of course open for everybody. If an element is added, it must be
+       * an orphan (i.e. does not have a parent node); and after addition
+       * *this* node is the new parent.
+       *)
+
+    method add_pinstr : proc_instruction -> unit
+      (* Add a processing instruction to the set of processing instructions of
+       * this node. Usually only elements contain processing instructions.
+       *)
+
+    method pinstr : string -> proc_instruction list
+      (* Get all processing instructions with the passed name *)
+
+    method pinstr_names : string list
+      (* Get a list of all names of processing instructions *)
+
+    method sub_nodes : 'ext node list
+      (* Get the list of sub nodes *)
+
+    method iter_nodes : ('ext node -> unit) -> unit
+      (* iterate over the sub nodes *)
+
+    method iter_nodes_sibl :
+      ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+      (* Here every iteration step can also access to the previous and to the
+       * following node if present:
+       *)
+
+    method find : string -> 'ext node
+      (* Get the node that has an ID attribute with this value, or raise
+       * Not_found.
+       * "find" may also cause a Validation_error if something is wrong
+       * with the IDs.
+       *)
+
+    method reset_finder : unit
+      (* makes that newly added nodes will also be found *)
+
+    method set_nodes : 'ext node list -> unit
+      (* Set the list of sub nodes. Elements that are no longer sub nodes gets
+       * orphaned, and all new elements that previously were not sub nodes
+       * must have been orphaned.
+       *)
+
+    method data : string
+      (* Get the data string of this node. For data nodes, this string is just
+       * the content. For elements, this string is the concatenation of all
+       * subordinate data nodes.
+       *)
+
+    method node_type : node_type
+      (* Get the name of the element type. *)
+
+    method attribute : string -> Markup_types.att_value
+    method attribute_names : string list
+    method attribute_type : string -> Markup_types.att_type
+    method attributes : (string * Markup_types.att_value) list
+      (* Get a specific attribute; get the names of all attributes; get the
+       * type of a specific attribute; get names and values of all attributes.
+       * Only elements have attributes.
+       * Note: If the DTD allows arbitrary for this element, "attribute_type"
+       * raises Undeclared.
+       *)
+
+    method required_string_attribute : string -> string
+    method required_list_attribute : string -> string list
+      (* Return the attribute or fail if the attribute is not present:
+       * The first version passes the value always as string back;
+       * the second version always as list.
+       *)
+
+    method optional_string_attribute : string -> string option
+    method optional_list_attribute : string -> string list
+      (* Return some attribute value or return None if the attribute is not
+       *  present:
+       * The first version passes the value always as string back;
+       * the second version always as list.
+       *)
+
+    method quick_set_attributes : (string * Markup_types.att_value) list -> unit
+      (* Sets the attributes but does not check whether they match the DTD.
+       *)
+
+     method dtd : dtd
+       (* Get the DTD *)
+
+    method create_element : dtd -> node_type -> (string * string) list -> 'ext node
+      (* create an "empty copy" of this element:
+       * - new DTD
+       * - new node type
+       * - new attribute list
+       * - empty list of nodes
+       *)
+
+    method create_data : dtd -> string -> 'ext node
+      (* create an "empty copy" of this data node: *)
+
+    method local_validate : unit
+      (* Check that this element conforms to the DTD: *)
+
+    method keep_always_whitespace_mode : unit
+      (* Normally, add_node does not accept data nodes when the DTD does not
+       * allow data nodes or only whitespace ("ignorable whitespace").
+       * Once you have invoked this method, ignorable whitespace is forced
+       * to be included into the document.
+       *)
+
+    method write_compact_as_latin1 : Markup_types.output_stream -> unit
+      (* Write the contents of this node and the subtrees to the passed
+       * output stream; the character set ISO-8859-1 is used. The format
+       * is compact (the opposite of "pretty printing").
+       *)
+
+    (* ---------------------------------------- *)
+    (* internal methods: *)
+    method internal_adopt : 'ext node option -> unit
+    method internal_delete : 'ext node -> unit
+    method internal_init : dtd -> string -> (string * string) list -> unit
+  end
+;;
+
+class [ 'ext ] data_impl : 'ext -> string -> [ 'ext ] node
+
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+
+class [ 'ext ] document :
+  Markup_types.collect_warnings -> 
+  object
+    method init_xml_version : string -> unit
+    method init_xml_standalone : bool -> unit
+    method init_root : 'ext node -> unit
+
+    method xml_version : string
+    method xml_standalone : bool
+    method dtd : dtd
+    method root : 'ext node
+
+    method add_pinstr : proc_instruction -> unit
+    method pinstr : string -> proc_instruction list
+    method pinstr_names : string list
+
+    method write_compact_as_latin1 : Markup_types.output_stream -> unit
+      (* Write the document to the passed
+       * output stream; the character set ISO-8859-1 is used. The format
+       * is compact (the opposite of "pretty printing").
+       * If a DTD is present, the DTD is included into the internal subset.
+       *)
+
+  end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/18 20:19:16  gerd
+ *     Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.3  2000/07/16 16:35:06  gerd
+ *     Update because PXP interface contains now the method 'write'.
+ *
+ * Revision 1.2  2000/06/14 22:19:27  gerd
+ *     Update because of additional 'encoding' methods.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.13  2000/05/27 19:15:08  gerd
+ *     Removed the method init_xml_standalone.
+ *
+ * Revision 1.12  2000/05/01 20:42:34  gerd
+ *         New method write_compact_as_latin1.
+ *
+ * Revision 1.11  2000/04/30 18:15:57  gerd
+ *     Beautifications.
+ *     New method keep_always_whitespace_mode.
+ *
+ * Revision 1.10  2000/03/11 22:58:15  gerd
+ *     Updated to support Markup_codewriter.
+ *
+ * Revision 1.9  2000/01/27 21:51:56  gerd
+ *     Added method 'attributes'.
+ *
+ * Revision 1.8  2000/01/27 21:19:07  gerd
+ *     Added further methods.
+ *
+ * Revision 1.7  1999/11/09 22:20:14  gerd
+ *     Removed method init_dtd from class "document". The DTD is
+ * implicitly passed to the document by the root element.
+ *
+ * Revision 1.6  1999/09/01 22:51:40  gerd
+ *     Added methods to store processing instructions.
+ *
+ * Revision 1.5  1999/09/01 16:19:57  gerd
+ *     The "document" class has now a "warner" as class argument.
+ *
+ * Revision 1.4  1999/08/19 21:59:13  gerd
+ *     Added method "reset_finder".
+ *
+ * Revision 1.3  1999/08/19 01:08:29  gerd
+ *     Added method "find".
+ *
+ * Revision 1.2  1999/08/15 02:19:41  gerd
+ *     Some new explanations: That unknown elements are not rejected
+ * if the DTD allows them.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml
new file mode 100644 (file)
index 0000000..7df5e29
--- /dev/null
@@ -0,0 +1,36 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+class dtd w = 
+  Pxp_dtd.dtd 
+    (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings) 
+    `Enc_iso88591;;
+
+class dtd_element dtd name = 
+  Pxp_dtd.dtd_element dtd name;;
+
+class dtd_notation name id = 
+  Pxp_dtd.dtd_notation name id `Enc_iso88591;;
+
+class proc_instruction target value = 
+  Pxp_dtd.proc_instruction target value `Enc_iso88591;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/07/14 21:35:35  gerd
+ *     Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2  2000/06/14 22:19:27  gerd
+ *     Update because of additional 'encoding' methods.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli
new file mode 100644 (file)
index 0000000..660b35a
--- /dev/null
@@ -0,0 +1,108 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_dtd.mli.
+ * It corresponds to revision 1.11 of markup_dtd.mli.
+ *)
+
+(**********************************************************************)
+(*                                                                    *)
+(* Markup_dtd:                                                        *)
+(*     Object model of document type declarations                     *)
+(*                                                                    *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ *                           declarations, entity declarations, notation
+ *                           declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ *                           of a content model and an attribute list
+ *                           declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+  Markup_types.collect_warnings -> 
+  Pxp_dtd.dtd
+    (* Incompatibilities:
+     * add_gen_entity, gen_entity
+     *)
+
+class dtd_element : dtd -> string -> Pxp_dtd.dtd_element
+  (* Incompatibilities:
+   * set_content_model, add_attribute
+   *)
+
+class dtd_notation : string -> Markup_types.ext_id -> Pxp_dtd.dtd_notation
+
+class proc_instruction : string -> string -> Pxp_dtd.proc_instruction
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.11  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.10  2000/05/27 19:20:38  gerd
+ *     Changed the interfaces for the standalone check: New
+ * methods: standalone_declaration, set_standalone_declaration,
+ * externally_declared, attribute_violates_standalone_declaration.
+ *     The method set_content_model has been renamed to
+ * set_cm_and_extdecl; it now initializes also whether the element
+ * has been declared in an external entity.
+ *     Methods add_gen_entity and gen_entity pass an additional
+ * boolean argument containing whether the declaration of the
+ * general entity happened in an external entity.
+ *     Method add_attribute expects this argument, too, which
+ * states whether the declaration of the attribute happened in an
+ * external entity.
+ *
+ * Revision 1.9  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.8  2000/05/06 23:10:26  gerd
+ *     allow_arbitrary for elements, too.
+ *
+ * Revision 1.7  2000/05/01 20:42:52  gerd
+ *         New method write_compact_as_latin1.
+ *
+ * Revision 1.6  2000/03/11 22:58:15  gerd
+ *     Updated to support Markup_codewriter.
+ *
+ * Revision 1.5  2000/02/22 02:32:02  gerd
+ *     Updated.
+ *
+ * Revision 1.4  1999/11/09 22:15:41  gerd
+ *     Added method "arbitrary_allowed".
+ *
+ * Revision 1.3  1999/09/01 16:21:56  gerd
+ *     "dtd" classes have now an argument that passes a "warner".
+ *
+ * Revision 1.2  1999/08/15 02:20:23  gerd
+ *         New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml
new file mode 100644 (file)
index 0000000..a196c22
--- /dev/null
@@ -0,0 +1,119 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+open Markup_types;;
+
+class type resolver =
+  object
+    method open_in : ext_id -> Lexing.lexbuf
+    method close_in : unit
+    method change_encoding : string -> unit
+    method clone : resolver
+  end
+;;
+
+(* General note: close_in is simulated by close_all. Of course, this is
+ * wrong, but it should not matter 
+ *)
+
+
+class resolve_read_channel ch the_warner =
+  object (self)
+    val pxp_resolver = 
+           new Pxp_reader.resolve_read_this_channel 
+             ~auto_close:false
+             ch
+    val warner = the_warner
+
+    initializer
+      pxp_resolver # init_warner 
+       (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
+      pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+    method open_in xid =
+      pxp_resolver # open_in xid
+
+    method close_in =
+      pxp_resolver # close_all   (* sic! *)
+
+    method change_encoding enc =
+      pxp_resolver # change_encoding enc
+
+    method clone =
+      ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+
+  end
+;;
+
+
+class resolve_read_string str =
+  object (self)
+    val pxp_resolver = 
+           new Pxp_reader.resolve_read_this_string str
+    val warner = new Pxp_types.drop_warnings
+
+    initializer
+      pxp_resolver # init_warner warner;
+      pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+    method open_in xid =
+      pxp_resolver # open_in xid
+
+    method close_in =
+      pxp_resolver # close_all   (* sic! *)
+
+    method change_encoding enc =
+      pxp_resolver # change_encoding enc
+
+    method clone =
+      ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+  end
+;;
+
+
+class resolve_as_file the_warner =
+  object (self)
+    val pxp_resolver = 
+           new Pxp_reader.resolve_as_file
+             ~system_encoding:`Enc_iso88591
+             ()
+    val warner = the_warner
+
+    initializer
+      pxp_resolver # init_warner
+       (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
+      pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+    method open_in xid =
+      pxp_resolver # open_in xid
+
+    method close_in =
+      pxp_resolver # close_all   (* sic! *)
+
+    method change_encoding enc =
+      pxp_resolver # change_encoding enc
+
+    method clone =
+      ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+  end
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/07/14 21:35:35  gerd
+ *     Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2  2000/07/08 17:40:50  gerd
+ *     Updated the simulation.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli
new file mode 100644 (file)
index 0000000..8e5e2c8
--- /dev/null
@@ -0,0 +1,141 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_reader.mli.
+ * It corresponds to revision 1.3 of markup_reader.mli.
+ *)
+
+open Markup_types;;
+
+
+(* The class type resolver is the official type of all "resolvers". 
+ * Resolvers get file names (or better, external identifiers) and 
+ * return lexbufs, scanning the file for tokens. Resolvers may be
+ * cloned, and clones can interpret relative file names relative to
+ * their creator. 
+ *)
+
+class type resolver =
+  object
+    (* A resolver can open a character source, and returns this source as
+     * Lexing.lexbuf.
+     * The resolver should recode the source into ISO-8859-1. By default,
+     * a resolver should assume UTF-8 or UTF-16 encoding. Before
+     * 'change_encoding' is invoked, the resolver should only return
+     * lexbufs with one character. After 'change_encoding' has been invoked,
+     * there is no character limit anymore.
+     * 'change_encoding' can only be invoked once. This method is usually
+     * called after the <? ... ?> prolog of the entity has been read.
+     * If this method is not called, it is up to the resolver to find out
+     * if UTF-8 or UTF-16 is used. It is recommended to invoke this method
+     * with an empty string to indicate this situation.
+     *)
+    method open_in : ext_id -> Lexing.lexbuf
+    method close_in : unit
+    method change_encoding : string -> unit
+
+
+    (* Every resolver can be cloned. The clone does not inherit the connection
+     * with the external object, i.e. it is closed.
+     *)
+    method clone : resolver
+
+  end
+;;
+
+
+(* The following class is the current main implementation of resolvers.
+ * It fetches strings from an arbitrary source (by calling init_in, and
+ * then repeatedly next_string), recodes them to ISO-8859-1, and creates
+ * lexbufs for them.
+ * It is not complete, as the source is missing.
+ *
+ * Note that 'resolve_general' may change in future revisions; it is ugly.
+ *)
+
+(* -- This API simulation does not provide 'resolve_general' any longer
+
+class virtual resolve_general :
+  collect_warnings ->
+  object 
+    val mutable encoding : string
+    val mutable encoding_requested : bool
+    val warner : collect_warnings
+
+    method clone : resolver
+
+    method private warn : int -> unit
+    method private autodetect : string -> unit
+
+    method private virtual next_string : string -> int -> int -> int
+    method private virtual init_in : ext_id -> unit
+    method virtual close_in : unit
+
+    method open_in : ext_id -> Lexing.lexbuf
+
+    method change_encoding : string -> unit
+  end
+*)
+
+
+(* The next classes are resolvers for concrete input sources. *)
+
+class resolve_read_channel : 
+  in_channel -> collect_warnings -> resolver;;
+
+  (* Reads from the passed channel (it may be even a pipe). Note that this
+   * resolver cannot handle file inclusions, as it is pre-bound to a 
+   * specific channel and is not able to interpret file names.
+   * That means, if there is a entity reference (something like &name; or
+   * %name;) to parse, and the definition points to another file, the
+   * resolver will fail.
+   *)
+
+
+class resolve_read_string : 
+  string -> resolver;;
+
+  (* Reads from the passed string. As 'resolver_read_channel', this 
+   * resolver cannot handle file inclusions.
+   *)
+
+
+class resolve_as_file :
+  collect_warnings -> resolver;;
+
+  (* Reads from the local file system. Every file name is interpreted as
+   * file name of the local file system, and the referred file is read.
+   * This resolver can handle file inclusions as long as they do not
+   * exceed the scope of the local file system (i.e. no URLs).
+   *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/07/08 17:40:50  gerd
+ *     Updated the simulation.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.3  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1  2000/03/13 23:41:54  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml
new file mode 100644 (file)
index 0000000..a0c0c27
--- /dev/null
@@ -0,0 +1,103 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+
+type ext_id = Pxp_types.ext_id =
+    System of string
+  | Public of (string * string)
+  | Anonymous
+type dtd_id = Pxp_types.dtd_id=
+    External of ext_id
+  | Derived of ext_id
+  | Internal
+type content_model_type = Pxp_types.content_model_type =
+    Unspecified
+  | Empty
+  | Any
+  | Mixed of mixed_spec list
+  | Regexp of regexp_spec
+and mixed_spec = Pxp_types.mixed_spec =
+    MPCDATA
+  | MChild of string
+and regexp_spec = Pxp_types.regexp_spec =
+    Optional of regexp_spec
+  | Repeated of regexp_spec
+  | Repeated1 of regexp_spec
+  | Alt of regexp_spec list
+  | Seq of regexp_spec list
+  | Child of string
+type att_type = Pxp_types.att_type =
+    A_cdata
+  | A_id
+  | A_idref
+  | A_idrefs
+  | A_entity
+  | A_entities
+  | A_nmtoken
+  | A_nmtokens
+  | A_notation of string list
+  | A_enum of string list
+type att_default = Pxp_types.att_default =
+    D_required
+  | D_implied
+  | D_default of string
+  | D_fixed of string
+type att_value = Pxp_types.att_value =
+    Value of string
+  | Valuelist of string list
+  | Implied_value
+
+class collect_warnings = 
+object
+  val mutable w = Buffer.create 100
+  method print_warnings =
+    Buffer.contents w
+  method reset =
+    Buffer.clear w
+  method warn s =
+    Buffer.add_string w ("WARNING: " ^ s ^ "\n")
+end
+
+exception Illegal_character of int
+exception Validation_error = Pxp_types.Validation_error
+exception WF_error = Pxp_types.WF_error
+exception Character_not_supported = Pxp_types.Character_not_supported
+exception Bad_character_stream = Netconversion.Malformed_code
+exception At = Pxp_types.At
+exception Undeclared = Pxp_types.Undeclared
+
+let string_of_exn = Pxp_types.string_of_exn
+
+type output_stream = Pxp_types.output_stream =
+    Out_buffer of Buffer.t
+  | Out_channel of out_channel
+  | Out_function of (string -> int -> int -> unit)
+
+let write = Pxp_types.write
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.5  2000/08/18 20:19:16  gerd
+ *     Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.4  2000/07/16 18:30:15  gerd
+ *     Updated because PXP does no longer have the exception
+ * Illegal_character.
+ *
+ * Revision 1.3  2000/07/14 21:35:35  gerd
+ *     Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2  2000/07/08 17:40:50  gerd
+ *     Updated the simulation.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli
new file mode 100644 (file)
index 0000000..b33bb30
--- /dev/null
@@ -0,0 +1,125 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_types.mli.
+ * It corresponds to revision 1.7 of markup_types.mli.
+ *)
+
+
+type ext_id = Pxp_types.ext_id =
+    System of string
+  | Public of (string * string)
+  | Anonymous
+type dtd_id = Pxp_types.dtd_id =
+    External of ext_id
+  | Derived of ext_id
+  | Internal
+type content_model_type = Pxp_types.content_model_type =
+    Unspecified
+  | Empty
+  | Any
+  | Mixed of mixed_spec list
+  | Regexp of regexp_spec
+and mixed_spec = Pxp_types.mixed_spec =
+    MPCDATA
+  | MChild of string
+and regexp_spec = Pxp_types.regexp_spec =
+    Optional of regexp_spec
+  | Repeated of regexp_spec
+  | Repeated1 of regexp_spec
+  | Alt of regexp_spec list
+  | Seq of regexp_spec list
+  | Child of string
+type att_type = Pxp_types.att_type =
+    A_cdata
+  | A_id
+  | A_idref
+  | A_idrefs
+  | A_entity
+  | A_entities
+  | A_nmtoken
+  | A_nmtokens
+  | A_notation of string list
+  | A_enum of string list
+type att_default = Pxp_types.att_default =
+    D_required
+  | D_implied
+  | D_default of string
+  | D_fixed of string
+type att_value = Pxp_types.att_value =
+    Value of string
+  | Valuelist of string list
+  | Implied_value
+
+class collect_warnings :
+  object 
+    method warn : string -> unit
+    method print_warnings : string
+    method reset : unit
+  end
+;;
+
+
+exception Illegal_character of int
+exception Validation_error of string
+exception WF_error of string
+exception Character_not_supported
+exception Bad_character_stream
+exception At of (string * exn)
+exception Undeclared
+
+val string_of_exn : exn -> string
+  (* Converts a Markup exception into a readable string *)
+
+
+type output_stream = Pxp_types.output_stream =
+    Out_buffer of Buffer.t
+  | Out_channel of out_channel
+  | Out_function of (string -> int -> int -> unit)
+
+val write : output_stream -> string -> int -> int -> unit
+  (* write os s pos len: Writes the string to the buffer/channel/stream *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/07/08 17:40:50  gerd
+ *     Updated the simulation.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.7  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5  2000/05/01 20:43:25  gerd
+ *         New type output_stream; new function 'write'.
+ *
+ * Revision 1.4  1999/09/01 16:25:35  gerd
+ *     Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3  1999/08/15 02:22:40  gerd
+ *         Added exception Undeclared.
+ *
+ * Revision 1.2  1999/08/14 22:15:17  gerd
+ *         New class "collect_warnings".
+ *
+ * Revision 1.1  1999/08/10 00:35:52  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml
new file mode 100644 (file)
index 0000000..26c40de
--- /dev/null
@@ -0,0 +1,245 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+open Markup_types
+open Markup_dtd
+open Markup_document
+
+type config =
+    { warner : collect_warnings;
+      errors_with_line_numbers : bool;
+      processing_instructions_inline : bool;
+      virtual_root : bool;
+      debugging_mode : bool;
+    }
+
+
+type source =
+    Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
+  | Channel of in_channel
+  | File of string
+  | Latin1 of string
+  | ExtID of (ext_id * Markup_reader.resolver)
+
+type 'ext domspec =
+    { map : (node_type, 'ext node) Hashtbl.t;
+      default_element : 'ext node;
+    }
+
+
+class default_ext =
+  object(self)
+    val mutable node = (None : ('a extension node as 'a) option)
+    method clone = {< >}
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+    method set_node n =
+      node <- Some n
+  end
+;;
+
+
+let default_extension = new default_ext;;
+
+let default_config = 
+  { warner = new collect_warnings;
+    errors_with_line_numbers = true;
+    processing_instructions_inline = false;
+    virtual_root = false;
+    debugging_mode = false;
+  }
+
+
+let default_dom =
+  let d = Hashtbl.create 2 in
+  Hashtbl.add d T_data (new data_impl default_extension "");
+  { map = d;
+    default_element = new element_impl default_extension
+  }
+;;
+
+
+let pxp_config cfg =
+  { Pxp_yacc.default_config with
+       Pxp_yacc.warner = (cfg.warner :> Pxp_types.collect_warnings);
+       Pxp_yacc.errors_with_line_numbers = cfg.errors_with_line_numbers;
+       Pxp_yacc.enable_pinstr_nodes = cfg.processing_instructions_inline;
+       Pxp_yacc.enable_super_root_node = cfg.virtual_root;
+       Pxp_yacc.encoding = `Enc_iso88591;
+       Pxp_yacc.recognize_standalone_declaration = false;
+       Pxp_yacc.debugging_mode = cfg.debugging_mode;
+  }
+;;
+
+
+class pxp_resolver r =
+  object (self)
+    val markup_resolver = r
+
+    method init_rep_encoding enc =
+      assert (enc = `Enc_iso88591 )
+  
+    method init_warner w =
+      ()
+
+    method rep_encoding = `Enc_iso88591
+
+    method open_in xid = 
+      markup_resolver # open_in xid
+
+    method close_in =
+      markup_resolver # close_in
+
+    method close_all =
+      markup_resolver # close_in
+
+    method change_encoding enc =
+      markup_resolver # change_encoding enc
+
+    method clone =
+      ( {< markup_resolver = markup_resolver # clone >} 
+       : #Pxp_reader.resolver :> Pxp_reader.resolver )
+  end
+;;
+
+
+let pxp_source src =
+  match src with
+      Entity (mkent, res) -> Pxp_yacc.Entity(mkent, new pxp_resolver res)
+    | ExtID (id, res)     -> Pxp_yacc.ExtID(id, new pxp_resolver res)
+    | Channel ch          -> Pxp_yacc.from_channel 
+                              ~system_encoding:`Enc_iso88591 ch
+    | File f              -> Pxp_yacc.from_file 
+                              ~system_encoding:`Enc_iso88591 f
+    | Latin1 s            -> Pxp_yacc.from_string ~fixenc:`Enc_iso88591 s
+;;
+
+
+let pxp_dom dom =
+  let dex =
+    try Hashtbl.find dom.map T_data 
+    with Not_found -> assert false
+  in
+  let eex = dom.default_element in
+  let m = Hashtbl.create 100 in
+  Hashtbl.iter
+    (fun nt ex ->
+       match nt with
+          T_element name when name <> "-vr" && name <> "-pi" -> 
+            let pxp_ex = ex # pxp_node in
+            Hashtbl.add m name pxp_ex
+        | _              -> ()
+    )
+    dom.map;
+  let srex =
+    try
+      Some ((Hashtbl.find dom.map (T_element "-vr")) # pxp_node)
+    with
+       Not_found -> None
+  in
+  let piex =
+    try
+      Some ((Hashtbl.find dom.map (T_element "-pi")) # pxp_node)
+    with
+       Not_found -> None
+  in
+  Pxp_document.make_spec_from_mapping
+    ?super_root_exemplar:srex
+    ?default_pinstr_exemplar:piex
+    ~data_exemplar:(dex # pxp_node)
+    ~default_element_exemplar:(eex # pxp_node)
+    ~element_mapping:m
+    ()
+;;
+
+
+let markup_document w index doc =
+  let mdoc = new document w in
+  mdoc # init_xml_version (doc # xml_version);
+  mdoc # init_xml_standalone (doc # xml_standalone);
+  let r = doc # root # extension in
+  r # set_index index;
+  mdoc # init_root (r # markup_node);
+  List.iter
+    (fun piname ->
+       let l = doc # pinstr piname in
+       List.iter 
+        (fun pi -> mdoc # add_pinstr pi)
+        l)
+    (doc # pinstr_names);
+  mdoc
+;;
+
+
+
+let parse_dtd_entity cfg src =
+  Pxp_yacc.parse_dtd_entity
+    (pxp_config cfg)
+    (pxp_source src)
+;;
+
+
+let parse_document_entity cfg src dom =
+  let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+  markup_document
+    cfg.warner
+    index
+    (Pxp_yacc.parse_document_entity 
+        ~id_index:index
+       (pxp_config cfg)
+       (pxp_source src)
+       (pxp_dom dom))
+;;
+
+
+let parse_content_entity cfg src dtd dom =
+  let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+  let n = 
+    (Pxp_yacc.parse_content_entity
+     ~id_index:index
+       (pxp_config cfg)
+       (pxp_source src)
+       dtd
+       (pxp_dom dom)) # extension in
+  n # set_index index;
+  n # markup_node
+;;
+
+
+let parse_wf_entity cfg src dom =
+  let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+  (* Restriction: index is not filled! *)
+  markup_document
+    cfg.warner
+    index 
+    (Pxp_yacc.parse_wfdocument_entity
+       (pxp_config cfg)
+       (pxp_source src)
+       (pxp_dom dom))
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/18 20:19:16  gerd
+ *     Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.3  2000/07/14 21:35:35  gerd
+ *     Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2  2000/07/08 17:40:50  gerd
+ *     Updated the simulation.
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli
new file mode 100644 (file)
index 0000000..daccad4
--- /dev/null
@@ -0,0 +1,233 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_yacc.mli.
+ * It corresponds to revision 1.4 of markup_yacc.mli.
+ *)
+
+
+(*$ markup-yacc.mli *)
+
+open Markup_types
+open Markup_dtd
+open Markup_document
+
+type config =
+    { warner : collect_warnings;
+         (* An object that collects warnings. *)
+
+      errors_with_line_numbers : bool;
+         (* Whether error messages contain line numbers or not. The parser
+         * is 10 to 20 per cent faster if line numbers are turned off;
+         * you get only character positions in this case.
+         *)
+
+      processing_instructions_inline : bool;
+         (* true: turns a special mode for processing instructions on. Normally,
+         * you cannot determine the exact location of a PI; you only know
+         * in which element the PI occurs. The "inline" mode makes it possible
+         * to find the exact location out: Every PI is artificially wrapped
+         * by a special element with name "-pi". For example, if the XML text
+         * is <a><?x?><?y?></a>, the parser normally produces only an element
+         * object for "a", and puts the PIs "x" and "y" into it (without
+         * order). In inline mode, the object "a" will contain two objects
+         * with name "-pi", and the first object will contain "x", and the
+         * second "y".
+         * Notes:
+         * (1) The name "-pi" is reserved. You cannot use it for your own
+         *     tags because tag names must not begin with '-'.
+         * (2) You need not to add a declaration for "-pi" to the DTD. These
+         *     elements are handled separately.
+         * (3) Of course, the "-pi" objects are created from exemplars of
+         *     your DOM map.
+         *)
+
+      virtual_root : bool;
+         (* true: the topmost element of the XML tree is not the root element,
+         * but the so-called virtual root. The root element is a son of the
+         * virtual root. The virtual root is an ordinary element with name
+         * "-vr".
+         * The following behaviour changes, too:
+         * - PIs occurring outside the root element and outside the DTD are
+         *   added to the virtual root instead of the document object
+         * - If processing_instructions_inline is also turned on, these PIs
+         *   are added inline to the virtual root
+         * Notes:
+         * (1) The name "-vr" is reserved. You cannot use it for your own
+         *     tags because tag names must not begin with '-'.
+         * (2) You need not to add a declaration for "-vr" to the DTD. These
+         *     elements are handled separately.
+         * (3) Of course, the "-vr" objects are created from exemplars of
+         *     your DOM map.
+         *)
+
+      (* The following options are not implemented, or only for internal
+       * use.
+       *)
+
+      debugging_mode : bool;
+    }
+
+
+type source =
+    Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
+  | Channel of in_channel
+  | File of string
+  | Latin1 of string
+  | ExtID of (ext_id * Markup_reader.resolver)
+
+(* Note on sources:
+ *
+ * The sources do not have all the same capabilities. Here the differences:
+ *
+ * - File: A File source reads from a file by name. This has the advantage
+ *   that references to external entites can be resolved. - The problem
+ *   with SYSTEM references is that they usually contain relative file
+ *   names; more exactly, a file name relative to the document containing it.
+ *   It is only possible to convert such names to absolute file names if the
+ *   name of the document containing such references is known; and File
+ *   denotes this name.
+ *
+ * - Channel, Latin1: These sources read from documents given as channels or
+ *   (Latin 1-encoded) strings. There is no file name, and because of this
+ *   the documents must not contain references to external files (even
+ *   if the file names are given as absolute names).
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ *   entity to read from is passed to the resolver r as-is.
+ *   The intention of this option is to allow customized
+ *   resolvers to interpret external identifiers without any restriction.
+ *   For example, you can assign the PUBLIC identifiers a meaning (they
+ *   currently do not have any), or you can extend the "namespace" of
+ *   identifiers.
+ *   ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ *   entity class. Once the DTD object d is known that will be used during
+ *   parsing, the entity  e = m d  is determined and used together with the
+ *   resolver r.
+ *   This is only for hackers.
+ *)
+
+
+type 'ext domspec =
+    { map : (node_type, 'ext node) Hashtbl.t;
+      default_element : 'ext node;
+    }
+  (* Specifies which node to use as exemplar for which node type. See the
+   * manual for explanations.
+   *)
+
+val default_config : config
+  (* - The resolver is able to read from files by name
+   * - Warnings are thrown away
+   * - Error message will contain line numbers
+   * - The internal encoding is ISO-8859-1
+   * - standalone declaration is checked
+   *)
+
+val default_extension : ('a node extension) as 'a
+  (* A "null" extension; an extension that does not extend the funtionality *)
+
+val default_dom : ('a node extension as 'a) domspec
+  (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity      : config -> source -> dtd
+  (* Parse an entity containing a DTD, and return this DTD. *)
+
+val parse_document_entity : config -> source -> 'ext domspec -> 'ext document
+  (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
+   * and validate the contents of the document against the DTD contained
+   * and/or referenced in the document.
+   *)
+
+val parse_content_entity  : config ->
+                            source ->
+                           dtd ->
+                           'ext domspec ->
+                             'ext node
+  (* Parse a file representing a well-formed fragment of a document. The
+   * fragment must be a single element (i.e. something like <a>...</a>;
+   * not a sequence like <a>...</a><b>...</b>). The element is validated
+   * against the passed DTD, but it is not checked whether the element is
+   * the root element specified in the DTD.
+   * Note that you can create DTDs that specify not to validate at all
+   * (invoke method allow_arbitrary on the DTD).
+   *)
+
+val parse_wf_entity : config -> source -> 'ext domspec -> 'ext document
+  (* Parse a closed document (see parse_document_entity), but do not
+   * validate it. Only checks on well-formedness are performed.
+   *)
+
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/29 23:43:51  gerd
+ *     Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.4  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.3  2000/05/27 19:24:01  gerd
+ *     New option: recognize_standalone_declaration.
+ *
+ * Revision 1.2  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1  2000/05/06 23:21:49  gerd
+ *     Initial revision.
+ *
+ * Revision 1.9  2000/04/30 18:23:38  gerd
+ *     New config options 'processing_instructions_inline' and
+ * 'virtual_root'.
+ *
+ * Revision 1.8  2000/03/13 23:46:46  gerd
+ *     Change: The 'resolver' component of the 'config' type has
+ * disappeared. Instead, there is a new resolver component in the Entity
+ * and ExtID values of 'source'. I hope that this makes clearer that the
+ * resolver has only an effect if used together with Entity and ExtID
+ * sources.
+ *     Change: The Entity value can now return the entity dependent
+ * on the DTD that is going to be used.
+ *
+ * Revision 1.7  2000/02/22 02:32:02  gerd
+ *     Updated.
+ *
+ * Revision 1.6  2000/02/22 01:52:45  gerd
+ *     Added documentation.
+ *
+ * Revision 1.5  2000/01/20 20:54:43  gerd
+ *     New config.errors_with_line_numbers.
+ *
+ * Revision 1.4  1999/09/01 23:09:10  gerd
+ *     New function parse_wf_entity that simulates a well-formedness
+ * parser.
+ *
+ * Revision 1.3  1999/09/01 16:26:36  gerd
+ *     Added an empty line. This is *really* a big change.
+ *
+ * Revision 1.2  1999/08/14 22:20:27  gerd
+ *         The "config" slot has now a component "warner"which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *         Furthermore, there is a new component "debugging_mode".
+ *
+ * Revision 1.1  1999/08/10 00:35:52  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB
new file mode 100644 (file)
index 0000000..d942e27
--- /dev/null
@@ -0,0 +1,52 @@
+******************************************************************************
+ABOUT-FINDLIB - Package manager for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+The findlib library provides a scheme to manage reusable software components 
+(packages), and includes tools that support this scheme. Packages are 
+collections of OCaml modules for which metainformation can be stored. The 
+packages are kept in the filesystem hierarchy, but with strict directory 
+structure. The library contains functions to look the directory up that stores 
+a package, to query metainformation about a package, and to retrieve dependency 
+information about multiple packages. There is also a tool that allows the user 
+to enter queries on the command-line. In order to simplify compilation and 
+linkage, there are new frontends of the various OCaml compilers that can 
+directly deal with packages. 
+
+Together with the packages metainformation is stored. This includes a version 
+string, the archives the package consists of, and additional linker options. 
+Packages can also be dependent on other packages. There is a query which finds 
+out all predecessors of a list of packages and sorts them topologically. The 
+new compiler frontends do this implicitly. 
+
+Metainformation can be conditional, i.e. depend on a set of predicates. This is 
+mainly used to be able to react on certain properties of the environment, such 
+as if the bytecode or the native compiler is invoked, if the application is 
+multi-threaded, and a few more. If the new compiler frontends are used, most 
+predicates are found out automatically. 
+
+There is special support for scripts. A new directive, "#require", loads 
+packages into scripts. Of course, this works only with newly created toploops 
+which include the findlib library. 
+
+==============================================================================
+Where to get findlib
+==============================================================================
+
+The manual of findlib is available online [1]. You can download findlib here 
+[2]. 
+
+
+--------------------------
+
+[1]   see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2]   see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml
new file mode 100644 (file)
index 0000000..d1dc5b0
--- /dev/null
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY f "<em>findlib</em>">
+<!ENTITY F "<em>Findlib</em>">
+
+]>
+
+<readme title="ABOUT-FINDLIB - Package manager for O'Caml">
+  <sect1>
+    <title>Abstract</title>
+<p>
+The &f; library provides a scheme to manage reusable software
+components (packages), and includes tools that support this
+scheme. Packages are collections of OCaml modules for which
+metainformation can be stored. The packages are kept in the filesystem
+hierarchy, but with strict directory structure. The library contains
+functions to look the directory up that stores a package, to query
+metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows
+the user to enter queries on the command-line. In order to simplify
+compilation and linkage, there are new frontends of the various OCaml
+compilers that can directly deal with packages.
+</p>
+
+<p>
+Together with the packages metainformation is stored. This includes a
+version string, the archives the package consists of, and additional
+linker options. Packages can also be dependent on other
+packages. There is a query which finds out all predecessors of a list
+of packages and sorts them topologically. The new compiler frontends
+do this implicitly.
+</p>
+
+<p>
+Metainformation can be conditional, i.e. depend on a set of
+predicates. This is mainly used to be able to react on certain
+properties of the environment, such as if the bytecode or the native
+compiler is invoked, if the application is multi-threaded, and a few
+more. If the new compiler frontends are used, most predicates are
+found out automatically.
+</p>
+
+<p>
+There is special support for scripts. A new directive, "#require",
+loads packages into scripts. Of course, this works only with newly
+created toploops which include the &f; library.
+</p>
+
+  </sect1>
+
+  <sect1><title>Where to get findlib</title>
+    <p>
+The manual of &f; is available <a href="&url.findlib-project;">online</a>.
+You can download &f; <a href="&url.findlib-download;">here</a>.
+</p>
+  </sect1>
+</readme>
diff --git a/helm/DEVEL/pxp/pxp/doc/EXTENSIONS b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS
new file mode 100644 (file)
index 0000000..a956839
--- /dev/null
@@ -0,0 +1,50 @@
+******************************************************************************
+Extensions of the XML specification
+******************************************************************************
+
+
+==============================================================================
+This document
+==============================================================================
+
+This parser has some options extending the XML specification. Here, the options 
+are explained. 
+
+==============================================================================
+Optional declarations instead of mandatory declarations
+==============================================================================
+
+The XML spec demands that elements, notations, and attributes must be declared. 
+However, there are sometimes situations where a different rule would be better: 
+If there is a declaration, the actual instance of the element type, notation 
+reference or attribute must match the pattern of the declaration; but if the 
+declaration is missing, a reasonable default declaration should be assumed.
+
+I have an example that seems to be typical: The inclusion of HTML into a meta 
+language. Imagine you have defined some type of "generator" or other tool 
+working with HTML fragments, and your document contains two types of elements: 
+The generating elements (with a name like "gen:xxx"), and the object elements 
+which are HTML. As HTML is still evolving, you do not want to declare the HTML 
+elements; the HTML fragments should be treated as well-formed XML fragments. In 
+contrast to this, the elements of the generator should be declared and 
+validated because you can more easily detect errors.
+
+The following two processing instructions can be included into the DTD:
+
+-  
+   <?pxp:dtd optional-element-and-notation-declarations?>
+   
+   References to unknown element types and notations no longer cause an error. 
+   The element may contain everything, but it must be still well-formed. It may 
+   have arbitrary attributes, and every attribute is treated as an #IMPLIED 
+   CDATA attribute.
+   
+-  
+   <?pxp:dtd optional-attribute-declarations elements="x y ..."?>
+   
+   References to unknown attributes inside one of the enumerated elements no 
+   longer cause an error. Such an attribute is treated as an #IMPLIED CDATA 
+   attribute. 
+   If there are several "optional-attribute-declarations" PIs, they are all 
+   interpreted (implicitly merged).
+   
diff --git a/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml
new file mode 100644 (file)
index 0000000..e64d061
--- /dev/null
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="Extensions of the XML specification">
+
+  <sect1>
+    <title>This document</title>
+    <p>This parser has some options extending the XML specification. Here, the 
+options are explained.
+</p>
+  </sect1>
+
+  <sect1>
+    <title>Optional declarations instead of mandatory declarations</title>
+
+<p>The XML spec demands that elements, notations, and attributes must be
+declared. However, there are sometimes situations where a different rule would
+be better: <em>If</em> there is a declaration, the actual instance of the
+element type, notation reference or attribute must match the pattern of the
+declaration; but if the declaration is missing, a reasonable default declaration
+should be assumed.</p> 
+
+<p>I have an example that seems to be typical: The inclusion of HTML into a
+meta language. Imagine you have defined some type of "generator" or other tool
+working with HTML fragments, and your document contains two types of elements:
+The generating elements (with a name like "gen:xxx"), and the object elements
+which are HTML. As HTML is still evolving, you do not want to declare the HTML
+elements; the HTML fragments should be treated as well-formed XML fragments. In
+contrast to this, the elements of the generator should be declared and
+validated because you can more easily detect errors.</p> 
+
+<p>The following two processing instructions can be included into the DTD:</p>
+    <ul>
+      <li><p><code><![CDATA[<?pxp:dtd optional-element-and-notation-declarations?>]]></code>
+       References to unknown element types and notations no longer cause an
+       error. The element may contain everything, but it must be still
+       well-formed. It may have arbitrary attributes, and every attribute is
+       treated as an #IMPLIED CDATA attribute.</p>
+      </li>
+      <li><p><code><![CDATA[<?pxp:dtd optional-attribute-declarations elements="x y ..."?>]]></code>
+        References to unknown attributes inside one of the enumerated elements
+        no longer cause an error. Such an attribute is treated as an #IMPLIED
+        CDATA attribute.
+</p>
+
+<p>If there are several "optional-attribute-declarations" PIs, they are all
+interpreted (implicitly merged).</p>
+      </li>
+    </ul>
+  </sect1>
+</readme>
diff --git a/helm/DEVEL/pxp/pxp/doc/INSTALL b/helm/DEVEL/pxp/pxp/doc/INSTALL
new file mode 100644 (file)
index 0000000..9a49a22
--- /dev/null
@@ -0,0 +1,154 @@
+******************************************************************************
+INSTALL - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+The "pxp" package
+==============================================================================
+
+------------------------------------------------------------------------------
+Prerequisites
+------------------------------------------------------------------------------
+
+PXP requires that the netstring package  [1] is already installed. PXP works 
+only with O'Caml 3.00 (the support for 2.04 has been dropped). The installation 
+procedure defined in the Makefile requires findlib [2] to work [3]. 
+
+------------------------------------------------------------------------------
+Configuration
+------------------------------------------------------------------------------
+
+It is not necessary to configure PXP; but you can switch off the UTF-8 support 
+by setting the variable 
+
+UTF8_SUPPORT = no
+
+in Makefile.conf. In this case, the UTF-8 modules are not even compiled. - By 
+default, the UTF-8 support is enabled. 
+
+Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II; if 
+this is too long, you can set UTF8_SUPPORT to "no".
+
+------------------------------------------------------------------------------
+Compilation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals: 
+
+-  make all
+   compiles with the bytecode compiler and creates the files pxp_types.cma, 
+   pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma, and pxp_utf8.cmo 
+   (*). The (*) files are not built if the UTF-8 support is switched off.
+   
+-  make opt
+   compiles with the native compiler and creates the files pxp_types.cmxa, 
+   pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa, and 
+   pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support is 
+   switched off.
+   
+------------------------------------------------------------------------------
+Installation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+-  make install
+   installs the bytecode archives, the interface definitions, and if present, 
+   the native archives in the default location of findlib as package "pxp" 
+   
+-  make uninstall
+   removes the package "pxp"
+   
+-  make markup-install
+   installs the Markup compatibility API as package "markup"
+   
+-  make markup-uninstall
+   removes the package "markup"
+   
+------------------------------------------------------------------------------
+Usage with the help of "findlib"
+------------------------------------------------------------------------------
+
+You can refer to the parser as the findlib package "pxp": 
+
+ocamlfind ocamlc -package pxp ...
+
+By default, the UTF-8 support modules will be linked in. If you do not need 
+them, you may define the predicate "pxp_without_utf8", which causes that the 
+UTF-8 relevant parts are not linked with your program; the difference in size 
+is about 1 MB: 
+
+ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
+
+Note that you can also reduce the size of the resulting executable by 
+specifying Netstring-related predicates (e.g. netstring_only_iso); see the 
+documentation of Netstring. 
+
+------------------------------------------------------------------------------
+Linking with the archives directly
+------------------------------------------------------------------------------
+
+If you need UTF-8 support, you must link your program as follows: 
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma 
+           pxp_engine.cma pxp_utf8.cmo ...
+
+If you do not need UTF-8, the following suffices: 
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
+
+
+
+==============================================================================
+The examples
+==============================================================================
+
+In the "examples" directory you find several applications of PXP. They require 
+that PXP has been installed using findlib. See the Makefiles in the directories 
+for descriptions of "make" goals. 
+
+==============================================================================
+Trouble shooting
+==============================================================================
+
+------------------------------------------------------------------------------
+Solaris
+------------------------------------------------------------------------------
+
+The "make" utility of Solaris does not work properly enough; there is a bug in 
+it that prevents the so-called suffix rules from being recognized. There are 
+two solutions:
+
+-  Install GNU make and use it instead of Solaris make. This is the recommended 
+   way to solve the problem, as GNU make can process almost every Makefile from 
+   open source projects, and you will never have problems with building 
+   software again.
+   
+-  Add the following lines to Makefile.code: 
+   
+   %.cmx: %.ml
+           $(OCAMLOPT) -c $<
+   
+   %.cmo: %.ml
+           $(OCAMLC) -c $<
+   
+   %.cmi: %.mli
+           $(OCAMLC) -c $<
+   
+   %.ml: %.mll
+           ocamllex $<
+   
+   
+   
+
+--------------------------
+
+[1]   see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[2]   see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[3]   Findlib is a package manager, see the file ABOUT-FINDLIB.
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/INSTALL.xml b/helm/DEVEL/pxp/pxp/doc/INSTALL.xml
new file mode 100644 (file)
index 0000000..ac7832d
--- /dev/null
@@ -0,0 +1,171 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY m "<em>PXP</em>">
+
+]>
+
+<readme title="INSTALL - PXP, the XML parser for O'Caml">
+  <sect1><title>The "pxp" package</title>
+    <sect2><title>Prerequisites</title>
+      <p>
+&m; requires that the <a href="&url.netstring-project;">netstring package
+</a> is already installed. &m; works
+only with O'Caml 3.00 (the support for 2.04 has been dropped).
+The installation
+procedure defined in the Makefile requires <a
+href="&url.findlib-project;">findlib</a> to work<footnote><em>Findlib</em> is a
+package manager, see the file ABOUT-FINDLIB.</footnote>.
+</p>
+    </sect2>
+
+    <sect2><title>Configuration</title>
+      <p>
+It is not necessary to configure PXP; but you can switch off the UTF-8
+support by setting the variable
+
+<code>
+UTF8_SUPPORT = no
+</code>
+
+in Makefile.conf. In this case, the UTF-8 modules are not even compiled.
+- By default, the UTF-8 support is enabled.
+</p>
+
+      <p>
+Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II;
+if this is too long, you can set UTF8_SUPPORT to "no".</p>
+    </sect2>
+
+    <sect2><title>Compilation</title>
+      <p>
+The Makefile defines the following goals:
+</p>
+      <ul>
+       <li>
+         <p>make all</p>
+         <p>compiles with the bytecode compiler and creates the files
+pxp_types.cma, pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma,
+and pxp_utf8.cmo (*). The (*) files are not built if the UTF-8 support
+is switched off.</p>
+       </li>
+       <li>
+         <p>make opt</p>
+         <p>compiles with the native compiler and creates  the files
+pxp_types.cmxa, pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa,
+and pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support
+is switched off.</p>
+       </li>
+      </ul>
+    </sect2>
+
+    <sect2><title>Installation</title>
+      <p>
+The Makefile defines the following goals:</p>
+      <ul>
+       <li>
+         <p>make install</p>
+         <p>installs the bytecode archives, the interface definitions, and if
+present, the native archives in the default location of <em>findlib</em> as
+package "pxp"
+</p>
+       </li>
+       <li>
+         <p>make uninstall</p>
+         <p>removes the package "pxp"</p>
+       </li>
+       <li>
+         <p>make markup-install</p>
+         <p>installs the Markup compatibility API as package "markup"</p>
+       </li>
+       <li>
+         <p>make markup-uninstall</p>
+         <p>removes the package "markup"</p>
+       </li>
+      </ul>
+    </sect2>
+
+    <sect2>
+      <title>Usage with the help of "findlib"</title>
+      <p>You can refer to the parser as the findlib package "pxp":
+
+<code>
+ocamlfind ocamlc -package pxp ...
+</code>
+
+By default, the UTF-8 support modules will be linked in. If you do not need
+them, you may define the predicate "pxp_without_utf8", which causes that the
+UTF-8 relevant parts are not linked with your program; the difference in size
+is about 1 MB:
+
+<code>
+ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
+</code>
+
+Note that you can also reduce the size of the resulting executable by
+specifying Netstring-related predicates (e.g. netstring_only_iso); see the
+documentation of Netstring.
+</p>
+    </sect2>
+
+    <sect2>
+      <title>Linking with the archives directly</title>
+      <p>If you need UTF-8 support, you must link your program as follows:
+
+<code>
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma 
+           pxp_engine.cma pxp_utf8.cmo ...
+</code>
+
+If you do not need UTF-8, the following suffices:
+
+<code>
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
+</code>
+
+</p>
+    </sect2>
+
+  </sect1>
+
+  <sect1><title>The examples</title>
+    <p>
+In the "examples" directory you find several applications of &m;. They require
+that &m; has been installed using <em>findlib</em>. See the Makefiles in the
+directories for descriptions of "make" goals.
+</p>
+  </sect1>
+
+  <sect1><title>Trouble shooting</title>
+    <sect2><title>Solaris</title>
+      <p>
+The "make" utility of Solaris does not work properly enough; there is a bug
+in it that prevents the so-called suffix rules from being recognized. There
+are two solutions:</p>
+      <ul>
+       <li><p>Install GNU make and use it instead of Solaris make. This is
+the recommended way to solve the problem, as GNU make can process almost
+every Makefile from open source projects, and you will never have problems
+with building software again.</p></li>
+       <li><p>Add the following lines to Makefile.code:
+         <code>
+%.cmx: %.ml
+        $(OCAMLOPT) -c $&lt;
+
+%.cmo: %.ml
+        $(OCAMLC) -c $&lt;
+
+%.cmi: %.mli
+        $(OCAMLC) -c $&lt;
+
+%.ml: %.mll
+        ocamllex $&lt;
+</code>
+</p></li>
+      </ul>
+    </sect2>
+  </sect1>
+</readme>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/Makefile b/helm/DEVEL/pxp/pxp/doc/Makefile
new file mode 100644 (file)
index 0000000..0ed1274
--- /dev/null
@@ -0,0 +1,43 @@
+.PHONY: all
+all: README INSTALL ABOUT-FINDLIB SPEC PRERELEASE EXTENSIONS
+
+README: README.xml common.xml config.xml
+       readme -text README.xml >README
+
+INSTALL: INSTALL.xml common.xml config.xml
+       readme -text INSTALL.xml >INSTALL
+
+ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
+       readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
+
+SPEC: SPEC.xml common.xml config.xml
+       readme -text SPEC.xml >SPEC
+
+EXTENSIONS: EXTENSIONS.xml common.xml config.xml
+       readme -text EXTENSIONS.xml >EXTENSIONS
+
+PRERELEASE: PRERELEASE.xml common.xml config.xml
+       readme -text PRERELEASE.xml >PRERELEASE
+
+config.xml:
+       touch config.xml
+
+common.xml:
+       ln -s dist-common.xml common.xml 
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+       $(MAKE) -C manual CLEAN
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       $(MAKE) -C manual distclean
+
+.PHONY: symlinks
+symlinks:
+       ln -s ../examples/readme/readme.dtd .
+
diff --git a/helm/DEVEL/pxp/pxp/doc/PRERELEASE b/helm/DEVEL/pxp/pxp/doc/PRERELEASE
new file mode 100644 (file)
index 0000000..bc46cd0
--- /dev/null
@@ -0,0 +1,103 @@
+******************************************************************************
+README - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Pre-release of PXP, the XML parser for O'Caml
+==============================================================================
+
+PXP is the new, completely revised and partly rewritten validating XML parser 
+for O'Caml; the old name, "Markup", has been dropped. The current version of 
+PXP is still a bit experimental because it is not fully tested; however, it is 
+now stable enough to be used in experimental applications. 
+
+PXP will retain most parts of Markup's API; the name PXP emphasizes the 
+strengths of the API: it is the Polymorphic XML Parser. The document objects 
+representing the parsed file have an interesting polymorphism which allows that 
+the user of the parser can control which kind of objects are actually created. 
+The current API supports the element type as criterion for object/class 
+selection; future APIs will extend this concept such that arbitrary criterions 
+are possible (e.g. you may want to have different classes for different 
+namespaces). 
+
+The current development goals of PXP are:
+
+-  Full XML-1.0 conformance: The current pre-release is now very close to 
+   strict XML-1.0 conformance. The only bigger difference to the standard is 
+   that PXP sometimes accepts DTDs as legal while the standard forbids them 
+   (non-deterministic content models).
+   One of the more important improvements since 0.2.10 is the possibility to 
+   represent XML documents internally as UTF-8 strings, not only as ISO-8859-1 
+   strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer 
+   preprocessor hiding the details of the UTF-8 encoding in the lexer 
+   definitions. 
+   
+-  Correctness of validation: The well-formedness and valididity constraints 
+   must be implemented as correct as possible. The last stable release had 
+   already a regression test covering many aspects of XML. The test suite will 
+   be extended.
+   
+-  Parsing performance: It should be possible to process large amounts of data 
+   in a reasoable period of time. The last stable release had many stages of 
+   processing that wasted time.
+   The current pre-release is already 30 per cent faster than 0.2.10.
+   
+-  Simplicity of usage: Unlike parsers basing on imperative languages and DOM, 
+   the usage of PXP should be simple, even for complex tasks. The current 
+   parser API has already many advantages over DOM; especially it is well 
+   integrated into the functional and object-oriented language O'Caml. You do 
+   not have to deal with artificial representations like "node lists" while the 
+   programming environment already provides good support for list structures. 
+   The fact that O'Caml allows a functional programming style is interesting 
+   for programs transforming XML trees.
+   
+==============================================================================
+Download the PXP pre-release
+==============================================================================
+
+The current pre-release is available under 
+http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz [1]. There is 
+currently no documentation for this version of the software; it is recommended 
+to use the Markup manual [2] and compare it with the current module interfaces.
+
+Please note that this is work in progress; it may still contain bugs and 
+irregularities.
+
+The parser works only with OCaml-3. The parser needs the netstring package [3], 
+at least version 0.9.1. 
+
+I am very interested in your opinion to PXP; please contact me [4].
+
+==============================================================================
+Author, Credits, Copying
+==============================================================================
+
+PXP has been written by Gerd Stolpmann [5]; it contains contributions by 
+Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for 
+commercial purposes as long as the license conditions are respected, see the 
+file LICENSE coming with the distribution. It allows almost everything. 
+
+==============================================================================
+Where to find the stable release
+==============================================================================
+
+Here. [6]
+
+
+--------------------------
+
+[1]   see http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz
+
+[2]   see http://www.ocaml-programming.de/packages/documentation/markup/manual
+
+[3]   see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[4]   see mailto:gerd@gerd-stolpmann.de
+
+[5]   see mailto:gerd@gerd-stolpmann.de
+
+[6]   see http://www.ocaml-programming.de/packages/documentation/markup
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml b/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml
new file mode 100644 (file)
index 0000000..f155abd
--- /dev/null
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="README - PXP, the XML parser for O'Caml">
+  <sect1>
+    <title>Pre-release of PXP, the XML parser for O'Caml</title>
+
+    <p>PXP is the new, completely revised and partly rewritten 
+validating XML parser
+for O'Caml; the old name, "Markup", has been dropped. The current version
+of PXP is still a bit experimental because it is not fully tested; however,
+it is now stable enough to be used in experimental applications.
+</p>
+
+    <p>PXP will retain most parts of Markup's API; the name PXP 
+emphasizes the strengths of the API: it is the Polymorphic XML Parser.
+The document objects representing the parsed file have an interesting
+polymorphism which allows that the user of the parser can control
+which kind of objects are actually created. The current API supports
+the element type as criterion for object/class selection; future APIs will
+extend this concept such that arbitrary criterions are possible
+(e.g. you may want to have different classes for different namespaces).
+</p>
+
+    <p>The current development goals of PXP are:</p>
+
+    <ul>
+      <li><p><em>Full XML-1.0 conformance:</em> The current pre-release
+is now very close to strict XML-1.0 conformance. The only bigger 
+difference to the standard is that PXP sometimes accepts DTDs as legal
+while the standard forbids them (non-deterministic content models).</p>
+
+<p>One of the more important improvements since 0.2.10 is the possibility to
+represent XML documents internally as UTF-8 strings, not only as ISO-8859-1
+strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer
+preprocessor hiding the details of the UTF-8 encoding in the lexer definitions.
+</p>
+      </li>
+
+      <li><p><em>Correctness of validation:</em> The well-formedness
+and valididity constraints must be implemented as correct as possible.
+The last stable release had already a regression test covering many
+aspects of XML. The test suite will be extended.</p>
+      </li>
+
+      <li><p><em>Parsing performance:</em> It should be possible to
+process large amounts of data in a reasoable period of time. The last
+stable release had many stages of processing that wasted time.</p>
+
+       <p>The current pre-release is already 30 per cent faster than
+0.2.10.</p>
+      </li>
+
+      <li><p><em>Simplicity of usage:</em> Unlike parsers basing on
+imperative languages and DOM, the usage of PXP should be simple, even
+for complex tasks. The current parser API has already many advantages
+over DOM; especially it is well integrated into the functional and
+object-oriented language O'Caml. You do not have to deal with
+artificial representations like "node lists" while the programming
+environment already provides good support for list structures. The
+fact that O'Caml allows a functional programming style is interesting
+for programs transforming XML trees.</p>
+      </li>
+    </ul>
+  </sect1>
+
+  <sect1>
+    <title>Download the PXP pre-release</title>
+
+    <p>The current pre-release is available under 
+<a href="&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz">
+&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz</a>. There is currently no
+documentation for this version of the software; it is recommended to use the <a
+href="&url.markup-manual;">Markup manual</a> and compare it with the current
+module interfaces.</p>
+
+    <p>Please note that this is work in progress; it may still contain bugs
+and irregularities.</p>
+
+    <p>The parser works only with OCaml-3. The parser needs the <a
+href="&url.netstring-project;">netstring package</a>, at least version 0.9.1.
+</p>
+
+    <p>I am very interested in your opinion to PXP; please <a
+href="mailto:&person.gps.mail;">contact me</a>.</p>
+  </sect1>
+
+  <sect1>
+    <title>Author, Credits, Copying</title>
+    <p>
+<em>PXP</em> has been written by &person.gps;; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything. 
+</p>
+  </sect1>
+
+  <sect1>
+    <title>Where to find the stable release</title>
+    <p><a href="&url.markup-project;">Here.</a></p>
+  </sect1>
+
+</readme>
+
diff --git a/helm/DEVEL/pxp/pxp/doc/README b/helm/DEVEL/pxp/pxp/doc/README
new file mode 100644 (file)
index 0000000..b7ad5de
--- /dev/null
@@ -0,0 +1,247 @@
+******************************************************************************
+README - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+PXP is a validating parser for XML-1.0 which has been written entirely in 
+Objective Caml. 
+
+PXP is the new name of the parser formerly known as "Markup". PXP means 
+"Polymorphic XML parser" and emphasizes its most useful property: that the API 
+is polymorphic and can be configured such that different objects are used to 
+store different types of elements.
+
+==============================================================================
+Download
+==============================================================================
+
+You can download PXP as gzip'ed tarball [1]. The parser needs the Netstring [2] 
+package (0.9.3). Note that PXP requires O'Caml 3.00. 
+
+==============================================================================
+User's Manual
+==============================================================================
+
+The manual is included in the distribution both as Postscript document and 
+bunch of HTML files. An online version can be found here [3]. 
+
+==============================================================================
+Author, Credits, Copying
+==============================================================================
+
+PXP has been written by Gerd Stolpmann [4]; it contains contributions by 
+Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for 
+commercial purposes as long as the license conditions are respected, see the 
+file LICENSE coming with the distribution. It allows almost everything. 
+
+Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug reports.
+
+==============================================================================
+Description
+==============================================================================
+
+PXP is a validating XML parser for O'Caml [5]. It strictly complies to the 
+XML-1.0 [6] standard. 
+
+The parser is simple to call, usually only one statement (function call) is 
+sufficient to parse an XML document and to represent it as object tree.
+
+Once the document is parsed, it can be accessed using a class interface. The 
+interface allows arbitrary access including transformations. One of the 
+features of the document representation is its polymorphic nature; it is simple 
+to add custom methods to the document classes. Furthermore, the parser can be 
+configured such that different XML elements are represented by objects created 
+from different classes. This is a very powerful feature, because it simplifies 
+the structure of programs processing XML documents. 
+
+Note that the class interface does not comply to the DOM standard. It was not a 
+development goal to realize a standard API (industrial developers can this much 
+better than I); however, the API is powerful enough to be considered as 
+equivalent with DOM. More important, the interface is compatible with the XML 
+information model required by many XML-related standards. 
+
+------------------------------------------------------------------------------
+Detailed feature list
+------------------------------------------------------------------------------
+
+-  The XML instance is validated against the DTD; any violation of a validation 
+   constraint leads to the rejection of the instance. The validator has been 
+   carefully implemented, and conforms strictly to the standard. If needed, it 
+   is also possible to run the parser in a well-formedness mode.
+   
+-  If possible, the validator applies a deterministic finite automaton to 
+   validate the content models. This ensures that validation can always be 
+   performed in linear time. However, in the case that the content models are 
+   not deterministic, the parser uses a backtracking algorithm which can be 
+   much slower. - It is also possible to reject non-deterministic content 
+   models.
+   
+-  In particular, the validator also checks the complicated rules whether 
+   parentheses are properly nested with respect to entities, and whether the 
+   standalone declaration is satisfied. On demand, it is checked whether the 
+   IDREF attributes only refer to existing nodes.
+   
+-  Entity references are automatically resolved while the XML text is being 
+   scanned. It is not possible to recognize in the object tree where a 
+   referenced entity begins or ends; the object tree only represents the 
+   logical structure.
+   
+-  External entities are loaded using a configurable resolver infrastructure. 
+   It is possible to connect the parser with an arbitrary XML source.
+   
+-  The parser can read XML text encoded in a variety of character sets. 
+   Independent of this, it is possible to choose the encoding of the internal 
+   representation of the tree nodes; the parser automatically converts the 
+   input text to this encoding. Currently, the parser supports UTF-8 and 
+   ISO-8859-1 as internal encodings.
+   
+-  The interface of the parser has been designed such that it is best 
+   integrated into the language O'Caml. The first goal was simplicity of usage 
+   which is achieved by many convenience methods and functions, and by allowing 
+   the user to select which parts of the XML text are actually represented in 
+   the tree. For example, it is possible to store processing instructions as 
+   tree nodes, but the parser can also be configured such that these 
+   instructions are put into hashtables. The information model is compatible 
+   with the requirements of XML-related standards such as XPath.
+   
+-  In particular, the node tree can optionally contain or leave out processing 
+   instructions and comments. It is also possible to generate a "super root" 
+   object which is the parent of the root element. The attributes of elements 
+   are normally not stored as nodes, but it is possible to get them wrapped 
+   into nodes.
+   
+-  There is also an interface for DTDs; you can parse and access sequences of 
+   declarations. The declarations are fully represented as recursive O'Caml 
+   values. 
+   
+------------------------------------------------------------------------------
+Code examples
+------------------------------------------------------------------------------
+
+This distribution contains several examples:
+
+-  validate: simply parses a document and prints all error messages 
+   
+-  readme: Defines a DTD for simple "README"-like documents, and offers 
+   conversion to HTML and text files [7]. 
+   
+-  xmlforms: This is already a sophisticated application that uses XML as style 
+   sheet language and data storage format. It shows how a Tk user interface can 
+   be configured by an XML style, and how data records can be stored using XML. 
+   
+------------------------------------------------------------------------------
+Restrictions and missing features
+------------------------------------------------------------------------------
+
+The following restrictions apply that are not violations of the standard: 
+
+-  The attributes "xml:space", and "xml:lang" are not supported specially. (The 
+   application can do this.)
+   
+-  The built-in support for SYSTEM and PUBLIC identifiers is limited to local 
+   file access. There is no support for catalogs. The parser offers a hook to 
+   add missing features.
+   
+-  It is currently not possible to check for interoperatibility with SGML. 
+   
+The following features are also missing:
+
+-  There is no special support for namespaces. (Perhaps in the next release?)
+   
+-  There is no support for XPATH or XSLT.
+   
+However, I hope that these features will be implemented soon, either by myself 
+or by contributors (who are invited to do so).
+
+------------------------------------------------------------------------------
+Recent Changes
+------------------------------------------------------------------------------
+
+-  Changed in 1.0:
+   Support for document order.
+   
+-  Changed in 0.99.8:
+   Several fixes of bugs reported by Haruo Hosoya and Alain Frisch.
+   The class type "node" has been extended: you can go directly to the next and 
+   previous nodes in the list; you can refer to nodes by position.
+   There are now some iterators for nodes: find, find_all, find_element, 
+   find_all_elements, map_tree, iter_tree.
+   Experimental support for viewing attributes as nodes; I hope that helps 
+   Alain writing his XPath evaluator.
+   The user's manual has been revised and is almost up to date.
+   
+-  Changed in 0.99.7:
+   There are now additional node types T_super_root, T_pinstr and T_comment, 
+   and the parser is able to create the corresponding nodes.
+   The functions for character set conversion have been moved to the Netstring 
+   package; they are not specific for XML.
+   
+-  Changed in 0.99.6:
+   Implemented a check on deterministic content models. Added an alternate 
+   validator basing on a DFA. - This means that now all mandatory features for 
+   an XML-1.0 parser are implemented! The parser is now substantially complete.
+   
+-  Changed in 0.99.5:
+   The handling of ID and IDREF attributes has changed. The index of nodes 
+   containing an ID attribute is now separated from the document. Optionally 
+   the parser now checks whether the IDREF attributes refer to existing 
+   elements.
+   The element nodes can optionally store the location in the source XML code.
+   The method 'write' writes the XML tree in every supported encoding. 
+   (Successor of 'write_compact_as_latin1'.)
+   Several smaller changes and fixes.
+   
+-  Changed in 0.99.4:
+   The module Pxp_reader has been modernized. The resolver classes are simpler 
+   to use. There is now support for URLs.
+   The interface of Pxp_yacc has been improved: The type 'source' is now 
+   simpler. The type 'domspec' has gone; the new 'spec' is opaque and performs 
+   better. There are some new parsing modes.
+   Many smaller changes.
+   
+-  Changed in 0.99.3:
+   The markup_* modules have been renamed to pxp_*. There is a new 
+   compatibility API that tries to be compatible with markup-0.2.10.
+   The type "encoding" is now a polymorphic variant.
+   
+-  Changed in 0.99.2:
+   Added checks for the constraints about the standalone declaration.
+   Added regression tests about attribute normalization, attribute checks, 
+   standalone checks.
+   Fixed some minor errors of the attribute normalization function.
+   The bytecode/native archives are now separated in a general part, in a 
+   ISO-8859-1-relevant part, and a UTF-8-relevant part. The parser can again be 
+   compiled with ocamlopt.
+   
+-  Changed in 0.99.1:
+   In general, this release is an early pre-release of the next stable version 
+   1.00. I do not recommend to use it for serious work; it is still very 
+   experimental!
+   The core of the parser has been rewritten using a self-written parser 
+   generator.
+   The lexer has been restructured, and can now handle UTF-8 encoded files.
+   Numerous other changes.
+   
+
+--------------------------
+
+[1]   see http://www.ocaml-programming.de/packages/pxp-1.0.tar.gz
+
+[2]   see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[3]   see http://www.ocaml-programming.de/packages/documentation/pxp/manual
+
+[4]   see mailto:gerd@gerd-stolpmann.de
+
+[5]   see http://caml.inria.fr/
+
+[6]   see http://www.w3.org/TR/1998/REC-xml-19980210.html
+
+[7]   This particular document is an example of this DTD!
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/README.xml b/helm/DEVEL/pxp/pxp/doc/README.xml
new file mode 100644 (file)
index 0000000..34c7726
--- /dev/null
@@ -0,0 +1,423 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!--
+<!ENTITY url.ocaml           "http://caml.inria.fr/">
+<!ENTITY url.xml-spec        "http://www.w3.org/TR/1998/REC-xml-19980210.html">
+<!ENTITY url.jclark-xmltdata "ftp://ftp.jclark.com/pub/xml/xmltest.zip">
+<!ENTITY url.gps-ocaml-download "http://people.darmstadt.netsurf.de/ocaml">
+<!ENTITY url.markup-download    "&url.gps-ocaml-download;/markup-0.1.tar.gz">
+<!ENTITY person.gps             '<a
+  href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>'>
+-->
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="README - PXP, the XML parser for O'Caml">
+  <sect1>
+    <title>Abstract</title>
+    <p>
+<em>PXP</em> is a validating parser for XML-1.0 which has been written
+entirely in Objective Caml.
+</p>
+
+    <p>PXP is the new name of the parser formerly known as "Markup". 
+PXP means "Polymorphic XML parser" and emphasizes its most useful
+property: that the API is polymorphic and can be configured such that
+different objects are used to store different types of elements.</p>
+  </sect1>
+
+  <sect1>
+    <title>Download</title>
+    <p>
+You can download <em>PXP</em> as gzip'ed <a
+href="&url.pxp-download;">tarball</a>. The parser needs the <a
+href="&url.netstring-project;">Netstring</a> package (0.9.3). Note that PXP
+requires O'Caml 3.00.
+</p>
+  </sect1>
+
+  <sect1>
+    <title>User's Manual</title>
+    <p>
+The manual is included in the distribution both as Postscript document and
+bunch of HTML files. An online version can be found <a
+                                                      href="&url.pxp-manual;">here</a>.
+</p>
+  </sect1>
+
+  <sect1>
+    <title>Author, Credits, Copying</title>
+    <p>
+<em>PXP</em> has been written by &person.gps;; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything. 
+</p>
+
+    <p>Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug
+reports.</p>
+  </sect1>
+
+  <sect1>
+    <title>Description</title>
+    <p>
+<em>PXP</em> is a validating XML parser for <a
+href="&url.ocaml;">O'Caml</a>. It strictly complies to the 
+<a href="&url.xml-spec;">XML-1.0</a> standard.
+</p>
+
+    <p>The parser is simple to call, usually only one statement (function
+call) is sufficient to parse an XML document and to represent it as object
+tree.</p>
+
+    <p>
+Once the document is parsed, it can be accessed using a class interface.
+The interface allows arbitrary access including transformations. One of
+the features of the document representation is its polymorphic nature;
+it is simple to add custom methods to the document classes. Furthermore,
+the parser can be configured such that different XML elements are represented
+by objects created from different classes. This is a very powerful feature,
+because it simplifies the structure of programs processing XML documents.
+</p>
+
+    <p>
+Note that the class interface does not comply to the DOM standard. It was not a
+development goal to realize a standard API (industrial developers can this much
+better than I); however, the API is powerful enough to be considered as
+equivalent with DOM. More important, the interface is compatible with the
+XML information model required by many XML-related standards.
+</p>
+
+    <sect2>
+      <title>Detailed feature list</title>
+
+      <ul>
+       <li><p>The XML instance is validated against the DTD; any violation of
+a validation constraint leads to the rejection of the instance. The validator
+has been carefully implemented, and conforms strictly to the standard. If
+needed, it is also possible to run the parser in a well-formedness mode.</p>
+       </li>
+       <li><p>If possible, the validator applies a deterministic finite
+automaton to validate the content models. This ensures that validation can
+always be performed in linear time. However, in the case that the content
+models are not deterministic, the parser uses a backtracking algorithm which
+can be much slower. - It is also possible to reject non-deterministic content
+models.</p>
+       </li>
+       <li><p>In particular, the validator also checks the complicated rules
+whether parentheses are properly nested with respect to entities, and whether
+the standalone declaration is satisfied. On demand, it is checked whether the
+IDREF attributes only refer to existing nodes.</p>
+       </li>
+       <li><p>Entity references are automatically resolved while the XML text
+is being scanned. It is not possible to recognize in the object tree where a
+referenced entity begins or ends; the object tree only represents the logical structure.</p>
+       </li>
+       <li><p>External entities are loaded using a configurable resolver
+infrastructure. It is possible to connect the parser with an arbitrary XML source.</p>
+       </li>
+       <li><p>The parser can read XML text encoded in a variety of character
+sets. Independent of this, it is possible to choose the encoding of the
+internal representation of the tree nodes; the parser automatically converts
+the input text to this encoding. Currently, the parser supports UTF-8 and
+ISO-8859-1 as internal encodings.</p>
+       </li>
+       <li><p>The interface of the parser has been designed such that it is
+best integrated into the language O'Caml. The first goal was simplicity of
+usage which is achieved by many convenience methods and functions, and by
+allowing the user to select which parts of the XML text are actually
+represented in the tree. For example, it is possible to store processing
+instructions as tree nodes, but the parser can also be configured such that
+these instructions are put into hashtables. The information model is compatible
+with the requirements of XML-related standards such as XPath.</p>
+       </li>
+       <li><p>In particular, the node tree can optionally contain or leave out
+processing instructions and comments. It is also possible to generate a "super
+root" object which is the parent of the root element. The attributes of
+elements are normally not stored as nodes, but it is possible to get them
+wrapped into nodes.</p>
+       </li>
+       <li><p>There is also an interface for DTDs; you can parse and access
+sequences of declarations. The declarations are fully represented as recursive
+O'Caml values.
+</p>
+       </li>
+      </ul>
+    </sect2>
+
+
+    <sect2>
+      <title>Code examples</title>
+      <p>
+This distribution contains several examples:</p>
+      <ul>
+       <li><p>
+<em>validate:</em> simply parses a
+document and prints all error messages
+</p></li>
+
+       <li><p>
+<em>readme:</em> Defines a DTD for simple "README"-like documents, and offers
+conversion to HTML and text files<footnote>This particular document is an
+example of this DTD!</footnote>.
+</p></li>
+
+       <li><p>
+<em>xmlforms:</em> This is already a
+sophisticated application that uses XML as style sheet language and data
+storage format. It shows how a Tk user interface can be configured by an
+XML style, and how data records can be stored using XML.
+</p></li>
+      </ul>
+    </sect2>
+
+    <sect2>
+      <title>Restrictions and missing features</title>
+      <p>
+The following restrictions apply that are not violations of the standard:
+</p>
+      <ul>
+       <li><p>
+The attributes "xml:space", and "xml:lang" are not supported specially.
+  (The application can do this.)</p></li>
+
+       <li><p>
+The built-in support for SYSTEM and PUBLIC identifiers is limited to
+  local file access. There is no support for catalogs. The parser offers
+  a hook to add missing features.</p></li>
+
+       <li><p>
+It is currently not possible to check for interoperatibility with SGML.
+</p></li>
+      </ul>
+
+<p>The following features are also missing:</p>
+      <ul>
+       <li><p>There is no special support for namespaces. (Perhaps in the next release?)</p>
+       </li>
+       <li><p>There is no support for XPATH or XSLT.</p>
+       </li>
+      </ul>
+<p>However, I hope that these features will be implemented soon, either by
+myself or by contributors (who are invited to do so).</p>
+    </sect2>
+
+    <sect2>
+      <title>Recent Changes</title>
+      <ul>
+       <li>
+         <p>Changed in 1.0:</p>
+         <p>Support for document order.</p>
+       </li>
+       <li>
+         <p>Changed in 0.99.8:</p>
+         <p>Several fixes of bugs reported by Haruo Hosoya and Alain
+Frisch.</p>
+         <p>The class type "node" has been extended: you can go directly to
+the next and previous nodes in the list; you can refer to nodes by
+position.</p>
+         <p>There are now some iterators for nodes: find, find_all,
+find_element, find_all_elements, map_tree, iter_tree.</p>
+         <p>Experimental support for viewing attributes as nodes; I hope that
+helps Alain writing his XPath evaluator.</p>
+         <p>The user's manual has been revised and is almost up to date.</p>
+       </li>
+       <li>
+         <p>Changed in 0.99.7:</p>
+         <p>There are now additional node types T_super_root, T_pinstr and
+T_comment, and the parser is able to create the corresponding nodes.</p>
+         <p>The functions for character set conversion have been moved to
+the Netstring package; they are not specific for XML.</p>
+       </li>
+       <li>
+         <p>Changed in 0.99.6:</p>
+         <p>Implemented a check on deterministic content models. Added
+an alternate validator basing on a DFA. - This means that now all mandatory
+features for an XML-1.0 parser are implemented! The parser is now substantially
+complete.</p>
+       </li>
+       <li>
+         <p>Changed in 0.99.5:</p>
+         <p>The handling of ID and IDREF attributes has changed. The
+index of nodes containing an ID attribute is now separated from the document.
+Optionally the parser now checks whether the IDREF attributes refer to
+existing elements.</p>
+         <p>The element nodes can optionally store the location in the
+source XML code.</p>
+         <p>The method 'write' writes the XML tree in every supported
+encoding. (Successor of 'write_compact_as_latin1'.)</p>
+         <p>Several smaller changes and fixes.</p>
+       </li>
+       <li>
+         <p>Changed in 0.99.4:</p>
+         <p>The module Pxp_reader has been modernized. The resolver classes
+are simpler to use. There is now support for URLs.</p>
+         <p>The interface of Pxp_yacc has been improved: The type 'source'
+is now simpler. The type 'domspec' has gone; the new 'spec' is opaque and
+performs better. There are some new parsing modes.</p>
+         <p>Many smaller changes.</p>
+       </li>
+       <li>
+         <p>Changed in 0.99.3:</p>
+         <p>The markup_* modules have been renamed to pxp_*. There is a new
+compatibility API that tries to be compatible with markup-0.2.10.</p>
+         <p>The type "encoding" is now a polymorphic variant.</p>
+       </li>
+        <li>
+         <p>Changed in 0.99.2:</p>
+         <p>Added checks for the constraints about the standalone
+declaration.</p>
+         <p>Added regression tests about attribute normalization, 
+attribute checks, standalone checks.</p>
+         <p>Fixed some minor errors of the attribute normalization
+function.</p>
+         <p>The bytecode/native archives are now separated in
+a general part, in a ISO-8859-1-relevant part, and a UTF-8-relevant
+part. The parser can again be compiled with ocamlopt.</p>
+       </li>
+        <li>
+         <p>Changed in 0.99.1:</p>
+         <p>In general, this release is an early pre-release of the
+next stable version 1.00. I do not recommend to use it for serious
+work; it is still very experimental!</p>
+         <p>The core of the parser has been rewritten using a self-written
+parser generator.</p>
+         <p>The lexer has been restructured, and can now handle UTF-8
+encoded files.</p>
+         <p>Numerous other changes.</p>
+       </li>
+
+<!--
+       <li>
+         <p>Changed in 0.2.10:</p>
+         <p>Bugfix: in the "allow_undeclared_attributes" feature.</p>
+         <p>Bugfix: in the methods write_compact_as_latin1.</p>
+         <p>Improvement: The code produced by the codewriter module can be
+faster compiled and with less memory usage.</p>
+       </li>
+
+       <li>
+         <p>Changed in 0.2.9:</p>
+         <p>New: The module Markup_codewriter generates for a given XML
+tree O'Caml code that creates the same XML tree. This is useful for
+applications which use large, constant XML trees.</p>
+         <p>New: Documents and DTDs have a method write_compact_as_latin1
+that writes an XML tree to a buffer or to a channel. (But it is not a pretty
+printer...)</p>
+         <p>Enhancement: If a DTD contains the processing instruction
+<code>
+&lt;?xml:allow_undeclared_attributes x?&gt;</code> 
+where "x" is the name of an already declared element it is allowed that
+instances of this element type have attributes that have not been declared.
+</p>
+         <p>New function Markup_types.string_of_exn that converts an
+exception from Markup into a readable string.</p>
+         <p>Change: The module Markup_reader contains all resolvers.
+The resolver API is now stable.</p>
+         <p>New parser modes processing_instructions_inline and
+virtual_root that help locating processing instructions exactly (if needed).
+</p>
+         <p>Many bugs regarding CRLF handling have been fixed.</p>
+         <p>The distributed tarball contains now the regression test suite.
+</p>
+         <p>The manual has been extended (but it is still incomplete and
+still behind the code).</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.8:</p>
+         <p>A bit more documentation (Markup_yacc).</p>
+         <p>Bugfix: In previous versions, the second trial to refer to
+an entity caused a Bad_character_stream exception. The reason was improper
+re-initialization of the resolver object.</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.7:</p>
+         <p>Added some methods in Markup_document.</p>
+         <p>Bugfix: in method orphaned_clone</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.6:</p>
+         <p>Enhancement: The config parameter has a new component
+"errors_with_line_numbers". If "true", error exceptions come with line numbers 
+(the default; and the only option in the previous versions); if "false"
+the line numbers are left out (only character positions). The parser is 10 to
+20 percent faster if the lines are not tracked.</p>
+         <p>Enhancement: If a DTD contains the processing instruction
+<code>
+&lt;?xml:allow_undeclared_elements_and_notations?&gt;</code> 
+it is allowed that
+elements and notations are undeclared. However, the elements for which
+declarations exist are still validated. The main effect is that the
+keyword ALL in element declarations means that also undeclared elements
+are permitted at this location.</p>
+         <p>Bugfix in method "set_nodes" of class Markup_document.node_impl.
+</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.5:</p>
+         <p>If the XML source is a string (i.e. Latin1 some_string is passed
+to the parser functions as source), resolving did not work properly in 
+previous releases. This is now fixed.
+</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.4:</p>
+         <p>A problem with some kind of DTD that does not specify the name
+of the root element was fixed. As a result, the "xmlforms" application works
+again. Again thanks to Haruo.</p>
+         <p>Due to the XML specs it is forbidden that parameter entities are
+referenced within the internal subset if the referenced text is not a 
+complete declaration itself. This is checked, but the check was too hard;
+even in external entities referenced from the internal subset this rule
+was enforced. This has been corrected; in external entities it is now possible
+to use parameter entities in an unrestricted way.
+</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.3:</p>
+         <p>A fix for a problem when installing Markup on Solaris. 
+Haruo detected the problem.</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.2:</p>
+         <p>A single bugfix: The parser did not reject documents where the
+root element was not the element declared as root element. Again thanks
+to Claudio.</p>
+       </li>
+       <li>
+         <p>Changed in 0.2.1:</p>
+         <p>A single bugfix which reduces the number of warnings. Thanks
+to Claudio for detecting the bug.</p>
+       </li>
+       <li>
+         <p>Changed in 0.2:</p>
+         <p>
+Much more constraints are checked in the 0.2 release than in 0.1. Especially
+that entities are properly nested is now guaranteed; parsed entities now always
+match the corresponding production of the grammar.</p>
+         <p>
+Many weak checks have been turned into strong checks. For example, it is now
+detected if the "version", "encoding", and "standalone" attributes of an XML
+declaration are ordered in the right way.
+</p>
+         <p>
+The error messages have been improved. 
+</p>
+       </li>
+-->
+      </ul>
+    </sect2>
+  </sect1>
+</readme>
+
diff --git a/helm/DEVEL/pxp/pxp/doc/SPEC b/helm/DEVEL/pxp/pxp/doc/SPEC
new file mode 100644 (file)
index 0000000..28e6914
--- /dev/null
@@ -0,0 +1,185 @@
+******************************************************************************
+Notes on the XML specification
+******************************************************************************
+
+
+==============================================================================
+This document
+==============================================================================
+
+There are some points in the XML specification which are ambiguous. The 
+following notes discuss these points, and describe how this parser behaves.
+
+==============================================================================
+Conditional sections and the token ]]>
+==============================================================================
+
+It is unclear what happens if an ignored section contains the token ]]> at 
+places where it is normally allowed, i.e. within string literals and comments, 
+e.g. 
+
+<![IGNORE[ <!-- ]]> --> ]]>
+
+On the one hand, the production rule of the XML grammar does not treat such 
+tokens specially. Following the grammar, already the first ]]> ends the 
+conditional section 
+
+<![IGNORE[ <!-- ]]>
+
+and the other tokens are included into the DTD.
+
+On the other hand, we can read: "Like the internal and external DTD subsets, a 
+conditional section may contain one or more complete declarations, comments, 
+processing instructions, or nested conditional sections, intermingled with 
+white space" (XML 1.0 spec, section 3.4). Complete declarations and comments 
+may contain ]]>, so this is contradictory to the grammar.
+
+The intention of conditional sections is to include or exclude the section 
+depending on the current replacement text of a parameter entity. Almost always 
+such sections are used as in 
+
+<!ENTITY % want.a.feature.or.not "INCLUDE">   (or "IGNORE")
+<![ %want.a.feature.or.not; [ ... ]]>
+
+This means that if it is possible to include a section it must also be legal to 
+ignore the same section. This is a strong indication that the token ]]> must 
+not count as section terminator if it occurs in a string literal or comment.
+
+This parser implements the latter.
+
+==============================================================================
+Conditional sections and the inclusion of parameter entities
+==============================================================================
+
+It is unclear what happens if an ignored section contains a reference to a 
+parameter entity. In most cases, this is not problematic because nesting of 
+parameter entities must respect declaration braces. The replacement text of 
+parameter entities must either contain a whole number of declarations or only 
+inner material of one declaration. Almost always it does not matter whether 
+these references are resolved or not (the section is ignored).
+
+But there is one case which is not explicitly specified: Is it allowed that the 
+replacement text of an entity contains the end marker ]]> of an ignored 
+conditional section? Example: 
+
+<!ENTITY % end "]]>">
+<![ IGNORE [ %end;
+
+We do not find the statement in the XML spec that the ]]> must be contained in 
+the same entity as the corresponding <![ (as for the tokens <! and > of 
+declarations). So it is possible to conclude that ]]> may be in another entity.
+
+Of course, there are many arguments not to allow such constructs: The resulting 
+code is incomprehensive, and parsing takes longer (especially if the entities 
+are external). I think the best argument against this kind of XML is that the 
+XML spec is not detailed enough, as it contains no rules where entity 
+references should be recognized and where not. For example: 
+
+<!ENTITY % y "]]>">
+<!ENTITY % x "<!ENTITY z '<![CDATA[some text%y;'>">
+<![ IGNORE [ %x; ]]>
+
+Which token ]]> counts? From a logical point of view, the ]]> in the third line 
+ends the conditional section. As already pointed out, the XML spec permits the 
+interpretation that ]]> is recognized even in string literals, and this may be 
+also true if it is "imported" from a separate entity; and so the first ]]> 
+denotes the end of the section.
+
+As a practical solution, this parser does not expand parameter entities in 
+ignored sections. Furthermore, it is also not allowed that the ending ]]> of 
+ignored or included sections is contained in a different entity than the 
+starting <![ token.
+
+==============================================================================
+Standalone documents and attribute normalization
+==============================================================================
+
+If a document is declared as stand-alone, a restriction on the effect of 
+attribute normalization takes effect for attributes declared in external 
+entities. Normally, the parser knows the type of the attribute from the ATTLIST 
+declaration, and it can normalize attribute values depending on their types. 
+For example, an NMTOKEN attribute can be written with leading or trailing 
+spaces, but the parser returns always the nmtoken without such added spaces; in 
+contrast to this, a CDATA attribute is not normalized in this way. For 
+stand-alone document the type information is not available if the ATTLIST 
+declaration is located in an external entity. Because of this, the XML spec 
+demands that attribute values must be written in their normal form in this 
+case, i.e. without additional spaces. 
+
+This parser interprets this restriction as follows. Obviously, the substitution 
+of character and entity references is not considered as a "change of the value" 
+as a result of the normalization, because these operations will be performed 
+identically if the ATTLIST declaration is not available. The same applies to 
+the substitution of TABs, CRs, and LFs by space characters. Only the removal of 
+spaces depending on the type of the attribute changes the value if the ATTLIST 
+is not available. 
+
+This means in detail: CDATA attributes never violate the stand-alone status. 
+ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator attributes must not be 
+written with leading and/or trailing spaces. IDREF, ENTITIES, and NMTOKENS 
+attributes must not be written with extra spaces at the beginning or at the end 
+of the value, or between the tokens of the list. 
+
+The whole check is dubious, because the attribute type expresses also a 
+semantical constraint, not only a syntactical one. At least this parser 
+distinguishes strictly between single-value and list types, and returns the 
+attribute values differently; the first are represented as Value s (where s is 
+a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The 
+internal representation of the value is dependent on the attribute type, too, 
+such that even normalized values are processed differently depending on whether 
+the attribute has list type or not. For this parser, it makes still a 
+difference whether a value is normalized and processed as if it were CDATA, or 
+whether the value is processed according to its declared type. 
+
+The stand-alone check is included to be able to make a statement whether other, 
+well-formedness parsers can process the document. Of course, these parsers 
+always process attributes as CDATA, and the stand-alone check guarantees that 
+these parsers will always see the normalized values. 
+
+==============================================================================
+Standalone documents and the restrictions on entity
+references
+==============================================================================
+
+Stand-alone documents must not refer to entities which are declared in an 
+external entity. This parser applies this rule only: to general and NDATA 
+entities when they occur in the document body (i.e. not in the DTD); and to 
+general and NDATA entities occuring in default attribute values declared in the 
+internal subset of the DTD. 
+
+Parameter entities are out of discussion for the stand-alone property. If there 
+is a parameter entity reference in the internal subset which was declared in an 
+external entity, it is not available in the same way as the external entity is 
+not available that contains its declaration. Because of this "equivalence", 
+parameter entity references are not checked on violations against the 
+stand-alone declaration. It simply does not matter. - Illustration: 
+
+Main document: 
+
+<!ENTITY % ext SYSTEM "ext">
+%ext;
+%ent;
+
+"ext" contains: 
+
+<!ENTITY % ent "<!ELEMENT el (other*)>">
+
+
+
+Here, the reference %ent; would be illegal if the standalone declaration is 
+strictly interpreted. This parser handles the references %ent; and %ext; 
+equivalently which means that %ent; is allowed, but the element type "el" is 
+treated as externally declared. 
+
+General entities can occur within the DTD, but they can only be contained in 
+the default value of attributes, or in the definition of other general 
+entities. The latter can be ignored, because the check will be repeated when 
+the entities are expanded. Though, general entities occuring in default 
+attribute values are actually checked at the moment when the default is used in 
+an element instance. 
+
+General entities occuring in the document body are always checked.
+
+NDATA entities can occur in ENTITY attribute values; either in the element 
+instance or in the default declaration. Both cases are checked. 
+
diff --git a/helm/DEVEL/pxp/pxp/doc/SPEC.xml b/helm/DEVEL/pxp/pxp/doc/SPEC.xml
new file mode 100644 (file)
index 0000000..906f45a
--- /dev/null
@@ -0,0 +1,226 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="Notes on the XML specification">
+
+  <sect1>
+    <title>This document</title>
+    <p>There are some points in the XML specification which are ambiguous.
+The following notes discuss these points, and describe how this parser
+behaves.</p>
+  </sect1>
+
+  <sect1>
+    <title>Conditional sections and the token ]]&gt;</title>
+
+    <p>It is unclear what happens if an ignored section contains the
+token ]]&gt; at places where it is normally allowed, i.e. within string
+literals and comments, e.g.
+
+<code>
+&lt;![IGNORE[ &lt;!-- ]]&gt; --&gt; ]]&gt;
+</code>
+
+On the one hand, the production rule of the XML grammar does not treat such 
+tokens specially. Following the grammar, already the first ]]&gt; ends
+the conditional section
+
+<code>
+&lt;![IGNORE[ &lt;!-- ]]&gt;
+</code>
+
+and the other tokens are included into the DTD.</p>
+
+<p>On the other hand, we can read: "Like the internal and external DTD subsets,
+a conditional section may contain one or more complete declarations, comments,
+processing instructions, or nested conditional sections, intermingled with
+white space" (XML 1.0 spec, section 3.4). Complete declarations and comments
+may contain ]]&gt;, so this is contradictory to the grammar.</p>
+
+<p>The intention of conditional sections is to include or exclude the section 
+depending on the current replacement text of a parameter entity. Almost
+always such sections are used as in
+
+<code>
+&lt;!ENTITY % want.a.feature.or.not "INCLUDE"&gt;   (or "IGNORE")
+&lt;![ %want.a.feature.or.not; [ ... ]]&gt;
+</code>
+
+This means that if it is possible to include a section it must also be
+legal to ignore the same section. This is a strong indication that 
+the token ]]&gt; must not count as section terminator if it occurs
+in a string literal or comment.</p>
+
+<p>This parser implements the latter.</p>
+
+  </sect1>
+
+  <sect1>
+    <title>Conditional sections and the inclusion of parameter entities</title>
+
+    <p>It is unclear what happens if an ignored section contains a reference
+to a parameter entity. In most cases, this is not problematic because 
+nesting of parameter entities must respect declaration braces. The
+replacement text of parameter entities must either contain a <em>whole</em>
+number of declarations or only inner material of one declaration. Almost always
+it does not matter whether these references are resolved or not
+(the section is ignored).</p>
+
+    <p>But there is one case which is not explicitly specified: Is it allowed
+that the replacement text of an entity contains the end marker ]]&gt; 
+of an ignored conditional section? Example:
+
+<code>
+&lt;!ENTITY % end "]]&gt;"&gt;
+&lt;![ IGNORE [ %end;
+</code>
+
+We do not find the statement in the XML spec that the ]]&gt; must be contained
+in the same entity as the corresponding &lt;![ (as for the tokens &lt;! and
+&gt; of declarations). So it is possible to conclude that ]]&gt; may be in
+another entity.</p>
+
+    <p>Of course, there are many arguments not to allow such constructs: The
+resulting code is incomprehensive, and parsing takes longer (especially if the
+entities are external). I think the best argument against this kind of XML
+is that the XML spec is not detailed enough, as it contains no rules where
+entity references should be recognized and where not. For example:
+
+<code>
+&lt;!ENTITY % y "]]&gt;"&gt;
+&lt;!ENTITY % x "&lt;!ENTITY z '&lt;![CDATA[some text%y;'&gt;"&gt;
+&lt;![ IGNORE [ %x; ]]&gt;
+</code>
+
+Which token ]]&gt; counts? From a logical point of view, the ]]&gt; in the
+third line ends the conditional section. As already pointed out, the XML spec
+permits the interpretation that ]]&gt; is recognized even in string literals,
+and this may be also true if it is "imported" from a separate entity; and so
+the first ]]&gt; denotes the end of the section.</p>
+
+    <p>As a practical solution, this parser does not expand parameter entities
+in ignored sections. Furthermore, it is also not allowed that the ending ]]&gt;
+of ignored or included sections is contained in a different entity than the
+starting &lt;![ token.</p>
+  </sect1>
+
+
+  <sect1>
+    <title>Standalone documents and attribute normalization</title>
+    
+    <p>
+If a document is declared as stand-alone, a restriction on the effect of
+attribute normalization takes effect for attributes declared in external
+entities. Normally, the parser knows the type of the attribute from
+the ATTLIST declaration, and it can normalize attribute values depending
+on their types. For example, an NMTOKEN attribute can be written with
+leading or trailing spaces, but the parser returns always the nmtoken
+without such added spaces; in contrast to this, a CDATA attribute is
+not normalized in this way. For stand-alone document the type information is
+not available if the ATTLIST declaration is located in an external
+entity. Because of this, the XML spec demands that attribute values must
+be written in their normal form in this case, i.e. without additional
+spaces.
+</p>
+    <p>This parser interprets this restriction as follows. Obviously, 
+the substitution of character and entity references is not considered
+as a "change of the value" as a result of the normalization, because
+these operations will be performed identically if the ATTLIST declaration
+is not available. The same applies to the substitution of TABs, CRs, 
+and LFs by space characters. Only the removal of spaces depending on
+the type of the attribute changes the value if the ATTLIST is not
+available.
+</p>
+    <p>This means in detail: CDATA attributes never violate the
+stand-alone status. ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator
+attributes must not be written with leading and/or trailing spaces. IDREF,
+ENTITIES, and NMTOKENS attributes must not be written with extra spaces at the
+beginning or at the end of the value, or between the tokens of the list.
+</p>
+    <p>The whole check is dubious, because the attribute type expresses also a
+semantical constraint, not only a syntactical one. At least this parser
+distinguishes strictly between single-value and list types, and returns the
+attribute values differently; the first are represented as Value s (where s is
+a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The
+internal representation of the value is dependent on the attribute type, too,
+such that even normalized values are processed differently depending on
+whether the attribute has list type or not. For this parser, it makes still a
+difference whether a value is normalized and processed as if it were CDATA, or
+whether the value is processed according to its declared type.
+</p>
+    <p>The stand-alone check is included to be able to make a statement 
+whether other, well-formedness parsers can process the document. Of course,
+these parsers always process attributes as CDATA, and the stand-alone check
+guarantees that these parsers will always see the normalized values.
+</p>
+  </sect1>
+
+  <sect1>
+    <title>Standalone documents and the restrictions on entity
+references</title>
+    <p>
+Stand-alone documents must not refer to entities which are declared in an
+external entity. This parser applies this rule only: to general and NDATA
+entities when they occur in the document body (i.e. not in the DTD); and to
+general and NDATA entities occuring in default attribute values declared in the
+internal subset of the DTD.
+</p>
+    <p>
+Parameter entities are out of discussion for the stand-alone property. If there
+is a parameter entity reference in the internal subset which was declared in an
+external entity, it is not available in the same way as the external entity is
+not available that contains its declaration. Because of this "equivalence",
+parameter entity references are not checked on violations against the
+stand-alone declaration. It simply does not matter. - Illustration:
+</p>
+
+    <p>
+Main document:
+
+    <code><![CDATA[
+<!ENTITY % ext SYSTEM "ext">
+%ext;
+%ent;
+]]></code>
+
+"ext" contains:
+
+    <code><![CDATA[
+<!ENTITY % ent "<!ELEMENT el (other*)>">
+]]></code>
+</p>
+
+    <p>Here, the reference %ent; would be illegal if the standalone
+declaration is strictly interpreted. This parser handles the references
+%ent; and %ext; equivalently which means that %ent; is allowed, but the
+element type "el" is treated as externally declared.
+</p>
+
+    <p>
+General entities can occur within the DTD, but they can only be contained in
+the default value of attributes, or in the definition of other general
+entities. The latter can be ignored, because the check will be repeated when
+the entities are expanded. Though, general entities occuring in default
+attribute values are actually checked at the moment when the default is
+used in an element instance.
+</p>
+    <p>
+General entities occuring in the document body are always checked.</p>
+    <p>
+NDATA entities can occur in ENTITY attribute values; either in the element
+instance or in the default declaration. Both cases are checked.
+</p>
+  </sect1>
+
+</readme>
diff --git a/helm/DEVEL/pxp/pxp/doc/design.txt b/helm/DEVEL/pxp/pxp/doc/design.txt
new file mode 100644 (file)
index 0000000..bf75d06
--- /dev/null
@@ -0,0 +1,340 @@
+------------------------------------------------ -*- indented-text -*-
+Some Notes About the Design:
+----------------------------------------------------------------------
+
+----------------------------------------------------------------------
+Compilation
+----------------------------------------------------------------------
+
+Compilation is non-trivial because:
+
+ - The lexer and parser generators ocamlllex resp. ocamlyacc normally
+   create code such that the parser module precedes the lexer module.
+   THIS design requires that the lexer layer precedes the entity layer
+   which precedes the parser layer, because the parsing results modify
+   the behaviour of the lexer and entity layers. There is no way to get
+   around this because of the nature of XML.
+
+   So the dependency relation of the lexer and the parser is modified;
+   in particular the "token" type that is normally defined by the 
+   generated parser is moved to a common prdecessor of both lexer
+   and parser.
+
+ - Another modification of the standard way of handling parsers is that
+   the parser is turned into an object. This is necessary because the
+   whole parser is polymorphic, i.e. there is a type parameter (the
+   type of the node extension).
+
+......................................................................
+
+First some modules are generated as illustrated by the following
+diagram:
+
+
+                                    markup_yacc.mly
+                              |        |
+                              \|/      \|/  [ocamlyacc, 1]
+                               V               V
+                            markup_yacc.mli  markup_yacc.ml
+                                 |             --> renamed into markup_yacc.ml0
+           [awk, 2]     \|/                        |
+                         V                        \|/   [sed, 3]
+              markup_yacc_token.mlf                V
+                      |       |              markup_yacc.ml
+ markup_lexer_types_   |       |
+ shadow.mli  |                |       | markup_lexer_types_
+           \|/ [sed, \|/      | shadow.ml
+            V    4]   V       |     |
+       markup_lexer_types.mli |     |   [sed, 4]
+                             \|/   \|/
+                              V     V
+                       markup_lexer_types.ml
+
+
+                                  markup_yacc_shadow.mli
+                                          |
+                                 \|/  [replaces, 5]
+                                  V
+                             markup_yacc.mli
+
+
+
+                          markup_lexers.mll
+                                          |
+                                 \|/  [ocamllex, 6]
+                                  V
+                           markup_lexers.ml
+
+
+Notes:
+
+       (1) ocamlyacc generates both a module and a module interface.
+           The module is postprocessed in step (3). The interface cannot
+           be used, but it contains the definition of the "token" type.
+           This definition is extracted in step (2). The interface is
+           completely replaced in step (5) by a different file.
+
+       (2) An "awk" script extracts the definition of the type "token".
+           "token" is created by ocamlyacc upon the %token directives
+           in markup_yacc.mly, and normally "token" is defined in
+           the module generated by ocamlyacc. This turned out not to be
+           useful as the module dependency must be that the lexer is
+           an antecedent of the parser and not vice versa (as usually),
+           so the "token" type is "moved" to the module Markup_lexer_types
+           which is an antecedent of both the lexer and the parser.
+
+       (3) A "sed" script turns the generated parser into an object.
+           This is rather simple; some "let" definitions must be rewritten
+           as "val" definitions, the other "let" definitions as
+           "method" definitions. The parser object is needed because
+           the whole parser has a polymorphic type parameter.
+
+       (4) The implementation and definition of Markup_lexer_types are
+           both generated by inserting the "token" type definition
+           (in markup_lexer_types.mlf) into two pattern files,
+           markup_lexer_types_shadow.ml resp. -.mli. The point of insertion
+           is marked by the string INCLUDE_HERE.
+
+       (5) The generated interface of the Markup_yacc module is replaced
+           by a hand-written file.
+
+       (6) ocamllex generates the lexer; this process is not patched in any
+           way.
+
+......................................................................
+
+After the additional modules have been generated, compilation proceeds
+in the usual manner.
+
+
+----------------------------------------------------------------------
+Hierarchy of parsing layers:
+----------------------------------------------------------------------
+
+From top to bottom:
+
+ - Parser: Markup_yacc
+   + gets input stream from the main entity object
+   + checks most of the grammar
+   + creates the DTD object as side-effect
+   + creates the element tree as side-effect
+   + creates further entity objects that are entered into the DTD
+ - Entity layer: Markup_entity
+   + gets input stream from the lexers, or another entity object
+   + handles entity references: if a reference is encountered the
+     input stream is redirected such that the tokens come from the
+     referenced entity object
+   + handles conditional sections
+ - Lexer layer: Markup_lexers
+   + gets input from lexbuffers created by resolvers
+   + different lexers for different lexical contexts
+   + a lexer returns pairs (token,lexid), where token is the scanned
+     token, and lexid is the name of the lexer that must be used for
+     the next token
+ - Resolver layer: Markup_entity
+   + a resolver creates the lexbuf from some character source
+   + a resolver recodes the input and handles the encoding scheme
+
+----------------------------------------------------------------------
+The YACC based parser
+----------------------------------------------------------------------
+
+ocamlyacc allows it to pass an arbitrary 'next_token' function to the
+parsing functions. We always use 'en # next_token()' where 'en' is the
+main entity object representing the main file to be parsed.
+
+The parser is not functional, but uses mainly side-effects to accumulate
+the structures that have been recognized. This is very important for the
+entity definitions, because once an entity definition has been found there
+may be a reference to it which is handled by the entity layer (which is
+below the yacc layer). This means that such a definition modifies the
+token source of the parser, and this can only be handled by side-effects
+(at least in a sensible manner; a purely functional parser would have to
+pass unresolved entity references to its caller, which would have to
+resolve the reference and to re-parse the whole document!).
+
+Note that also element definitions profit from the imperative style of
+the parser; an element instance can be validated directly once the end
+tag has been read in.
+
+----------------------------------------------------------------------
+The entity layer
+----------------------------------------------------------------------
+
+The parser gets the tokens from the main entity object. This object
+controls the underlying lexing mechanism (see below), and already
+interprets the following:
+
+- Conditional sections (if they are allowed in this entity):
+  The structures <![ INCLUDE [ ... ]]> and <! IGNORE [ ... ]]> are
+  recognized and interpreted.
+
+  This would be hard to realize by the yacc parser, because:
+  - INCLUDE and IGNORE are not recognized as lexical keywords but as names.
+    This means that the parser cannot select different rules for them.
+  - The text after IGNORE requires a different lexical handling.
+
+- Entity references: &name; and %name;
+  The named entity is looked up and the input source is redirected to it, i.e.
+  if the main entity object gets the message 'next_token' this message is
+  forwarded to the referenced entity. (This entity may choose to forward the
+  message again to a third entity, and so on.)
+
+  There are some fine points:
+
+  - It is okay that redirection happens at token level, not at character level:
+    + General entities must always match the 'content' production, and because
+      of this they must always consist of a whole number of tokens.
+    + If parameter entities are resolved, the XML specification states that
+      a space character is inserted before and after the replacement text.
+      This also means that such entities always consists of a whole number
+      of tokens.
+
+  - There are some "nesting constraints":
+    + General entities must match the 'content' production. Because of this,
+      the special token Begin_entity is inserted before the first token of
+      the entity, and End_entity is inserted just before the Eof token. The
+      brace Begin_entity...End_entity is recognized by the yacc parser, but
+      only in the 'content' production.
+    + External parameter entities must match 'extSubsetDecl'. Again,
+      Begin_entity and End_entity tokens embrace the inner token stream.
+      The brace Begin_entity...End_entity is recognized by the yacc parser
+      at the appropriate position.
+      (As general and parameter entities are used in different contexts
+      (document vs. DTD), both kinds of entities can use the same brace
+      Begin_entity...End_entity.)
+    + TODO:
+      The constraints for internal parameter entities are not yet checked.
+
+  - Recursive references can be detected because entities must be opened
+    before the 'next_token' method can be invoked.
+
+----------------------------------------------------------------------
+The lexer layer
+----------------------------------------------------------------------
+
+There are five main lexers, and a number of auxiliary lexers. The five
+main lexers are:
+
+- Document (function scan_document):
+  Scans an XML document outside the DTD and outside the element instance.
+
+- Content (function scan_content):
+  Scans an element instance, but not within tags.
+
+- Within_tag (function scan_within_tag):
+  Scans within <...>, i.e. a tag denoting an element instance.
+
+- Document_type (function scan_document_type):
+  Scans after <!DOCTYPE until the corresponding >.
+
+- Declaration (function scan_declaration):
+  Scans sequences of declarations
+
+Why several lexers? Because there are different lexical rules in these
+five regions of an XML document.
+
+Every lexer not only produces tokens, but also the name of the next lexer
+to use. For example, if the Document lexer scans "<!DOCTYPE", it also
+outputs that the next token must be scanned by Document_type.
+
+It is interesting that this really works. The beginning of every lexical
+context can be recognized by the lexer of the previous context, and there
+is always a token that unambigously indicates that the context ends.
+
+----------------------------------------------------------------------
+The DTD object
+----------------------------------------------------------------------
+
+There is usually one object that collects DTD declarations. All kinds of
+declarations are entered here:
+
+- element and attribute list declarations
+- entity declarations
+- notation declarations
+
+Some properties are validated directly after a declarations has been added
+to the DTD, but most validation is done by a 'validate' method.
+
+The result of 'validate' is stored such that another invocation is cheap.
+A DTD becomes again 'unchecked' if another declaration is added.
+
+TODO: We need a special DTD object that allows every content.
+
+The DTD object is known by more or less every other object, i.e. entities
+know the DTD, element declarations and instances know the DTD, and so on.
+
+TODO: We need a method that deletes all entity declarations once the DTD
+is complete (to free memory).
+
+----------------------------------------------------------------------
+Element and Document objects
+----------------------------------------------------------------------
+
+The 'element' objects form the tree of the element instances.
+
+The 'document' object is a derivate of 'element' where properties of the
+whole document can be stored.
+
+New element objects are NOT created by the "new class" mechanism, but
+instead by an exemplar/instance scheme: A new instance is the duplicate
+of an exemplar. This has the advantage that the user can provide own
+classes for the element instances. A hashtable contains the exemplars
+for every element type (tag name), and there is a default exemplar.
+The user can configure this hashtable such that for elements A objects
+of class element_a, for elements B objects of class element_b and so on
+are used.
+
+The object for the root element must already be created before parsing
+starts, and the parser returns the (filled) root object. Because of this,
+the user determines the *static* type of the object without the need
+of back coercion (which is not possible in Ocaml).
+
+----------------------------------------------------------------------
+Newline normalization
+----------------------------------------------------------------------
+
+The XML spec states that all of \n, \r, and \r\n must be recognized
+as newline characters/character sequences. Notes:
+- The replacement text of entities always contains the orginal text,
+  i.e. \r and \r\n are NOT converted to \n.
+  It is unclear if this is a violation of the standard or not.
+- Content of elements: Newline characters are converted to \n.
+- Attribute values: Newline characters are converted to spaces.
+- Processing instructions: Newline characters are not converted.
+  It is unclear if this is a violation of the standard or not.
+
+----------------------------------------------------------------------
+Empty entities
+----------------------------------------------------------------------
+
+Many entities are artificially surrounded by a Begin_entity/End_entity pair.
+This is sometimes not done if the entity is empty:
+
+- External parameter entities are parsed entities, i.e. they must match
+  the markupdecl* production. If they are not empty, the Begin_entity/End_entity
+  trick guarantees that they match markupdecl+, and that they are only
+  referred to at positions where markupdecl+ is allowed.
+  If they are empty, they are allowed everywhere just like internal 
+  parameter entities. Because of this, the Begin_entity/End_entity pair
+  is dropped.
+
+- This does not apply to parameter entities (either external or internal)
+  which are referred to in the internal subset, nor applies to internal
+  parameter entities, nor applies to general entities:
+
+  + References in the internal subset are only allowed at positions where
+    markupdecl can occur, so Begin_entity/End_entity is added even if the
+    entity is empty.
+  + References to internal parameter entities are allowed anywhere, so
+    never Begin_entity/End_entity is added.
+  + References to general entities: An empty  Begin_entity/End_entity pair
+    is recognized by the yacc parser, so special handling is not required.
+    Moreover, there is the situation that an empty entity is referred to
+    after the toplevel element:
+    <!DOCTYPE doc ...[
+    <!ENTITY empty "">
+    ]>
+    <doc></doc>&empty;
+    - This is illegal, and the presence of an empty Begin_entity/End_entity pair
+    helps to recognize this.
diff --git a/helm/DEVEL/pxp/pxp/doc/dist-common.xml b/helm/DEVEL/pxp/pxp/doc/dist-common.xml
new file mode 100644 (file)
index 0000000..d18a150
--- /dev/null
@@ -0,0 +1,123 @@
+<?xml encoding="ISO-8859-1"?>
+
+<!-- ************************************************************ -->
+<!-- EXTERNAL URLs                                                -->
+<!-- ************************************************************ -->
+
+<!ENTITY url.ocaml           
+         "http://caml.inria.fr/">
+
+<!ENTITY url.ocaml.list
+         "http://caml.inria.fr/caml-list-eng.html">
+
+<!ENTITY url.ocaml.download
+         "ftp://ftp.inria.fr/lang/caml-light/">
+
+<!ENTITY url.ocaml.camlp4
+         "http://caml.inria.fr/camlp4/">
+
+<!ENTITY url.ocaml.hump
+         "http://caml.inria.fr/hump.html">
+
+<!ENTITY url.ocaml.mottl
+         "http://miss.wu-wien.ac.at/~mottl/ocaml_sources/intro.html">
+
+<!ENTITY url.ocaml.mottl.pcre
+         "http://miss.wu-wien.ac.at/~mottl/ocaml_sources/pcre_ocaml.tar.gz">
+
+<!ENTITY url.ocaml.lindig
+         "http://www.cs.tu-bs.de/softech/people/lindig/software/index.html">
+
+<!ENTITY url.ocaml.lindig.ocmarkup
+         "http://www.cs.tu-bs.de/softech/people/lindig/software/ocmarkup.html">
+
+<!ENTITY url.ocaml.lindig.tony
+         "http://www.cs.tu-bs.de/softech/people/lindig/software/tony.html">
+
+<!ENTITY url.ocaml.filliatre
+         "http://www.lri.fr/~filliatr/software.en.html">
+
+<!ENTITY url.ocaml.filliatre.cgi
+         "http://www.lri.fr/~filliatr/ftp/ocaml/cgi/">
+
+<!ENTITY url.xml-spec        
+         "http://www.w3.org/TR/1998/REC-xml-19980210.html">
+
+<!ENTITY url.xml.oasis
+         "http://www.oasis-open.org/cover/">
+
+<!ENTITY url.xml.w3c
+         "http://www.w3c.org/XML/">
+
+<!ENTITY url.jclark-xmltdata 
+         "ftp://ftp.jclark.com/pub/xml/xmltest.zip">
+
+<!ENTITY urlprefix.ietf.rfc
+         "http://www.ietf.org/rfc">
+   <!-- Ohne "/" am Ende! -->
+
+<!ENTITY url.apache
+         "http://www.apache.org/">
+
+
+<!-- ************************************************************ -->
+<!-- MY URLs                                                      -->
+<!-- ************************************************************ -->
+
+<!ENTITY url.linkdb
+         "http://www.npc.de/ocaml/linkdb">
+
+<!-- ************************************************************ -->
+<!-- HOMEPAGE URLs                                                -->
+<!-- ************************************************************ -->
+
+<!-- GENERIC -->
+
+<!ENTITY url.gps-ocaml-download 
+         "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/ocaml">
+
+<!ENTITY url.gps-ocaml-projects
+         "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/ocaml/projects">
+
+<!ENTITY url.gps-old-download
+         "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/download">
+
+
+<!-- SPECIFIC -->
+
+<!ENTITY release.findlib
+         "SOME-VERSION">
+
+<!ENTITY url.findlib-download
+         "&url.gps-ocaml-download;/findlib-&release.findlib;.tar.gz">
+
+<!ENTITY url.findlib-project
+         "&url.gps-ocaml-projects;/findlib/">
+
+<!ENTITY url.findlib-manual
+         "&url.gps-ocaml-projects;/findlib/">
+
+
+
+<!ENTITY release.markup
+         "SOME-VERSION">
+
+<!ENTITY url.markup-download    
+         "&url.gps-ocaml-download;/markup-&release.markup;.tar.gz">
+
+<!ENTITY url.markup-project
+         "&url.gps-ocaml-projects;/markup">
+
+<!ENTITY url.markup-manual
+         "&url.gps-ocaml-projects;/markup/manual">
+
+
+<!-- ************************************************************ -->
+<!-- MAIL URLs                                                    -->
+<!-- ************************************************************ -->
+
+<!ENTITY person.gps '<a href="mailto:&person.gps.mail;">Gerd Stolpmann</a>'>
+
+<!ENTITY person.gps.mail
+         "Gerd.Stolpmann@darmstadt.netsurf.de">
+
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/Makefile b/helm/DEVEL/pxp/pxp/doc/manual/Makefile
new file mode 100644 (file)
index 0000000..5a3e1ff
--- /dev/null
@@ -0,0 +1,82 @@
+DOCBOOK_HTML = /usr/share/sgml/docbkdsl/html
+DOCBOOK_PRINT = /usr/share/sgml/docbkdsl/print
+SRC = $(PWD)/src
+
+.PHONY: html ps
+
+default: html ps
+
+html: html/book1.htm html/pic/done
+
+ps: ps/markup.ps ps/pic/done
+
+
+src/readme.ent: ../../examples/readme/to_html.ml
+       src/getcode.ml <../../examples/readme/to_html.ml >src/readme.ent
+
+src/yacc.mli.ent: ../../pxp_yacc.mli
+       src/getcode.ml <../../pxp_yacc.mli >src/yacc.mli.ent
+
+src/dtd.mli.ent: ../../pxp_dtd.mli
+       src/getcode.ml <../../pxp_dtd.mli >src/dtd.mli.ent
+
+html/book1.htm: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
+       mkdir -p html
+       cp src/markup.css html; \
+       cd html; \
+       rm -f *.htm*; \
+       jade -t sgml -D$(DOCBOOK_HTML) -D$(SRC) -ihtml markup.sgml; \
+       true
+       touch html/TIMESTAMP
+
+html/pic/done: src/pic/*.fig
+       mkdir -p html/pic
+       l=`cd src/pic; echo *.fig`; \
+       for x in $$l; do fig2dev -L gif src/pic/$$x html/pic/`basename $$x .fig`.gif; done
+       touch html/pic/done
+
+#man: src/findlib_reference.xml
+#      mkdir -p man
+#      cd man; \
+#      rm -f *.[0-9]; \
+#      db2man <../src/findlib_reference.xml
+
+ps/markup.tex: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
+       mkdir -p ps
+       cd ps; \
+       jade -t tex -D$(DOCBOOK_PRINT) -D$(SRC) markup.sgml; \
+       true
+
+ps/markup.dvi: ps/markup.tex ps/pic/done
+       cd ps; \
+       jadetex markup.tex; \
+       jadetex markup.tex; \
+       jadetex markup.tex
+
+ps/markup.ps: ps/markup.dvi
+       cd ps; \
+       dvips -f <markup.dvi >markup.ps
+
+ps/pic/done: src/pic/*.fig
+       mkdir -p ps/pic
+       l=`cd src/pic; echo *.fig`; \
+       for x in $$l; do fig2dev -L ps -m 0.8 src/pic/$$x ps/pic/`basename $$x .fig`.ps; done
+       touch ps/pic/done
+
+.SUFFIXES: .xml .sgml
+
+.sgml.xml: 
+       sx -xndata $< >$@; true
+
+
+
+clean:
+       rm -rf html man ps
+       rm -f src/readme.ent
+
+CLEAN: clean
+
+distclean:
+       rm -f src/*~
+       rm -f *~
+       rm -f ps/*.aux ps/*.dvi ps/*.log ps/*.tex
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/TIMESTAMP b/helm/DEVEL/pxp/pxp/doc/manual/html/TIMESTAMP
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html
new file mode 100644 (file)
index 0000000..ab88e87
--- /dev/null
@@ -0,0 +1,434 @@
+<HTML
+><HEAD
+><TITLE
+>Configuring and calling the parser</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="Details of the mapping from XML text to the tree representation"
+HREF="x1496.html"><LINK
+REL="NEXT"
+TITLE="Resolvers and sources"
+HREF="x1629.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1496.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1629.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN1567"
+>Chapter 4. Configuring and calling the parser</A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>4.1. <A
+HREF="c1567.html#AEN1569"
+>Overview</A
+></DT
+><DT
+>4.2. <A
+HREF="x1629.html"
+>Resolvers and sources</A
+></DT
+><DT
+>4.3. <A
+HREF="x1812.html"
+>The DTD classes</A
+></DT
+><DT
+>4.4. <A
+HREF="x1818.html"
+>Invoking the parser</A
+></DT
+><DT
+>4.5. <A
+HREF="x1965.html"
+>Updates</A
+></DT
+></DL
+></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1569"
+>4.1. Overview</A
+></H1
+><P
+>There are the following main functions invoking the parser (in Pxp_yacc):
+
+          <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_document_entity:</I
+> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+the body is validated against the DTD. This mode is interesting if you have a
+file
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!DOCTYPE root ... [ ... ] &#62; &#60;root&#62; ... &#60;/root&#62;</PRE
+>
+
+and you can accept any DTD that is included in the file (e.g. because the file
+is under your control).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_wfdocument_entity:</I
+> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+but the body is not validated, only checked for well-formedness. This mode is
+preferred if validation costs too much time or if the DTD is missing.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_dtd_entity:</I
+> You want only to
+parse an entity (file) containing the external subset of a DTD. Sometimes it is
+interesting to read such a DTD, for example to compare it with the DTD included
+in a document, or to apply the next mode:</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_content_entity:</I
+> You want only to
+parse an entity (file) containing a fragment of a document body; this fragment
+is validated against the DTD you pass to the function. Especially, the fragment
+must not have a <TT
+CLASS="LITERAL"
+> &lt;!DOCTYPE&gt;</TT
+> clause, and must directly
+begin with an element.  The element is validated against the DTD.  This mode is
+interesting if you want to check documents against a fixed, immutable DTD.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_wfcontent_entity:</I
+> This function
+also parses a single element without DTD, but does not validate it.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>extract_dtd_from_document_entity:</I
+> This
+function extracts the DTD from a closed document consisting of a DTD and a
+document body. Both the internal and the external subsets are extracted.</P
+></LI
+></UL
+></P
+><P
+>In many cases, <TT
+CLASS="LITERAL"
+>parse_document_entity</TT
+> is the preferred mode
+to parse a document in a validating way, and
+<TT
+CLASS="LITERAL"
+>parse_wfdocument_entity</TT
+> is the mode of choice to parse a
+file while only checking for well-formedness.</P
+><P
+>There are a number of variations of these modes. One important application of a
+parser is to check documents of an untrusted source against a fixed DTD. One
+solution is to not allow the <TT
+CLASS="LITERAL"
+>&lt;!DOCTYPE&gt;</TT
+> clause in
+these documents, and treat the document like a fragment (using mode
+<I
+CLASS="EMPHASIS"
+>parse_content_entity</I
+>). This is very simple, but
+inflexible; users of such a system cannot even define additional entities to
+abbreviate frequent phrases of their text.</P
+><P
+>It may be necessary to have a more intelligent checker. For example, it is also
+possible to parse the document to check fully, i.e. with DTD, and to compare
+this DTD with the prescribed one. In order to fully parse the document, mode
+<I
+CLASS="EMPHASIS"
+>parse_document_entity</I
+> is applied, and to get the DTD to
+compare with mode <I
+CLASS="EMPHASIS"
+>parse_dtd_entity</I
+> can be used.</P
+><P
+>There is another very important configurable aspect of the parser: the
+so-called resolver. The task of the resolver is to locate the contents of an
+(external) entity for a given entity name, and to make the contents accessible
+as a character stream. (Furthermore, it also normalizes the character set;
+but this is a detail we can ignore here.) Consider you have a file called
+<TT
+CLASS="LITERAL"
+>"main.xml"</TT
+> containing 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % sub SYSTEM "sub/sub.xml"&#62;
+%sub;</PRE
+>
+
+and a file stored in the subdirectory <TT
+CLASS="LITERAL"
+>"sub"</TT
+> with name
+<TT
+CLASS="LITERAL"
+>"sub.xml"</TT
+> containing
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % subsub SYSTEM "subsub/subsub.xml"&#62;
+%subsub;</PRE
+>
+
+and a file stored in the subdirectory <TT
+CLASS="LITERAL"
+>"subsub"</TT
+> of
+<TT
+CLASS="LITERAL"
+>"sub"</TT
+> with name <TT
+CLASS="LITERAL"
+>"subsub.xml"</TT
+> (the
+contents of this file do not matter). Here, the resolver must track that
+the second entity <TT
+CLASS="LITERAL"
+>subsub</TT
+> is located in the directory
+<TT
+CLASS="LITERAL"
+>"sub/subsub"</TT
+>, i.e. the difficulty is to interpret the
+system (file) names of entities relative to the entities containing them,
+even if the entities are deeply nested.</P
+><P
+>There is not a fixed resolver already doing everything right - resolving entity
+names is a task that highly depends on the environment. The XML specification
+only demands that <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> entities are interpreted like URLs
+(which is not very precise, as there are lots of URL schemes in use), hoping
+that this helps overcoming the local peculiarities of the environment; the idea
+is that if you do not know your environment you can refer to other entities by
+denoting URLs for them. I think that this interpretation of
+<TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> names may have some applications in the internet, but
+it is not the first choice in general. Because of this, the resolver is a
+separate module of the parser that can be exchanged by another one if
+necessary; more precisely, the parser already defines several resolvers.</P
+><P
+>The following resolvers do already exist:
+
+          <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>Resolvers reading from arbitrary input channels. These
+can be configured such that a certain ID is associated with the channel; in
+this case inner references to external entities can be resolved. There is also
+a special resolver that interprets SYSTEM IDs as URLs; this resolver can
+process relative SYSTEM names and determine the corresponding absolute URL.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>A resolver that reads always from a given O'Caml
+string. This resolver is not able to resolve further names unless the string is
+not associated with any name, i.e. if the document contained in the string
+refers to an external entity, this reference cannot be followed in this
+case.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>A resolver for file names. The <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+>
+name is interpreted as file URL with the slash "/" as separator for
+directories. - This resolver is derived from the generic URL resolver.</P
+></LI
+></UL
+>
+
+The interface a resolver must have is documented, so it is possible to write
+your own resolver. For example, you could connect the parser with an HTTP
+client, and resolve URLs of the HTTP namespace. The resolver classes support
+that several independent resolvers are combined to one more powerful resolver;
+thus it is possible to combine a self-written resolver with the already
+existing resolvers.</P
+><P
+>Note that the existing resolvers only interpret <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+>
+names, not <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> names. If it helps you, it is possible to
+define resolvers for <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> names, too; for example, such a
+resolver could look up the public name in a hash table, and map it to a system
+name which is passed over to the existing resolver for system names. It is
+relatively simple to provide such a resolver.</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1496.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1629.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Details of the mapping from XML text to the tree representation</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Resolvers and sources</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html
new file mode 100644 (file)
index 0000000..d74ecbb
--- /dev/null
@@ -0,0 +1,533 @@
+<HTML
+><HEAD
+><TITLE
+>What is XML?</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="NEXT"
+TITLE="Highlights of XML"
+HREF="x107.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="p34.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x107.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN36"
+>Chapter 1. What is XML?</A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>1.1. <A
+HREF="c36.html#AEN38"
+>Introduction</A
+></DT
+><DT
+>1.2. <A
+HREF="x107.html"
+>Highlights of XML</A
+></DT
+><DT
+>1.3. <A
+HREF="x468.html"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</A
+></DT
+></DL
+></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN38"
+>1.1. Introduction</A
+></H1
+><P
+>XML (short for <I
+CLASS="EMPHASIS"
+>Extensible Markup Language</I
+>)
+generalizes the idea that text documents are typically structured in sections,
+sub-sections, paragraphs, and so on. The format of the document is not fixed
+(as, for example, in HTML), but can be declared by a so-called DTD (document
+type definition). The DTD describes only the rules how the document can be
+structured, but not how the document can be processed. For example, if you want
+to publish a book that uses XML markup, you will need a processor that converts
+the XML file into a printable format such as Postscript. On the one hand, the
+structure of XML documents is configurable; on the other hand, there is no
+longer a canonical interpretation of the elements of the document; for example
+one XML DTD might want that paragraphes are delimited by
+<TT
+CLASS="LITERAL"
+>para</TT
+> tags, and another DTD expects <TT
+CLASS="LITERAL"
+>p</TT
+> tags
+for the same purpose. As a result, for every DTD a new processor is required.</P
+><P
+>Although XML can be used to express structured text documents it is not limited
+to this kind of application. For example, XML can also be used to exchange
+structured data over a network, or to simply store structured data in
+files. Note that XML documents cannot contain arbitrary binary data because
+some characters are forbidden; for some applications you need to encode binary
+data as text (e.g. the base 64 encoding).</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN45"
+>1.1.1. The "hello world" example</A
+></H2
+><P
+>The following example shows a very simple DTD, and a corresponding document
+instance. The document is structured such that it consists of sections, and
+that sections consist of paragraphs, and that paragraphs contain plain text:</P
+><PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT document (section)+&#62;
+&#60;!ELEMENT section (paragraph)+&#62;
+&#60;!ELEMENT paragraph (#PCDATA)&#62;</PRE
+><P
+>The following document is an instance of this DTD:</P
+><PRE
+CLASS="PROGRAMLISTING"
+>&#60;?xml version="1.0" encoding="ISO-8859-1"?&#62;
+&#60;!DOCTYPE document SYSTEM "simple.dtd"&#62;
+&#60;document&#62;
+  &#60;section&#62;
+    &#60;paragraph&#62;This is a paragraph of the first section.&#60;/paragraph&#62;
+    &#60;paragraph&#62;This is another paragraph of the first section.&#60;/paragraph&#62;
+  &#60;/section&#62;
+  &#60;section&#62;
+    &#60;paragraph&#62;This is the only paragraph of the second section.&#60;/paragraph&#62;
+  &#60;/section&#62;
+&#60;/document&#62;</PRE
+><P
+>As in HTML (and, of course, in grand-father SGML), the "pieces" of
+the document are delimited by element braces, i.e. such a piece begins with
+<TT
+CLASS="LITERAL"
+>&lt;name-of-the-type-of-the-piece&gt;</TT
+> and ends with
+<TT
+CLASS="LITERAL"
+>&lt;/name-of-the-type-of-the-piece&gt;</TT
+>, and the pieces are
+called <I
+CLASS="EMPHASIS"
+>elements</I
+>. Unlike HTML and SGML, both start tags and
+end tags (i.e. the delimiters written in angle brackets) can never be left
+out. For example, HTML calls the paragraphs simply <TT
+CLASS="LITERAL"
+>p</TT
+>, and
+because paragraphs never contain paragraphs, a sequence of several paragraphs
+can be written as:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;p&#62;First paragraph 
+&#60;p&#62;Second paragraph</PRE
+>
+
+This is not possible in XML; continuing our example above we must always write
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;paragraph&#62;First paragraph&#60;/paragraph&#62;
+&#60;paragraph&#62;Second paragraph&#60;/paragraph&#62;</PRE
+>
+
+The rationale behind that is to (1) simplify the development of XML parsers
+(you need not convert the DTD into a deterministic finite automaton which is
+required to detect omitted tags), and to (2) make it possible to parse the
+document independent of whether the DTD is known or not.</P
+><P
+>The first line of our sample document,
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;?xml version="1.0" encoding="ISO-8859-1"?&#62;</PRE
+>
+
+is the so-called <I
+CLASS="EMPHASIS"
+>XML declaration</I
+>. It expresses that the
+document follows the conventions of XML version 1.0, and that the document is
+encoded using characters from the ISO-8859-1 character set (often known as
+"Latin 1", mostly used in Western Europe). Although the XML declaration is not
+mandatory, it is good style to include it; everybody sees at the first glance
+that the document uses XML markup and not the similar-looking HTML and SGML
+markup languages. If you omit the XML declaration, the parser will assume
+that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
+it possible to distinguish between UTF-8 and UTF-16 automatically); these
+are encodings of Unicode's universal character set. (Note that <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>, unlike its
+predecessor "Markup", fully supports Unicode.)</P
+><P
+>The second line,
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!DOCTYPE document SYSTEM "simple.dtd"&#62;</PRE
+>
+
+names the DTD that is going to be used for the rest of the document. In
+general, it is possible that the DTD consists of two parts, the so-called
+external and the internal subset. "External" means that the DTD exists as a
+second file; "internal" means that the DTD is included in the same file. In
+this example, there is only an external subset, and the system identifier
+"simple.dtd" specifies where the DTD file can be found. System identifiers are
+interpreted as URLs; for instance this would be legal:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!DOCTYPE document SYSTEM "http://host/location/simple.dtd"&#62;</PRE
+>
+
+Please note that <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> cannot interpret HTTP identifiers by default, but it is
+possible to change the interpretation of system identifiers.</P
+><P
+>The word immediately following <TT
+CLASS="LITERAL"
+>DOCTYPE</TT
+> determines which of
+the declared element types (here "document", "section", and "paragraph") is
+used for the outermost element, the <I
+CLASS="EMPHASIS"
+>root element</I
+>. In this
+example it is <TT
+CLASS="LITERAL"
+>document</TT
+> because the outermost element is
+delimited by <TT
+CLASS="LITERAL"
+>&lt;document&gt;</TT
+> and
+<TT
+CLASS="LITERAL"
+>&lt;/document&gt;</TT
+>. </P
+><P
+>The DTD consists of three declarations for element types:
+<TT
+CLASS="LITERAL"
+>document</TT
+>, <TT
+CLASS="LITERAL"
+>section</TT
+>, and
+<TT
+CLASS="LITERAL"
+>paragraph</TT
+>. Such a declaration has two parts:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>content-model</I
+></TT
+>&gt;</PRE
+>
+
+The content model is a regular expression which describes the possible inner
+structure of the element. Here, <TT
+CLASS="LITERAL"
+>document</TT
+> contains one or
+more sections, and a <TT
+CLASS="LITERAL"
+>section</TT
+> contains one or more
+paragraphs. Note that these two element types are not allowed to contain
+arbitrary text. Only the <TT
+CLASS="LITERAL"
+>paragraph</TT
+> element type is declared
+such that parsed character data (indicated by the symbol
+<TT
+CLASS="LITERAL"
+>#PCDATA</TT
+>) is permitted.</P
+><P
+>See below for a detailed discussion of content models. </P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN84"
+>1.1.2. XML parsers and processors</A
+></H2
+><P
+>XML documents are human-readable, but this is not the main purpose of this
+language. XML has been designed such that documents can be read by a program
+called an <I
+CLASS="EMPHASIS"
+>XML parser</I
+>. The parser checks that the document
+is well-formatted, and it represents the document as objects of the programming
+language. There are two aspects when checking the document: First, the document
+must follow some basic syntactic rules, such as that tags are written in angle
+brackets, that for every start tag there must be a corresponding end tag and so
+on. A document respecting these rules is
+<I
+CLASS="EMPHASIS"
+>well-formed</I
+>. Second, the document must match the DTD in
+which case the document is <I
+CLASS="EMPHASIS"
+>valid</I
+>. Many parsers check only
+on well-formedness and ignore the DTD; <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> is designed such that it can
+even validate the document.</P
+><P
+>A parser does not make a sensible application, it only reads XML
+documents. The whole application working with XML-formatted data is called an
+<I
+CLASS="EMPHASIS"
+>XML processor</I
+>. Often XML processors convert documents into
+another format, such as HTML or Postscript. Sometimes processors extract data
+of the documents and output the processed data again XML-formatted. The parser
+can help the application processing the document; for example it can provide
+means to access the document in a specific manner. <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> supports an
+object-oriented access layer specially.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN94"
+>1.1.3. Discussion</A
+></H2
+><P
+>As we have seen, there are two levels of description: On the one hand, XML can
+define rules about the format of a document (the DTD), on the other hand, XML
+expresses structured documents. There are a number of possible applications:</P
+><P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>XML can be used to express structured texts. Unlike HTML, there is no canonical
+interpretation; one would have to write a backend for the DTD that translates
+the structured texts into a format that existing browsers, printers
+etc. understand. The advantage of a self-defined document format is that it is
+possible to design the format in a more problem-oriented way. For example, if
+the task is to extract reports from a database, one can use a DTD that reflects
+the structure of the report or the database. A possible approach would be to
+have an element type for every database table and for every column. Once the
+DTD has been designed, the report procedure can be splitted up in a part that
+selects the database rows and outputs them as an XML document according to the
+DTD, and in a part that translates the document into other formats. Of course,
+the latter part can be solved in a generic way, e.g. there may be configurable
+backends for all DTDs that follow the approach and have element types for
+tables and columns.</P
+><P
+>XML plays the role of a configurable intermediate format. The database
+extraction function can be written without having to know the details of
+typesetting; the backends can be written without having to know the details of
+the database.</P
+><P
+>Of course, there are traditional solutions. One can define an ad hoc
+intermediate text file format. This disadvantage is that there are no names for
+the pieces of the format, and that such formats usually lack of documentation
+because of this. Another solution would be to have a binary representation,
+either as language-dependent or language-independent structure (example of the
+latter can be found in RPC implementations). The disadvantage is that it is
+harder to view such representations, one has to write pretty printers for this
+purpose. It is also more difficult to enter test data; XML is plain text that
+can be written using an arbitrary editor (Emacs has even a good XML mode,
+PSGML). All these alternatives suffer from a missing structure checker,
+i.e. the programs processing these formats usually do not check the input file
+or input object in detail; XML parsers check the syntax of the input (the
+so-called well-formedness check), and the advanced parsers like <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> even
+verify that the structure matches the DTD (the so-called validation).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>XML can be used as configurable communication language. A fundamental problem
+of every communication is that sender and receiver must follow the same
+conventions about the language. For data exchange, the question is usually
+which data records and fields are available, how they are syntactically
+composed, and which values are possible for the various fields. Similar
+questions arise for text document exchange. XML does not answer these problems
+completely, but it reduces the number of ambiguities for such conventions: The
+outlines of the syntax are specified by the DTD (but not necessarily the
+details), and XML introduces canonical names for the components of documents
+such that it is simpler to describe the rest of the syntax and the semantics
+informally.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>XML is a data storage format. Currently, every software product tends to use
+its own way to store data; commercial software often does not describe such
+formats, and it is a pain to integrate such software into a bigger project. 
+XML can help to improve this situation when several applications share the same
+syntax of data files. DTDs are then neutral instances that check the format of
+data files independent of applications. </P
+></LI
+></UL
+></DIV
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x107.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>User's guide</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Highlights of XML</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html
new file mode 100644 (file)
index 0000000..c58e6ff
--- /dev/null
@@ -0,0 +1,234 @@
+<HTML
+><HEAD
+><TITLE
+>Using PXP</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="A complete example: The readme DTD"
+HREF="x468.html"><LINK
+REL="NEXT"
+TITLE="How to parse a document from an application"
+HREF="x550.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x468.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x550.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN533"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>2.1. <A
+HREF="c533.html#AEN536"
+>Validation</A
+></DT
+><DT
+>2.2. <A
+HREF="x550.html"
+>How to parse a document from an application</A
+></DT
+><DT
+>2.3. <A
+HREF="x675.html"
+>Class-based processing of the node tree</A
+></DT
+><DT
+>2.4. <A
+HREF="x738.html"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</A
+></DT
+></DL
+></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN536"
+>2.1. Validation</A
+></H1
+><P
+>The parser can be used to <I
+CLASS="EMPHASIS"
+>validate</I
+> a document. This means
+that all the constraints that must hold for a valid document are actually
+checked. Validation is the default mode of <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>, i.e. every document is
+validated while it is being parsed.</P
+><P
+>In the <TT
+CLASS="LITERAL"
+>examples</TT
+> directory of the distribution you find the
+<TT
+CLASS="LITERAL"
+>pxpvalidate</TT
+> application. It is invoked in the following way:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>pxpvalidate [ -wf ] <TT
+CLASS="REPLACEABLE"
+><I
+>file</I
+></TT
+>...</PRE
+>
+
+The files mentioned on the command line are validated, and every warning and
+every error messages are printed to stderr.</P
+><P
+>The -wf switch modifies the behaviour such that a well-formedness parser is
+simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
+DTD are ignored, and only the ENTITY declarations will take effect. This mode
+is intended for documents lacking a DTD. Please note that the parser still
+scans the DTD fully and will report all errors in the DTD; such checks are not
+required by a well-formedness parser.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>pxpvalidate</TT
+> application is the simplest sensible program
+using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>, you may consider it as "hello world" program. </P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x468.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x550.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>How to parse a document from an application</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html
new file mode 100644 (file)
index 0000000..0e564fb
--- /dev/null
@@ -0,0 +1,349 @@
+<HTML
+><HEAD
+><TITLE
+>The objects representing the document</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="Example: An HTML backend for the readme
+DTD"
+HREF="x738.html"><LINK
+REL="NEXT"
+TITLE="The class type node"
+HREF="x939.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x738.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x939.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN893"
+>Chapter 3. The objects representing the document</A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>3.1. <A
+HREF="c893.html#AEN897"
+>The <TT
+CLASS="LITERAL"
+>document</TT
+> class</A
+></DT
+><DT
+>3.2. <A
+HREF="x939.html"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></DT
+><DT
+>3.3. <A
+HREF="x1439.html"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></A
+></DT
+><DT
+>3.4. <A
+HREF="x1496.html"
+>Details of the mapping from XML text to the tree representation</A
+></DT
+></DL
+></DIV
+><P
+><I
+CLASS="EMPHASIS"
+>This description might be out-of-date. See the module interface files
+for updated information.</I
+></P
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN897"
+>3.1. The <TT
+CLASS="LITERAL"
+>document</TT
+> class</A
+></H1
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class [ 'ext ] document :
+  Pxp_types.collect_warnings -&#62; 
+  object
+    method init_xml_version : string -&#62; unit
+    method init_root : 'ext node -&#62; unit
+
+    method xml_version : string
+    method xml_standalone : bool
+    method dtd : dtd
+    method root : 'ext node
+
+    method encoding : Pxp_types.rep_encoding
+
+    method add_pinstr : proc_instruction -&#62; unit
+    method pinstr : string -&#62; proc_instruction list
+    method pinstr_names : string list
+
+    method write : Pxp_types.output_stream -&#62; Pxp_types.encoding -&#62; unit
+
+  end
+;;</PRE
+>
+
+The methods beginning with <TT
+CLASS="LITERAL"
+>init_</TT
+> are only for internal use
+of the parser.</P
+><P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>xml_version</TT
+>: returns the version string at the beginning of
+the document. For example, "1.0" is returned if the document begins with
+<TT
+CLASS="LITERAL"
+>&lt;?xml version="1.0"?&gt;</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>xml_standalone</TT
+>: returns the boolean value of
+<TT
+CLASS="LITERAL"
+>standalone</TT
+> declaration in the XML declaration. If the
+<TT
+CLASS="LITERAL"
+>standalone</TT
+> attribute is missing, <TT
+CLASS="LITERAL"
+>false</TT
+> is
+returned. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>dtd</TT
+>: returns a reference to the global DTD object.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>root</TT
+>: returns a reference to the root element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>encoding</TT
+>: returns the internal encoding of the
+document. This means that all strings of which the document consists are
+encoded in this character set.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr</TT
+>: returns the processing instructions outside the DTD
+and outside the root element. The argument passed to the method names a
+<I
+CLASS="EMPHASIS"
+>target</I
+>, and the method returns all instructions with this
+target. The target is the first word inside <TT
+CLASS="LITERAL"
+>&lt;?</TT
+> and
+<TT
+CLASS="LITERAL"
+>?&gt;</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr_names</TT
+>: returns the names of the processing instructions</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>add_pinstr</TT
+>: adds another processing instruction. This method
+is used by the parser itself to enter the instructions returned by
+<TT
+CLASS="LITERAL"
+>pinstr</TT
+>, but you can also enter additional instructions.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>write</TT
+>: writes the document to the passed stream as XML
+text using the passed (external) encoding. The generated text is always valid
+XML and can be parsed by PXP; however, the text is badly formatted (this is not
+a pretty printer).</P
+></LI
+></UL
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x738.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x939.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/index.html b/helm/DEVEL/pxp/pxp/doc/manual/html/index.html
new file mode 100644 (file)
index 0000000..3c07ff2
--- /dev/null
@@ -0,0 +1,330 @@
+<HTML
+><HEAD
+><TITLE
+>The PXP user's guide</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="NEXT"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="BOOK"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="BOOK"
+><A
+NAME="AEN1"
+></A
+><DIV
+CLASS="TITLEPAGE"
+><H1
+CLASS="TITLE"
+><A
+NAME="AEN1"
+>The PXP user's guide</A
+></H1
+><H3
+CLASS="AUTHOR"
+>Gerd Stolpmann</H3
+><P
+CLASS="COPYRIGHT"
+>Copyright &copy; 1999, 2000 by <SPAN
+CLASS="HOLDER"
+>Gerd Stolpmann</SPAN
+></P
+><DIV
+><DIV
+CLASS="ABSTRACT"
+><P
+></P
+><P
+><SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> is a validating parser for XML-1.0 which has been
+written entirely in Objective Caml.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><H1
+CLASS="TITLE"
+><A
+NAME="AEN18"
+>Download <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>:</A
+></H1
+>The free <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> library can be downloaded at
+<A
+HREF="http://www.ocaml-programming.de/packages/"
+TARGET="_top"
+>http://www.ocaml-programming.de/packages/</A
+>. This user's guide is included.
+Newest releases of <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> will be announced in
+<A
+HREF="http://www.npc.de/ocaml/linkdb/"
+TARGET="_top"
+>The OCaml Link
+Database</A
+>.</P
+></DIV
+><P
+></P
+></DIV
+></DIV
+><DIV
+CLASS="LEGALNOTICE"
+><P
+><B
+>License</B
+></P
+><P
+>This document, and the described software, "<SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>", are copyright by
+Gerd Stolpmann. </P
+><P
+>Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "<SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:</P
+><P
+>The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.</P
+><P
+>The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.</P
+></DIV
+><HR></DIV
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>I. <A
+HREF="p34.html"
+>User's guide</A
+></DT
+><DD
+><DL
+><DT
+>1. <A
+HREF="c36.html"
+>What is XML?</A
+></DT
+><DD
+><DL
+><DT
+>1.1. <A
+HREF="c36.html#AEN38"
+>Introduction</A
+></DT
+><DT
+>1.2. <A
+HREF="x107.html"
+>Highlights of XML</A
+></DT
+><DT
+>1.3. <A
+HREF="x468.html"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</A
+></DT
+></DL
+></DD
+><DT
+>2. <A
+HREF="c533.html"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></A
+></DT
+><DD
+><DL
+><DT
+>2.1. <A
+HREF="c533.html#AEN536"
+>Validation</A
+></DT
+><DT
+>2.2. <A
+HREF="x550.html"
+>How to parse a document from an application</A
+></DT
+><DT
+>2.3. <A
+HREF="x675.html"
+>Class-based processing of the node tree</A
+></DT
+><DT
+>2.4. <A
+HREF="x738.html"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</A
+></DT
+></DL
+></DD
+><DT
+>3. <A
+HREF="c893.html"
+>The objects representing the document</A
+></DT
+><DD
+><DL
+><DT
+>3.1. <A
+HREF="c893.html#AEN897"
+>The <TT
+CLASS="LITERAL"
+>document</TT
+> class</A
+></DT
+><DT
+>3.2. <A
+HREF="x939.html"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></DT
+><DT
+>3.3. <A
+HREF="x1439.html"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></A
+></DT
+><DT
+>3.4. <A
+HREF="x1496.html"
+>Details of the mapping from XML text to the tree representation</A
+></DT
+></DL
+></DD
+><DT
+>4. <A
+HREF="c1567.html"
+>Configuring and calling the parser</A
+></DT
+><DD
+><DL
+><DT
+>4.1. <A
+HREF="c1567.html#AEN1569"
+>Overview</A
+></DT
+><DT
+>4.2. <A
+HREF="x1629.html"
+>Resolvers and sources</A
+></DT
+><DT
+>4.3. <A
+HREF="x1812.html"
+>The DTD classes</A
+></DT
+><DT
+>4.4. <A
+HREF="x1818.html"
+>Invoking the parser</A
+></DT
+><DT
+>4.5. <A
+HREF="x1965.html"
+>Updates</A
+></DT
+></DL
+></DD
+></DL
+></DD
+></DL
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>&nbsp;</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+>&nbsp;</TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>&nbsp;</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+>&nbsp;</TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>User's guide</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css b/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css
new file mode 100644 (file)
index 0000000..67dfaec
--- /dev/null
@@ -0,0 +1,4 @@
+.acronym { 
+  font-weight: bold;
+  color: #c71585
+}
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html b/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html
new file mode 100644 (file)
index 0000000..9db427d
--- /dev/null
@@ -0,0 +1,167 @@
+<HTML
+><HEAD
+><TITLE
+>User's guide</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="PREVIOUS"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="NEXT"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="PART"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="index.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c36.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="PART"
+><A
+NAME="AEN34"
+></A
+><DIV
+CLASS="TITLEPAGE"
+><H1
+CLASS="TITLE"
+>I. User's guide</H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>1. <A
+HREF="c36.html"
+>What is XML?</A
+></DT
+><DT
+>2. <A
+HREF="c533.html"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></A
+></DT
+><DT
+>3. <A
+HREF="c893.html"
+>The objects representing the document</A
+></DT
+><DT
+>4. <A
+HREF="c1567.html"
+>Configuring and calling the parser</A
+></DT
+></DL
+></DIV
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="index.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The PXP user's guide</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+>&nbsp;</TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>What is XML?</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/done b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/done
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif
new file mode 100644 (file)
index 0000000..6cc260a
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif
new file mode 100644 (file)
index 0000000..0091db2
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif
new file mode 100644 (file)
index 0000000..97cd363
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif
new file mode 100644 (file)
index 0000000..d521123
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif
new file mode 100644 (file)
index 0000000..5f6358c
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif
new file mode 100644 (file)
index 0000000..5644c91
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif differ
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html
new file mode 100644 (file)
index 0000000..102aba2
--- /dev/null
@@ -0,0 +1,1694 @@
+<HTML
+><HEAD
+><TITLE
+>Highlights of XML</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="PREVIOUS"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="NEXT"
+TITLE="A complete example: The readme DTD"
+HREF="x468.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c36.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 1. What is XML?</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x468.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN107"
+>1.2. Highlights of XML</A
+></H1
+><P
+>This section explains many of the features of XML, but not all, and some
+features not in detail. For a complete description, see the <A
+HREF="http://www.w3.org/TR/1998/REC-xml-19980210.html"
+TARGET="_top"
+>XML
+specification</A
+>.</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN111"
+>1.2.1. The DTD and the instance</A
+></H2
+><P
+>The DTD contains various declarations; in general you can only use a feature if
+you have previously declared it. The document instance file may contain the
+full DTD, but it is also possible to split the DTD into an internal and an
+external subset. A document must begin as follows if the full DTD is included:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?&gt;
+&lt;!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+> [
+  <TT
+CLASS="REPLACEABLE"
+><I
+>Declarations</I
+></TT
+>
+]&gt;</PRE
+>
+
+These declarations are called the <I
+CLASS="EMPHASIS"
+>internal subset</I
+>. Note
+that the usage of entities and conditional sections is restricted within the
+internal subset.</P
+><P
+>If the declarations are located in a different file, you can refer to this file
+as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?&gt;
+&lt;!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>"&gt;</PRE
+>
+
+The declarations in the file are called the <I
+CLASS="EMPHASIS"
+>external
+subset</I
+>. The file name is called the <I
+CLASS="EMPHASIS"
+>system
+identifier</I
+>. 
+It is also possible to refer to the file by a so-called
+<I
+CLASS="EMPHASIS"
+>public identifier</I
+>, but most XML applications won't use
+this feature.</P
+><P
+>You can also specify both internal and external subsets. In this case, the
+declarations of both subsets are mixed, and if there are conflicts, the
+declaration of the internal subset overrides those of the external subset with
+the same name. This looks as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?&gt;
+&lt;!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>  SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>" [
+  <TT
+CLASS="REPLACEABLE"
+><I
+>Declarations</I
+></TT
+>
+]&gt;</PRE
+></P
+><P
+>The XML declaration (the string beginning with <TT
+CLASS="LITERAL"
+>&lt;?xml</TT
+> and
+ending at <TT
+CLASS="LITERAL"
+>?&gt;</TT
+>) should specify the encoding of the
+file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
+that every file parsed by the XML processor can begin with an XML declaration
+and that every file may have its own encoding.</P
+><P
+>The name of the root element must be mentioned directly after the
+<TT
+CLASS="LITERAL"
+>DOCTYPE</TT
+> string. This means that a full document instance
+looks like
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?&gt;
+&lt;!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>  SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>" [
+  <TT
+CLASS="REPLACEABLE"
+><I
+>Declarations</I
+></TT
+>
+]&gt;
+
+&lt;<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>&gt;
+  <TT
+CLASS="REPLACEABLE"
+><I
+>inner contents</I
+></TT
+>
+&lt;/<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>&gt;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN146"
+>1.2.2. Reserved characters</A
+></H2
+><P
+>Some characters are generally reserved to indicate markup such that they cannot
+be used for character data. These characters are &lt;, &gt;, and
+&amp;. Furthermore, single and double quotes are sometimes reserved. If you
+want to include such a character as character, write it as follows:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&amp;lt;</TT
+> instead of &lt;</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&amp;gt;</TT
+> instead of &gt;</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&amp;amp;</TT
+> instead of &amp;</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&amp;apos;</TT
+> instead of '</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&amp;quot;</TT
+> instead of "</P
+></LI
+></UL
+>
+
+All other characters are free in the document instance. It is possible to
+include a character by its position in the Unicode alphabet: 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&amp;#<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;</PRE
+>
+
+where <TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+> is the decimal number of the
+character. Alternatively, you can specify the character by its hexadecimal
+number: 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&amp;#x<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;</PRE
+>
+
+In the scope of declarations, the character % is no longer free. To include it
+as character, you must use the notations <TT
+CLASS="LITERAL"
+>&amp;#37;</TT
+> or
+<TT
+CLASS="LITERAL"
+>&amp;#x25;</TT
+>.</P
+><P
+>Note that besides &amp;lt;, &amp;gt;, &amp;amp;,
+&amp;apos;, and &amp;quot; there are no predefines character entities. This is
+different from HTML which defines a list of characters that can be referenced
+by name (e.g. &amp;auml; for Ã¤); however, if you prefer named characters, you
+can declare such entities yourself (see below).</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN173"
+>1.2.3. Elements and ELEMENT declarations</A
+></H2
+><P
+>Elements structure the document instance in a hierarchical way. There is a
+top-level element, the <I
+CLASS="EMPHASIS"
+>root element</I
+>, which contains a
+sequence of inner elements and character sections. The inner elements are
+structured in the same way. Every element has an <I
+CLASS="EMPHASIS"
+>element
+type</I
+>. The beginning of the element is indicated by a <I
+CLASS="EMPHASIS"
+>start
+tag</I
+>, written
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;<TT
+CLASS="REPLACEABLE"
+><I
+>element-type</I
+></TT
+>&gt;</PRE
+>
+
+and the element continues until the corresponding <I
+CLASS="EMPHASIS"
+>end tag</I
+>
+is reached:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;/<TT
+CLASS="REPLACEABLE"
+><I
+>element-type</I
+></TT
+>&gt;</PRE
+>
+
+In XML, it is not allowed to omit start or end tags, even if the DTD would
+permit this. Note that there are no special rules how to interpret spaces or
+newlines near start or end tags; all spaces and newlines count.</P
+><P
+>Every element type must be declared before it can be used. The declaration
+consists of two parts: the ELEMENT declaration describes the content model,
+i.e. which inner elements are allowed; the ATTLIST declaration describes the
+attributes of the element.</P
+><P
+>An element can simply allow everything as content. This is written:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> ANY&gt;</PRE
+>
+
+On the opposite, an element can be forced to be empty; declared by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> EMPTY&gt;</PRE
+>
+
+Note that there is an abbreviated notation for empty element instances:
+<TT
+CLASS="LITERAL"
+>&lt;<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>/&gt;</TT
+>. </P
+><P
+>There are two more sophisticated forms of declarations: so-called
+<I
+CLASS="EMPHASIS"
+>mixed declarations</I
+>, and <I
+CLASS="EMPHASIS"
+>regular
+expressions</I
+>. An element with mixed content contains character data
+interspersed with inner elements, and the set of allowed inner elements can be
+specified. In contrast to this, a regular expression declaration does not allow
+character data, but the inner elements can be described by the more powerful
+means of regular expressions.</P
+><P
+>A declaration for mixed content looks as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> (#PCDATA | <TT
+CLASS="REPLACEABLE"
+><I
+>element<SUB
+>1</SUB
+></I
+></TT
+> | ... | <TT
+CLASS="REPLACEABLE"
+><I
+>element<SUB
+>n</SUB
+></I
+></TT
+> )*&gt;</PRE
+>
+
+or if you do not want to allow any inner element, simply
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> (#PCDATA)&gt;</PRE
+></P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>If element type <TT
+CLASS="LITERAL"
+>q</TT
+> is declared as
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT q (#PCDATA | r | s)*&#62;</PRE
+>
+
+this is a legal instance:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;q&#62;This is character data&#60;r&#62;&#60;/r&#62;with &#60;s&#62;&#60;/s&#62;inner elements&#60;/q&#62;</PRE
+>
+
+But this is illegal because <TT
+CLASS="LITERAL"
+>t</TT
+> has not been enumerated in the
+declaration:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;q&#62;This is character data&#60;r&#62;&#60;/r&#62;with &#60;t&#62;&#60;/t&#62;inner elements&#60;/q&#62;</PRE
+></P
+></BLOCKQUOTE
+><P
+>The other form uses a regular expression to describe the possible contents:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>regexp</I
+></TT
+>&gt;</PRE
+>
+
+The following well-known regexp operators are allowed:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+></TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>(<TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>1</SUB
+></I
+></TT
+> ,</TT
+> ... <TT
+CLASS="LITERAL"
+>, <TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>n</SUB
+></I
+></TT
+> )</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>(<TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>1</SUB
+></I
+></TT
+> |</TT
+> ... <TT
+CLASS="LITERAL"
+>| <TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>n</SUB
+></I
+></TT
+> )</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>*</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>+</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>?</TT
+></P
+></LI
+></UL
+>
+
+The <TT
+CLASS="LITERAL"
+>,</TT
+> operator indicates a sequence of sub-models, the
+<TT
+CLASS="LITERAL"
+>|</TT
+> operator describes alternative sub-models. The
+<TT
+CLASS="LITERAL"
+>*</TT
+> indicates zero or more repetitions, and
+<TT
+CLASS="LITERAL"
+>+</TT
+> one or more repetitions. Finally, <TT
+CLASS="LITERAL"
+>?</TT
+> can
+be used for optional sub-models. As atoms the regexp can contain names of
+elements; note that it is not allowed to include <TT
+CLASS="LITERAL"
+>#PCDATA</TT
+>.</P
+><P
+>The exact syntax of the regular expressions is rather strange. This can be
+explained best by a list of constraints:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>The outermost expression must not be
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+></TT
+>. </P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+> 
+<TT
+CLASS="LITERAL"
+>&#60;!ELEMENT x y&#62;</TT
+>; this must be written as
+<TT
+CLASS="LITERAL"
+>&#60;!ELEMENT x (y)&#62;</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>For the unary operators <TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>*</TT
+>,
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>+</TT
+>, and
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>?</TT
+>, the
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+></TT
+> must not be again an
+unary operator.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+> 
+<TT
+CLASS="LITERAL"
+>&#60;!ELEMENT x y**&#62;</TT
+>; this must be written as
+<TT
+CLASS="LITERAL"
+>&#60;!ELEMENT x (y*)*&#62;</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>Between <TT
+CLASS="LITERAL"
+>)</TT
+> and one of the unary operatory
+<TT
+CLASS="LITERAL"
+>*</TT
+>, <TT
+CLASS="LITERAL"
+>+</TT
+>, or <TT
+CLASS="LITERAL"
+>?</TT
+>, there must
+not be whitespace.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+> 
+<TT
+CLASS="LITERAL"
+>&#60;!ELEMENT x (y|z) *&#62;</TT
+>; this must be written as
+<TT
+CLASS="LITERAL"
+>&#60;!ELEMENT x (y|z)*&#62;</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>There is the additional constraint that the
+right parenthsis must be contained in the same entity as the left parenthesis;
+see the section about parsed entities below.</P
+></LI
+></UL
+>&#13;</P
+><P
+>Note that there is another restriction on regular expressions which must be
+deterministic. This means that the parser must be able to see by looking at the
+next token which alternative is actually used, or whether the repetition
+stops. The reason for this is simply compatability with SGML (there is no
+intrinsic reason for this rule; XML can live without this restriction).</P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>The elements are declared as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT q (r?, (s | t)+)&#62;
+&#60;!ELEMENT r (#PCDATA)&#62;
+&#60;!ELEMENT s EMPTY&#62;
+&#60;!ELEMENT t (q | r)&#62;</PRE
+>
+
+This is a legal instance:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;q&#62;&#60;r&#62;Some characters&#60;/r&#62;&#60;s/&#62;&#60;/q&#62;</PRE
+>
+
+(Note: <TT
+CLASS="LITERAL"
+>&lt;s/&gt;</TT
+> is an abbreviation for
+<TT
+CLASS="LITERAL"
+>&lt;s&gt;&lt;/s&gt;</TT
+>.)
+
+It would be illegal to leave <TT
+CLASS="LITERAL"
+>&#60;s/&#62;</TT
+> out because at
+least one instance of <TT
+CLASS="LITERAL"
+>s</TT
+> or <TT
+CLASS="LITERAL"
+>t</TT
+> must be
+present. It would be illegal, too, if characters existed outside the
+<TT
+CLASS="LITERAL"
+>r</TT
+> element; the only exception is white space. -- This is
+legal, too:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;q&#62;&#60;s/&#62;&#60;t&#62;&#60;q&#62;&#60;s/&#62;&#60;/q&#62;&#60;/t&#62;&#60;/q&#62;</PRE
+></P
+></BLOCKQUOTE
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN304"
+>1.2.4. Attribute lists and ATTLIST declarations</A
+></H2
+><P
+>Elements may have attributes. These are put into the start tag of an element as
+follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;<TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute<SUB
+>1</SUB
+></I
+></TT
+>="<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>1</SUB
+></I
+></TT
+>" ... <TT
+CLASS="REPLACEABLE"
+><I
+>attribute<SUB
+>n</SUB
+></I
+></TT
+>="<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>n</SUB
+></I
+></TT
+>"&gt;</PRE
+>
+
+Instead of
+<TT
+CLASS="LITERAL"
+>"<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>k</SUB
+></I
+></TT
+>"</TT
+>
+it is also possible to use single quotes as in
+<TT
+CLASS="LITERAL"
+>'<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>k</SUB
+></I
+></TT
+>'</TT
+>.
+Note that you cannot use double quotes literally within the value of the
+attribute if double quotes are the delimiters; the same applies to single
+quotes. You can generally not use &lt; and &amp; as characters in attribute
+values. It is possible to include the paraphrases &amp;lt;, &amp;gt;,
+&amp;amp;, &amp;apos;, and &amp;quot; (and any other reference to a general
+entity as long as the entity is not defined by an external file) as well as
+&amp;#<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;.</P
+><P
+>Before you can use an attribute you must declare it. An ATTLIST declaration
+looks as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ATTLIST <TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+> 
+          <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-type</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-default</I
+></TT
+>
+          ...
+          <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-type</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-default</I
+></TT
+>
+&gt;</PRE
+>
+
+There are a lot of types, but most important are:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>CDATA</TT
+>: Every string is allowed as attribute value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>NMTOKEN</TT
+>: Every nametoken is allowed as attribute
+value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
+order.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+>: A space-separated list of nametokens is allowed as
+attribute value.</P
+></LI
+></UL
+>
+
+The most interesting default declarations are:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>#REQUIRED</TT
+>: The attribute must be specified.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>#IMPLIED</TT
+>: The attribute can be specified but also can be
+left out. The application can find out whether the attribute was present or
+not. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>"<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"</TT
+> or
+<TT
+CLASS="LITERAL"
+>'<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>'</TT
+>: This particular value is
+used as default if the attribute is omitted in the element.</P
+></LI
+></UL
+></P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>This is a valid attribute declaration for element type <TT
+CLASS="LITERAL"
+>r</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ATTLIST r 
+          x CDATA    #REQUIRED
+          y NMTOKEN  #IMPLIED
+          z NMTOKENS "one two three"&#62;</PRE
+>
+
+This means that <TT
+CLASS="LITERAL"
+>x</TT
+> is a required attribute that cannot be
+left out, while <TT
+CLASS="LITERAL"
+>y</TT
+> and <TT
+CLASS="LITERAL"
+>z</TT
+> are optional. The
+XML parser indicates the application whether <TT
+CLASS="LITERAL"
+>y</TT
+> is present or
+not, but if <TT
+CLASS="LITERAL"
+>z</TT
+> is missing the default value
+"one two three" is returned automatically. </P
+><P
+>This is a valid example of these attributes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;r x="He said: &#38;quot;I don't like quotes!&#38;quot;" y='1'&#62;</PRE
+></P
+></BLOCKQUOTE
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN368"
+>1.2.5. Parsed entities</A
+></H2
+><P
+>Elements describe the logical structure of the document, while
+<I
+CLASS="EMPHASIS"
+>entities</I
+> determine the physical structure. Entities are
+the pieces of text the parser operates on, mostly files and macros. Entities
+may be <I
+CLASS="EMPHASIS"
+>parsed</I
+> in which case the parser reads the text and
+interprets it as XML markup, or <I
+CLASS="EMPHASIS"
+>unparsed</I
+> which simply
+means that the data of the entity has a foreign format (e.g. a GIF icon).</P
+><P
+>If the parsed entity is going to be used as part of the DTD, it
+is called a <I
+CLASS="EMPHASIS"
+>parameter entity</I
+>. You can declare a parameter
+entity with a fixed text as content by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ENTITY % <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> "<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"&gt;</PRE
+>
+
+Within the DTD, you can <I
+CLASS="EMPHASIS"
+>refer to</I
+> this entity, i.e. read
+the text of the entity, by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>%<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</PRE
+>
+
+Such entities behave like macros, i.e. when they are referred to, the
+macro text is inserted and read instead of the original text.
+
+<BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>For example, you can declare two elements with the same content model by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % model "a | b | c"&#62;
+&#60;!ELEMENT x (%model;)&#62;
+&#60;!ELEMENT y (%model;)&#62;</PRE
+>&#13;</P
+></BLOCKQUOTE
+>
+
+If the contents of the entity are given as string constant, the entity is
+called an <I
+CLASS="EMPHASIS"
+>internal</I
+> entity. It is also possible to name a
+file to be used as content (an <I
+CLASS="EMPHASIS"
+>external</I
+> entity):
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ENTITY % <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>"&gt;</PRE
+>
+
+There are some restrictions for parameter entities:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the internal parameter entity contains the first token of a declaration
+(i.e. <TT
+CLASS="LITERAL"
+>&lt;!</TT
+>), it must also contain the last token of the
+declaration, i.e. the <TT
+CLASS="LITERAL"
+>&gt;</TT
+>. This means that the entity
+either contains a whole number of complete declarations, or some text from the
+middle of one declaration.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % e "(a | b | c)&#62;"&#62;
+&#60;!ELEMENT x %e;</PRE
+> Because <TT
+CLASS="LITERAL"
+>&lt;!</TT
+> is contained in the main
+entity, and the corresponding <TT
+CLASS="LITERAL"
+>&gt;</TT
+> is contained in the
+entity <TT
+CLASS="LITERAL"
+>e</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the internal parameter entity contains a left paranthesis, it must also
+contain the corresponding right paranthesis.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % e "(a | b | c"&#62;
+&#60;!ELEMENT x %e;)&#62;</PRE
+> Because <TT
+CLASS="LITERAL"
+>(</TT
+> is contained in the entity 
+<TT
+CLASS="LITERAL"
+>e</TT
+>, and the corresponding <TT
+CLASS="LITERAL"
+>)</TT
+> is
+contained in the main entity.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>When reading text from an entity, the parser automatically inserts one space
+character before the entity text and one space character after the entity
+text. However, this rule is not applied within the definition of another
+entity.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Legal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+> 
+&#60;!ENTITY % suffix "gif"&#62; 
+&#60;!ENTITY iconfile 'icon.%suffix;'&#62;</PRE
+> Because <TT
+CLASS="LITERAL"
+>%suffix;</TT
+> is referenced within
+the definition text for <TT
+CLASS="LITERAL"
+>iconfile</TT
+>, no additional spaces are
+added.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % suffix "test"&#62;
+&#60;!ELEMENT x.%suffix; ANY&#62;</PRE
+>
+Because <TT
+CLASS="LITERAL"
+>%suffix;</TT
+> is referenced outside the definition
+text of another entity, the parser replaces <TT
+CLASS="LITERAL"
+>%suffix;</TT
+> by
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>space</I
+></TT
+>test<TT
+CLASS="REPLACEABLE"
+><I
+>space</I
+></TT
+></TT
+>. </P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % e "(a | b | c)"&#62;
+&#60;!ELEMENT x %e;*&#62;</PRE
+> Because there is a whitespace between <TT
+CLASS="LITERAL"
+>)</TT
+>
+and <TT
+CLASS="LITERAL"
+>*</TT
+>, which is illegal.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>An external parameter entity must always consist of a whole number of complete
+declarations.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>In the internal subset of the DTD, a reference to a parameter entity (internal
+or external) is only allowed at positions where a new declaration can start.</P
+></LI
+></UL
+></P
+><P
+>If the parsed entity is going to be used in the document instance, it is called
+a <I
+CLASS="EMPHASIS"
+>general entity</I
+>. Such entities can be used as
+abbreviations for frequent phrases, or to include external files. Internal
+general entities are declared as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ENTITY <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> "<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"&gt;</PRE
+>
+
+External general entities are declared this way:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!ENTITY <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>"&gt;</PRE
+>
+
+References to general entities are written as:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#38;<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</PRE
+>
+
+The main difference between parameter and general entities is that the former
+are only recognized in the DTD and that the latter are only recognized in the
+document instance. As the DTD is parsed before the document, the parameter
+entities are expanded first; for example it is possible to use the content of a
+parameter entity as the name of a general entity:
+<TT
+CLASS="LITERAL"
+>&amp;#38;%name;;</TT
+><A
+NAME="AEN445"
+HREF="#FTN.AEN445"
+>[1]</A
+>.</P
+><P
+>General entities must respect the element hierarchy. This means that there must
+be an end tag for every start tag in the entity value, and that end tags
+without corresponding start tags are not allowed.</P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>If the author of a document changes sometimes, it is worthwhile to set up a
+general entity containing the names of the authors. If the author changes, you
+need only to change the definition of the entity, and do not need to check all
+occurrences of authors' names:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY authors "Gerd Stolpmann"&#62;</PRE
+>
+
+In the document text, you can now refer to the author names by writing
+<TT
+CLASS="LITERAL"
+>&amp;authors;</TT
+>.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+The following two entities are illegal because the elements in the definition
+do not nest properly:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY lengthy-tag "&#60;section textcolor='white' background='graphic'&#62;"&#62;
+&#60;!ENTITY nonsense    "&#60;a&#62;&#60;/b&#62;"&#62;</PRE
+></P
+></BLOCKQUOTE
+><P
+>Earlier in this introduction we explained that there are substitutes for
+reserved characters: &amp;lt;, &amp;gt;, &amp;amp;, &amp;apos;, and
+&amp;quot;. These are simply predefined general entities; note that they are
+the only predefined entities. It is allowed to define these entities again
+as long as the meaning is unchanged.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN463"
+>1.2.6. Notations and unparsed entities</A
+></H2
+><P
+>Unparsed entities have a foreign format and can thus not be read by the XML
+parser. Unparsed entities are always external. The format of an unparsed entity
+must have been declared, such a format is called a
+<I
+CLASS="EMPHASIS"
+>notation</I
+>. The entity can then be declared by referring to
+this notation. As unparsed entities do not contain XML text, it is not possible
+to include them directly into the document; you can only declare attributes
+such that names of unparsed entities are acceptable values.</P
+><P
+>As you can see, unparsed entities are too complicated in order to have any
+purpose. It is almost always better to simply pass the name of the data file as
+normal attribute value, and let the application recognize and process the
+foreign format. </P
+></DIV
+></DIV
+><H3
+CLASS="FOOTNOTES"
+>Notes</H3
+><TABLE
+BORDER="0"
+CLASS="FOOTNOTES"
+WIDTH="100%"
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN445"
+HREF="x107.html#AEN445"
+>[1]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>This construct is only
+allowed within the definition of another entity; otherwise extra spaces would
+be added (as explained above). Such indirection is not recommended.</P
+><P
+>Complete example:
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % variant "a"&#62;      &#60;!-- or "b" --&#62;
+&#60;!ENTITY text-a "This is text A."&#62;
+&#60;!ENTITY text-b "This is text B."&#62;
+&#60;!ENTITY text "&#38;#38;text-%variant;;"&#62;</PRE
+>
+You can now write <TT
+CLASS="LITERAL"
+>&amp;text;</TT
+> in the document instance, and
+depending on the value of <TT
+CLASS="LITERAL"
+>variant</TT
+> either
+<TT
+CLASS="LITERAL"
+>text-a</TT
+> or <TT
+CLASS="LITERAL"
+>text-b</TT
+> is inserted.</P
+></TD
+></TR
+></TABLE
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x468.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>What is XML?</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html
new file mode 100644 (file)
index 0000000..2677305
--- /dev/null
@@ -0,0 +1,464 @@
+<HTML
+><HEAD
+><TITLE
+>The class type extension</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="PREVIOUS"
+TITLE="The class type node"
+HREF="x939.html"><LINK
+REL="NEXT"
+TITLE="Details of the mapping from XML text to the tree representation"
+HREF="x1496.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x939.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 3. The objects representing the document</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1496.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1439"
+>3.3. The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></A
+></H1
+><P
+>&#13;<PRE
+CLASS="PROGRAMLISTING"
+>class type [ 'node ] extension =
+  object ('self)
+    method clone : 'self
+      (* "clone" should return an exact deep copy of the object. *)
+    method node : 'node
+      (* "node" returns the corresponding node of this extension. This method
+       * intended to return exactly what previously has been set by "set_node".
+       *)
+    method set_node : 'node -&#62; unit
+      (* "set_node" is invoked once the extension is associated to a new
+       * node object.
+       *)
+  end</PRE
+>
+
+This is the type of classes used for node extensions. For every node of the
+document tree, there is not only the <TT
+CLASS="LITERAL"
+>node</TT
+> object, but also
+an <TT
+CLASS="LITERAL"
+>extension</TT
+> object. The latter has minimal
+functionality; it has only the necessary methods to be attached to the node
+object containing the details of the node instance. The extension object is
+called extension because its purpose is extensibility.</P
+><P
+>For some reasons, it is impossible to derive the
+<TT
+CLASS="LITERAL"
+>node</TT
+> classes (i.e. <TT
+CLASS="LITERAL"
+>element_impl</TT
+> and
+<TT
+CLASS="LITERAL"
+>data_impl</TT
+>) such that the subclasses can be extended by new
+new methods. But
+subclassing nodes is a great feature, because it allows the user to provide
+different classes for different types of nodes. The extension objects are a
+workaround that is as powerful as direct subclassing, the costs are
+some notation overhead.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="EXTENSION-GENERAL"
+></A
+><P
+><B
+>Figure 3-6. The structure of nodes and extensions</B
+></P
+><P
+><IMG
+SRC="pic/extension_general.gif"></P
+></DIV
+><P
+>The picture shows how the nodes and extensions are linked
+together. Every node has a reference to its extension, and every extension has
+a reference to its node. The methods <TT
+CLASS="LITERAL"
+>extension</TT
+> and
+<TT
+CLASS="LITERAL"
+>node</TT
+> follow these references; a typical phrase is 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node # attribute "xy"</PRE
+>
+
+to get the value of an attribute from a method defined in the extension object;
+or 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node # iter
+  (fun n -&gt; n # extension # my_method ...)</PRE
+>
+
+to iterate over the subnodes and to call <TT
+CLASS="LITERAL"
+>my_method</TT
+> of the
+corresponding extension objects.</P
+><P
+>Note that extension objects do not have references to subnodes
+(or "subextensions") themselves; in order to get one of the children of an
+extension you must first go to the node object, then get the child node, and
+finally reach the extension that is logically the child of the extension you
+started with.</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1460"
+>3.3.1. How to define an extension class</A
+></H2
+><P
+>At minimum, you must define the methods
+<TT
+CLASS="LITERAL"
+>clone</TT
+>, <TT
+CLASS="LITERAL"
+>node</TT
+>, and
+<TT
+CLASS="LITERAL"
+>set_node</TT
+> such that your class is compatible with the type
+<TT
+CLASS="LITERAL"
+>extension</TT
+>. The method <TT
+CLASS="LITERAL"
+>set_node</TT
+> is called
+during the initialization of the node, or after a node has been cloned; the
+node object invokes <TT
+CLASS="LITERAL"
+>set_node</TT
+> on the extension object to tell
+it that this node is now the object the extension is linked to. The extension
+must return the node object passed as argument of <TT
+CLASS="LITERAL"
+>set_node</TT
+>
+when the <TT
+CLASS="LITERAL"
+>node</TT
+> method is called.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>clone</TT
+> method must return a copy of the
+extension object; at least the object itself must be duplicated, but if
+required, the copy should deeply duplicate all objects and values that are
+referred by the extension, too. Whether this is required, depends on the
+application; <TT
+CLASS="LITERAL"
+>clone</TT
+> is invoked by the node object when one of
+its cloning methods is called.</P
+><P
+>A good starting point for an extension class:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class custom_extension =
+  object (self)
+
+    val mutable node = (None : custom_extension node option)
+
+    method clone = {&#60; &#62;} 
+
+    method node =
+      match node with
+          None -&#62;
+            assert false
+        | Some n -&#62; n
+
+    method set_node n =
+      node &#60;- Some n
+
+  end</PRE
+>
+
+This class is compatible with <TT
+CLASS="LITERAL"
+>extension</TT
+>. The purpose of
+defining such a class is, of course, adding further methods; and you can do it
+without restriction. </P
+><P
+>Often, you want not only one extension class. In this case,
+it is the simplest way that all your classes (for one kind of document) have
+the same type (with respect to the interface; i.e. it does not matter if your
+classes differ in the defined private methods and instance variables, but
+public methods count). This approach avoids lots of coercions and problems with
+type incompatibilities. It is simple to implement:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class custom_extension =
+  object (self)
+    val mutable node = (None : custom_extension node option)
+
+    method clone = ...      (* see above *)
+    method node = ...       (* see above *)
+    method set_node n = ... (* see above *)
+
+    method virtual my_method1 : ...
+    method virtual my_method2 : ...
+    ... (* etc. *)
+  end
+
+class custom_extension_kind_A =
+  object (self)
+    inherit custom_extension
+
+    method my_method1 = ...
+    method my_method2 = ...
+  end
+
+class custom_extension_kind_B =
+  object (self)
+    inherit custom_extension
+
+    method my_method1 = ...
+    method my_method2 = ...
+  end</PRE
+>
+
+If a class does not need a method (e.g. because it does not make sense, or it
+would violate some important condition), it is possible to define the method
+and to always raise an exception when the method is invoked
+(e.g. <TT
+CLASS="LITERAL"
+>assert false</TT
+>).</P
+><P
+>The latter is a strong recommendation: do not try to further
+specialize the types of extension objects. It is difficult, sometimes even
+impossible, and almost never worth-while.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1481"
+>3.3.2. How to bind extension classes to element types</A
+></H2
+><P
+>Once you have defined your extension classes, you can bind them
+to element types. The simplest case is that you have only one class and that
+this class is to be always used. The parsing functions in the module
+<TT
+CLASS="LITERAL"
+>Pxp_yacc</TT
+> take a <TT
+CLASS="LITERAL"
+>spec</TT
+> argument which
+can be customized. If your single class has the name <TT
+CLASS="LITERAL"
+>c</TT
+>,
+this argument should be 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl c)
+    ~default_element_exemplar: (new element_impl c)
+    ~element_alist:            []
+    ()</PRE
+>
+
+This means that data nodes will be created from the exemplar passed by
+~data_exemplar and that all element nodes will be made from the exemplar
+specified by ~default_element_exemplar. In ~element_alist, you can 
+pass that different exemplars are to be used for different element types; but
+this is an optional feature. If you do not need it, pass the empty list.</P
+><P
+>Remember that an exemplar is a (node, extension) pair that serves as pattern
+when new nodes (and the corresponding extension objects) are added to the
+document tree. In this case, the exemplar contains <TT
+CLASS="LITERAL"
+>c</TT
+> as
+extension, and when nodes are created, the exemplar is cloned, and cloning
+makes also a copy of <TT
+CLASS="LITERAL"
+>c</TT
+> such that all nodes of the document
+tree will have a copy of <TT
+CLASS="LITERAL"
+>c</TT
+> as extension.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>~element_alist</TT
+> argument can bind
+specific element types to specific exemplars; as exemplars may be instances of
+different classes it is effectively possible to bind element types to
+classes. For example, if the element type "p" is implemented by class "c_p",
+and "q" is realized by "c_q", you can pass the following value:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl c)
+    ~default_element_exemplar: (new element_impl c)
+    ~element_alist:            
+      [ "p", new element_impl c_p;
+        "q", new element_impl c_q;
+      ]
+    ()</PRE
+>
+
+The extension object <TT
+CLASS="LITERAL"
+>c</TT
+> is still used for all data nodes and
+for all other element types.</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x939.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1496.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Details of the mapping from XML text to the tree representation</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html
new file mode 100644 (file)
index 0000000..faea39f
--- /dev/null
@@ -0,0 +1,442 @@
+<HTML
+><HEAD
+><TITLE
+>Details of the mapping from XML text to the tree representation</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="PREVIOUS"
+TITLE="The class type extension"
+HREF="x1439.html"><LINK
+REL="NEXT"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1439.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 3. The objects representing the document</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c1567.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1496"
+>3.4. Details of the mapping from XML text to the tree representation</A
+></H1
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1498"
+>3.4.1. The representation of character-free elements</A
+></H2
+><P
+>If an element declaration does not allow the element to 
+contain character data, the following rules apply.</P
+><P
+>If the element must be empty, i.e. it is declared with the
+keyword <TT
+CLASS="LITERAL"
+>EMPTY</TT
+>, the element instance must be effectively
+empty (it must not even contain whitespace characters). The parser guarantees
+that a declared <TT
+CLASS="LITERAL"
+>EMPTY</TT
+> element does never contain a data
+node, even if the data node represents the empty string.</P
+><P
+>If the element declaration only permits other elements to occur
+within that element but not character data, it is still possible to insert
+whitespace characters between the subelements. The parser ignores these
+characters, too, and does not create data nodes for them.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Example. </B
+>Consider the following element types:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT x ( #PCDATA | z )* &#62;
+&#60;!ELEMENT y ( z )* &#62;
+&#60;!ELEMENT z EMPTY&#62;</PRE
+>
+
+Only <TT
+CLASS="LITERAL"
+>x</TT
+> may contain character data, the keyword
+<TT
+CLASS="LITERAL"
+>#PCDATA</TT
+> indicates this. The other types are character-free. </P
+></DIV
+><P
+>The XML term
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;x&#62;&#60;z/&#62; &#60;z/&#62;&#60;/x&#62;</PRE
+>
+
+will be internally represented by an element node for <TT
+CLASS="LITERAL"
+>x</TT
+> 
+with three subnodes: the first <TT
+CLASS="LITERAL"
+>z</TT
+> element, a data node
+containing the space character, and the second <TT
+CLASS="LITERAL"
+>z</TT
+> element. 
+In contrast to this, the term
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;y&#62;&#60;z/&#62; &#60;z/&#62;&#60;/y&#62;</PRE
+>
+
+is represented by an  element node for <TT
+CLASS="LITERAL"
+>y</TT
+> with only
+<I
+CLASS="EMPHASIS"
+>two</I
+> subnodes, the two <TT
+CLASS="LITERAL"
+>z</TT
+> elements. There
+is no data node for the space character because spaces are ignored in the
+character-free element <TT
+CLASS="LITERAL"
+>y</TT
+>.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1521"
+>3.4.2. The representation of character data</A
+></H2
+><P
+>The XML specification allows all Unicode characters in XML
+texts. This parser can be configured such that UTF-8 is used to represent the
+characters internally; however, the default character encoding is
+ISO-8859-1. (Currently, no other encodings are possible for the internal string
+representation; the type <TT
+CLASS="LITERAL"
+>Pxp_types.rep_encoding</TT
+> enumerates
+the possible encodings. Principially, the parser could use any encoding that is
+ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
+ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
+encodings (or other multibyte encodings which are not ASCII-compatible) unless
+major parts of the parser are rewritten - unlikely...)</P
+><P
+>The internal encoding may be different from the external encoding (specified
+in the XML declaration <TT
+CLASS="LITERAL"
+>&lt;?xml ... encoding="..."?&gt;</TT
+>); in
+this case the strings are automatically converted to the internal encoding.</P
+><P
+>If the internal encoding is ISO-8859-1, it is possible that there are
+characters that cannot be represented. In this case, the parser ignores such
+characters and prints a warning (to the <TT
+CLASS="LITERAL"
+>collect_warning</TT
+>
+object that must be passed when the parser is called).</P
+><P
+>The XML specification allows lines to be separated by single LF
+characters, by CR LF character sequences, or by single CR
+characters. Internally, these separators are always converted to single LF
+characters.</P
+><P
+>The parser guarantees that there are never two adjacent data
+nodes; if necessary, data material that would otherwise be represented by
+several nodes is collapsed into one node. Note that you can still create node
+trees with adjacent data nodes; however, the parser does not return such trees.</P
+><P
+>Note that CDATA sections are not represented specially; such
+sections are added to the current data material that being collected for the
+next data node.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1532"
+>3.4.3. The representation of entities within documents</A
+></H2
+><P
+><I
+CLASS="EMPHASIS"
+>Entities are not represented within
+documents!</I
+> If the parser finds an entity reference in the document
+content, the reference is immediately expanded, and the parser reads the
+expansion text instead of the reference.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1536"
+>3.4.4. The representation of attributes</A
+></H2
+><P
+>As attribute
+values are composed of Unicode characters, too, the same problems with the
+character encoding arise as for character material. Attribute values are
+converted to the internal encoding, too; and if there are characters that
+cannot be represented, these are dropped, and a warning is printed.</P
+><P
+>Attribute values are normalized before they are returned by
+methods like <TT
+CLASS="LITERAL"
+>attribute</TT
+>. First, any remaining entity
+references are expanded; if necessary, expansion is performed recursively.
+Second, newline characters (any of LF, CR LF, or CR characters) are converted
+to single space characters. Note that especially the latter action is
+prescribed by the XML standard (but <TT
+CLASS="LITERAL"
+></TT
+> is not converted
+such that it is still possible to include line feeds into attributes).</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1542"
+>3.4.5. The representation of processing instructions</A
+></H2
+><P
+>Processing instructions are parsed to some extent: The first word of the
+PI is called the target, and it is stored separated from the rest of the PI:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;?target rest?&#62;</PRE
+>
+
+The exact location where a PI occurs is not represented (by default). The
+parser puts the PI into the object that represents the embracing construct (an
+element, a DTD, or the whole document); that means you can find out which PIs
+occur in a certain element, in the DTD, or in the whole document, but you
+cannot lookup the exact position within the construct.</P
+><P
+>If you require the exact location of PIs, it is possible to
+create extra nodes for them. This mode is controled by the option
+<TT
+CLASS="LITERAL"
+>enable_pinstr_nodes</TT
+>. The additional nodes have the node type
+<TT
+CLASS="LITERAL"
+>T_pinstr <TT
+CLASS="REPLACEABLE"
+><I
+>target</I
+></TT
+></TT
+>, and are created
+from special exemplars contained in the <TT
+CLASS="LITERAL"
+>spec</TT
+> (see
+pxp_document.mli).</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1551"
+>3.4.6. The representation of comments</A
+></H2
+><P
+>Normally, comments are not represented; they are dropped by
+default. However, if you require them, it is possible to create
+<TT
+CLASS="LITERAL"
+>T_comment</TT
+> nodes for them. This mode can be specified by the
+option <TT
+CLASS="LITERAL"
+>enable_comment_nodes</TT
+>. Comment nodes are created from
+special exemplars contained in the <TT
+CLASS="LITERAL"
+>spec</TT
+> (see
+pxp_document.mli). You can access the contents of comments through the 
+method <TT
+CLASS="LITERAL"
+>comment</TT
+>.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1558"
+>3.4.7. The attributes <TT
+CLASS="LITERAL"
+>xml:lang</TT
+> and
+<TT
+CLASS="LITERAL"
+>xml:space</TT
+></A
+></H2
+><P
+>These attributes are not supported specially; they are handled
+like any other attribute.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1563"
+>3.4.8. And what about namespaces?</A
+></H2
+><P
+>Currently, there is no special support for namespaces.
+However, the parser allows it that the colon occurs in names such that it is
+possible to implement namespaces on top of the current API.</P
+><P
+>Some future release of PXP will support namespaces as built-in
+feature...</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1439.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Configuring and calling the parser</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html
new file mode 100644 (file)
index 0000000..06b1e60
--- /dev/null
@@ -0,0 +1,895 @@
+<HTML
+><HEAD
+><TITLE
+>Resolvers and sources</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="NEXT"
+TITLE="The DTD classes"
+HREF="x1812.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c1567.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1812.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1629"
+>4.2. Resolvers and sources</A
+></H1
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1631"
+>4.2.1. Using the built-in resolvers (called sources)</A
+></H2
+><P
+>The type <TT
+CLASS="LITERAL"
+>source</TT
+> enumerates the two
+possibilities where the document to parse comes from.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type source =
+    Entity of ((dtd -&gt; Pxp_entity.entity) * Pxp_reader.resolver)
+  | ExtID of (ext_id * Pxp_reader.resolver)</PRE
+>
+
+You normally need not to worry about this type as there are convenience
+functions that create <TT
+CLASS="LITERAL"
+>source</TT
+> values:
+
+
+            <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>from_file s</TT
+>: The document is read from
+file <TT
+CLASS="LITERAL"
+>s</TT
+>; you may specify absolute or relative path names.
+The file name must be encoded as UTF-8 string.</P
+><P
+>There is an optional argument <TT
+CLASS="LITERAL"
+>~system_encoding</TT
+>
+specifying the character encoding which is used for the names of the file
+system. For example, if this encoding is ISO-8859-1 and <TT
+CLASS="LITERAL"
+>s</TT
+> is
+also a ISO-8859-1 string, you can form the source:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let s_utf8  =  recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
+from_file ~system_encoding:`Enc_iso88591 s_utf8</PRE
+></P
+><P
+>This <TT
+CLASS="LITERAL"
+>source</TT
+> has the advantage that
+it is able to resolve inner external entities; i.e. if your document includes
+data from another file (using the <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> attribute), this
+mode will find that file. However, this mode cannot resolve
+<TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> identifiers nor <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> identifiers
+other than "file:".</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>from_channel ch</TT
+>: The document is read
+from the channel <TT
+CLASS="LITERAL"
+>ch</TT
+>. In general, this source also supports
+file URLs found in the document; however, by default only absolute URLs are
+understood. It is possible to associate an ID with the channel such that the
+resolver knows how to interpret relative URLs:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>from_channel ~id:(System "file:///dir/dir1/") ch</PRE
+>
+
+There is also the ~system_encoding argument specifying how file names are
+encoded. - The example from above can also be written (but it is no
+longer possible to interpret relative URLs because there is no ~id argument,
+and computing this argument is relatively complicated because it must
+be a valid URL):
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let ch = open_in s in
+let src = from_channel ~system_encoding:`Enc_iso88591 ch in
+...;
+close_in ch</PRE
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>from_string s</TT
+>: The string
+<TT
+CLASS="LITERAL"
+>s</TT
+> is the document to parse. This mode is not able to
+interpret file names of <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> clauses, nor it can look up
+<TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> identifiers. </P
+><P
+>Normally, the encoding of the string is detected as usual
+by analyzing the XML declaration, if any. However, it is also possible to
+specify the encoding directly:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let src = from_string ~fixenc:`ISO-8859-2 s</PRE
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>ExtID (id, r)</TT
+>: The document to parse
+is denoted by the identifier <TT
+CLASS="LITERAL"
+>id</TT
+> (either a
+<TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> or <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> clause), and this
+identifier is interpreted by the resolver <TT
+CLASS="LITERAL"
+>r</TT
+>. Use this mode
+if you have written your own resolver.</P
+><P
+>Which character sets are possible depends on the passed
+resolver <TT
+CLASS="LITERAL"
+>r</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Entity (get_entity, r)</TT
+>: The document
+to parse is returned by the function invocation <TT
+CLASS="LITERAL"
+>get_entity
+dtd</TT
+>, where <TT
+CLASS="LITERAL"
+>dtd</TT
+> is the DTD object to use (it may be
+empty). Inner external references occuring in this entity are resolved using
+the resolver <TT
+CLASS="LITERAL"
+>r</TT
+>.</P
+><P
+>Which character sets are possible depends on the passed
+resolver <TT
+CLASS="LITERAL"
+>r</TT
+>.</P
+></LI
+></UL
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1682"
+>4.2.2. The resolver API</A
+></H2
+><P
+>A resolver is an object that can be opened like a file, but you
+do not pass the file name to the resolver, but the XML identifier of the entity
+to read from (either a <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> or <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+>
+clause). When opened, the resolver must return the
+<TT
+CLASS="LITERAL"
+>Lexing.lexbuf</TT
+> that reads the characters.  The resolver can
+be closed, and it can be cloned. Furthermore, it is possible to tell the
+resolver which character set it should assume. - The following from Pxp_reader:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>exception Not_competent
+exception Not_resolvable of exn
+
+class type resolver =
+  object
+    method init_rep_encoding : rep_encoding -&#62; unit
+    method init_warner : collect_warnings -&#62; unit
+    method rep_encoding : rep_encoding
+    method open_in : ext_id -&#62; Lexing.lexbuf
+    method close_in : unit
+    method change_encoding : string -&#62; unit
+    method clone : resolver
+    method close_all : unit
+  end</PRE
+>
+
+The resolver object must work as follows:</P
+><P
+>            <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>When the parser is called, it tells the resolver the
+warner object and the internal encoding by invoking
+<TT
+CLASS="LITERAL"
+>init_warner</TT
+> and <TT
+CLASS="LITERAL"
+>init_rep_encoding</TT
+>. The
+resolver should store these values. The method <TT
+CLASS="LITERAL"
+>rep_encoding</TT
+>
+should return the internal encoding.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the parser wants to read from the resolver, it invokes
+the method <TT
+CLASS="LITERAL"
+>open_in</TT
+>. Either the resolver succeeds, in which
+case the <TT
+CLASS="LITERAL"
+>Lexing.lexbuf</TT
+> reading from the file or stream must
+be returned, or opening fails. In the latter case the method implementation
+should raise an exception (see below).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the parser finishes reading, it calls the
+<TT
+CLASS="LITERAL"
+>close_in</TT
+> method.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the parser finds a reference to another external
+entity in the input stream, it calls <TT
+CLASS="LITERAL"
+>clone</TT
+> to get a second
+resolver which must be initially closed (not yet connected with an input
+stream).  The parser then invokes <TT
+CLASS="LITERAL"
+>open_in</TT
+> and the other
+methods as described.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If you already know the character set of the input
+stream, you should recode it to the internal encoding, and define the method
+<TT
+CLASS="LITERAL"
+>change_encoding</TT
+> as an empty method.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If you want to support multiple external character sets,
+the object must follow a much more complicated protocol. Directly after
+<TT
+CLASS="LITERAL"
+>open_in</TT
+> has been called, the resolver must return a lexical
+buffer that only reads one byte at a time. This is only possible if you create
+the lexical buffer with <TT
+CLASS="LITERAL"
+>Lexing.from_function</TT
+>; the function
+must then always return 1 if the EOF is not yet reached, and 0 if EOF is
+reached. If the parser has read the first line of the document, it will invoke
+<TT
+CLASS="LITERAL"
+>change_encoding</TT
+> to tell the resolver which character set to
+assume. From this moment, the object can return more than one byte at once. The
+argument of <TT
+CLASS="LITERAL"
+>change_encoding</TT
+> is either the parameter of the
+"encoding" attribute of the XML declaration, or the empty string if there is
+not any XML declaration or if the declaration does not contain an encoding
+attribute. </P
+><P
+>At the beginning the resolver must only return one
+character every time something is read from the lexical buffer. The reason for
+this is that you otherwise would not exactly know at which position in the
+input stream the character set changes.</P
+><P
+>If you want automatic recognition of the character set,
+it is up to the resolver object to implement this.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If an error occurs, the parser calls the method
+<TT
+CLASS="LITERAL"
+>close_all</TT
+> for the top-level resolver; this method should
+close itself (if not already done) and all clones.</P
+></LI
+></UL
+></P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Exceptions. </B
+>It is possible to chain resolvers such that when the first resolver is not able
+to open the entity, the other resolvers of the chain are tried in turn. The
+method <TT
+CLASS="LITERAL"
+>open_in</TT
+> should raise the exception
+<TT
+CLASS="LITERAL"
+>Not_competent</TT
+> to indicate that the next resolver should try
+to open the entity. If the resolver is able to handle the ID, but some other
+error occurs, the exception <TT
+CLASS="LITERAL"
+>Not_resolvable</TT
+> should be raised
+to force that the chain breaks.
+         </P
+></DIV
+><P
+>Example: How to define a resolver that is equivalent to
+from_string: ...</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1728"
+>4.2.3. Predefined resolver components</A
+></H2
+><P
+>There are some classes in Pxp_reader that define common resolver behaviour.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_this_channel : 
+    ?id:ext_id -&#62; 
+    ?fixenc:encoding -&#62; 
+    ?auto_close:bool -&#62; 
+    in_channel -&#62; 
+        resolver</PRE
+>
+
+Reads from the passed channel (it may be even a pipe). If the
+<TT
+CLASS="LITERAL"
+>~id</TT
+> argument is passed to the object, the created resolver
+accepts only this ID. Otherwise all IDs are accepted.  - Once the resolver has
+been cloned, it does not accept any ID. This means that this resolver cannot
+handle inner references to external entities. Note that you can combine this
+resolver with another resolver that can handle inner references (such as
+resolve_as_file); see class 'combine' below.  - If you pass the
+<TT
+CLASS="LITERAL"
+>~fixenc</TT
+> argument, the encoding of the channel is set to the
+passed value, regardless of any auto-recognition or any XML declaration. - If
+<TT
+CLASS="LITERAL"
+>~auto_close = true</TT
+> (which is the default), the channel is
+closed after use. If <TT
+CLASS="LITERAL"
+>~auto_close = false</TT
+>, the channel is
+left open.
+ </P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_any_channel : 
+    ?auto_close:bool -&#62; 
+    channel_of_id:(ext_id -&#62; (in_channel * encoding option)) -&#62; 
+        resolver</PRE
+>
+
+This resolver calls the function <TT
+CLASS="LITERAL"
+>~channel_of_id</TT
+> to open a
+new channel for the passed <TT
+CLASS="LITERAL"
+>ext_id</TT
+>. This function must either
+return the channel and the encoding, or it must fail with Not_competent.  The
+function must return <TT
+CLASS="LITERAL"
+>None</TT
+> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<TT
+CLASS="LITERAL"
+>Some e</TT
+> if it is already known that the encoding of the
+channel is <TT
+CLASS="LITERAL"
+>e</TT
+>.  If <TT
+CLASS="LITERAL"
+>~auto_close = true</TT
+>
+(which is the default), the channel is closed after use. If
+<TT
+CLASS="LITERAL"
+>~auto_close = false</TT
+>, the channel is left open.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_url_channel :
+    ?base_url:Neturl.url -&#62;
+    ?auto_close:bool -&#62; 
+    url_of_id:(ext_id -&#62; Neturl.url) -&#62; 
+    channel_of_url:(Neturl.url -&#62; (in_channel * encoding option)) -&#62; 
+        resolver</PRE
+>
+
+When this resolver gets an ID to read from, it calls the function
+<TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> to get the corresponding URL. This URL may be a
+relative URL; however, a URL scheme must be used which contains a path.  The
+resolver converts the URL to an absolute URL if necessary.  The second
+function, <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+>, is fed with the absolute URL as
+input. This function opens the resource to read from, and returns the channel
+and the encoding of the resource.</P
+><P
+>Both functions, <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> and
+<TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+>, can raise Not_competent to indicate that
+the object is not able to read from the specified resource. However, there is a
+difference: A Not_competent from <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> is left as it
+is, but a Not_competent from <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+> is converted to
+Not_resolvable. So only <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> decides which URLs are
+accepted by the resolver and which not.</P
+><P
+>The function <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+> must return
+<TT
+CLASS="LITERAL"
+>None</TT
+> as encoding if the default mechanism to recognize the
+encoding should be used. It must return <TT
+CLASS="LITERAL"
+>Some e</TT
+> if it is
+already known that the encoding of the channel is <TT
+CLASS="LITERAL"
+>e</TT
+>.</P
+><P
+>If <TT
+CLASS="LITERAL"
+>~auto_close = true</TT
+> (which is the default), the channel is
+closed after use. If <TT
+CLASS="LITERAL"
+>~auto_close = false</TT
+>, the channel is
+left open.</P
+><P
+>Objects of this class contain a base URL relative to which relative URLs are
+interpreted. When creating a new object, you can specify the base URL by
+passing it as <TT
+CLASS="LITERAL"
+>~base_url</TT
+> argument. When an existing object is
+cloned, the base URL of the clone is the URL of the original object. - Note
+that the term "base URL" has a strict definition in RFC 1808.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_this_string : 
+    ?id:ext_id -&#62; 
+    ?fixenc:encoding -&#62; 
+    string -&#62; 
+        resolver</PRE
+>
+
+Reads from the passed string. If the <TT
+CLASS="LITERAL"
+>~id</TT
+> argument is passed
+to the object, the created resolver accepts only this ID. Otherwise all IDs are
+accepted. - Once the resolver has been cloned, it does not accept any ID. This
+means that this resolver cannot handle inner references to external
+entities. Note that you can combine this resolver with another resolver that
+can handle inner references (such as resolve_as_file); see class 'combine'
+below. - If you pass the <TT
+CLASS="LITERAL"
+>~fixenc</TT
+> argument, the encoding of
+the string is set to the passed value, regardless of any auto-recognition or
+any XML declaration.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_any_string : 
+    string_of_id:(ext_id -&#62; (string * encoding option)) -&#62; 
+        resolver</PRE
+>
+
+This resolver calls the function <TT
+CLASS="LITERAL"
+>~string_of_id</TT
+> to get the
+string for the passed <TT
+CLASS="LITERAL"
+>ext_id</TT
+>. This function must either
+return the string and the encoding, or it must fail with Not_competent.  The
+function must return <TT
+CLASS="LITERAL"
+>None</TT
+> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<TT
+CLASS="LITERAL"
+>Some e</TT
+> if it is already known that the encoding of the
+string is <TT
+CLASS="LITERAL"
+>e</TT
+>.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_as_file :
+    ?file_prefix:[ `Not_recognized | `Allowed | `Required ] -&#62;
+    ?host_prefix:[ `Not_recognized | `Allowed | `Required ] -&#62;
+    ?system_encoding:encoding -&#62;
+    ?url_of_id:(ext_id -&#62; Neturl.url) -&#62; 
+    ?channel_of_url: (Neturl.url -&#62; (in_channel * encoding option)) -&#62;
+    unit -&#62; 
+        resolver</PRE
+>
+Reads from the local file system. Every file name is interpreted as
+file name of the local file system, and the referred file is read.</P
+><P
+>The full form of a file URL is: file://host/path, where
+'host' specifies the host system where the file identified 'path'
+resides. host = "" or host = "localhost" are accepted; other values
+will raise Not_competent. The standard for file URLs is 
+defined in RFC 1738.</P
+><P
+>Option <TT
+CLASS="LITERAL"
+>~file_prefix</TT
+>: Specifies how the "file:" prefix of
+file names is handled:
+            <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Not_recognized:</TT
+>The prefix is not
+recognized.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Allowed:</TT
+> The prefix is allowed but
+not required (the default).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Required:</TT
+> The prefix is
+required.</P
+></LI
+></UL
+></P
+><P
+>Option <TT
+CLASS="LITERAL"
+>~host_prefix:</TT
+> Specifies how the "//host" phrase of
+file names is handled:
+            <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Not_recognized:</TT
+>The prefix is not
+recognized.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Allowed:</TT
+> The prefix is allowed but
+not required (the default).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Required:</TT
+> The prefix is
+required.</P
+></LI
+></UL
+></P
+><P
+>Option <TT
+CLASS="LITERAL"
+>~system_encoding:</TT
+> Specifies the encoding of file
+names of the local file system. Default: UTF-8.</P
+><P
+>Options <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+>, <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+>: Not
+for the casual user!</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class combine : 
+    ?prefer:resolver -&#62; 
+    resolver list -&#62; 
+        resolver</PRE
+>
+
+Combines several resolver objects. If a concrete entity with an
+<TT
+CLASS="LITERAL"
+>ext_id</TT
+> is to be opened, the combined resolver tries the
+contained resolvers in turn until a resolver accepts opening the entity
+(i.e. it does not raise Not_competent on open_in).</P
+><P
+>Clones: If the 'clone' method is invoked before 'open_in', all contained
+resolvers are cloned separately and again combined. If the 'clone' method is 
+invoked after 'open_in' (i.e. while the resolver is open), additionally the
+clone of the active resolver is flagged as being preferred, i.e. it is tried
+first. </P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1812.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Configuring and calling the parser</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The DTD classes</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html
new file mode 100644 (file)
index 0000000..34f09c2
--- /dev/null
@@ -0,0 +1,517 @@
+<HTML
+><HEAD
+><TITLE
+>The DTD classes</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="Resolvers and sources"
+HREF="x1629.html"><LINK
+REL="NEXT"
+TITLE="Invoking the parser"
+HREF="x1818.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1629.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1818.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1812"
+>4.3. The DTD classes</A
+></H1
+><P
+><I
+CLASS="EMPHASIS"
+>Sorry, not yet
+written. Perhaps the interface definition of Pxp_dtd expresses the same:</I
+></P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>&#13;(**********************************************************************)
+(*                                                                    *)
+(* Pxp_dtd:                                                           *)
+(*     Object model of document type declarations                     *)
+(*                                                                    *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ *                           declarations, entity declarations, notation
+ *                           declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ *                           of a content model and an attribute list
+ *                           declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+  (* Creation:
+   *   new dtd
+   * creates a new, empty DTD object without any declaration, without a root
+   * element, without an ID.
+   *)
+  Pxp_types.collect_warnings -&gt; 
+  Pxp_types.rep_encoding -&gt;
+  object
+    method root : string option
+      (* get the name of the root element if present *)
+
+    method set_root : string -&gt; unit
+      (* set the name of the root element. This method can be invoked 
+       * only once
+       *)
+
+    method id : Pxp_types.dtd_id option
+      (* get the identifier for this DTD *)
+
+    method set_id : Pxp_types.dtd_id -&gt; unit
+      (* set the identifier. This method can be invoked only once *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* returns the encoding used for character representation *)
+
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - elements and notations that have not been added may be used in an
+       *   arbitrary way; the methods "element" and "notation" indicate this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary contents are allowed or not. *)
+
+    method standalone_declaration : bool
+      (* Whether there is a 'standalone' declaration or not. Strictly 
+       * speaking, this declaration is not part of the DTD, but it is
+       * included here because of practical reasons. 
+       * If not set, this property defaults to 'false'.
+       *)
+
+    method set_standalone_declaration : bool -&gt; unit
+      (* Sets the 'standalone' declaration. *)
+
+
+    method add_element : dtd_element -&gt; unit
+      (* add the given element declaration to this DTD. Raises Not_found
+       * if there is already an element declaration with the same name.
+       *)
+
+    method add_gen_entity : Pxp_entity.entity -&gt; bool -&gt; unit
+      (* add_gen_entity e extdecl:
+       * add the entity 'e' as general entity to this DTD (general entities
+       * are those represented by &amp;name;). If there is already a declaration
+       * with the same name, the second definition is ignored; as exception from
+       * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+       * may only be redeclared with a definition that is equivalent to the
+       * standard definition; otherwise a Validation_error is raised.
+       *
+       * 'extdecl': 'true' indicates that the entity declaration occurs in
+       * an external entity. (Used for the standalone check.)
+       *)
+
+    method add_par_entity : Pxp_entity.entity -&gt; unit
+      (* add the given entity as parameter entity to this DTD (parameter
+       * entities are those represented by %name;). If there is already a 
+       * declaration with the same name, the second definition is ignored.
+       *)
+
+    method add_notation : dtd_notation -&gt; unit
+      (* add the given notation to this DTD. If there is already a declaration
+       * with the same name, a Validation_error is raised.
+       *)
+
+    method add_pinstr : proc_instruction -&gt; unit
+      (* add the given processing instruction to this DTD. *)
+
+    method element : string -&gt; dtd_element
+      (* looks up the element declaration with the given name. Raises 
+       * Validation_error if the element cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method element_names : string list
+      (* returns the list of the names of all element declarations. *)
+
+    method gen_entity : string -&gt; (Pxp_entity.entity * bool)
+      (* let e, extdecl = obj # gen_entity n:
+       * looks up the general entity 'e' with the name 'n'. Raises
+       * WF_error if the entity cannot be found.
+       * 'extdecl': indicates whether the entity declaration occured in an 
+       * external entity.
+       *)
+
+    method gen_entity_names : string list
+      (* returns the list of all general entity names *)
+
+    method par_entity : string -&gt; Pxp_entity.entity
+      (* looks up the parameter entity with the given name. Raises
+       * WF_error if the entity cannot be found.
+       *)
+
+    method par_entity_names : string list
+      (* returns the list of all parameter entity names *)
+
+    method notation : string -&gt; dtd_notation
+      (* looks up the notation declaration with the given name. Raises
+       * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method notation_names : string list
+      (* Returns the list of the names of all added notations *)
+
+    method pinstr : string -&gt; proc_instruction list
+      (* looks up all processing instructions with the given target.
+       * The "target" is the identifier following "&lt;?".
+       * Note: It is not possible to find out the exact position of the
+       * processing instruction.
+       *)
+
+    method pinstr_names : string list
+      (* Returns the list of the names (targets) of all added pinstrs *)
+
+    method validate : unit
+      (* ensures that the DTD is valid. This method is optimized such that
+       * actual validation is only performed if DTD has changed.
+       * If the DTD is invalid, mostly a Validation_error is raised,
+       * but other exceptions are possible, too.
+       *)
+
+    method only_deterministic_models : unit
+      (* Succeeds if all regexp content models are deterministic. 
+       * Otherwise Validation_error.
+       *)
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; bool -&gt; unit
+      (* write_compact_as_latin1 os enc doctype:
+       * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a 
+       * DTD like &lt;!DOCTYPE root [ ... ]&gt; is written. If 'not doctype',
+       * only the declarations are written (the material within the
+       * square brackets).
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; bool -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+
+    (*----------------------------------------*)
+    method invalidate : unit
+      (* INTERNAL METHOD *)
+    method warner : Pxp_types.collect_warnings
+      (* INTERNAL METHOD *)
+  end
+
+
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -&gt; string -&gt; 
+  (* Creation:
+   *   new dtd_element init_dtd init_name:
+   * creates a new dtd_element object for init_dtd with init_name.
+   * The strings are represented in the same encoding as init_dtd.
+   *)
+  object
+
+    method name : string
+      (* returns the name of the declared element *)
+
+    method externally_declared : bool
+      (* returns whether the element declaration occurs in an external
+       * entity.
+       *)
+
+    method content_model : Pxp_types.content_model_type
+      (* get the content model of this element declaration, or Unspecified *)
+
+    method content_dfa : Pxp_dfa.dfa_definition option
+      (* return the DFA of the content model if there is a DFA, or None.
+       * A DFA exists only for regexp style content models which are
+       * deterministic.
+       *)
+
+    method set_cm_and_extdecl : Pxp_types.content_model_type -&gt; bool -&gt; unit
+      (* set_cm_and_extdecl cm extdecl:
+       * set the content model to 'cm'. Once the content model is not 
+       * Unspecified, it cannot be set to a different value again.
+       * Furthermore, it is set whether the element occurs in an external
+       * entity ('extdecl').
+       *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* Return the encoding of the strings *)
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - attributes that have not been added may be used in an
+       *   arbitrary way; the method "attribute" indicates this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary attributes are allowed or not. *)
+
+    method attribute : string -&gt; 
+                         Pxp_types.att_type * Pxp_types.att_default
+      (* get the type and default value of a declared attribute, or raise
+       * Validation_error if the attribute does not exist.
+       * If 'arbitrary_allowed', the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_violates_standalone_declaration : 
+               string -&gt; string option -&gt; bool
+      (* attribute_violates_standalone_declaration name v:
+       * Checks whether the attribute 'name' violates the "standalone"
+       * declaration if it has value 'v'.
+       * The method returns true if:
+       * - The attribute declaration occurs in an external entity, 
+       * and if one of the two conditions holds:
+       * - v = None, and there is a default for the attribute value
+       * - v = Some s, and the type of the attribute is not CDATA,
+       *   and s changes if normalized according to the rules of the
+       *   attribute type.
+       *
+       * The method raises Validation_error if the attribute does not exist.
+       * If 'arbitrary_allowed', the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_names : string list
+      (* get the list of all declared attributes *)
+
+    method names_of_required_attributes : string list
+      (* get the list of all attributes that are specified as required 
+       * attributes
+       *)
+
+    method id_attribute_name : string option
+      (* Returns the name of the attribute with type ID, or None. *)
+
+    method idref_attribute_names : string list
+      (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+    method add_attribute : string -&gt; 
+                           Pxp_types.att_type -&gt; 
+                          Pxp_types.att_default -&gt; 
+                          bool -&gt;
+                            unit
+      (* add_attribute name type default extdecl:
+       * add an attribute declaration for an attribute with the given name,
+       * type, and default value. If there is more than one declaration for
+       * an attribute name, the first declaration counts; the other declarations
+       * are ignored.
+       * 'extdecl': if true, the attribute declaration occurs in an external
+       * entity. This property is used to check the "standalone" attribute.
+       *)
+
+    method validate : unit
+      (* checks whether this element declaration (i.e. the content model and
+       * all attribute declarations) is valid for the associated DTD.
+       * Raises mostly Validation_error if the validation fails.
+       *)
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the &lt;!ELEMENT ... &gt; declaration to 'os' as 'enc'-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -&gt; Pxp_types.ext_id -&gt; Pxp_types.rep_encoding -&gt;
+  (* Creation:
+   *    new dtd_notation a_name an_external_ID init_encoding
+   * creates a new dtd_notation object with the given name and the given
+   * external ID.
+   *)
+  object
+    method name : string
+    method ext_id : Pxp_types.ext_id
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the &lt;!NOTATION ... &gt; declaration to 'os' as 'enc'-encoded 
+       * string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -&gt; string -&gt; Pxp_types.rep_encoding -&gt;
+  (* Creation:
+   *   new proc_instruction a_target a_value
+   * creates a new proc_instruction object with the given target string and
+   * the given value string. 
+   * Note: A processing instruction is written as &lt;?target value?&gt;. 
+   *)
+  object
+    method target : string
+    method value : string
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; unit
+      (* write os enc:
+       * Writes the &lt;?...?&gt; PI to 'os' as 'enc'-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+    method parse_pxp_option : (string * string * (string * string) list)
+      (* Parses a PI containing a PXP option. Such PIs are formed like:
+       *   &lt;?target option-name option-att="value" option-att="value" ... ?&gt;
+       * The method returns a triple
+       *   (target, option-name, [option-att, value; ...])
+       * or raises Error.
+       *)
+
+  end
+
+;;&#13;</PRE
+></P
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1629.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1818.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Resolvers and sources</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Invoking the parser</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html
new file mode 100644 (file)
index 0000000..b289a36
--- /dev/null
@@ -0,0 +1,779 @@
+<HTML
+><HEAD
+><TITLE
+>Invoking the parser</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="The DTD classes"
+HREF="x1812.html"><LINK
+REL="NEXT"
+TITLE="Updates"
+HREF="x1965.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1812.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1965.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1818"
+>4.4. Invoking the parser</A
+></H1
+><P
+>Here a description of Pxp_yacc.</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1821"
+>4.4.1. Defaults</A
+></H2
+><P
+>The following defaults are available:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val default_config : config
+val default_extension : ('a node extension) as 'a
+val default_spec : ('a node extension as 'a) spec</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1825"
+>4.4.2. Parsing functions</A
+></H2
+><P
+>In the following, the term "closed document" refers to
+an XML structure like
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;!DOCTYPE ... [ <TT
+CLASS="REPLACEABLE"
+><I
+>declarations</I
+></TT
+> ] &gt;
+&lt;<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>&gt;
+...
+&lt;/<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>&gt;</PRE
+>
+
+The term "fragment" refers to an XML structure like
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&lt;<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>&gt;
+...
+&lt;/<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>&gt;</PRE
+>
+
+i.e. only to one isolated element instance.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_dtd_entity : config -&#62; source -&#62; dtd</PRE
+>
+
+Parses the declarations which are contained in the entity, and returns them as
+<TT
+CLASS="LITERAL"
+>dtd</TT
+> object.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val extract_dtd_from_document_entity : config -&#62; source -&#62; dtd</PRE
+>
+
+Extracts the DTD from a closed document. Both the internal and the external
+subsets are extracted and combined to one <TT
+CLASS="LITERAL"
+>dtd</TT
+> object. This
+function does not parse the whole document, but only the parts that are
+necessary to extract the DTD.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_document_entity : 
+    ?transform_dtd:(dtd -&#62; dtd) -&#62;
+    ?id_index:('ext index) -&#62;
+    config -&#62; 
+    source -&#62; 
+    'ext spec -&#62; 
+        'ext document</PRE
+>
+
+Parses a closed document and validates it against the DTD that is contained in
+the document (internal and external subsets). The option
+<TT
+CLASS="LITERAL"
+>~transform_dtd</TT
+> can be used to transform the DTD in the
+document, and to use the transformed DTD for validation. If
+<TT
+CLASS="LITERAL"
+>~id_index</TT
+> is specified, an index of all ID attributes is
+created.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_wfdocument_entity : 
+    config -&#62; 
+    source -&#62; 
+    'ext spec -&#62; 
+        'ext document</PRE
+>
+
+Parses a closed document, but checks it only on well-formedness.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_content_entity  : 
+    ?id_index:('ext index) -&#62;
+    config -&#62;  
+    source -&#62; 
+    dtd -&#62; 
+    'ext spec -&#62; 
+        'ext node</PRE
+>
+
+Parses a fragment, and validates the element.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_wfcontent_entity : 
+    config -&#62; 
+    source -&#62; 
+    'ext spec -&#62; 
+        'ext node</PRE
+>
+
+Parses a fragment, but checks it only on well-formedness.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1851"
+>4.4.3. Configuration options</A
+></H2
+><P
+>&#13;<PRE
+CLASS="PROGRAMLISTING"
+>type config =
+    { warner : collect_warnings;
+      errors_with_line_numbers : bool;
+      enable_pinstr_nodes : bool;
+      enable_super_root_node : bool;
+      enable_comment_nodes : bool;
+      encoding : rep_encoding;
+      recognize_standalone_declaration : bool;
+      store_element_positions : bool;
+      idref_pass : bool;
+      validate_by_dfa : bool;
+      accept_only_deterministic_models : bool;
+      ...
+    }</PRE
+>
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>warner:</TT
+>The parser prints
+warnings by invoking the method <TT
+CLASS="LITERAL"
+>warn</TT
+> for this warner
+object. (Default: all warnings are dropped)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>errors_with_line_numbers:</TT
+>If
+true, errors contain line numbers; if false, errors contain only byte
+positions. The latter mode is faster. (Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>enable_pinstr_nodes:</TT
+>If true,
+the parser creates extra nodes for processing instructions. If false,
+processing instructions are simply added to the element or document surrounding
+the instructions. (Default: false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>enable_super_root_node:</TT
+>If
+true, the parser creates an extra node which is the parent of the root of the
+document tree. This node is called super root; it is an element with type
+<TT
+CLASS="LITERAL"
+>T_super_root</TT
+>. - If there are processing instructions outside
+the root element and outside the DTD, they are added to the super root instead
+of the document. - If false, the super root node is not created. (Default:
+false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>enable_comment_nodes:</TT
+>If true,
+the parser creates nodes for comments with type <TT
+CLASS="LITERAL"
+>T_comment</TT
+>;
+if false, such nodes are not created. (Default: false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>encoding:</TT
+>Specifies the
+internal encoding of the parser. Most strings are then represented according to
+this encoding; however there are some exceptions (especially
+<TT
+CLASS="LITERAL"
+>ext_id</TT
+> values which are always UTF-8 encoded).
+(Default: `Enc_iso88591)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>recognize_standalone_declaration:</TT
+> If true and if the parser is
+validating, the <TT
+CLASS="LITERAL"
+>standalone="yes"</TT
+> declaration forces that it
+is checked whether the document is a standalone document. - If false, or if the
+parser is in well-formedness mode, such declarations are ignored.
+(Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>store_element_positions:</TT
+> If
+true, for every non-data node the source position is stored. If false, the
+position information is lost. If available, you can get the positions of nodes
+by invoking the <TT
+CLASS="LITERAL"
+>position</TT
+> method.
+(Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>idref_pass:</TT
+>If true and if
+there is an ID index, the parser checks whether every IDREF or IDREFS attribute
+refer to an existing node; this requires that the parser traverses the whole
+doument tree. If false, this check is left out. (Default: false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>validate_by_dfa:</TT
+>If true and if
+the content model for an element type is deterministic, a deterministic finite
+automaton is used to validate whether the element contents match the content
+model of the type. If false, or if a DFA is not available, a backtracking
+algorithm is used for validation. (Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>accept_only_deterministic_models:</TT
+> If true, only deterministic content
+models are accepted; if false, any syntactically correct content models can be
+processed. (Default: true)</P
+></LI
+></UL
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1895"
+>4.4.4. Which configuration should I use?</A
+></H2
+><P
+>First, I recommend to vary the default configuration instead of
+creating a new configuration record. For instance, to set
+<TT
+CLASS="LITERAL"
+>idref_pass</TT
+> to <TT
+CLASS="LITERAL"
+>true</TT
+>, change the default
+as in:
+<PRE
+CLASS="PROGRAMLISTING"
+>let config = { default_config with idref_pass = true }</PRE
+>
+The background is that I can add more options to the record in future versions
+of the parser without breaking your programs.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Do I need extra nodes for processing instructions? </B
+>By default, such nodes are not created. This does not mean that the
+processing instructions are lost; however, you cannot find out the exact
+location where they occur. For example, the following XML text
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;x&#62;&#60;?pi1?&#62;&#60;y/&#62;&#60;?pi2?&#62;&#60;/x&#62; </PRE
+> 
+
+will normally create one element node for <TT
+CLASS="LITERAL"
+>x</TT
+> containing
+<I
+CLASS="EMPHASIS"
+>one</I
+> subnode for <TT
+CLASS="LITERAL"
+>y</TT
+>. The processing
+instructions are attached to <TT
+CLASS="LITERAL"
+>x</TT
+> in a separate hash table; you
+can access them using <TT
+CLASS="LITERAL"
+>x # pinstr "pi1"</TT
+> and <TT
+CLASS="LITERAL"
+>x #
+pinstr "pi2"</TT
+>, respectively. The information is lost where the
+instructions occur within <TT
+CLASS="LITERAL"
+>x</TT
+>.</P
+></DIV
+><P
+>If the option <TT
+CLASS="LITERAL"
+>enable_pinstr_nodes</TT
+> is
+turned on, the parser creates extra nodes <TT
+CLASS="LITERAL"
+>pi1</TT
+> and
+<TT
+CLASS="LITERAL"
+>pi2</TT
+> such that the subnodes of <TT
+CLASS="LITERAL"
+>x</TT
+> are now: 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>x # sub_nodes = [ pi1; y; pi2 ]</PRE
+>
+
+The extra nodes contain the processing instructions in the usual way, i.e. you
+can access them using <TT
+CLASS="LITERAL"
+>pi1 # pinstr "pi1"</TT
+> and <TT
+CLASS="LITERAL"
+>pi2 #
+pinstr "pi2"</TT
+>, respectively.</P
+><P
+>Note that you will need an exemplar for the PI nodes (see
+<TT
+CLASS="LITERAL"
+>make_spec_from_alist</TT
+>).</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Do I need a super root node? </B
+>By default, there is no super root node. The
+<TT
+CLASS="LITERAL"
+>document</TT
+> object refers directly to the node representing the
+root element of the document, i.e.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>doc # root = r</PRE
+>
+
+if <TT
+CLASS="LITERAL"
+>r</TT
+> is the root node. This is sometimes inconvenient: (1)
+Some algorithms become simpler if every node has a parent, even the root
+node. (2) Some standards such as XPath call the "root node" the node whose
+child represents the root of the document. (3) The super root node can serve
+as a container for processing instructions outside the root element. Because of
+these reasons, it is possible to create an extra super root node, whose child
+is the root node:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>doc # root = sr         &#38;&#38;
+sr # sub_nodes = [ r ]</PRE
+>
+
+When extra nodes are also created for processing instructions, these nodes can
+be added to the super root node if they occur outside the root element (reason
+(3)), and the order reflects the order in the source text.</P
+></DIV
+><P
+>Note that you will need an exemplar for the super root node
+(see <TT
+CLASS="LITERAL"
+>make_spec_from_alist</TT
+>).</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>What is the effect of the UTF-8 encoding? </B
+>By default, the parser represents strings (with few
+exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
+and fonts for this encoding.</P
+></DIV
+><P
+>However, internationalization may require that you switch over
+to UTF-8 encoding. In most environments, the immediate effect will be that you
+cannot read strings with character codes &#62;= 160 any longer; your terminal will
+only show funny glyph combinations. It is strongly recommended to install
+Unicode fonts (<A
+HREF="http://czyborra.com/unifont/"
+TARGET="_top"
+>GNU Unifont</A
+>, 
+<A
+HREF="http://www.cl.cam.ac.uk/~mgk25/download/ucs-fonts.tar.gz"
+TARGET="_top"
+>Markus Kuhn's fonts</A
+>) and <A
+HREF="http://myweb.clark.net/pub/dickey/xterm/xterm.html"
+TARGET="_top"
+>terminal emulators
+that can handle UTF-8 byte sequences</A
+>. Furthermore, a Unicode editor may
+be helpful (such as <A
+HREF="ftp://metalab.unc.edu/pub/Linux/apps/editors/X/"
+TARGET="_top"
+>Yudit</A
+>). There are
+also <A
+HREF="http://www.cl.cam.ac.uk/~mgk25/unicode.html"
+TARGET="_top"
+>FAQ</A
+> by
+Markus Kuhn.</P
+><P
+>By setting <TT
+CLASS="LITERAL"
+>encoding</TT
+> to
+<TT
+CLASS="LITERAL"
+>`Enc_utf8</TT
+> all strings originating from the parsed XML
+document are represented as UTF-8 strings. This includes not only character
+data and attribute values but also element names, attribute names and so on, as
+it is possible to use any Unicode letter to form such names.  Strictly
+speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
+will have difficulties when validating documents containing
+non-ISO-8859-1-names.</P
+><P
+>This mode does not have any impact on the external
+representation of documents. The character set assumed when reading a document
+is set in the XML declaration, and character set when writing a document must
+be passed to the <TT
+CLASS="LITERAL"
+>write</TT
+> method.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>How do I check that nodes exist which are referred by IDREF attributes? </B
+>First, you must create an index of all occurring ID
+attributes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let index = new hash_index</PRE
+>
+
+This index must be passed to the parsing function:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>parse_document_entity
+  ~id_index:(index :&#62; index)
+  config source spec</PRE
+>
+
+Next, you must turn on the <TT
+CLASS="LITERAL"
+>idref_pass</TT
+> mode:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let config = { default_config with idref_pass = true }</PRE
+>
+
+Note that now the whole document tree will be traversed, and every node will be
+checked for IDREF and IDREFS attributes. If the tree is big, this may take some
+time.</P
+></DIV
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>What are deterministic content models? </B
+>These type of models can speed up the validation checks;
+furthermore they ensure SGML-compatibility. In particular, a content model is
+deterministic if the parser can determine the actually used alternative by
+inspecting only the current token. For example, this element has
+non-deterministic contents:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT x ((u,v) | (u,y+) | v)&#62;</PRE
+>
+
+If the first element in <TT
+CLASS="LITERAL"
+>x</TT
+> is <TT
+CLASS="LITERAL"
+>u</TT
+>, the
+parser does not know which of the alternatives <TT
+CLASS="LITERAL"
+>(u,v)</TT
+> or
+<TT
+CLASS="LITERAL"
+>(u,y+)</TT
+> will work; the parser must also inspect the second
+element to be able to distinguish between the alternatives. Because such
+look-ahead (or "guessing") is required, this example is
+non-deterministic.</P
+></DIV
+><P
+>The XML standard demands that content models must be
+deterministic. So it is recommended to turn the option
+<TT
+CLASS="LITERAL"
+>accept_only_deterministic_models</TT
+> on; however, PXP can also
+process non-deterministic models using a backtracking algorithm.</P
+><P
+>Deterministic models ensure that validation can be performed in
+linear time. In order to get the maximum benefits, PXP also implements a
+special validator that profits from deterministic models; this is the
+deterministic finite automaton (DFA). This validator is enabled per element
+type if the element type has a deterministic model and if the option
+<TT
+CLASS="LITERAL"
+>validate_by_dfa</TT
+> is turned on.</P
+><P
+>In general, I expect that the DFA method is faster than the
+backtracking method; especially in the worst case the DFA takes only linear
+time. However, if the content model has only few alternatives and the
+alternatives do not nest, the backtracking algorithm may be better.</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1812.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1965.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The DTD classes</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Updates</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html
new file mode 100644 (file)
index 0000000..8fc8562
--- /dev/null
@@ -0,0 +1,152 @@
+<HTML
+><HEAD
+><TITLE
+>Updates</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="Invoking the parser"
+HREF="x1818.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1818.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+>&nbsp;</TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1965"
+>4.5. Updates</A
+></H1
+><P
+><I
+CLASS="EMPHASIS"
+>Some (often later added) features that are otherwise
+not explained in the manual but worth to be mentioned.</I
+></P
+><P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>Methods node_position, node_path, nth_node,
+previous_node, next_node for nodes: See pxp_document.mli</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>Functions to determine the document order of nodes:
+compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli</P
+></LI
+></UL
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1818.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>&nbsp;</TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Invoking the parser</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>&nbsp;</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html
new file mode 100644 (file)
index 0000000..dc9cc1e
--- /dev/null
@@ -0,0 +1,474 @@
+<HTML
+><HEAD
+><TITLE
+>A complete example: The readme DTD</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="PREVIOUS"
+TITLE="Highlights of XML"
+HREF="x107.html"><LINK
+REL="NEXT"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x107.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 1. What is XML?</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c533.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="SECT.README.DTD"
+>1.3. A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</A
+></H1
+><P
+>The reason for <I
+CLASS="EMPHASIS"
+>readme</I
+> was that I often wrote two versions
+of files such as README and INSTALL which explain aspects of a distributed
+software archive; one version was ASCII-formatted, the other was written in
+HTML. Maintaining both versions means double amount of work, and changes
+of one version may be forgotten in the other version. To improve this situation
+I invented the <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD which allows me to maintain only
+one source written as XML document, and to generate the ASCII and the HTML
+version from it.</P
+><P
+>In this section, I explain only the DTD. The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD is
+contained in the <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> distribution together with the two converters to
+produce ASCII and HTML. Another <A
+HREF="x738.html"
+>section</A
+> of this manual describes the HTML
+converter.</P
+><P
+>The documents have a simple structure: There are up to three levels of nested
+sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
+outermost element has usually the type <TT
+CLASS="LITERAL"
+>readme</TT
+>, it is
+declared by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT readme (sect1+)&#62;
+&#60;!ATTLIST readme
+          title CDATA #REQUIRED&#62;</PRE
+>
+
+This means that this element contains one or more sections of the first level
+(element type <TT
+CLASS="LITERAL"
+>sect1</TT
+>), and that the element has a required
+attribute <TT
+CLASS="LITERAL"
+>title</TT
+> containing character data (CDATA). Note that
+<TT
+CLASS="LITERAL"
+>readme</TT
+> elements must not contain text data.</P
+><P
+>The three levels of sections are declared as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT sect1 (title,(sect2|p|ul)+)&#62;
+
+&#60;!ELEMENT sect2 (title,(sect3|p|ul)+)&#62;
+
+&#60;!ELEMENT sect3 (title,(p|ul)+)&#62;</PRE
+>
+
+Every section has a <TT
+CLASS="LITERAL"
+>title</TT
+> element as first subelement. After
+the title an arbitrary but non-empty sequence of inner sections, paragraphs and
+item lists follows. Note that the inner sections must belong to the next higher
+section level; <TT
+CLASS="LITERAL"
+>sect3</TT
+> elements must not contain inner
+sections because there is no next higher level.</P
+><P
+>Obviously, all three declarations allow paragraphs (<TT
+CLASS="LITERAL"
+>p</TT
+>) and
+item lists (<TT
+CLASS="LITERAL"
+>ul</TT
+>). The definition can be simplified at this
+point by using a parameter entity:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % p.like "p|ul"&#62;
+
+&#60;!ELEMENT sect1 (title,(sect2|%p.like;)+)&#62;
+
+&#60;!ELEMENT sect2 (title,(sect3|%p.like;)+)&#62;
+
+&#60;!ELEMENT sect3 (title,(%p.like;)+)&#62;</PRE
+>
+
+Here, the entity <TT
+CLASS="LITERAL"
+>p.like</TT
+> is nothing but a macro abbreviating
+the same sequence of declarations; if new elements on the same level as
+<TT
+CLASS="LITERAL"
+>p</TT
+> and <TT
+CLASS="LITERAL"
+>ul</TT
+> are later added, it is
+sufficient only to change the entity definition. Note that there are some
+restrictions on the usage of entities in this context; most important, entities
+containing a left paranthesis must also contain the corresponding right
+paranthesis. </P
+><P
+>Note that the entity <TT
+CLASS="LITERAL"
+>p.like</TT
+> is a
+<I
+CLASS="EMPHASIS"
+>parameter</I
+> entity, i.e. the ENTITY declaration contains a
+percent sign, and the entity is referred to by
+<TT
+CLASS="LITERAL"
+>%p.like;</TT
+>. This kind of entity must be used to abbreviate
+parts of the DTD; the <I
+CLASS="EMPHASIS"
+>general</I
+> entities declared without
+percent sign and referred to as <TT
+CLASS="LITERAL"
+>&amp;name;</TT
+> are not allowed
+in this context.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>title</TT
+> element specifies the title of the section in
+which it occurs. The title is given as character data, optionally interspersed
+with line breaks (<TT
+CLASS="LITERAL"
+>br</TT
+>):
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT title (#PCDATA|br)*&#62;</PRE
+>
+
+Compared with the <TT
+CLASS="LITERAL"
+>title</TT
+> <I
+CLASS="EMPHASIS"
+>attribute</I
+> of
+the <TT
+CLASS="LITERAL"
+>readme</TT
+> element, this element allows inner markup
+(i.e. <TT
+CLASS="LITERAL"
+>br</TT
+>) while attribute values do not: It is an error if
+an attribute value contains the left angle bracket &lt; literally such that it
+is impossible to include inner elements. </P
+><P
+>The paragraph element <TT
+CLASS="LITERAL"
+>p</TT
+> has a structure similar to
+<TT
+CLASS="LITERAL"
+>title</TT
+>, but it allows more inner elements:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ENTITY % text "br|code|em|footnote|a"&#62;
+
+&#60;!ELEMENT p (#PCDATA|%text;)*&#62;</PRE
+>
+
+Line breaks do not have inner structure, so they are declared as being empty:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT br EMPTY&#62;</PRE
+>
+
+This means that really nothing is allowed within <TT
+CLASS="LITERAL"
+>br</TT
+>; you
+must always write <TT
+CLASS="LITERAL"
+>&#60;br&#62;&#60;/br&#62;</TT
+> or abbreviated
+<TT
+CLASS="LITERAL"
+>&#60;br/&#62;</TT
+>.</P
+><P
+>Code samples should be marked up by the <TT
+CLASS="LITERAL"
+>code</TT
+> tag; emphasized
+text can be indicated by <TT
+CLASS="LITERAL"
+>em</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT code (#PCDATA)&#62;
+
+&#60;!ELEMENT em (#PCDATA|%text;)*&#62;</PRE
+>
+
+That <TT
+CLASS="LITERAL"
+>code</TT
+> elements are not allowed to contain further markup
+while <TT
+CLASS="LITERAL"
+>em</TT
+> elements do is a design decision by the author of
+the DTD.</P
+><P
+>Unordered lists simply consists of one or more list items, and a list item may
+contain paragraph-level material:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT ul (li+)&#62;
+
+&#60;!ELEMENT li (%p.like;)*&#62;</PRE
+>
+
+Footnotes are described by the text of the note; this text may contain
+text-level markup. There is no mechanism to describe the numbering scheme of
+footnotes, or to specify how footnote references are printed.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT footnote (#PCDATA|%text;)*&#62;</PRE
+>
+
+Hyperlinks are written as in HTML. The anchor tag contains the text describing
+where the link points to, and the <TT
+CLASS="LITERAL"
+>href</TT
+> attribute is the
+pointer (as URL). There is no way to describe locations of "hash marks". If the
+link refers to another <I
+CLASS="EMPHASIS"
+>readme</I
+> document, the attribute
+<TT
+CLASS="LITERAL"
+>readmeref</TT
+> should be used instead of <TT
+CLASS="LITERAL"
+>href</TT
+>.
+The reason is that the converted document has usually a different system
+identifier (file name), and the link to a converted document must be
+converted, too.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ELEMENT a (#PCDATA)*&#62;
+&#60;!ATTLIST a 
+          href      CDATA #IMPLIED
+          readmeref CDATA #IMPLIED
+&#62;</PRE
+>
+
+Note that although it is only sensible to specify one of the two attributes,
+the DTD has no means to express this restriction.</P
+><P
+>So far the DTD. Finally, here is a document for it:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;?xml version="1.0" encoding="ISO-8859-1"?&#62;
+&#60;!DOCTYPE readme SYSTEM "readme.dtd"&#62;
+&#60;readme title="How to use the readme converters"&#62;
+&#60;sect1&#62;
+  &#60;title&#62;Usage&#60;/title&#62;
+  &#60;p&#62;
+    The &#60;em&#62;readme&#60;/em&#62; converter is invoked on the command line by:
+  &#60;/p&#62;
+  &#60;p&#62;
+    &#60;code&#62;readme [ -text | -html ] input.xml&#60;/code&#62;
+  &#60;/p&#62;
+  &#60;p&#62;
+    Here a list of options:
+  &#60;/p&#62;
+  &#60;ul&#62;
+    &#60;li&#62;
+      &#60;p&#62;&#60;code&#62;-text&#60;/code&#62;: specifies that ASCII output should be produced&#60;/p&#62;
+    &#60;/li&#62;
+    &#60;li&#62;
+      &#60;p&#62;&#60;code&#62;-html&#60;/code&#62;: specifies that HTML output should be produced&#60;/p&#62;
+    &#60;/li&#62;
+  &#60;/ul&#62;
+  &#60;p&#62;
+    The input file must be given on the command line. The converted output is
+    printed to &#60;em&#62;stdout&#60;/em&#62;.
+  &#60;/p&#62;
+&#60;/sect1&#62;
+&#60;sect1&#62;
+  &#60;title&#62;Author&#60;/title&#62;
+  &#60;p&#62;
+    The program has been written by
+    &#60;a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de"&#62;Gerd Stolpmann&#60;/a&#62;.
+  &#60;/p&#62;
+&#60;/sect1&#62;
+&#60;/readme&#62;</PRE
+>&#13;</P
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x107.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Highlights of XML</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html
new file mode 100644 (file)
index 0000000..f2dcdd7
--- /dev/null
@@ -0,0 +1,765 @@
+<HTML
+><HEAD
+><TITLE
+>How to parse a document from an application</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="PREVIOUS"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="NEXT"
+TITLE="Class-based processing of the node tree"
+HREF="x675.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c533.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x675.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN550"
+>2.2. How to parse a document from an application</A
+></H1
+><P
+>Let me first give a rough overview of the object model of the parser. The
+following items are represented by objects:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>Documents:</I
+> The document representation is more or less the
+anchor for the application; all accesses to the parsed entities start here. It
+is described by the class <TT
+CLASS="LITERAL"
+>document</TT
+> contained in the module
+<TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>. You can get some global information, such
+as the XML declaration the document begins with, the DTD of the document,
+global processing instructions, and most important, the document tree. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>The contents of documents:</I
+> The contents have the structure
+of a tree: Elements contain other elements and text<A
+NAME="AEN562"
+HREF="#FTN.AEN562"
+>[1]</A
+>. 
+
+The common type to represent both kinds of content is <TT
+CLASS="LITERAL"
+>node</TT
+>
+which is a class type that unifies the properties of elements and character
+data. Every node has a list of children (which is empty if the element is empty
+or the node represents text); nodes may have attributes; nodes have always text
+contents. There are two implementations of <TT
+CLASS="LITERAL"
+>node</TT
+>, the class
+<TT
+CLASS="LITERAL"
+>element_impl</TT
+> for elements, and the class
+<TT
+CLASS="LITERAL"
+>data_impl</TT
+> for text data. You find these classes and class
+types in the module <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>, too.</P
+><P
+>Note that attribute lists are represented by non-class values.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>The node extension:</I
+> For advanced usage, every node of the
+document may have an associated <I
+CLASS="EMPHASIS"
+>extension</I
+> which is simply
+a second object. This object must have the three methods
+<TT
+CLASS="LITERAL"
+>clone</TT
+>, <TT
+CLASS="LITERAL"
+>node</TT
+>, and
+<TT
+CLASS="LITERAL"
+>set_node</TT
+> as bare minimum, but you are free to add methods as
+you want. This is the preferred way to add functionality to the document
+tree<A
+NAME="AEN582"
+HREF="#FTN.AEN582"
+>[2]</A
+>. The class type <TT
+CLASS="LITERAL"
+>extension</TT
+> is
+defined in <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>, too.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>The DTD:</I
+> Sometimes it is necessary to access the DTD of a
+document; the average application does not need this feature. The class
+<TT
+CLASS="LITERAL"
+>dtd</TT
+> describes DTDs, and makes it possible to get
+representations of element, entity, and notation declarations as well as
+processing instructions contained in the DTD. This class, and
+<TT
+CLASS="LITERAL"
+>dtd_element</TT
+>, <TT
+CLASS="LITERAL"
+>dtd_notation</TT
+>, and
+<TT
+CLASS="LITERAL"
+>proc_instruction</TT
+> can be found in the module
+<TT
+CLASS="LITERAL"
+>Pxp_dtd</TT
+>. There are a couple of classes representing
+different kinds of entities; these can be found in the module
+<TT
+CLASS="LITERAL"
+>Pxp_entity</TT
+>. </P
+></LI
+></UL
+>
+
+Additionally, the following modules play a role:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>Pxp_yacc:</I
+> Here the main parsing functions such as
+<TT
+CLASS="LITERAL"
+>parse_document_entity</TT
+> are located. Some additional types and
+functions allow the parser to be configured in a non-standard way.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>Pxp_types:</I
+> This is a collection of basic types and
+exceptions. </P
+></LI
+></UL
+>
+
+There are some further modules that are needed internally but are not part of
+the API.</P
+><P
+>Let the document to be parsed be stored in a file called
+<TT
+CLASS="LITERAL"
+>doc.xml</TT
+>. The parsing process is started by calling the
+function
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val parse_document_entity : config -&#62; source -&#62; 'ext spec -&#62; 'ext document</PRE
+>
+
+defined in the module <TT
+CLASS="LITERAL"
+>Pxp_yacc</TT
+>. The first argument
+specifies some global properties of the parser; it is recommended to start with
+the <TT
+CLASS="LITERAL"
+>default_config</TT
+>. The second argument determines where the
+document to be parsed comes from; this may be a file, a channel, or an entity
+ID. To parse <TT
+CLASS="LITERAL"
+>doc.xml</TT
+>, it is sufficient to pass
+<TT
+CLASS="LITERAL"
+>from_file "doc.xml"</TT
+>. </P
+><P
+>The third argument passes the object specification to use. Roughly
+speaking, it determines which classes implement the node objects of which
+element types, and which extensions are to be used. The <TT
+CLASS="LITERAL"
+>'ext</TT
+>
+polymorphic variable is the type of the extension. For the moment, let us
+simply pass <TT
+CLASS="LITERAL"
+>default_spec</TT
+> as this argument, and ignore it.</P
+><P
+>So the following expression parses <TT
+CLASS="LITERAL"
+>doc.xml</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>open Pxp_yacc
+let d = parse_document_entity default_config (from_file "doc.xml") default_spec</PRE
+>
+
+Note that <TT
+CLASS="LITERAL"
+>default_config</TT
+> implies that warnings are collected
+but not printed. Errors raise one of the exception defined in
+<TT
+CLASS="LITERAL"
+>Pxp_types</TT
+>; to get readable errors and warnings catch the
+exceptions as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class warner =
+  object 
+    method warn w =
+      print_endline ("WARNING: " ^ w)
+  end
+;;
+
+try
+  let config = { default_config with warner = new warner } in
+  let d = parse_document_entity config (from_file "doc.xml") default_spec
+  in
+    ...
+with
+   e -&#62;
+     print_endline (Pxp_types.string_of_exn e)</PRE
+>
+
+Now <TT
+CLASS="LITERAL"
+>d</TT
+> is an object of the <TT
+CLASS="LITERAL"
+>document</TT
+>
+class. If you want the node tree, you can get the root element by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let root = d # root</PRE
+>
+
+and if you would rather like to access the DTD, determine it by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let dtd = d # dtd</PRE
+>
+
+As it is more interesting, let us investigate the node tree now. Given the root
+element, it is possible to recursively traverse the whole tree. The children of
+a node <TT
+CLASS="LITERAL"
+>n</TT
+> are returned by the method
+<TT
+CLASS="LITERAL"
+>sub_nodes</TT
+>, and the type of a node is returned by
+<TT
+CLASS="LITERAL"
+>node_type</TT
+>. This function traverses the tree, and prints the
+type of each node:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec print_structure n =
+  let ntype = n # node_type in
+  match ntype with
+    T_element name -&#62;
+      print_endline ("Element of type " ^ name);
+      let children = n # sub_nodes in
+      List.iter print_structure children
+  | T_data -&#62;
+      print_endline "Data"
+  | _ -&#62;
+      (* Other node types are not possible unless the parser is configured
+         differently.
+       *)
+      assert false</PRE
+>
+
+You can call this function by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>print_structure root</PRE
+>
+
+The type returned by <TT
+CLASS="LITERAL"
+>node_type</TT
+> is either <TT
+CLASS="LITERAL"
+>T_element
+name</TT
+> or <TT
+CLASS="LITERAL"
+>T_data</TT
+>. The <TT
+CLASS="LITERAL"
+>name</TT
+> of the
+element type is the string included in the angle brackets. Note that only
+elements have children; data nodes are always leaves of the tree.</P
+><P
+>There are some more methods in order to access a parsed node tree:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # parent</TT
+>: Returns the parent node, or raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> if the node is already the root</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # root</TT
+>: Returns the root of the node tree. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # attribute a</TT
+>: Returns the value of the attribute with
+name <TT
+CLASS="LITERAL"
+>a</TT
+>. The method returns a value for every
+<I
+CLASS="EMPHASIS"
+>declared</I
+> attribute, independently of whether the attribute
+instance is defined or not. If the attribute is not declared,
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> will be raised. (In well-formedness mode, every
+attribute is considered as being implicitly declared with type
+<TT
+CLASS="LITERAL"
+>CDATA</TT
+>.) </P
+><P
+>The following return values are possible: <TT
+CLASS="LITERAL"
+>Value s</TT
+>, 
+<TT
+CLASS="LITERAL"
+>Valuelist sl</TT
+> , and <TT
+CLASS="LITERAL"
+>Implied_value</TT
+>. 
+The first two value types indicate that the attribute value is available,
+either because there is a definition
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>a</I
+></TT
+>="<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"</TT
+>
+in the XML text, or because there is a default value (declared in the
+DTD). Only if both the instance definition and the default declaration are
+missing, the latter value <TT
+CLASS="LITERAL"
+>Implied_value</TT
+> will be returned.</P
+><P
+>In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
+IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
+<TT
+CLASS="LITERAL"
+>Value s</TT
+> back, where <TT
+CLASS="LITERAL"
+>s</TT
+> is the normalized
+string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
+represent list values, and the parser splits the XML literal into several
+tokens and returns these tokens as <TT
+CLASS="LITERAL"
+>Valuelist sl</TT
+>.</P
+><P
+>Normalization means that entity references (the
+<TT
+CLASS="LITERAL"
+>&amp;<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</TT
+> tokens) and
+character references
+(<TT
+CLASS="LITERAL"
+>&amp;#<TT
+CLASS="REPLACEABLE"
+><I
+>number</I
+></TT
+>;</TT
+>) are replaced
+by the text they represent, and that white space characters are converted into
+plain spaces.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # data</TT
+>: Returns the character data contained in the
+node. For data nodes, the meaning is obvious as this is the main content of
+data nodes. For element nodes, this method returns the concatenated contents of
+all inner data nodes.</P
+><P
+>Note that entity references included in the text are resolved while they are
+being parsed; for example the text "a &#38;lt;&#38;gt; b" will be returned
+as "a &#60;&#62; b" by this method. Spaces of data nodes are always
+preserved. Newlines are preserved, but always converted to \n characters even
+if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
+data nodes because the parser collapses all data material at one location into
+one node. (However, if you create your own tree or transform the parsed tree,
+it is possible to have adjacent data nodes.)</P
+><P
+>Note that elements that do <I
+CLASS="EMPHASIS"
+>not</I
+> allow #PCDATA as content
+will not have data nodes as children. This means that spaces and newlines, the
+only character material allowed for such elements, are silently dropped.</P
+></LI
+></UL
+>
+
+For example, if the task is to print all contents of elements with type
+"valuable" whose attribute "priority" is "1", this function can help:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec print_valuable_prio1 n =
+  let ntype = n # node_type in
+  match ntype with
+    T_element "valuable" when n # attribute "priority" = Value "1" -&#62;
+      print_endline "Valuable node with priotity 1 found:";
+      print_endline (n # data)
+  | (T_element _ | T_data) -&#62;
+      let children = n # sub_nodes in
+      List.iter print_valuable_prio1 children
+  | _ -&#62;
+      assert false</PRE
+>
+
+You can call this function by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>print_valuable_prio1 root</PRE
+>
+
+If you like a DSSSL-like style, you can make the function
+<TT
+CLASS="LITERAL"
+>process_children</TT
+> explicit:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec print_valuable_prio1 n =
+
+  let process_children n =
+    let children = n # sub_nodes in
+    List.iter print_valuable_prio1 children 
+  in
+
+  let ntype = n # node_type in
+  match ntype with
+    T_element "valuable" when n # attribute "priority" = Value "1" -&#62;
+      print_endline "Valuable node with priority 1 found:";
+      print_endline (n # data)
+  | (T_element _ | T_data) -&#62;
+      process_children n
+  | _ -&#62;
+      assert false</PRE
+>
+
+So far, O'Caml is now a simple "style-sheet language": You can form a big
+"match" expression to distinguish between all significant cases, and provide
+different reactions on different conditions. But this technique has
+limitations; the "match" expression tends to get larger and larger, and it is
+difficult to store intermediate values as there is only one big
+recursion. Alternatively, it is also possible to represent the various cases as
+classes, and to use dynamic method lookup to find the appropiate class. The
+next section explains this technique in detail.&#13;</P
+></DIV
+><H3
+CLASS="FOOTNOTES"
+>Notes</H3
+><TABLE
+BORDER="0"
+CLASS="FOOTNOTES"
+WIDTH="100%"
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN562"
+HREF="x550.html#AEN562"
+>[1]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>Elements may
+also contain processing instructions. Unlike other document models, <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>
+separates processing instructions from the rest of the text and provides a
+second interface to access them (method <TT
+CLASS="LITERAL"
+>pinstr</TT
+>). However,
+there is a parser option (<TT
+CLASS="LITERAL"
+>enable_pinstr_nodes</TT
+>) which changes
+the behaviour of the parser such that extra nodes for processing instructions
+are included into the tree.</P
+><P
+>Furthermore, the tree does normally not contain nodes for XML comments;
+they are ignored by default. Again, there is an option
+(<TT
+CLASS="LITERAL"
+>enable_comment_nodes</TT
+>) changing this.</P
+></TD
+></TR
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN582"
+HREF="x550.html#AEN582"
+>[2]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>Due to the typing system it is more or less impossible to
+derive recursive classes in O'Caml. To get around this, it is common practice
+to put the modifiable or extensible part of recursive objects into parallel
+objects.</P
+></TD
+></TR
+></TABLE
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x675.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Class-based processing of the node tree</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html
new file mode 100644 (file)
index 0000000..cf3f473
--- /dev/null
@@ -0,0 +1,538 @@
+<HTML
+><HEAD
+><TITLE
+>Class-based processing of the node tree</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="PREVIOUS"
+TITLE="How to parse a document from an application"
+HREF="x550.html"><LINK
+REL="NEXT"
+TITLE="Example: An HTML backend for the readme
+DTD"
+HREF="x738.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x550.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x738.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN675"
+>2.3. Class-based processing of the node tree</A
+></H1
+><P
+>By default, the parsed node tree consists of objects of the same class; this is
+a good design as long as you want only to access selected parts of the
+document. For complex transformations, it may be better to use different
+classes for objects describing different element types.</P
+><P
+>For example, if the DTD declares the element types <TT
+CLASS="LITERAL"
+>a</TT
+>,
+<TT
+CLASS="LITERAL"
+>b</TT
+>, and <TT
+CLASS="LITERAL"
+>c</TT
+>, and if the task is to convert
+an arbitrary document into a printable format, the idea is to define for every
+element type a separate class that has a method <TT
+CLASS="LITERAL"
+>print</TT
+>. The
+classes are <TT
+CLASS="LITERAL"
+>eltype_a</TT
+>, <TT
+CLASS="LITERAL"
+>eltype_b</TT
+>, and
+<TT
+CLASS="LITERAL"
+>eltype_c</TT
+>, and every class implements
+<TT
+CLASS="LITERAL"
+>print</TT
+> such that elements of the type corresponding to the
+class are converted to the output format.</P
+><P
+>The parser supports such a design directly. As it is impossible to derive
+recursive classes in O'Caml<A
+NAME="AEN688"
+HREF="#FTN.AEN688"
+>[1]</A
+>, the specialized element classes cannot be formed by
+simply inheriting from the built-in classes of the parser and adding methods
+for customized functionality. To get around this limitation, every node of the
+document tree is represented by <I
+CLASS="EMPHASIS"
+>two</I
+> objects, one called
+"the node" and containing the recursive definition of the tree, one called "the
+extension". Every node object has a reference to the extension, and the
+extension has a reference to the node. The advantage of this model is that it
+is now possible to customize the extension without affecting the typing
+constraints of the recursive node definition.</P
+><P
+>Every extension must have the three methods <TT
+CLASS="LITERAL"
+>clone</TT
+>,
+<TT
+CLASS="LITERAL"
+>node</TT
+>, and <TT
+CLASS="LITERAL"
+>set_node</TT
+>. The method
+<TT
+CLASS="LITERAL"
+>clone</TT
+> creates a deep copy of the extension object and
+returns it; <TT
+CLASS="LITERAL"
+>node</TT
+> returns the node object for this extension
+object; and <TT
+CLASS="LITERAL"
+>set_node</TT
+> is used to tell the extension object
+which node is associated with it, this method is automatically called when the
+node tree is initialized. The following definition is a good starting point
+for these methods; usually <TT
+CLASS="LITERAL"
+>clone</TT
+> must be further refined
+when instance variables are added to the class:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class custom_extension =
+  object (self)
+
+    val mutable node = (None : custom_extension node option)
+
+    method clone = {&#60; &#62;} 
+    method node =
+      match node with
+          None -&#62;
+            assert false
+        | Some n -&#62; n
+    method set_node n =
+      node &#60;- Some n
+
+  end</PRE
+>
+
+This part of the extension is usually the same for all classes, so it is a good
+idea to consider <TT
+CLASS="LITERAL"
+>custom_extension</TT
+> as the super-class of the
+further class definitions. Continuining the example of above, we can define the
+element type classes as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class virtual custom_extension =
+  object (self)
+    ... clone, node, set_node defined as above ...
+
+    method virtual print : out_channel -&#62; unit
+  end
+
+class eltype_a =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+
+class eltype_b =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+
+class eltype_c =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end</PRE
+>
+
+The method <TT
+CLASS="LITERAL"
+>print</TT
+> can now be implemented for every element
+type separately. Note that you get the associated node by invoking
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node</PRE
+>
+
+and you get the extension object of a node <TT
+CLASS="LITERAL"
+>n</TT
+> by writing 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>n # extension</PRE
+>
+
+It is guaranteed that 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node # extension == self</PRE
+>
+
+always holds.</P
+><P
+>Here are sample definitions of the <TT
+CLASS="LITERAL"
+>print</TT
+>
+methods:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class eltype_a =
+  object (self)
+    inherit custom_extension
+    method print ch = 
+      (* Nodes &#60;a&#62;...&#60;/a&#62; are only containers: *)
+      output_string ch "(";
+      List.iter
+        (fun n -&#62; n # extension # print ch)
+        (self # node # sub_nodes);
+      output_string ch ")";
+  end
+
+class eltype_b =
+  object (self)
+    inherit custom_extension
+    method print ch =
+      (* Print the value of the CDATA attribute "print": *)
+      match self # node # attribute "print" with
+        Value s       -&#62; output_string ch s
+      | Implied_value -&#62; output_string ch "&#60;missing&#62;"
+      | Valuelist l   -&#62; assert false   
+                         (* not possible because the att is CDATA *)
+  end
+
+class eltype_c =
+  object (self)
+    inherit custom_extension
+    method print ch = 
+      (* Print the contents of this element: *)
+      output_string ch (self # node # data)
+  end
+
+class null_extension =
+  object (self)
+    inherit custom_extension
+    method print ch = assert false
+  end</PRE
+></P
+><P
+>The remaining task is to configure the parser such that these extension classes
+are actually used. Here another problem arises: It is not possible to
+dynamically select the class of an object to be created. As workaround,
+<SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> allows the user to specify <I
+CLASS="EMPHASIS"
+>exemplar objects</I
+> for
+the various element types; instead of creating the nodes of the tree by
+applying the <TT
+CLASS="LITERAL"
+>new</TT
+> operator the nodes are produced by
+duplicating the exemplars. As object duplication preserves the class of the
+object, one can create fresh objects of every class for which previously an
+exemplar has been registered.</P
+><P
+>Exemplars are meant as objects without contents, the only interesting thing is
+that exemplars are instances of a certain class. The creation of an exemplar
+for an element node can be done by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let element_exemplar = new element_impl extension_exemplar</PRE
+>
+
+And a data node exemplar is created by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let data_exemplar = new data_impl extension_exemplar</PRE
+>
+
+The classes <TT
+CLASS="LITERAL"
+>element_impl</TT
+> and <TT
+CLASS="LITERAL"
+>data_impl</TT
+>
+are defined in the module <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>. The constructors
+initialize the fresh objects as empty objects, i.e. without children, without
+data contents, and so on. The <TT
+CLASS="LITERAL"
+>extension_exemplar</TT
+> is the
+initial extension object the exemplars are associated with. </P
+><P
+>Once the exemplars are created and stored somewhere (e.g. in a hash table), you
+can take an exemplar and create a concrete instance (with contents) by
+duplicating it. As user of the parser you are normally not concerned with this
+as this is part of the internal logic of the parser, but as background knowledge
+it is worthwhile to mention that the two methods
+<TT
+CLASS="LITERAL"
+>create_element</TT
+> and <TT
+CLASS="LITERAL"
+>create_data</TT
+> actually
+perform the duplication of the exemplar for which they are invoked,
+additionally apply modifications to the clone, and finally return the new
+object. Moreover, the extension object is copied, too, and the new node object
+is associated with the fresh extension object. Note that this is the reason why
+every extension object must have a <TT
+CLASS="LITERAL"
+>clone</TT
+> method.</P
+><P
+>The configuration of the set of exemplars is passed to the
+<TT
+CLASS="LITERAL"
+>parse_document_entity</TT
+> function as third argument. In our
+example, this argument can be set up as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl (new null_extension))
+    ~default_element_exemplar: (new element_impl (new null_extension))
+    ~element_alist:
+       [ "a",  new element_impl (new eltype_a);
+         "b",  new element_impl (new eltype_b);
+         "c",  new element_impl (new eltype_c);
+       ]
+    ()</PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>~element_alist</TT
+> function argument defines the mapping
+from element types to exemplars as associative list. The argument
+<TT
+CLASS="LITERAL"
+>~data_exemplar</TT
+> specifies the exemplar for data nodes, and
+the <TT
+CLASS="LITERAL"
+>~default_element_exemplar</TT
+> is used whenever the parser
+finds an element type for which the associative list does not define an
+exemplar. </P
+><P
+>The configuration is now complete. You can still use the same parsing
+functions, only the initialization is a bit different. For example, call the
+parser by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let d = parse_document_entity default_config (from_file "doc.xml") spec</PRE
+>
+
+Note that the resulting document <TT
+CLASS="LITERAL"
+>d</TT
+> has a usable type;
+especially the <TT
+CLASS="LITERAL"
+>print</TT
+> method we added is visible. So you can
+print your document by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>d # root # extension # print stdout</PRE
+></P
+><P
+>This object-oriented approach looks rather complicated; this is mostly caused
+by working around some problems of the strict typing system of O'Caml. Some
+auxiliary concepts such as extensions were needed, but the practical
+consequences are low. In the next section, one of the examples of the
+distribution is explained, a converter from <I
+CLASS="EMPHASIS"
+>readme</I
+>
+documents to HTML.</P
+></DIV
+><H3
+CLASS="FOOTNOTES"
+>Notes</H3
+><TABLE
+BORDER="0"
+CLASS="FOOTNOTES"
+WIDTH="100%"
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN688"
+HREF="x675.html#AEN688"
+>[1]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>The problem is that the subclass is
+usually not a subtype in this case because O'Caml has a contravariant subtyping
+rule. </P
+></TD
+></TR
+></TABLE
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x550.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x738.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>How to parse a document from an application</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html
new file mode 100644 (file)
index 0000000..6741801
--- /dev/null
@@ -0,0 +1,1036 @@
+<HTML
+><HEAD
+><TITLE
+>Example: An HTML backend for the readme
+DTD</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="PREVIOUS"
+TITLE="Class-based processing of the node tree"
+HREF="x675.html"><LINK
+REL="NEXT"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x675.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c893.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="SECT.README.TO-HTML"
+>2.4. Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</A
+></H1
+><P
+>The converter from <I
+CLASS="EMPHASIS"
+>readme</I
+> documents to HTML
+documents follows strictly the approach to define one class per element
+type. The HTML code is similar to the <I
+CLASS="EMPHASIS"
+>readme</I
+> source,
+because of this most elements can be converted in the following way: Given the
+input element 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;e&#62;content&#60;/e&#62;</PRE
+>
+
+the conversion text is the concatenation of a computed prefix, the recursively
+converted content, and a computed suffix. </P
+><P
+>Only one element type cannot be handled by this scheme:
+<TT
+CLASS="LITERAL"
+>footnote</TT
+>. Footnotes are collected while they are found in
+the input text, and they are printed after the main text has been converted and
+printed. </P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN747"
+>2.4.1. Header</A
+></H2
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>open Pxp_types
+open Pxp_document</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN751"
+>2.4.2. Type declarations</A
+></H2
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class type footnote_printer =
+  object
+    method footnote_to_html : store_type -&gt; out_channel -&gt; unit
+  end
+
+and store_type =
+  object
+    method alloc_footnote : footnote_printer -&gt; int
+    method print_footnotes : out_channel -&gt; unit
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN755"
+>2.4.3. Class <TT
+CLASS="LITERAL"
+>store</TT
+></A
+></H2
+><P
+>The <TT
+CLASS="LITERAL"
+>store</TT
+> is a container for footnotes. You can add a
+footnote by invoking <TT
+CLASS="LITERAL"
+>alloc_footnote</TT
+>; the argument is an
+object of the class <TT
+CLASS="LITERAL"
+>footnote_printer</TT
+>, the method returns the
+number of the footnote. The interesting property of a footnote is that it can
+be converted to HTML, so a <TT
+CLASS="LITERAL"
+>footnote_printer</TT
+> is an object
+with a method <TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+>. The class
+<TT
+CLASS="LITERAL"
+>footnote</TT
+> which is defined below has a compatible method
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+> such that objects created from it can be
+used as <TT
+CLASS="LITERAL"
+>footnote_printer</TT
+>s.</P
+><P
+>The other method, <TT
+CLASS="LITERAL"
+>print_footnotes</TT
+> prints the footnotes as
+definition list, and is typically invoked after the main material of the page
+has already been printed. Every item of the list is printed by
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+>.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class store =
+  object (self)
+
+    val mutable footnotes = ( [] : (int * footnote_printer) list )
+    val mutable next_footnote_number = 1
+
+    method alloc_footnote n =
+      let number = next_footnote_number in
+      next_footnote_number &lt;- number+1;
+      footnotes &lt;- footnotes @ [ number, n ];
+      number
+
+    method print_footnotes ch =
+      if footnotes &lt;&gt; [] then begin
+       output_string ch "&lt;hr align=left noshade=noshade width=\"30%\"&gt;\n";
+       output_string ch "&lt;dl&gt;\n";
+       List.iter
+         (fun (_,n) -&gt; 
+            n # footnote_to_html (self : #store_type :&gt; store_type) ch)
+         footnotes;
+       output_string ch "&lt;/dl&gt;\n";
+      end
+
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN772"
+>2.4.4. Function <TT
+CLASS="LITERAL"
+>escape_html</TT
+></A
+></H2
+><P
+>This function converts the characters &lt;, &gt;, &amp;, and " to their HTML
+representation. For example, 
+<TT
+CLASS="LITERAL"
+>escape_html "&lt;&gt;" = "&amp;lt;&amp;gt;"</TT
+>. Other
+characters are left unchanged.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let escape_html s =
+  Str.global_substitute
+    (Str.regexp "&lt;\\|&gt;\\|&amp;\\|\"")
+    (fun s -&gt;
+      match Str.matched_string s with
+        "&lt;" -&gt; "&amp;lt;"
+      | "&gt;" -&gt; "&amp;gt;"
+      | "&amp;" -&gt; "&amp;amp;"
+      | "\"" -&gt; "&amp;quot;"
+      | _ -&gt; assert false)
+    s
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN778"
+>2.4.5. Virtual class <TT
+CLASS="LITERAL"
+>shared</TT
+></A
+></H2
+><P
+>This virtual class is the abstract superclass of the extension classes shown
+below. It defines the standard methods <TT
+CLASS="LITERAL"
+>clone</TT
+>,
+<TT
+CLASS="LITERAL"
+>node</TT
+>, and <TT
+CLASS="LITERAL"
+>set_node</TT
+>, and declares the type
+of the virtual method <TT
+CLASS="LITERAL"
+>to_html</TT
+>. This method recursively
+traverses the whole element tree, and prints the converted HTML code to the
+output channel passed as second argument. The first argument is the reference
+to the global <TT
+CLASS="LITERAL"
+>store</TT
+> object which collects the footnotes.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class virtual shared =
+  object (self)
+
+    (* --- default_ext --- *)
+
+    val mutable node = (None : shared node option)
+
+    method clone = {&lt; &gt;} 
+    method node =
+      match node with
+          None -&gt;
+            assert false
+        | Some n -&gt; n
+    method set_node n =
+      node &lt;- Some n
+
+    (* --- virtual --- *)
+
+    method virtual to_html : store -&gt; out_channel -&gt; unit
+
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN788"
+>2.4.6. Class <TT
+CLASS="LITERAL"
+>only_data</TT
+></A
+></H2
+><P
+>This class defines <TT
+CLASS="LITERAL"
+>to_html</TT
+> such that the character data of
+the current node is converted to HTML. Note that <TT
+CLASS="LITERAL"
+>self</TT
+> is an
+extension object, <TT
+CLASS="LITERAL"
+>self # node</TT
+> is the node object, and
+<TT
+CLASS="LITERAL"
+>self # node # data</TT
+> returns the character data of the node. 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class only_data =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch (escape_html (self # node # data))
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN797"
+>2.4.7. Class <TT
+CLASS="LITERAL"
+>readme</TT
+></A
+></H2
+><P
+>This class converts elements of type <TT
+CLASS="LITERAL"
+>readme</TT
+> to HTML. Such an
+element is (by definition) always the root element of the document. First, the
+HTML header is printed; the <TT
+CLASS="LITERAL"
+>title</TT
+> attribute of the element
+determines the title of the HTML page. Some aspects of the HTML page can be
+configured by setting certain parameter entities, for example the background
+color, the text color, and link colors. After the header, the
+<TT
+CLASS="LITERAL"
+>body</TT
+> tag, and the headline have been printed, the contents
+of the page are converted by invoking <TT
+CLASS="LITERAL"
+>to_html</TT
+> on all
+children of the current node (which is the root node). Then, the footnotes are
+appended to this by telling the global <TT
+CLASS="LITERAL"
+>store</TT
+> object to print
+the footnotes. Finally, the end tags of the HTML pages are printed.</P
+><P
+>This class is an example how to access the value of an attribute: The value is
+determined by invoking <TT
+CLASS="LITERAL"
+>self # node # attribute "title"</TT
+>. As
+this attribute has been declared as CDATA and as being required, the value has
+always the form <TT
+CLASS="LITERAL"
+>Value s</TT
+> where <TT
+CLASS="LITERAL"
+>s</TT
+> is the
+string value of the attribute. </P
+><P
+>You can also see how entity contents can be accessed. A parameter entity object
+can be looked up by <TT
+CLASS="LITERAL"
+>self # node # dtd # par_entity "name"</TT
+>,
+and by invoking <TT
+CLASS="LITERAL"
+>replacement_text</TT
+> the value of the entity
+is returned after inner parameter and character entities have been
+processed. Note that you must use <TT
+CLASS="LITERAL"
+>gen_entity</TT
+> instead of
+<TT
+CLASS="LITERAL"
+>par_entity</TT
+> to access general entities.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class readme =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      (* output header *)
+      output_string 
+       ch "&lt;!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"&gt;";
+      output_string
+       ch "&lt;!-- WARNING! This is a generated file, do not edit! --&gt;\n";
+      let title = 
+       match self # node # attribute "title" with
+           Value s -&gt; s
+         | _ -&gt; assert false
+      in
+      let html_header, _ =
+       try (self # node # dtd # par_entity "readme:html:header") 
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_trailer, _ =
+       try (self # node # dtd # par_entity "readme:html:trailer")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_bgcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:bgcolor")
+            # replacement_text
+       with WF_error _ -&gt; "white", false in
+      let html_textcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:textcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_alinkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:alinkcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_vlinkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_linkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:linkcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_background, _ =
+       try (self # node # dtd # par_entity "readme:html:background")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+
+      output_string ch "&lt;html&gt;&lt;header&gt;&lt;title&gt;\n";
+      output_string ch (escape_html title);
+      output_string ch "&lt;/title&gt;&lt;/header&gt;\n";
+      output_string ch "&lt;body ";
+      List.iter
+       (fun (name,value) -&gt;
+          if value &lt;&gt; "" then 
+            output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+       [ "bgcolor",    html_bgcolor;
+         "text",       html_textcolor;
+         "link",       html_linkcolor;
+         "alink",      html_alinkcolor;
+         "vlink",      html_vlinkcolor;
+       ];
+      output_string ch "&gt;\n";
+      output_string ch html_header;
+      output_string ch "&lt;h1&gt;";
+      output_string ch (escape_html title);
+      output_string ch "&lt;/h1&gt;\n";
+      (* process main content: *)
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      (* now process footnotes *)
+      store # print_footnotes ch;
+      (* trailer *)
+      output_string ch html_trailer;
+      output_string ch "&lt;/html&gt;\n";
+
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN817"
+>2.4.8. Classes <TT
+CLASS="LITERAL"
+>section</TT
+>, <TT
+CLASS="LITERAL"
+>sect1</TT
+>,
+<TT
+CLASS="LITERAL"
+>sect2</TT
+>, and <TT
+CLASS="LITERAL"
+>sect3</TT
+></A
+></H2
+><P
+>As the conversion process is very similar, the conversion classes of the three
+section levels are derived from the more general <TT
+CLASS="LITERAL"
+>section</TT
+>
+class. The HTML code of the section levels only differs in the type of the
+headline, and because of this the classes describing the section levels can be
+computed by replacing the class argument <TT
+CLASS="LITERAL"
+>the_tag</TT
+> of
+<TT
+CLASS="LITERAL"
+>section</TT
+> by the HTML name of the headline tag.</P
+><P
+>Section elements are converted to HTML by printing a headline and then
+converting the contents of the element recursively. More precisely, the first
+sub-element is always a <TT
+CLASS="LITERAL"
+>title</TT
+> element, and the other
+elements are the contents of the section. This structure is declared in the
+DTD, and it is guaranteed that the document matches the DTD. Because of this
+the title node can be separated from the rest without any checks.</P
+><P
+>Both the title node, and the body nodes are then converted to HTML by calling
+<TT
+CLASS="LITERAL"
+>to_html</TT
+> on them.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class section the_tag =
+  object (self)
+    inherit shared
+
+    val tag = the_tag
+
+    method to_html store ch =
+      let sub_nodes = self # node # sub_nodes in
+      match sub_nodes with
+         title_node :: rest -&gt;
+           output_string ch ("&lt;" ^ tag ^ "&gt;\n");
+           title_node # extension # to_html store ch;
+           output_string ch ("\n&lt;/" ^ tag ^ "&gt;");
+           List.iter
+             (fun n -&gt; n # extension # to_html store ch)
+             rest
+       | _ -&gt;
+           assert false
+  end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN833"
+>2.4.9. Classes <TT
+CLASS="LITERAL"
+>map_tag</TT
+>, <TT
+CLASS="LITERAL"
+>p</TT
+>,
+<TT
+CLASS="LITERAL"
+>em</TT
+>, <TT
+CLASS="LITERAL"
+>ul</TT
+>, <TT
+CLASS="LITERAL"
+>li</TT
+></A
+></H2
+><P
+>Several element types are converted to HTML by simply mapping them to
+corresponding HTML element types. The class <TT
+CLASS="LITERAL"
+>map_tag</TT
+>
+implements this, and the class argument <TT
+CLASS="LITERAL"
+>the_target_tag</TT
+>
+determines the tag name to map to. The output consists of the start tag, the
+recursively converted inner elements, and the end tag.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class map_tag the_target_tag =
+  object (self)
+    inherit shared
+
+    val target_tag = the_target_tag
+
+    method to_html store ch =
+      output_string ch ("&lt;" ^ target_tag ^ "&gt;\n");
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      output_string ch ("\n&lt;/" ^ target_tag ^ "&gt;");
+  end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN844"
+>2.4.10. Class <TT
+CLASS="LITERAL"
+>br</TT
+></A
+></H2
+><P
+>Element of type <TT
+CLASS="LITERAL"
+>br</TT
+> are mapped to the same HTML type. Note
+that HTML forbids the end tag of <TT
+CLASS="LITERAL"
+>br</TT
+>.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class br =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "&lt;br&gt;\n";
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN851"
+>2.4.11. Class <TT
+CLASS="LITERAL"
+>code</TT
+></A
+></H2
+><P
+>The <TT
+CLASS="LITERAL"
+>code</TT
+> type is converted to a <TT
+CLASS="LITERAL"
+>pre</TT
+>
+section (preformatted text). As the meaning of tabs is unspecified in HTML,
+tabs are expanded to spaces.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class code =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      let data = self # node # data in
+      (* convert tabs *)
+      let l = String.length data in
+      let rec preprocess i column =
+       (* this is very ineffective but comprehensive: *)
+       if i &lt; l then
+         match data.[i] with
+             '\t' -&gt;
+               let n = 8 - (column mod 8) in
+               String.make n ' ' ^ preprocess (i+1) (column + n)
+           | '\n' -&gt;
+               "\n" ^ preprocess (i+1) 0
+           | c -&gt;
+               String.make 1 c ^ preprocess (i+1) (column + 1)
+       else
+         ""
+      in
+      output_string ch "&lt;p&gt;&lt;pre&gt;";
+      output_string ch (escape_html (preprocess 0 0));
+      output_string ch "&lt;/pre&gt;&lt;/p&gt;";
+
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN858"
+>2.4.12. Class <TT
+CLASS="LITERAL"
+>a</TT
+></A
+></H2
+><P
+>Hyperlinks, expressed by the <TT
+CLASS="LITERAL"
+>a</TT
+> element type, are converted
+to the HTML <TT
+CLASS="LITERAL"
+>a</TT
+> type. If the target of the hyperlink is given
+by <TT
+CLASS="LITERAL"
+>href</TT
+>, the URL of this attribute can be used
+directly. Alternatively, the target can be given by
+<TT
+CLASS="LITERAL"
+>readmeref</TT
+> in which case the ".html" suffix must be added to
+the file name. </P
+><P
+>Note that within <TT
+CLASS="LITERAL"
+>a</TT
+> only #PCDATA is allowed, so the contents
+can be converted directly by applying <TT
+CLASS="LITERAL"
+>escape_html</TT
+> to the
+character data contents.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class a =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "&lt;a ";
+      let href =
+       match self # node # attribute "href" with
+           Value v -&gt; escape_html v
+         | Valuelist _ -&gt; assert false
+         | Implied_value -&gt;
+             begin match self # node # attribute "readmeref" with
+                 Value v -&gt; escape_html v ^ ".html"
+               | Valuelist _ -&gt; assert false
+               | Implied_value -&gt;
+                   ""
+             end
+      in
+      if href &lt;&gt; "" then
+       output_string ch ("href=\""  ^ href ^ "\"");
+      output_string ch "&gt;";
+      output_string ch (escape_html (self # node # data));
+      output_string ch "&lt;/a&gt;";
+       
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN870"
+>2.4.13. Class <TT
+CLASS="LITERAL"
+>footnote</TT
+></A
+></H2
+><P
+>The <TT
+CLASS="LITERAL"
+>footnote</TT
+> class has two methods:
+<TT
+CLASS="LITERAL"
+>to_html</TT
+> to convert the footnote reference to HTML, and
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+> to convert the footnote text itself.</P
+><P
+>The footnote reference is converted to a local hyperlink; more precisely, to
+two anchor tags which are connected with each other. The text anchor points to
+the footnote anchor, and the footnote anchor points to the text anchor.</P
+><P
+>The footnote must be allocated in the <TT
+CLASS="LITERAL"
+>store</TT
+> object. By
+allocating the footnote, you get the number of the footnote, and the text of
+the footnote is stored until the end of the HTML page is reached when the
+footnotes can be printed. The <TT
+CLASS="LITERAL"
+>to_html</TT
+> method stores simply
+the object itself, such that the <TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+> method is
+invoked on the same object that encountered the footnote.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>to_html</TT
+> only allocates the footnote, and prints the
+reference anchor, but it does not print nor convert the contents of the
+note. This is deferred until the footnotes actually get printed, i.e. the
+recursive call of <TT
+CLASS="LITERAL"
+>to_html</TT
+> on the sub nodes is done by
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+>. </P
+><P
+>Note that this technique does not work if you make another footnote within a
+footnote; the second footnote gets allocated but not printed.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class footnote =
+  object (self)
+    inherit shared
+
+    val mutable footnote_number = 0
+
+    method to_html store ch =
+      let number = 
+       store # alloc_footnote (self : #shared :&gt; footnote_printer) in
+      let foot_anchor = 
+       "footnote" ^ string_of_int number in
+      let text_anchor =
+       "textnote" ^ string_of_int number in
+      footnote_number &lt;- number;
+      output_string ch ( "&lt;a name=\"" ^ text_anchor ^ "\" href=\"#" ^ 
+                        foot_anchor ^ "\"&gt;[" ^ string_of_int number ^ 
+                        "]&lt;/a&gt;" )
+
+    method footnote_to_html store ch =
+      (* prerequisite: we are in a definition list &lt;dl&gt;...&lt;/dl&gt; *)
+      let foot_anchor = 
+       "footnote" ^ string_of_int footnote_number in
+      let text_anchor =
+       "textnote" ^ string_of_int footnote_number in
+      output_string ch ("&lt;dt&gt;&lt;a name=\"" ^ foot_anchor ^ "\" href=\"#" ^ 
+                       text_anchor ^ "\"&gt;[" ^ string_of_int footnote_number ^ 
+                       "]&lt;/a&gt;&lt;/dt&gt;\n&lt;dd&gt;");
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      output_string ch ("\n&lt;/dd&gt;")
+  end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN889"
+>2.4.14. The specification of the document model</A
+></H2
+><P
+>This code sets up the hash table that connects element types with the exemplars
+of the extension classes that convert the elements to HTML.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>open Pxp_yacc
+
+let tag_map =
+  make_spec_from_alist
+    ~data_exemplar:(new data_impl (new only_data))
+    ~default_element_exemplar:(new element_impl (new no_markup))
+    ~element_alist:
+      [ "readme", (new element_impl (new readme));
+       "sect1",  (new element_impl (new sect1));
+       "sect2",  (new element_impl (new sect2));
+       "sect3",  (new element_impl (new sect3));
+       "title",  (new element_impl (new no_markup));
+       "p",      (new element_impl (new p));
+       "br",     (new element_impl (new br));
+       "code",   (new element_impl (new code));
+       "em",     (new element_impl (new em));
+       "ul",     (new element_impl (new ul));
+       "li",     (new element_impl (new li));
+       "footnote", (new element_impl (new footnote : #shared :&gt; shared));
+       "a",      (new element_impl (new a));
+      ]
+    ()
+;;</PRE
+></P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x675.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Class-based processing of the node tree</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The objects representing the document</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html
new file mode 100644 (file)
index 0000000..cf177f8
--- /dev/null
@@ -0,0 +1,2337 @@
+<HTML
+><HEAD
+><TITLE
+>The class type node</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="PREVIOUS"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="NEXT"
+TITLE="The class type extension"
+HREF="x1439.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c893.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 3. The objects representing the document</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1439.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN939"
+>3.2. The class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></H1
+><P
+>&#13;From <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type node_type =
+  T_data
+| T_element of string
+| T_super_root
+| T_pinstr of string
+| T_comment
+<TT
+CLASS="REPLACEABLE"
+><I
+>and some other, reserved types</I
+></TT
+>
+;;
+
+class type [ 'ext ] node =
+  object ('self)
+    constraint 'ext = 'ext node #extension
+
+    <A
+NAME="TYPE-NODE-GENERAL.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-GENERAL"
+><I
+><I
+>General observers</I
+></I
+></A
+> *)
+
+    method extension : 'ext
+    method dtd : dtd
+    method parent : 'ext node
+    method root : 'ext node
+    method sub_nodes : 'ext node list
+    method iter_nodes : ('ext node -&gt; unit) -&gt; unit
+    method iter_nodes_sibl : 
+           ('ext node option -&gt; 'ext node -&gt; 'ext node option -&gt; unit) -&gt; unit
+    method node_type : node_type
+    method encoding : Pxp_types.rep_encoding
+    method data : string
+    method position : (string * int * int)
+    method comment : string option
+    method pinstr : string -&gt; proc_instruction list
+    method pinstr_names : string list
+    method write : Pxp_types.output_stream -&#62; Pxp_types.encoding -&#62; unit
+
+    <A
+NAME="TYPE-NODE-ATTS.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-ATTS"
+><I
+><I
+>Attribute observers</I
+></I
+></A
+> *)
+
+    method attribute : string -&gt; Pxp_types.att_value
+    method required_string_attribute : string -&gt; string
+    method optional_string_attribute : string -&gt; string option
+    method required_list_attribute : string -&gt; string list
+    method optional_list_attribute : string -&gt; string list
+    method attribute_names : string list
+    method attribute_type : string -&gt; Pxp_types.att_type
+    method attributes : (string * Pxp_types.att_value) list
+    method id_attribute_name : string
+    method id_attribute_value : string
+    method idref_attribute_names : string
+
+    <A
+NAME="TYPE-NODE-MODS.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-MODS"
+><I
+><I
+>Modifying methods</I
+></I
+></A
+> *)
+
+    method add_node : ?force:bool -&gt; 'ext node -&gt; unit
+    method add_pinstr : proc_instruction -&gt; unit
+    method delete : unit
+    method set_nodes : 'ext node list -&gt; unit
+    method quick_set_attributes : (string * Pxp_types.att_value) list -&gt; unit
+    method set_comment : string option -&gt; unit
+
+    <A
+NAME="TYPE-NODE-CLONING.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-CLONING"
+><I
+><I
+>Cloning methods</I
+></I
+></A
+> *)
+
+    method orphaned_clone : 'self
+    method orphaned_flat_clone : 'self
+    method create_element : 
+              ?position:(string * int * int) -&gt;
+              dtd -&gt; node_type -&gt; (string * string) list -&gt;
+                  'ext node
+    method create_data : dtd -&gt; string -&gt; 'ext node
+    method keep_always_whitespace_mode : unit
+
+    <A
+NAME="TYPE-NODE-WEIRD.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-WEIRD"
+><I
+><I
+>Validating methods</I
+></I
+></A
+> *)
+
+    method local_validate : ?use_dfa:bool -&#62; unit -&#62; unit
+
+    (* ... Internal methods are undocumented. *)
+
+  end
+;;</PRE
+>
+
+In the module <TT
+CLASS="LITERAL"
+>Pxp_types</TT
+> you can find another type
+definition that is important in this context:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type Pxp_types.att_value =
+    Value     of string
+  | Valuelist of string list
+  | Implied_value
+;;</PRE
+></P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN958"
+>3.2.1. The structure of document trees</A
+></H2
+><P
+>A node represents either an element or a character data section. There are two
+classes implementing the two aspects of nodes: <TT
+CLASS="LITERAL"
+>element_impl</TT
+>
+and <TT
+CLASS="LITERAL"
+>data_impl</TT
+>. The latter class does not implement all
+methods because some methods do not make sense for data nodes.</P
+><P
+>(Note: PXP also supports a mode which forces that processing instructions and
+comments are represented as nodes of the document tree. However, these nodes
+are instances of <TT
+CLASS="LITERAL"
+>element_impl</TT
+> with node types
+<TT
+CLASS="LITERAL"
+>T_pinstr</TT
+> and <TT
+CLASS="LITERAL"
+>T_comment</TT
+>,
+respectively. This mode must be explicitly configured; the basic representation
+knows only element and data nodes.)</P
+><P
+>The following figure 
+(<A
+HREF="x939.html#NODE-TERM"
+><I
+><I
+>A tree with element nodes, data nodes, and attributes</I
+><I
+></I
+></I
+></A
+>) shows an example how
+a tree is constructed from element and data nodes. The circular areas 
+represent element nodes whereas the ovals denote data nodes. Only elements
+may have subnodes; data nodes are always leaves of the tree. The subnodes
+of an element can be either element or data nodes; in both cases the O'Caml
+objects storing the nodes have the class type <TT
+CLASS="LITERAL"
+>node</TT
+>.</P
+><P
+>Attributes (the clouds in the picture) are not directly
+integrated into the tree; there is always an extra link to the attribute
+list. This is also true for processing instructions (not shown in the
+picture). This means that there are separated access methods for attributes and
+processing instructions.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-TERM"
+></A
+><P
+><B
+>Figure 3-1. A tree with element nodes, data nodes, and attributes</B
+></P
+><P
+><IMG
+SRC="pic/node_term.gif"></P
+></DIV
+><P
+>Only elements, data sections, attributes and processing
+instructions (and comments, if configured) can, directly or indirectly, occur
+in the document tree. It is impossible to add entity references to the tree; if
+the parser finds such a reference, not the reference as such but the referenced
+text (i.e. the tree representing the structured text) is included in the
+tree.</P
+><P
+>Note that the parser collapses as much data material into one
+data node as possible such that there are normally never two adjacent data
+nodes. This invariant is enforced even if data material is included by entity
+references or CDATA sections, or if a data sequence is interrupted by
+comments. So <TT
+CLASS="LITERAL"
+>a &amp;amp; b &lt;-- comment --&gt; c &lt;![CDATA[
+&lt;&gt; d]]&gt;</TT
+> is represented by only one data node, for
+instance. However, you can create document trees manually which break this
+invariant; it is only the way the parser forms the tree.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-GENERAL"
+></A
+><P
+><B
+>Figure 3-2. Nodes are doubly linked trees</B
+></P
+><P
+><IMG
+SRC="pic/node_general.gif"></P
+></DIV
+><P
+>The node tree has links in both directions: Every node has a link to its parent
+(if any), and it has links to the subnodes (see 
+figure <A
+HREF="x939.html#NODE-GENERAL"
+><I
+><I
+>Nodes are doubly linked trees</I
+><I
+></I
+></I
+></A
+>). Obviously,
+this doubly-linked structure simplifies the navigation in the tree; but has
+also some consequences for the possible operations on trees.</P
+><P
+>Because every node must have at most <I
+CLASS="EMPHASIS"
+>one</I
+> parent node,
+operations are illegal if they violate this condition. The following figure
+(<A
+HREF="x939.html#NODE-ADD"
+><I
+><I
+>A node can only be added if it is a root</I
+><I
+></I
+></I
+></A
+>) shows on the left side
+that node <TT
+CLASS="LITERAL"
+>y</TT
+> is added to <TT
+CLASS="LITERAL"
+>x</TT
+> as new subnode
+which is allowed because <TT
+CLASS="LITERAL"
+>y</TT
+> does not have a parent yet. The
+right side of the picture illustrates what would happen if <TT
+CLASS="LITERAL"
+>y</TT
+>
+had a parent node; this is illegal because <TT
+CLASS="LITERAL"
+>y</TT
+> would have two
+parents after the operation.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-ADD"
+></A
+><P
+><B
+>Figure 3-3. A node can only be added if it is a root</B
+></P
+><P
+><IMG
+SRC="pic/node_add.gif"></P
+></DIV
+><P
+>The "delete" operation simply removes the links between two nodes. In the
+picture (<A
+HREF="x939.html#NODE-DELETE"
+><I
+><I
+>A deleted node becomes the root of the subtree</I
+><I
+></I
+></I
+></A
+>) the node
+<TT
+CLASS="LITERAL"
+>x</TT
+> is deleted from the list of subnodes of
+<TT
+CLASS="LITERAL"
+>y</TT
+>. After that, <TT
+CLASS="LITERAL"
+>x</TT
+> becomes the root of the
+subtree starting at this node.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-DELETE"
+></A
+><P
+><B
+>Figure 3-4. A deleted node becomes the root of the subtree</B
+></P
+><P
+><IMG
+SRC="pic/node_delete.gif"></P
+></DIV
+><P
+>It is also possible to make a clone of a subtree; illustrated in 
+<A
+HREF="x939.html#NODE-CLONE"
+><I
+><I
+>The clone of a subtree</I
+><I
+></I
+></I
+></A
+>. In this case, the
+clone is a copy of the original subtree except that it is no longer a
+subnode. Because cloning never keeps the connection to the parent, the clones
+are called <I
+CLASS="EMPHASIS"
+>orphaned</I
+>.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-CLONE"
+></A
+><P
+><B
+>Figure 3-5. The clone of a subtree</B
+></P
+><P
+><IMG
+SRC="pic/node_clone.gif"></P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1007"
+>3.2.2. The methods of the class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></H2
+><A
+NAME="TYPE-NODE-GENERAL"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>              <A
+HREF="x939.html#TYPE-NODE-GENERAL.SIG"
+>General observers</A
+>
+            . </B
+>            <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>extension</TT
+>: The reference to the extension object which
+belongs to this node (see ...).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>dtd</TT
+>: Returns a reference to the global DTD. All nodes
+of a tree must share the same DTD.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>parent</TT
+>: Get the father node. Raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> in the case the node does not have a
+parent, i.e. the node is the root.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>root</TT
+>: Gets the reference to the root node of the tree.
+Every node is contained in a tree with a root, so this method always 
+succeeds. Note that this method <I
+CLASS="EMPHASIS"
+>searches</I
+> the root,
+which costs time proportional to the length of the path to the root.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>sub_nodes</TT
+>: Returns references to the children. The returned
+list reflects the order of the children. For data nodes, this method returns
+the empty list.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>iter_nodes f</TT
+>: Iterates over the children, and calls
+<TT
+CLASS="LITERAL"
+>f</TT
+> for every child in turn. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>iter_nodes_sibl f</TT
+>: Iterates over the children, and calls
+<TT
+CLASS="LITERAL"
+>f</TT
+> for every child in turn. <TT
+CLASS="LITERAL"
+>f</TT
+> gets as
+arguments the previous node, the current node, and the next node.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>node_type</TT
+>: Returns either <TT
+CLASS="LITERAL"
+>T_data</TT
+> which
+means that the node is a data node, or <TT
+CLASS="LITERAL"
+>T_element n</TT
+>
+which means that the node is an element of type <TT
+CLASS="LITERAL"
+>n</TT
+>. 
+If configured, possible node types are also <TT
+CLASS="LITERAL"
+>T_pinstr t</TT
+>
+indicating that the node represents a processing instruction with target
+<TT
+CLASS="LITERAL"
+>t</TT
+>, and <TT
+CLASS="LITERAL"
+>T_comment</TT
+> in which case the node
+is a comment.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>encoding</TT
+>: Returns the encoding of the strings.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>data</TT
+>: Returns the character data of this node and all
+children, concatenated as one string. The encoding of the string is what
+the method <TT
+CLASS="LITERAL"
+>encoding</TT
+> returns.
+- For data nodes, this method simply returns the represented characters.
+For elements, the meaning of the method has been extended such that it
+returns something useful, i.e. the effectively contained characters, without
+markup. (For <TT
+CLASS="LITERAL"
+>T_pinstr</TT
+> and <TT
+CLASS="LITERAL"
+>T_comment</TT
+>
+nodes, the method returns the empty string.)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>position</TT
+>: If configured, this method returns the position of
+the element as triple (entity, line, byteposition). For data nodes, the
+position is not stored. If the position is not available the triple
+<TT
+CLASS="LITERAL"
+>"?", 0, 0</TT
+> is returned.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>comment</TT
+>: Returns <TT
+CLASS="LITERAL"
+>Some text</TT
+> for comment
+nodes, and <TT
+CLASS="LITERAL"
+>None</TT
+> for other nodes. The <TT
+CLASS="LITERAL"
+>text</TT
+>
+is everything between the comment delimiters <TT
+CLASS="LITERAL"
+>&lt;--</TT
+> and
+<TT
+CLASS="LITERAL"
+>--&gt;</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr n</TT
+>: Returns all processing instructions that are
+directly contained in this element and that have a <I
+CLASS="EMPHASIS"
+>target</I
+>
+specification of <TT
+CLASS="LITERAL"
+>n</TT
+>. The target is the first word after
+the <TT
+CLASS="LITERAL"
+>&lt;?</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr_names</TT
+>: Returns the list of all targets of processing
+instructions directly contained in this element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>write s enc</TT
+>: Prints the node and all subnodes to the passed
+output stream as valid XML text, using the passed external encoding.</P
+></LI
+></UL
+>
+            </P
+></DIV
+><A
+NAME="TYPE-NODE-ATTS"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>              <A
+HREF="x939.html#TYPE-NODE-ATTS.SIG"
+>Attribute observers</A
+>
+            . </B
+>            <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attribute n</TT
+>: Returns the value of the attribute with name
+<TT
+CLASS="LITERAL"
+>n</TT
+>. This method returns a value for every declared 
+attribute, and it raises <TT
+CLASS="LITERAL"
+>Not_found</TT
+> for any undeclared
+attribute. Note that it even returns a value if the attribute is actually
+missing but is declared as <TT
+CLASS="LITERAL"
+>#IMPLIED</TT
+> or has a default
+value. - Possible values are:
+                  <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Implied_value</TT
+>: The attribute has been declared with the
+keyword <TT
+CLASS="LITERAL"
+>#IMPLIED</TT
+>, and the attribute is missing in the
+attribute list of this element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Value s</TT
+>: The attribute has been declared as type
+<TT
+CLASS="LITERAL"
+>CDATA</TT
+>, as <TT
+CLASS="LITERAL"
+>ID</TT
+>, as
+<TT
+CLASS="LITERAL"
+>IDREF</TT
+>, as <TT
+CLASS="LITERAL"
+>ENTITY</TT
+>, or as
+<TT
+CLASS="LITERAL"
+>NMTOKEN</TT
+>, or as enumeration or notation, and one of the two
+conditions holds: (1) The attribute value is present in the attribute list in
+which case the value is returned in the string <TT
+CLASS="LITERAL"
+>s</TT
+>. (2) The
+attribute has been omitted, and the DTD declared the attribute with a default
+value. The default value is returned in <TT
+CLASS="LITERAL"
+>s</TT
+>. 
+- Summarized, <TT
+CLASS="LITERAL"
+>Value s</TT
+> is returned for non-implied, non-list 
+attribute values.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Valuelist l</TT
+>: The attribute has been declared as type
+<TT
+CLASS="LITERAL"
+>IDREFS</TT
+>, as <TT
+CLASS="LITERAL"
+>ENTITIES</TT
+>, or
+as <TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+>, and one of the two conditions holds: (1) The
+attribute value is present in the attribute list in which case the
+space-separated tokens of the value are returned in the string list
+<TT
+CLASS="LITERAL"
+>l</TT
+>. (2) The attribute has been omitted, and the DTD declared
+the attribute with a default value. The default value is returned in
+<TT
+CLASS="LITERAL"
+>l</TT
+>. 
+- Summarized, <TT
+CLASS="LITERAL"
+>Valuelist l</TT
+> is returned for all list-type
+attribute values.</P
+></LI
+></UL
+>
+
+Note that before the attribute value is returned, the value is normalized. This
+means that newlines are converted to spaces, and that references to character
+entities (i.e. <TT
+CLASS="LITERAL"
+>&amp;#<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;</TT
+>) and
+general entities
+(i.e. <TT
+CLASS="LITERAL"
+>&amp;<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</TT
+>) are expanded;
+if necessary, expansion is performed recursively.</P
+><P
+>In well-formedness mode, there is no DTD which could declare an
+attribute. Because of this, every occuring attribute is considered as a CDATA
+attribute.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>required_string_attribute n</TT
+>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is convenient
+if you expect a non-implied and non-list attribute value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>optional_string_attribute n</TT
+>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method returns None. - This method is 
+convenient if you expect a non-list attribute value including the implied
+value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>required_list_attribute n</TT
+>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is 
+convenient if you expect a list attribute value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>optional_list_attribute n</TT
+>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, an empty list will be returned. - This method
+is convenient if you expect a list attribute value or the implied value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attribute_names</TT
+>: returns the list of all attribute names of
+this element. As this is a validating parser, this list is equal to the
+list of declared attributes.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attribute_type n</TT
+>: returns the type of the attribute called
+<TT
+CLASS="LITERAL"
+>n</TT
+>. See the module <TT
+CLASS="LITERAL"
+>Pxp_types</TT
+> for a
+description of the encoding of the types.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attributes</TT
+>: returns the list of pairs of names and values
+for all attributes of
+this element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>id_attribute_name</TT
+>: returns the name of the attribute that is
+declared with type ID. There is at most one such attribute. The method raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> if there is no declared ID attribute for the
+element type.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>id_attribute_value</TT
+>: returns the value of the attribute that
+is declared with type ID. There is at most one such attribute. The method raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> if there is no declared ID attribute for the
+element type.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>idref_attribute_names</TT
+>: returns the list of attribute names
+that are declared as IDREF or IDREFS.</P
+></LI
+></UL
+>
+          </P
+></DIV
+><A
+NAME="TYPE-NODE-MODS"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>              <A
+HREF="x939.html#TYPE-NODE-MODS.SIG"
+>Modifying methods</A
+>
+            . </B
+>The following methods are only defined for element nodes (more exactly:
+the methods are defined for data nodes, too, but fail always).
+
+             <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>add_node sn</TT
+>: Adds sub node <TT
+CLASS="LITERAL"
+>sn</TT
+> to the list
+of children. This operation is illustrated in the picture 
+<A
+HREF="x939.html#NODE-ADD"
+><I
+><I
+>A node can only be added if it is a root</I
+><I
+></I
+></I
+></A
+>. This method expects that
+<TT
+CLASS="LITERAL"
+>sn</TT
+> is a root, and it requires that <TT
+CLASS="LITERAL"
+>sn</TT
+> and
+the current object share the same DTD.</P
+><P
+>Because <TT
+CLASS="LITERAL"
+>add_node</TT
+> is the method the parser itself uses
+to add new nodes to the tree, it performs by default some simple validation
+checks: If the content model is a regular expression, it is not allowed to add
+data nodes to this node unless the new nodes consist only of whitespace. In
+this case, the new data nodes are silently dropped (you can change this by
+invoking <TT
+CLASS="LITERAL"
+>keep_always_whitespace_mode</TT
+>).</P
+><P
+>If the document is flagged as stand-alone, these data nodes only
+containing whitespace are even forbidden if the element declaration is
+contained in an external entity. This case is detected and rejected.</P
+><P
+>If the content model is <TT
+CLASS="LITERAL"
+>EMPTY</TT
+>, it is not allowed to
+add any data node unless the data node is empty. In this case, the new data
+node is silently dropped.</P
+><P
+>These checks only apply if there is a DTD. In well-formedness mode, it is
+assumed that every element is declared with content model
+<TT
+CLASS="LITERAL"
+>ANY</TT
+> which prohibits any validation check. Furthermore, you
+turn these checks off by passing <TT
+CLASS="LITERAL"
+>~force:true</TT
+> as first
+argument.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>add_pinstr pi</TT
+>: Adds the processing instruction
+<TT
+CLASS="LITERAL"
+>pi</TT
+> to the list of processing instructions.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>delete</TT
+>: Deletes this node from the tree. After this
+operation, this node is no longer the child of the former father node; and the
+node loses the connection to the father as well. This operation is illustrated
+by the figure <A
+HREF="x939.html#NODE-DELETE"
+><I
+><I
+>A deleted node becomes the root of the subtree</I
+><I
+></I
+></I
+></A
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>set_nodes nl</TT
+>: Sets the list of children to
+<TT
+CLASS="LITERAL"
+>nl</TT
+>. It is required that every member of <TT
+CLASS="LITERAL"
+>nl</TT
+>
+is a root, and that all members and the current object share the same DTD.
+Unlike <TT
+CLASS="LITERAL"
+>add_node</TT
+>, no validation checks are performed.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>quick_set_attributes atts</TT
+>: sets the attributes of this
+element to <TT
+CLASS="LITERAL"
+>atts</TT
+>. It is <I
+CLASS="EMPHASIS"
+>not</I
+> checked
+whether <TT
+CLASS="LITERAL"
+>atts</TT
+> matches the DTD or not; it is up to the
+caller of this method to ensure this. (This method may be useful to transform
+the attribute values, i.e. apply a mapping to every attribute.)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>set_comment text</TT
+>: This method is only applicable to
+<TT
+CLASS="LITERAL"
+>T_comment</TT
+> nodes; it sets the comment text contained by such
+nodes. </P
+></LI
+></UL
+></P
+></DIV
+><A
+NAME="TYPE-NODE-CLONING"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>              <A
+HREF="x939.html#TYPE-NODE-CLONING.SIG"
+>Cloning methods</A
+>
+            . </B
+>            <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>orphaned_clone</TT
+>: Returns a clone of the node and the complete
+tree below this node (deep clone). The clone does not have a parent (i.e. the
+reference to the parent node is <I
+CLASS="EMPHASIS"
+>not</I
+> cloned). While
+copying the subtree, strings are skipped; it is likely that the original tree
+and the copy tree share strings. Extension objects are cloned by invoking
+the <TT
+CLASS="LITERAL"
+>clone</TT
+> method on the original objects; how much of
+the extension objects is cloned depends on the implemention of this method.</P
+><P
+>This operation is illustrated by the figure 
+<A
+HREF="x939.html#NODE-CLONE"
+><I
+><I
+>The clone of a subtree</I
+><I
+></I
+></I
+></A
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>orphaned_flat_clone</TT
+>: Returns a clone of the node,
+but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><A
+NAME="TYPE-NODE-METH-CREATE-ELEMENT"
+></A
+>
+<TT
+CLASS="LITERAL"
+>create_element dtd nt al</TT
+>: Returns a flat copy of this node
+(which must be an element) with the following modifications: The DTD is set to
+<TT
+CLASS="LITERAL"
+>dtd</TT
+>; the node type is set to <TT
+CLASS="LITERAL"
+>nt</TT
+>, and the
+new attribute list is set to <TT
+CLASS="LITERAL"
+>al</TT
+> (given as list of
+(name,value) pairs). The copy does not have children nor a parent. It does not
+contain processing instructions. See 
+<A
+HREF="x939.html#TYPE-NODE-EX-CREATE-ELEMENT"
+>the example below</A
+>.</P
+><P
+>Note that you can specify the position of the new node
+by the optional argument <TT
+CLASS="LITERAL"
+>~position</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><A
+NAME="TYPE-NODE-METH-CREATE-DATA"
+></A
+>
+<TT
+CLASS="LITERAL"
+>create_data dtd cdata</TT
+>: Returns a flat copy of this node
+(which must be a data node) with the following modifications: The DTD is set to
+<TT
+CLASS="LITERAL"
+>dtd</TT
+>; the node type is set to <TT
+CLASS="LITERAL"
+>T_data</TT
+>; the
+attribute list is empty (data nodes never have attributes); the list of
+children and PIs is empty, too (same reason). The new node does not have a
+parent. The value <TT
+CLASS="LITERAL"
+>cdata</TT
+> is the new character content of the
+node. See 
+<A
+HREF="x939.html#TYPE-NODE-EX-CREATE-DATA"
+>the example below</A
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>keep_always_whitespace_mode</TT
+>: Even data nodes which are
+normally dropped because they only contain ignorable whitespace, can added to
+this node once this mode is turned on. (This mode is useful to produce
+canonical XML.)</P
+></LI
+></UL
+></P
+></DIV
+><A
+NAME="TYPE-NODE-WEIRD"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>              <A
+HREF="x939.html#TYPE-NODE-WEIRD.SIG"
+>Validating methods</A
+>
+            . </B
+>There is one method which locally validates the node, i.e. checks whether the
+subnodes match the content model of this node.
+
+             <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>local_validate</TT
+>: Checks that this node conforms to the
+DTD by comparing the type of the subnodes with the content model for this
+node. (Applications need not call this method unless they add new nodes
+themselves to the tree.)</P
+></LI
+></UL
+></P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1252"
+>3.2.3. The class <TT
+CLASS="LITERAL"
+>element_impl</TT
+></A
+></H2
+><P
+>This class is an implementation of <TT
+CLASS="LITERAL"
+>node</TT
+> which
+realizes element nodes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class [ 'ext ] element_impl : 'ext -&#62; [ 'ext ] node</PRE
+>&#13;</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Constructor. </B
+>You can create a new instance by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>new element_impl <TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+></PRE
+>
+
+which creates a special form of empty element which already contains a
+reference to the <TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+>, but is
+otherwise empty. This special form is called an
+<I
+CLASS="EMPHASIS"
+>exemplar</I
+>. The purpose of exemplars is that they serve as
+patterns that can be duplicated and filled with data. The method
+<A
+HREF="x939.html#TYPE-NODE-METH-CREATE-ELEMENT"
+><TT
+CLASS="LITERAL"
+>create_element</TT
+></A
+> is designed to perform this action.</P
+></DIV
+><A
+NAME="TYPE-NODE-EX-CREATE-ELEMENT"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Example. </B
+>First, create an exemplar by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let exemplar_ext = ... in
+let exemplar     = new element_impl exemplar_ext in</PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>exemplar</TT
+> is not used in node trees, but only as
+a pattern when the element nodes are created:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let element = exemplar # <A
+HREF="x939.html#TYPE-NODE-METH-CREATE-ELEMENT"
+>create_element</A
+> dtd (T_element name) attlist </PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>element</TT
+> is a copy of <TT
+CLASS="LITERAL"
+>exemplar</TT
+>
+(even the extension <TT
+CLASS="LITERAL"
+>exemplar_ext</TT
+> has been copied)
+which ensures that <TT
+CLASS="LITERAL"
+>element</TT
+> and its extension are objects
+of the same class as the exemplars; note that you need not to pass a 
+class name or other meta information. The copy is initially connected 
+with the <TT
+CLASS="LITERAL"
+>dtd</TT
+>, it gets a node type, and the attribute list
+is filled. The <TT
+CLASS="LITERAL"
+>element</TT
+> is now fully functional; it can
+be added to another element as child, and it can contain references to
+subnodes.</P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1281"
+>3.2.4. The class <TT
+CLASS="LITERAL"
+>data_impl</TT
+></A
+></H2
+><P
+>This class is an implementation of <TT
+CLASS="LITERAL"
+>node</TT
+> which
+should be used for all character data nodes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class [ 'ext ] data_impl : 'ext -&#62; [ 'ext ] node</PRE
+>&#13;</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Constructor. </B
+>You can create a new instance by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>new data_impl <TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+></PRE
+>
+
+which creates an empty exemplar node which is connected to
+<TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+>. The node does not contain a
+reference to any DTD, and because of this it cannot be added to node trees.</P
+></DIV
+><P
+>To get a fully working data node, apply the method
+<A
+HREF="x939.html#TYPE-NODE-METH-CREATE-DATA"
+><TT
+CLASS="LITERAL"
+>create_data</TT
+></A
+> to the exemplar (see example).</P
+><A
+NAME="TYPE-NODE-EX-CREATE-DATA"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Example. </B
+>First, create an exemplar by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let exemplar_ext = ... in
+let exemplar     = new exemplar_ext data_impl in</PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>exemplar</TT
+> is not used in node trees, but only as
+a pattern when the data nodes are created:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let data_node = exemplar # <A
+HREF="x939.html#TYPE-NODE-METH-CREATE-DATA"
+>create_data</A
+> dtd "The characters contained in the data node" </PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>data_node</TT
+> is a copy of <TT
+CLASS="LITERAL"
+>exemplar</TT
+>.
+The copy is initially connected 
+with the <TT
+CLASS="LITERAL"
+>dtd</TT
+>, and it is filled with character material.
+The <TT
+CLASS="LITERAL"
+>data_node</TT
+> is now fully functional; it can
+be added to an element as child.</P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1308"
+>3.2.5. The type <TT
+CLASS="LITERAL"
+>spec</TT
+></A
+></H2
+><P
+>The type <TT
+CLASS="LITERAL"
+>spec</TT
+> defines a way to handle the details of
+creating nodes from exemplars.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type 'ext spec
+constraint 'ext = 'ext node #extension
+
+val make_spec_from_mapping :
+      ?super_root_exemplar : 'ext node -&#62;
+      ?comment_exemplar : 'ext node -&#62;
+      ?default_pinstr_exemplar : 'ext node -&#62;
+      ?pinstr_mapping : (string, 'ext node) Hashtbl.t -&#62;
+      data_exemplar: 'ext node -&#62;
+      default_element_exemplar: 'ext node -&#62;
+      element_mapping: (string, 'ext node) Hashtbl.t -&#62; 
+      unit -&#62; 
+        'ext spec
+
+val make_spec_from_alist :
+      ?super_root_exemplar : 'ext node -&#62;
+      ?comment_exemplar : 'ext node -&#62;
+      ?default_pinstr_exemplar : 'ext node -&#62;
+      ?pinstr_alist : (string * 'ext node) list -&#62;
+      data_exemplar: 'ext node -&#62;
+      default_element_exemplar: 'ext node -&#62;
+      element_alist: (string * 'ext node) list -&#62; 
+      unit -&#62; 
+        'ext spec</PRE
+>
+
+The two functions <TT
+CLASS="LITERAL"
+>make_spec_from_mapping</TT
+> and
+<TT
+CLASS="LITERAL"
+>make_spec_from_alist</TT
+> create <TT
+CLASS="LITERAL"
+>spec</TT
+>
+values. Both functions are functionally equivalent and the only difference is
+that the first function prefers hashtables and the latter associative lists to
+describe mappings from names to exemplars.</P
+><P
+>You can specify exemplars for the various kinds of nodes that need to be
+generated when an XML document is parsed:
+             
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~super_root_exemplar</TT
+>: This exemplar
+is used to create the super root. This special node is only created if the
+corresponding configuration option has been selected; it is the parent node of
+the root node which may be convenient if every working node must have a parent.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~comment_exemplar</TT
+>: This exemplar is
+used when a comment node must be created. Note that such nodes are only created
+if the corresponding configuration option is "on".</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~default_pinstr_exemplar</TT
+>: If a node
+for a processing instruction must be created, and the instruction is not listed
+in the table passed by <TT
+CLASS="LITERAL"
+>~pinstr_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>~pinstr_alist</TT
+>, this exemplar is used.
+Again the configuration option must be "on" in order to create such nodes at
+all. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~pinstr_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>~pinstr_alist</TT
+>: Map the target names of processing
+instructions to exemplars. These mappings are only used when nodes for
+processing instructions are created.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~data_exemplar</TT
+>: The exemplar for
+ordinary data nodes.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~default_element_exemplar</TT
+>: This
+exemplar is used if an element node must be created, but the element type
+cannot be found in the tables <TT
+CLASS="LITERAL"
+>element_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>element_alist</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~element_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>~element_alist</TT
+>: Map the element types to exemplars. These
+mappings are used to create element nodes.</P
+></LI
+></UL
+>
+
+In most cases, you only want to create <TT
+CLASS="LITERAL"
+>spec</TT
+> values to pass
+them to the parser functions found in <TT
+CLASS="LITERAL"
+>Pxp_yacc</TT
+>. However, it
+might be useful to apply <TT
+CLASS="LITERAL"
+>spec</TT
+> values directly.</P
+><P
+>The following functions create various types of nodes by selecting the
+corresponding exemplar from the passed <TT
+CLASS="LITERAL"
+>spec</TT
+> value, and by
+calling <TT
+CLASS="LITERAL"
+>create_element</TT
+> or <TT
+CLASS="LITERAL"
+>create_data</TT
+> on
+the exemplar.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val create_data_node : 
+      'ext spec -&#62; 
+      dtd -&#62; 
+      (* data material: *) string -&#62; 
+          'ext node
+
+val create_element_node : 
+      ?position:(string * int * int) -&#62;
+      'ext spec -&#62; 
+      dtd -&#62; 
+      (* element type: *) string -&#62; 
+      (* attributes: *) (string * string) list -&#62; 
+          'ext node
+
+val create_super_root_node :
+      ?position:(string * int * int) -&#62;
+      'ext spec -&#62; 
+       dtd -&#62; 
+           'ext node
+
+val create_comment_node :
+      ?position:(string * int * int) -&#62;
+      'ext spec -&#62; 
+      dtd -&#62; 
+      (* comment text: *) string -&#62; 
+          'ext node
+
+val create_pinstr_node :
+      ?position:(string * int * int) -&#62;
+      'ext spec -&#62; 
+      dtd -&#62; 
+      proc_instruction -&#62; 
+          'ext node</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1354"
+>3.2.6. Examples</A
+></H2
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Building trees. </B
+>Here is the piece of code that creates the tree of
+the figure <A
+HREF="x939.html#NODE-TERM"
+><I
+><I
+>A tree with element nodes, data nodes, and attributes</I
+><I
+></I
+></I
+></A
+>. The extension
+object and the DTD are beyond the scope of this example.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let exemplar_ext = ... (* some extension *) in
+let dtd = ... (* some DTD *) in
+
+let element_exemplar = new element_impl exemplar_ext in
+let data_exemplar    = new data_impl    exemplar_ext in
+
+let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
+and b1 = element_exemplar # create_element dtd (T_element "b") []
+and c1 = element_exemplar # create_element dtd (T_element "c") []
+and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
+in
+
+let cherries = data_exemplar # create_data dtd "Cherries" in
+let orange   = data_exemplar # create_data dtd "An orange" in
+
+a1 # add_node b1;
+a1 # add_node c1;
+b1 # add_node a2;
+b1 # add_node cherries;
+a2 # add_node orange;</PRE
+>
+
+Alternatively, the last block of statements could also be written as:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>a1 # set_nodes [b1; c1];
+b1 # set_nodes [a2; cherries];
+a2 # set_nodes [orange];</PRE
+>
+
+The root of the tree is <TT
+CLASS="LITERAL"
+>a1</TT
+>, i.e. it is true that
+
+<PRE
+CLASS="PROGRAMLISTING"
+>x # root == a1</PRE
+>
+
+for every x from { <TT
+CLASS="LITERAL"
+>a1</TT
+>, <TT
+CLASS="LITERAL"
+>a2</TT
+>,
+<TT
+CLASS="LITERAL"
+>b1</TT
+>, <TT
+CLASS="LITERAL"
+>c1</TT
+>, <TT
+CLASS="LITERAL"
+>cherries</TT
+>,
+<TT
+CLASS="LITERAL"
+>orange</TT
+> }.</P
+></DIV
+><P
+>Furthermore, the following properties hold:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>  a1 # attribute "att" = Value "apple"
+&#38; a2 # attribute "att" = Value "orange"
+
+&#38; cherries # data = "Cherries"
+&#38;   orange # data = "An orange"
+&#38;       a1 # data = "CherriesAn orange"
+
+&#38;       a1 # node_type = T_element "a"
+&#38;       a2 # node_type = T_element "a"
+&#38;       b1 # node_type = T_element "b"
+&#38;       c1 # node_type = T_element "c"
+&#38; cherries # node_type = T_data
+&#38;   orange # node_type = T_data
+
+&#38;       a1 # sub_nodes = [ b1; c1 ]
+&#38;       a2 # sub_nodes = [ orange ]
+&#38;       b1 # sub_nodes = [ a2; cherries ]
+&#38;       c1 # sub_nodes = []
+&#38; cherries # sub_nodes = []
+&#38;   orange # sub_nodes = []
+
+&#38;       a2 # parent == a1
+&#38;       b1 # parent == b1
+&#38;       c1 # parent == a1
+&#38; cherries # parent == b1
+&#38;   orange # parent == a2</PRE
+></P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Searching nodes. </B
+>The following function searches all nodes of a tree 
+for which a certain condition holds:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec search p t =
+  if p t then
+    t :: search_list p (t # sub_nodes)
+  else
+    search_list p (t # sub_nodes)
+
+and search_list p l =
+  match l with
+    []      -&gt; []
+  | t :: l' -&gt; (search p t) @ (search_list p l')
+;;</PRE
+></P
+></DIV
+><P
+>For example, if you want to search all elements of a certain
+type <TT
+CLASS="LITERAL"
+>et</TT
+>, the function <TT
+CLASS="LITERAL"
+>search</TT
+> can be
+applied as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let search_element_type et t =
+  search (fun x -&gt; x # node_type = T_element et) t
+;;</PRE
+></P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Getting attribute values. </B
+>Suppose we have the declaration:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ATTLIST e a CDATA #REQUIRED
+            b CDATA #IMPLIED
+            c CDATA "12345"&#62;</PRE
+>
+
+In this case, every element <TT
+CLASS="LITERAL"
+>e</TT
+> must have an attribute 
+<TT
+CLASS="LITERAL"
+>a</TT
+>, otherwise the parser would indicate an error. If
+the O'Caml variable <TT
+CLASS="LITERAL"
+>n</TT
+> holds the node of the tree 
+corresponding to the element, you can get the value of the attribute
+<TT
+CLASS="LITERAL"
+>a</TT
+> by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_a = n # required_string_attribute "a"</PRE
+>
+
+which is more or less an abbreviation for 
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_a = 
+  match n # attribute "a" with
+    Value s -&#62; s
+  | _       -&#62; assert false</PRE
+>
+
+- as the attribute is required, the <TT
+CLASS="LITERAL"
+>attribute</TT
+> method always
+returns a <TT
+CLASS="LITERAL"
+>Value</TT
+>.</P
+></DIV
+><P
+>In contrast to this, the attribute <TT
+CLASS="LITERAL"
+>b</TT
+> can be
+omitted. In this case, the method <TT
+CLASS="LITERAL"
+>required_string_attribute</TT
+>
+works only if the attribute is there, and the method will fail if the attribute
+is missing. To get the value, you can apply the method
+<TT
+CLASS="LITERAL"
+>optional_string_attribute</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_b = n # optional_string_attribute "b"</PRE
+>
+
+Now, <TT
+CLASS="LITERAL"
+>value_of_b</TT
+> is of type <TT
+CLASS="LITERAL"
+>string option</TT
+>,
+and <TT
+CLASS="LITERAL"
+>None</TT
+> represents the omitted attribute. Alternatively, 
+you could also use <TT
+CLASS="LITERAL"
+>attribute</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_b = 
+  match n # attribute "b" with
+    Value s       -&#62; Some s
+  | Implied_value -&#62; None
+  | _             -&#62; assert false</PRE
+></P
+><P
+>The attribute <TT
+CLASS="LITERAL"
+>c</TT
+> behaves much like
+<TT
+CLASS="LITERAL"
+>a</TT
+>, because it has always a value. If the attribute is
+omitted, the default, here "12345", will be returned instead. Because of this,
+you can again use <TT
+CLASS="LITERAL"
+>required_string_attribute</TT
+> to get the
+value.</P
+><P
+>The type <TT
+CLASS="LITERAL"
+>CDATA</TT
+> is the most general string
+type. The types <TT
+CLASS="LITERAL"
+>NMTOKEN</TT
+>, <TT
+CLASS="LITERAL"
+>ID</TT
+>,
+<TT
+CLASS="LITERAL"
+>IDREF</TT
+>, <TT
+CLASS="LITERAL"
+>ENTITY</TT
+>, and all enumerators and
+notations are special forms of string types that restrict the possible
+values. From O'Caml, they behave like <TT
+CLASS="LITERAL"
+>CDATA</TT
+>, i.e. you can
+use the methods <TT
+CLASS="LITERAL"
+>required_string_attribute</TT
+> and
+<TT
+CLASS="LITERAL"
+>optional_string_attribute</TT
+>, too.</P
+><P
+>In contrast to this, the types <TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+>,
+<TT
+CLASS="LITERAL"
+>IDREFS</TT
+>, and <TT
+CLASS="LITERAL"
+>ENTITIES</TT
+> mean lists of
+strings. Suppose we have the declaration:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#60;!ATTLIST f d NMTOKENS #REQUIRED
+            e NMTOKENS #IMPLIED&#62;</PRE
+>
+
+The type <TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+> stands for lists of space-separated
+tokens; for example the value <TT
+CLASS="LITERAL"
+>"1 abc 23ef"</TT
+> means the list
+<TT
+CLASS="LITERAL"
+>["1"; "abc"; "23ef"]</TT
+>. (Again, <TT
+CLASS="LITERAL"
+>IDREFS</TT
+>
+and <TT
+CLASS="LITERAL"
+>ENTITIES</TT
+> have more restricted values.) To get the
+value of attribute <TT
+CLASS="LITERAL"
+>d</TT
+>, one can use
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_d = n # required_list_attribute "d"</PRE
+>
+
+or
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_d = 
+  match n # attribute "d" with
+    Valuelist l -&#62; l
+  | _           -&#62; assert false</PRE
+>
+As <TT
+CLASS="LITERAL"
+>d</TT
+> is required, the attribute cannot be omitted, and 
+the <TT
+CLASS="LITERAL"
+>attribute</TT
+> method returns always a
+<TT
+CLASS="LITERAL"
+>Valuelist</TT
+>. </P
+><P
+>For optional attributes like <TT
+CLASS="LITERAL"
+>e</TT
+>, apply
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_e = n # optional_list_attribute "e"</PRE
+>
+
+or
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_e = 
+  match n # attribute "e" with
+    Valuelist l   -&#62; l
+  | Implied_value -&#62; []
+  | _             -&#62; assert false</PRE
+>
+
+Here, the case that the attribute is missing counts like the empty list.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1435"
+>3.2.7. Iterators</A
+></H2
+><P
+>There are also several iterators in Pxp_document; please see
+the mli file for details. You can find examples for them in the
+"simple_transformation" directory.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val find : ?deeply:bool -&#62; 
+           f:('ext node -&#62; bool) -&#62; 'ext node -&#62; 'ext node
+
+val find_all : ?deeply:bool -&#62;
+               f:('ext node -&#62; bool) -&#62; 'ext node -&#62; 'ext node list
+
+val find_element : ?deeply:bool -&#62;
+                   string -&#62; 'ext node -&#62; 'ext node
+
+val find_all_elements : ?deeply:bool -&#62;
+                        string -&#62; 'ext node -&#62; 'ext node list
+
+exception Skip
+val map_tree :  pre:('exta node -&#62; 'extb node) -&#62;
+               ?post:('extb node -&#62; 'extb node) -&#62;
+               'exta node -&#62; 
+                   'extb node
+
+
+val map_tree_sibl : 
+        pre: ('exta node option -&#62; 'exta node -&#62; 'exta node option -&#62; 
+                  'extb node) -&#62;
+       ?post:('extb node option -&#62; 'extb node -&#62; 'extb node option -&#62; 
+                  'extb node) -&#62;
+       'exta node -&#62; 
+           'extb node
+
+val iter_tree : ?pre:('ext node -&#62; unit) -&#62;
+                ?post:('ext node -&#62; unit) -&#62;
+                'ext node -&#62; 
+                    unit
+
+val iter_tree_sibl :
+       ?pre: ('ext node option -&#62; 'ext node -&#62; 'ext node option -&#62; unit) -&#62;
+       ?post:('ext node option -&#62; 'ext node -&#62; 'ext node option -&#62; unit) -&#62;
+       'ext node -&#62; 
+           unit</PRE
+></P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1439.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The objects representing the document</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps b/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps
new file mode 100644 (file)
index 0000000..3a98c79
--- /dev/null
@@ -0,0 +1,8866 @@
+%!PS-Adobe-2.0
+%%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software
+%%Pages: 96
+%%PageOrder: Ascend
+%%BoundingBox: 0 0 596 842
+%%DocumentFonts: Helvetica-Bold Times-Roman Times-Bold Times-Italic
+%%+ Courier Courier-Oblique Helvetica-BoldOblique Courier-Bold
+%%DocumentPaperSizes: a4
+%%EndComments
+%DVIPSWebPage: (www.radicaleye.com)
+%DVIPSCommandLine: dvips -f
+%DVIPSParameters: dpi=600, compressed
+%DVIPSSource:  TeX output 2000.08.30:1757
+%%BeginProcSet: texc.pro
+%!
+/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S
+N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72
+mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0
+0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{
+landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize
+mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[
+matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round
+exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{
+statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0]
+N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin
+/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array
+/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2
+array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N
+df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A
+definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get
+}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub}
+B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr
+1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3
+1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx
+0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx
+sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{
+rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp
+gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B
+/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{
+/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{
+A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy
+get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse}
+ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp
+fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17
+{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add
+chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{
+1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop}
+forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn
+/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put
+}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{
+bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A
+mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{
+SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{
+userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X
+1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4
+index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N
+/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{
+/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT)
+(LaserWriter 16/600)]{A length product length le{A length product exch 0
+exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse
+end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask
+grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot}
+imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round
+exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto
+fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p
+delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M}
+B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{
+p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S
+rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end
+
+%%EndProcSet
+%%BeginProcSet: 8r.enc
+% @@psencodingfile@{
+%   author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry",
+%   version = "0.6",
+%   date = "1 July 1998",
+%   filename = "8r.enc",
+%   email = "tex-fonts@@tug.org",
+%   docstring = "Encoding for TrueType or Type 1 fonts
+%                to be used with TeX."
+% @}
+% 
+% Idea is to have all the characters normally included in Type 1 fonts
+% available for typesetting. This is effectively the characters in Adobe
+% Standard Encoding + ISO Latin 1 + extra characters from Lucida.
+% 
+% Character code assignments were made as follows:
+% 
+% (1) the Windows ANSI characters are almost all in their Windows ANSI
+% positions, because some Windows users cannot easily reencode the
+% fonts, and it makes no difference on other systems. The only Windows
+% ANSI characters not available are those that make no sense for
+% typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen
+% (173). quotesingle and grave are moved just because it's such an
+% irritation not having them in TeX positions.
+% 
+% (2) Remaining characters are assigned arbitrarily to the lower part
+% of the range, avoiding 0, 10 and 13 in case we meet dumb software.
+% 
+% (3) Y&Y Lucida Bright includes some extra text characters; in the
+% hopes that other PostScript fonts, perhaps created for public
+% consumption, will include them, they are included starting at 0x12.
+% 
+% (4) Remaining positions left undefined are for use in (hopefully)
+% upward-compatible revisions, if someday more characters are generally
+% available.
+% 
+% (5) hyphen appears twice for compatibility with both 
+% ASCII and Windows.
+% 
+/TeXBase1Encoding [
+% 0x00 (encoded characters from Adobe Standard not in Windows 3.1)
+  /.notdef /dotaccent /fi /fl
+  /fraction /hungarumlaut /Lslash /lslash
+  /ogonek /ring /.notdef
+  /breve /minus /.notdef 
+% These are the only two remaining unencoded characters, so may as
+% well include them.
+  /Zcaron /zcaron 
+% 0x10
+ /caron /dotlessi 
+% (unusual TeX characters available in, e.g., Lucida Bright)
+ /dotlessj /ff /ffi /ffl 
+ /.notdef /.notdef /.notdef /.notdef
+ /.notdef /.notdef /.notdef /.notdef
+ % very contentious; it's so painful not having quoteleft and quoteright
+ % at 96 and 145 that we move the things normally found there to here.
+ /grave /quotesingle 
+% 0x20 (ASCII begins)
+ /space /exclam /quotedbl /numbersign
+ /dollar /percent /ampersand /quoteright
+ /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash
+% 0x30
+ /zero /one /two /three /four /five /six /seven
+ /eight /nine /colon /semicolon /less /equal /greater /question
+% 0x40
+ /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O
+% 0x50
+ /P /Q /R /S /T /U /V /W
+ /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
+% 0x60
+ /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o
+% 0x70
+ /p /q /r /s /t /u /v /w
+ /x /y /z /braceleft /bar /braceright /asciitilde
+ /.notdef % rubout; ASCII ends
+% 0x80
+ /.notdef /.notdef /quotesinglbase /florin
+ /quotedblbase /ellipsis /dagger /daggerdbl
+ /circumflex /perthousand /Scaron /guilsinglleft
+ /OE /.notdef /.notdef /.notdef
+% 0x90
+ /.notdef /.notdef /.notdef /quotedblleft
+ /quotedblright /bullet /endash /emdash
+ /tilde /trademark /scaron /guilsinglright
+ /oe /.notdef /.notdef /Ydieresis
+% 0xA0
+ /.notdef % nobreakspace
+ /exclamdown /cent /sterling
+ /currency /yen /brokenbar /section
+ /dieresis /copyright /ordfeminine /guillemotleft
+ /logicalnot
+ /hyphen % Y&Y (also at 45); Windows' softhyphen
+ /registered
+ /macron
+% 0xD0
+ /degree /plusminus /twosuperior /threesuperior
+ /acute /mu /paragraph /periodcentered
+ /cedilla /onesuperior /ordmasculine /guillemotright
+ /onequarter /onehalf /threequarters /questiondown
+% 0xC0
+ /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
+ /Egrave /Eacute /Ecircumflex /Edieresis
+ /Igrave /Iacute /Icircumflex /Idieresis
+% 0xD0
+ /Eth /Ntilde /Ograve /Oacute
+ /Ocircumflex /Otilde /Odieresis /multiply
+ /Oslash /Ugrave /Uacute /Ucircumflex
+ /Udieresis /Yacute /Thorn /germandbls
+% 0xE0
+ /agrave /aacute /acircumflex /atilde
+ /adieresis /aring /ae /ccedilla
+ /egrave /eacute /ecircumflex /edieresis
+ /igrave /iacute /icircumflex /idieresis
+% 0xF0
+ /eth /ntilde /ograve /oacute
+ /ocircumflex /otilde /odieresis /divide
+ /oslash /ugrave /uacute /ucircumflex
+ /udieresis /yacute /thorn /ydieresis
+] def
+
+%%EndProcSet
+%%BeginProcSet: texps.pro
+%!
+TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2
+index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll
+exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics
+exch def dict begin Encoding{exch dup type/integertype ne{pop pop 1 sub
+dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def}
+ifelse}forall Metrics/Metrics currentdict end def[2 index currentdict
+end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{
+dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1
+roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def
+dup[exch{dup CharStrings exch known not{pop/.notdef/Encoding true def}
+if}forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def}
+def end
+
+%%EndProcSet
+%%BeginProcSet: special.pro
+%!
+TeXDict begin/SDict 200 dict N SDict begin/@SpecialDefaults{/hs 612 N
+/vs 792 N/ho 0 N/vo 0 N/hsc 1 N/vsc 1 N/ang 0 N/CLIP 0 N/rwiSeen false N
+/rhiSeen false N/letter{}N/note{}N/a4{}N/legal{}N}B/@scaleunit 100 N
+/@hscale{@scaleunit div/hsc X}B/@vscale{@scaleunit div/vsc X}B/@hsize{
+/hs X/CLIP 1 N}B/@vsize{/vs X/CLIP 1 N}B/@clip{/CLIP 2 N}B/@hoffset{/ho
+X}B/@voffset{/vo X}B/@angle{/ang X}B/@rwi{10 div/rwi X/rwiSeen true N}B
+/@rhi{10 div/rhi X/rhiSeen true N}B/@llx{/llx X}B/@lly{/lly X}B/@urx{
+/urx X}B/@ury{/ury X}B/magscale true def end/@MacSetUp{userdict/md known
+{userdict/md get type/dicttype eq{userdict begin md length 10 add md
+maxlength ge{/md md dup length 20 add dict copy def}if end md begin
+/letter{}N/note{}N/legal{}N/od{txpose 1 0 mtx defaultmatrix dtransform S
+atan/pa X newpath clippath mark{transform{itransform moveto}}{transform{
+itransform lineto}}{6 -2 roll transform 6 -2 roll transform 6 -2 roll
+transform{itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll
+curveto}}{{closepath}}pathforall newpath counttomark array astore/gc xdf
+pop ct 39 0 put 10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack}
+if}N/txpose{pxs pys scale ppr aload pop por{noflips{pop S neg S TR pop 1
+-1 scale}if xflip yflip and{pop S neg S TR 180 rotate 1 -1 scale ppr 3
+get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip
+yflip not and{pop S neg S TR pop 180 rotate ppr 3 get ppr 1 get neg sub
+neg 0 TR}if yflip xflip not and{ppr 1 get neg ppr 0 get neg TR}if}{
+noflips{TR pop pop 270 rotate 1 -1 scale}if xflip yflip and{TR pop pop
+90 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get
+neg sub neg TR}if xflip yflip not and{TR pop pop 90 rotate ppr 3 get ppr
+1 get neg sub neg 0 TR}if yflip xflip not and{TR pop pop 270 rotate ppr
+2 get ppr 0 get neg sub neg 0 S TR}if}ifelse scaleby96{ppr aload pop 4
+-1 roll add 2 div 3 1 roll add 2 div 2 copy TR .96 dup scale neg S neg S
+TR}if}N/cp{pop pop showpage pm restore}N end}if}if}N/normalscale{
+Resolution 72 div VResolution 72 div neg scale magscale{DVImag dup scale
+}if 0 setgray}N/psfts{S 65781.76 div N}N/startTexFig{/psf$SavedState
+save N userdict maxlength dict begin/magscale true def normalscale
+currentpoint TR/psf$ury psfts/psf$urx psfts/psf$lly psfts/psf$llx psfts
+/psf$y psfts/psf$x psfts currentpoint/psf$cy X/psf$cx X/psf$sx psf$x
+psf$urx psf$llx sub div N/psf$sy psf$y psf$ury psf$lly sub div N psf$sx
+psf$sy scale psf$cx psf$sx div psf$llx sub psf$cy psf$sy div psf$ury sub
+TR/showpage{}N/erasepage{}N/copypage{}N/p 3 def @MacSetUp}N/doclip{
+psf$llx psf$lly psf$urx psf$ury currentpoint 6 2 roll newpath 4 copy 4 2
+roll moveto 6 -1 roll S lineto S lineto S lineto closepath clip newpath
+moveto}N/endTexFig{end psf$SavedState restore}N/@beginspecial{SDict
+begin/SpecialSave save N gsave normalscale currentpoint TR
+@SpecialDefaults count/ocount X/dcount countdictstack N}N/@setspecial{
+CLIP 1 eq{newpath 0 0 moveto hs 0 rlineto 0 vs rlineto hs neg 0 rlineto
+closepath clip}if ho vo TR hsc vsc scale ang rotate rwiSeen{rwi urx llx
+sub div rhiSeen{rhi ury lly sub div}{dup}ifelse scale llx neg lly neg TR
+}{rhiSeen{rhi ury lly sub div dup scale llx neg lly neg TR}if}ifelse
+CLIP 2 eq{newpath llx lly moveto urx lly lineto urx ury lineto llx ury
+lineto closepath clip}if/showpage{}N/erasepage{}N/copypage{}N newpath}N
+/@endspecial{count ocount sub{pop}repeat countdictstack dcount sub{end}
+repeat grestore SpecialSave restore end}N/@defspecial{SDict begin}N
+/@fedspecial{end}B/li{lineto}B/rl{rlineto}B/rc{rcurveto}B/np{/SaveX
+currentpoint/SaveY X N 1 setlinecap newpath}N/st{stroke SaveX SaveY
+moveto}N/fil{fill SaveX SaveY moveto}N/ellipse{/endangle X/startangle X
+/yrad X/xrad X/savematrix matrix currentmatrix N TR xrad yrad scale 0 0
+1 startangle endangle arc savematrix setmatrix}N end
+
+%%EndProcSet
+%%BeginProcSet: color.pro
+%!
+TeXDict begin/setcmykcolor where{pop}{/setcmykcolor{dup 10 eq{pop
+setrgbcolor}{1 sub 4 1 roll 3{3 index add neg dup 0 lt{pop 0}if 3 1 roll
+}repeat setrgbcolor pop}ifelse}B}ifelse/TeXcolorcmyk{setcmykcolor}def
+/TeXcolorrgb{setrgbcolor}def/TeXcolorgrey{setgray}def/TeXcolorgray{
+setgray}def/TeXcolorhsb{sethsbcolor}def/currentcmykcolor where{pop}{
+/currentcmykcolor{currentrgbcolor 10}B}ifelse/DC{exch dup userdict exch
+known{pop pop}{X}ifelse}B/GreenYellow{0.15 0 0.69 0 setcmykcolor}DC
+/Yellow{0 0 1 0 setcmykcolor}DC/Goldenrod{0 0.10 0.84 0 setcmykcolor}DC
+/Dandelion{0 0.29 0.84 0 setcmykcolor}DC/Apricot{0 0.32 0.52 0
+setcmykcolor}DC/Peach{0 0.50 0.70 0 setcmykcolor}DC/Melon{0 0.46 0.50 0
+setcmykcolor}DC/YellowOrange{0 0.42 1 0 setcmykcolor}DC/Orange{0 0.61
+0.87 0 setcmykcolor}DC/BurntOrange{0 0.51 1 0 setcmykcolor}DC
+/Bittersweet{0 0.75 1 0.24 setcmykcolor}DC/RedOrange{0 0.77 0.87 0
+setcmykcolor}DC/Mahogany{0 0.85 0.87 0.35 setcmykcolor}DC/Maroon{0 0.87
+0.68 0.32 setcmykcolor}DC/BrickRed{0 0.89 0.94 0.28 setcmykcolor}DC/Red{
+0 1 1 0 setcmykcolor}DC/OrangeRed{0 1 0.50 0 setcmykcolor}DC/RubineRed{
+0 1 0.13 0 setcmykcolor}DC/WildStrawberry{0 0.96 0.39 0 setcmykcolor}DC
+/Salmon{0 0.53 0.38 0 setcmykcolor}DC/CarnationPink{0 0.63 0 0
+setcmykcolor}DC/Magenta{0 1 0 0 setcmykcolor}DC/VioletRed{0 0.81 0 0
+setcmykcolor}DC/Rhodamine{0 0.82 0 0 setcmykcolor}DC/Mulberry{0.34 0.90
+0 0.02 setcmykcolor}DC/RedViolet{0.07 0.90 0 0.34 setcmykcolor}DC
+/Fuchsia{0.47 0.91 0 0.08 setcmykcolor}DC/Lavender{0 0.48 0 0
+setcmykcolor}DC/Thistle{0.12 0.59 0 0 setcmykcolor}DC/Orchid{0.32 0.64 0
+0 setcmykcolor}DC/DarkOrchid{0.40 0.80 0.20 0 setcmykcolor}DC/Purple{
+0.45 0.86 0 0 setcmykcolor}DC/Plum{0.50 1 0 0 setcmykcolor}DC/Violet{
+0.79 0.88 0 0 setcmykcolor}DC/RoyalPurple{0.75 0.90 0 0 setcmykcolor}DC
+/BlueViolet{0.86 0.91 0 0.04 setcmykcolor}DC/Periwinkle{0.57 0.55 0 0
+setcmykcolor}DC/CadetBlue{0.62 0.57 0.23 0 setcmykcolor}DC
+/CornflowerBlue{0.65 0.13 0 0 setcmykcolor}DC/MidnightBlue{0.98 0.13 0
+0.43 setcmykcolor}DC/NavyBlue{0.94 0.54 0 0 setcmykcolor}DC/RoyalBlue{1
+0.50 0 0 setcmykcolor}DC/Blue{1 1 0 0 setcmykcolor}DC/Cerulean{0.94 0.11
+0 0 setcmykcolor}DC/Cyan{1 0 0 0 setcmykcolor}DC/ProcessBlue{0.96 0 0 0
+setcmykcolor}DC/SkyBlue{0.62 0 0.12 0 setcmykcolor}DC/Turquoise{0.85 0
+0.20 0 setcmykcolor}DC/TealBlue{0.86 0 0.34 0.02 setcmykcolor}DC
+/Aquamarine{0.82 0 0.30 0 setcmykcolor}DC/BlueGreen{0.85 0 0.33 0
+setcmykcolor}DC/Emerald{1 0 0.50 0 setcmykcolor}DC/JungleGreen{0.99 0
+0.52 0 setcmykcolor}DC/SeaGreen{0.69 0 0.50 0 setcmykcolor}DC/Green{1 0
+1 0 setcmykcolor}DC/ForestGreen{0.91 0 0.88 0.12 setcmykcolor}DC
+/PineGreen{0.92 0 0.59 0.25 setcmykcolor}DC/LimeGreen{0.50 0 1 0
+setcmykcolor}DC/YellowGreen{0.44 0 0.74 0 setcmykcolor}DC/SpringGreen{
+0.26 0 0.76 0 setcmykcolor}DC/OliveGreen{0.64 0 0.95 0.40 setcmykcolor}
+DC/RawSienna{0 0.72 1 0.45 setcmykcolor}DC/Sepia{0 0.83 1 0.70
+setcmykcolor}DC/Brown{0 0.81 1 0.60 setcmykcolor}DC/Tan{0.14 0.42 0.56 0
+setcmykcolor}DC/Gray{0 0 0 0.50 setcmykcolor}DC/Black{0 0 0 1
+setcmykcolor}DC/White{0 0 0 0 setcmykcolor}DC end
+
+%%EndProcSet
+TeXDict begin 39158280 55380996 1000 600 600 () @start
+/Fa 106[21 149[{TeXBase1Encoding ReEncodeFont}1 59.7758
+/Times-Roman rf /Fb 135[77 2[77 77 77 3[77 77 77 3[77
+3[77 77 77 99[{TeXBase1Encoding ReEncodeFont}11 129.116
+/Courier-Bold rf /Fc 134[65 65 2[65 65 65 65 1[65 65
+65 65 65 2[65 65 65 65 65 65 65 65 65 1[65 36[65 6[65
+65 65 49[{TeXBase1Encoding ReEncodeFont}25 107.597 /Courier-Bold
+rf /Fd 141[56 4[128 7[80 88 2[80 97[{TeXBase1Encoding ReEncodeFont}5
+143.462 /Helvetica-BoldOblique rf /Fe 147[21 4[37 1[33
+3[37 23[25 14[25 58[{TeXBase1Encoding ReEncodeFont}6
+74.7198 /Times-Italic rf /Ff 204[25 25 25 49[{
+TeXBase1Encoding ReEncodeFont}3 49.8132 /Times-Roman
+rf
+%DVIPSBitmapFont: Fg cmmi8 8 2
+/Fg 2 63 df<EE01C01607161FEE7F00ED01FCED07F0ED1FC0037FC7FCEC01FCEC07F0EC
+0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FCEA03F8EA0FE0EA3F8000FECAFC12F8
+12FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0EC07F0EC01
+FCEC007FED1FC0ED07F0ED01FCED007FEE1FC0160716012A2B7AA537>60
+D<12E012F812FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0
+EC07F0EC01FCEC007FED1FC0ED07F0ED01FCED007FEE1FC01607161FEE7F00ED01FCED07
+F0ED1FC0037FC7FCEC01FCEC07F0EC0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FC
+EA03F8EA0FE0EA3F8000FECAFC12F812E02A2B7AA537>62 D E
+%EndDVIPSBitmapFont
+/Fh 131[40 1[40 40 40 40 40 40 40 40 40 40 40 40 40 40
+40 40 1[40 40 40 1[40 40 40 40 40 1[40 5[40 3[40 40 40
+40 40 40 40 40 40 40 40 1[40 40 40 1[40 40 40 40 40 1[40
+40 40 40 40 40 1[40 4[40 1[40 1[40 40 40 40 40 40 40
+40 40 40 40 1[40 40 40 33[{TeXBase1Encoding ReEncodeFont}69
+67.2479 /Courier rf /Fi 105[37 28[37 37 54 37 37 21 29
+25 37 37 37 37 58 21 37 1[21 37 37 25 33 37 33 37 33
+7[54 54 3[46 5[54 66 46 2[25 2[42 2[50 50 54 5[21 21
+11[19 1[19 2[25 25 25 4[30 31[42 2[{TeXBase1Encoding ReEncodeFont}45
+74.7198 /Times-Roman rf /Fj 135[55 7[61 2[89 28 6[55
+3[55 27[66 69[{TeXBase1Encoding ReEncodeFont}7 99.6264
+/Helvetica-Bold rf /Fk 145[27 2[27 57[27 49[{
+TeXBase1Encoding ReEncodeFont}3 44.8318 /Courier-Oblique
+rf /Fl 135[50 3[50 50 3[50 50 3[50 50 3[50 1[50 50 2[50
+95[{TeXBase1Encoding ReEncodeFont}11 83.022 /Courier-Oblique
+rf
+%DVIPSBitmapFont: Fm cmmi10 10 2
+/Fm 2 63 df<EF0380EF0FC0173FEFFF80933803FE00EE0FF8EE3FE0EEFF80DB03FEC7FC
+ED0FF8ED3FE0EDFF80DA03FEC8FCEC0FF8EC3FE0ECFF80D903FEC9FCEB0FF8EB3FE0EBFF
+80D803FECAFCEA0FF8EA3FE0EA7F8000FECBFCA2EA7F80EA3FE0EA0FF8EA03FEC66C7EEB
+3FE0EB0FF8EB03FE903800FF80EC3FE0EC0FF8EC03FE913800FF80ED3FE0ED0FF8ED03FE
+923800FF80EE3FE0EE0FF8EE03FE933800FF80EF3FC0170FEF0380323279AD41>60
+D<126012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
+C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
+01FF9338007F80EF1FC0A2EF7F80933801FF00EE07FCEE1FF0EE7FC04B48C7FCED07FCED
+1FF0ED7FC04A48C8FCEC07FCEC1FF0EC7FC04948C9FCEB07FCEB1FF0EB7FC04848CAFCEA
+07FCEA3FF0EA7FC048CBFC12FC1270323279AD41>62 D E
+%EndDVIPSBitmapFont
+/Fn 134[45 45 1[45 45 45 45 45 1[45 45 45 45 45 1[45
+45 45 45 45 45 45 45 45 45 1[45 5[45 2[45 8[45 5[45 2[45
+45 1[45 19[45 45 44[{TeXBase1Encoding ReEncodeFont}32
+74.7198 /Courier-Oblique rf
+%DVIPSBitmapFont: Fo cmmi9 9 2
+/Fo 2 63 df<171C177EEE01FEEE07FCEE1FF0EE7FC0923801FF00ED07FCED1FF0ED7FC0
+4A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07FCEA1FF0
+EA7FC048CAFCA2EA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
+C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
+01FEEE007E171C2F2E7AA93C>60 D<127012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007F
+C0EB1FF0EB07FCEB01FF9038007FC0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED
+01FF9238007FC0EE1FF0EE07FCEE01FEA2EE07FCEE1FF0EE7FC0923801FF00ED07FCED1F
+F0ED7FC04A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07
+FCEA1FF0EA7FC048CAFC12FC12702F2E7AA93C>62 D E
+%EndDVIPSBitmapFont
+/Fp 134[66 66 93 66 73 40 66 47 1[73 73 73 106 33 2[33
+73 73 40 66 73 66 73 66 8[80 113 80 86 73 80 86 1[80
+1[86 100 73 2[33 86 1[73 80 86 86 1[86 1[73 5[66 66 66
+66 66 66 66 66 66 66 1[33 40 33 2[40 40 5[57 31[73 2[{
+TeXBase1Encoding ReEncodeFont}58 119.552 /Helvetica-Bold
+rf /Fq 129[45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 33[{TeXBase1Encoding ReEncodeFont}90
+74.7198 /Courier rf /Fr 134[37 37 55 37 42 23 32 32 1[42
+42 42 60 23 37 23 23 42 42 23 37 42 37 42 42 1[42 6[51
+69 1[60 46 42 2[51 1[55 69 46 2[28 3[51 60 55 1[51 1[42
+4[28 42 42 42 42 42 42 42 42 42 42 1[21 28 21 2[28 28
+6[28 30[42 2[{TeXBase1Encoding ReEncodeFont}58 83.022
+/Times-Italic rf /Fs 138[105 57 96 67 1[105 105 105 153
+48 1[48 48 105 105 57 96 105 96 105 96 8[115 163 1[124
+105 3[115 2[143 105 5[105 2[124 3[105 10[96 96 96 96
+2[48 43[105 2[{TeXBase1Encoding ReEncodeFont}35 172.154
+/Helvetica-Bold rf /Ft 106[23 29 29 25[33 33 48 33 33
+18 26 22 1[33 33 33 52 18 33 18 18 33 33 22 29 33 29
+33 29 8[48 3[41 37 2[37 6[22 1[48 12[18 10[18 17 1[17
+2[22 22 5[27 31[37 2[{TeXBase1Encoding ReEncodeFont}41
+66.4176 /Times-Roman rf /Fu 134[42 42 60 42 46 28 32
+37 1[46 42 46 69 23 46 1[23 46 42 28 37 46 37 46 42 9[83
+60 60 55 46 60 3[60 78 55 2[32 65 65 51 55 60 60 55 60
+1[42 6[42 1[42 42 42 42 42 42 2[21 28 21 4[28 39[{
+TeXBase1Encoding ReEncodeFont}53 83.022 /Times-Bold rf
+/Fv 27[37 58[63 42[45 40 1[40 37 42 42 60 42 42 23 32
+28 42 42 42 42 65 23 42 23 23 42 42 28 37 42 37 42 37
+28 42 1[28 23 28 1[60 60 78 60 60 51 46 55 60 46 60 60
+74 51 60 1[28 60 60 46 51 60 55 55 60 1[37 47 47 47 23
+23 42 42 42 42 42 42 42 42 42 42 23 21 28 21 2[28 28
+28 65 69 1[42 34 28 29[46 46 2[{TeXBase1Encoding ReEncodeFont}90
+83.022 /Times-Roman rf /Fw 136[65 1[51 1[46 32 2[51 51
+1[23 2[23 51 51 1[46 51 2[46 8[55 3[51 3[55 11[60 9[28
+18[23 39[{TeXBase1Encoding ReEncodeFont}19 83.022 /Helvetica-Bold
+rf /Fx 134[80 80 112 80 88 48 80 56 1[88 88 88 128 40
+80 1[40 88 88 48 80 88 80 88 80 8[96 1[96 104 88 96 104
+2[112 104 120 88 2[40 104 112 1[96 104 104 1[104 6[48
+4[80 80 80 80 80 2[40 48 45[{TeXBase1Encoding ReEncodeFont}48
+143.462 /Helvetica-Bold rf /Fy 138[126 1[115 80 8[57
+126 126 1[115 126 11[138 2[149 126 3[138 6[57 26[57 6[57
+39[{TeXBase1Encoding ReEncodeFont}15 206.584 /Helvetica-Bold
+rf end
+%%EndProlog
+%%BeginSetup
+%%Feature: *Resolution 600dpi
+TeXDict begin
+%%BeginPaperSize: a4
+a4
+%%EndPaperSize
+
+%%EndSetup
+%%Page: 1 1
+1 0 bop Black Black 890 647 a Fy(The)58 b(PXP)f(user')-12
+b(s)58 b(guide)1384 2594 y Fx(Ger)m(d)39 b(Stolpmann)p
+Black Black eop
+%%Page: 2 2
+2 1 bop Black Black -2 579 a Fw(The)22 b(PXP)j(user')-5
+b(s)23 b(guide)-2 687 y Fv(by)d(Gerd)f(Stolpmann)-2 903
+y(Cop)o(yright)f(\251)j(1999,)e(2000)g(by)g(Gerd)h(Stolpmann)-2
+1135 y(PXP)h(is)g(a)g(v)n(alidating)d(parser)i(for)f(XML-1.0)g(which)h
+(has)g(been)g(written)g(entirely)f(in)h(Objecti)n(v)o(e)g(Caml.)-2
+1285 y Fw(Do)o(wnload)h(PXP:)j Fv(The)c(free)g(PXP)h(library)e(can)h
+(be)g(do)n(wnloaded)d(at)k(http://www)-5 b(.ocaml-programming)o(.de)o
+(/pack)o(age)o(s/.)15 b(This)-2 1393 y(user')-5 b(s)20
+b(guide)f(is)j(included.)c(Ne)n(west)j(releases)f(of)g(PXP)h(will)g(be)
+f(announced)e(in)i(The)g(OCaml)g(Link)g(Database)-2 1500
+y(\(http://www)-5 b(.npc.de/ocaml/linkdb)o(/\).)-2 1899
+y Fu(License)-2 2090 y Ft(This)16 b(document,)j(and)e(the)h(described)h
+(softw)o(are,)f("PXP",)e(are)i(cop)o(yright)i(by)d(Gerd)g(Stolpmann.)-2
+2198 y(Permission)h(is)e(hereby)j(granted,)f(free)g(of)f(char)o(ge,)h
+(to)f(an)o(y)h(person)f(obtaining)j(a)d(cop)o(y)h(of)f(this)h(document)
+g(and)g(the)f("PXP")g(softw)o(are)i(\(the)f("Softw)o(are"\),)g(to)f
+(deal)i(in)-2 2306 y(the)f(Softw)o(are)g(without)h(restriction,)g
+(including)h(without)e(limitation)i(the)e(rights)g(to)f(use,)g(cop)o(y)
+l(,)g(modify)l(,)g(mer)o(ge,)g(publish,)h(distrib)o(ute,)h(sublicense,)
+g(and/or)f(sell)-2 2414 y(copies)g(of)f(the)h(Softw)o(are,)g(and)g(to)f
+(permit)h(persons)f(to)h(whom)e(the)i(Softw)o(are)h(is)e(furnished)h
+(to)f(do)g(so,)g(subject)h(to)g(the)f(follo)n(wing)j(conditions:)-2
+2522 y(The)d(abo)o(v)o(e)h(cop)o(yright)h(notice)g(and)f(this)f
+(permission)h(notice)h(shall)f(be)g(included)h(in)e(all)h(copies)h(or)e
+(substantial)i(portions)g(of)e(the)g(Softw)o(are.)-2
+2630 y(The)g(Softw)o(are)h(is)f(pro)o(vided)i(\223as)e(is\224,)g
+(without)i(w)o(arranty)g(of)e(an)o(y)g(kind,)h(e)o(xpress)f(or)g
+(implied,)i(including)g(b)o(ut)e(not)h(limited)h(to)e(the)h(w)o
+(arranties)h(of)e(merchantability)l(,)-2 2737 y(\002tness)g(for)g(a)g
+(particular)j(purpose)e(and)g(noninfringement.)i(In)d(no)g(e)n(v)o(ent)
+h(shall)h(Gerd)e(Stolpmann)h(be)g(liable)h(for)e(an)o(y)g(claim,)h
+(damages)g(or)f(other)h(liability)l(,)i(whether)-2 2845
+y(in)d(an)g(action)i(of)e(contract,)i(tort)f(or)f(otherwise,)i(arising)
+f(from,)e(out)i(of)f(or)g(in)g(connection)j(with)e(the)f(Softw)o(are)i
+(or)e(the)h(use)f(or)g(other)h(dealings)h(in)e(the)h(softw)o(are.)p
+Black Black eop
+%%Page: 3 3
+3 2 bop Black Black -2 621 a Fs(T)-14 b(ab)n(le)48 b(of)g(Contents)396
+815 y Fu(I.)21 b(User')m(s)g(guide)p Black 4 w(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black 4 w(6)596
+943 y Fv(1.)f(What)g(is)h(XML?)p Black 4 w(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black 4 w(7)795 1051
+y(1.1.)e(Introduction)p Black 14 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black 4 w(7)994 1159
+y(1.1.1.)g(The)g("hello)h(w)o(orld")g(e)o(xample)p Black
+13 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black 4 w(7)994 1267 y(1.1.2.)f(XML)h(parsers)g(and)f
+(processors)p Black 3 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black 4 w(9)994 1375 y(1.1.3.)g(Discussion)p
+Black 9 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+4 w(9)795 1483 y(1.2.)g(Highlights)g(of)h(XML)p Black
+10 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(11)994
+1591 y(1.2.1.)f(The)g(DTD)i(and)e(the)i(instance)p Black
+15 w(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(11)994 1699 y(1.2.2.)e(Reserv)o(ed)g(characters)p
+Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(12)994 1807 y(1.2.3.)g(Elements)g(and)h
+(ELEMENT)f(declarations)p Black 7 w(.)p Black Black -2
+w(.)p Black Black(.)p Black Black(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(13)994
+1915 y(1.2.4.)g(Attrib)n(ute)g(lists)j(and)e(A)-9 b(TTLIST)19
+b(declarations)p Black 6 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(15)994 2023 y(1.2.5.)g(P)o(arsed)g(entities)p
+Black 18 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(16)994 2131 y(1.2.6.)g(Notations)g(and)h
+(unparsed)e(entities)p Black 14 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(19)795 2238 y(1.3.)h(A)i(complete)e(e)o(xample:)g
+(The)h Fr(r)m(eadme)f Fv(DTD)p Black 3 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(20)596 2346 y(2.)h(Using)g(PXP)p Black
+6 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(24)795 2454 y(2.1.)f(V)-9 b(alidation)p
+Black 3 w(.)p Black Black -2 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
+2562 y(2.2.)19 b(Ho)n(w)h(to)g(parse)g(a)h(document)d(from)h(an)h
+(application)p Black 10 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
+2670 y(2.3.)f(Class-based)h(processing)f(of)h(the)g(node)g(tree)p
+Black 8 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(29)795
+2778 y(2.4.)f(Example:)g(An)h(HTML)g(back)o(end)f(for)g(the)i
+Fr(r)m(eadme)e Fv(DTD)p Black 3 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(33)994 2886 y(2.4.1.)g(Header)p
+Black 9 w(.)p Black Black -2 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(33)994 2994 y(2.4.2.)g(T)-7 b(ype)19
+b(declarations)p Black 14 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(33)994 3102 y(2.4.3.)g(Class)i Fq(store)p Black
+11 w Fv(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(34)994 3210 y(2.4.4.)e(Function)g
+Fq(escape_html)p Black Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(35)994 3318 y(2.4.5.)g(V)-5 b(irtual)20 b(class)h
+Fq(shared)p Black 4 w Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(35)994 3426 y(2.4.6.)e(Class)i
+Fq(only_data)p Black 17 w Fv(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(36)994
+3534 y(2.4.7.)e(Class)i Fq(readme)p Black 8 w Fv(.)p
+Black Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(36)994 3642 y(2.4.8.)e(Classes)i
+Fq(section)p Fv(,)f Fq(sect1)p Fv(,)f Fq(sect2)p Fv(,)h(and)g
+Fq(sect3)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(39)994 3749 y(2.4.9.)f(Classes)i
+Fq(map_tag)p Fv(,)f Fq(p)p Fv(,)g Fq(em)p Fv(,)g Fq(ul)p
+Fv(,)g Fq(li)p Black 16 w Fv(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(39)994
+3857 y(2.4.10.)e(Class)k Fq(br)p Black Fv(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(40)994 3965 y(2.4.11.)c(Class)k
+Fq(code)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(40)994 4073 y(2.4.12.)c(Class)k
+Fq(a)p Black 4 w Fv(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(41)994
+4181 y(2.4.13.)c(Class)k Fq(footnote)p Black 1 w Fv(.)p
+Black Black -2 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(42)994 4289
+y(2.4.14.)c(The)i(speci\002cation)f(of)h(the)g(document)f(model)p
+Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(43)596 4397 y(3.)h(The)f(objects)h
+(representing)e(the)j(document)p Black 4 w(.)p Black
+Black -3 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(46)795 4505 y(3.1.)e(The)h Fq(document)f Fv(class)p
+Black 7 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(46)795 4613 y(3.2.)g(The)h(class)h(type)f
+Fq(node)p Black 2 w Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(47)994 4721 y(3.2.1.)f(The)g(structure)h(of)g(document)e(trees)p
+Black 3 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(49)994
+4829 y(3.2.2.)h(The)g(methods)h(of)f(the)i(class)g(type)f
+Fq(node)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(52)p Black 3842
+5278 a Fr(3)p Black eop
+%%Page: 4 4
+4 3 bop Black Black 994 579 a Fv(3.2.3.)19 b(The)g(class)j
+Fq(element_impl)p Black 2 w Fv(.)p Black Black -3 w(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(56)994 687 y(3.2.4.)d(The)g(class)j Fq(data_impl)p
+Black 12 w Fv(.)p Black Black -2 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(57)994
+795 y(3.2.5.)d(The)g(type)h Fq(spec)p Black 5 w Fv(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(58)994 903 y(3.2.6.)f(Examples)p Black
+5 w(.)p Black Black -3 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(60)994
+1011 y(3.2.7.)g(Iterators)p Black 12 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(64)795 1119 y(3.3.)g(The)h(class)h(type)f Fq(extension)p
+Black 6 w Fv(.)p Black Black -2 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(65)994 1226 y(3.3.1.)f(Ho)n(w)h(to)g(de\002ne)
+g(an)g(e)o(xtension)f(class)p Black 13 w(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(66)994 1334
+y(3.3.2.)g(Ho)n(w)h(to)g(bind)f(e)o(xtension)g(classes)i(to)g(element)e
+(types)p Black 10 w(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(68)795 1442 y(3.4.)g(Details)i(of)f(the)g(mapping)e(from)i(XML)g
+(te)o(xt)g(to)g(the)g(tree)h(representation)p Black 13
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(69)994
+1550 y(3.4.1.)e(The)g(representation)g(of)g(character)n(-free)f
+(elements)p Black 9 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(69)994 1658 y(3.4.2.)h(The)g(representation)g(of)g(character)g
+(data)p Black 10 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(70)994 1766
+y(3.4.3.)g(The)g(representation)g(of)g(entities)i(within)f(documents)p
+Black 12 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(70)994 1874 y(3.4.4.)f(The)g(representation)g
+(of)g(attrib)n(utes)p Black 20 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(71)994 1982 y(3.4.5.)g(The)g(representation)g(of)g
+(processing)g(instructions)p Black(.)p Black Black -1
+w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(71)994 2090 y(3.4.6.)g(The)g
+(representation)g(of)g(comments)p Black 7 w(.)p Black
+Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(71)994 2198 y(3.4.7.)g(The)g(attrib)n(utes)i
+Fq(xml:lang)e Fv(and)h Fq(xml:space)p Black 10 w Fv(.)p
+Black Black -2 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(72)994 2306 y(3.4.8.)f(And)g(what)h(about)g(namespaces?)p
+Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(72)596
+2414 y(4.)g(Con\002guring)e(and)h(calling)h(the)g(parser)p
+Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(73)795 2522 y(4.1.)f(Ov)o(ervie)n(w)p
+Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(73)795
+2630 y(4.2.)g(Resolv)o(ers)h(and)g(sources)p Black 2
+w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(75)994 2737
+y(4.2.1.)f(Using)h(the)g(b)n(uilt-in)f(resolv)o(ers)h(\(called)f
+(sources\))p Black 5 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(75)994 2845 y(4.2.2.)g(The)g(resolv)o(er)g(API)p
+Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(76)994 2953 y(4.2.3.)g(Prede\002ned)f(resolv)o(er)h
+(components)p Black 13 w(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(78)795 3061
+y(4.3.)g(The)h(DTD)g(classes)p Black 1 w(.)p Black Black
+1 w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(81)795
+3169 y(4.4.)f(In)m(v)n(oking)f(the)i(parser)p Black 14
+w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(89)994
+3277 y(4.4.1.)f(Def)o(aults)p Black 10 w(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(89)994 3385 y(4.4.2.)g(P)o(arsing)g(functions)p
+Black 4 w(.)p Black Black -3 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(90)994 3493 y(4.4.3.)g(Con\002guration)f(options)p
+Black 19 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(91)994 3601 y(4.4.4.)h(Which)h
+(con\002guration)d(should)i(I)i(use?)p Black 18 w(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(93)795 3709 y(4.5.)e(Updates)p Black 10 w(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(95)p Black 3842 5278 a
+Fr(4)p Black eop
+%%Page: 5 5
+5 4 bop Black Black -2 621 a Fs(List)48 b(of)g(Figures)396
+815 y Fv(3-1.)19 b(A)i(tree)f(with)h(element)e(nodes,)h(data)g(nodes,)f
+(and)g(attrib)n(utes)p Black 18 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(49)396 923 y(3-2.)g(Nodes)h(are)g(doubly)f(link)o
+(ed)g(trees)p Black 15 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(50)396
+1031 y(3-3.)g(A)i(node)e(can)h(only)g(be)g(added)f(if)h(it)h(is)g(a)g
+(root)p Black 5 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(51)396 1139 y(3-4.)e(A)i(deleted)f(node)f
+(becomes)g(the)h(root)g(of)g(the)g(subtree)p Black 3
+w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(51)396 1247 y(3-5.)f(The)h(clone)g(of)g(a)g(subtree)p
+Black 18 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(52)396 1355 y(3-6.)f(The)h(structure)g
+(of)f(nodes)h(and)g(e)o(xtensions)p Black 18 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(65)p Black 3842 5278 a Fr(5)p Black eop
+%%Page: 6 6
+6 5 bop Black Black 1241 647 a Fy(I.)58 b(User')-12 b(s)57
+b(guide)p Black Black eop
+%%Page: 7 7
+7 6 bop Black Black -2 621 a Fs(Chapter)48 b(1.)f(What)h(is)f(XML?)-2
+1055 y Fx(1.1.)39 b(Intr)m(oduction)396 1235 y Fv(XML)20
+b(\(short)g(for)f Fr(Extensible)h(Markup)g(Langua)o(g)o(e)p
+Fv(\))e(generalizes)h(the)h(idea)g(that)g(te)o(xt)g(documents)f(are)h
+(typically)396 1343 y(structured)f(in)h(sections,)g(sub-sections,)f
+(paragraphs,)f(and)i(so)g(on.)g(The)g(format)f(of)h(the)g(document)e
+(is)j(not)f(\002x)o(ed)g(\(as,)396 1451 y(for)g(e)o(xample,)e(in)j
+(HTML\),)e(b)n(ut)h(can)g(be)g(declared)f(by)h(a)h(so-called)e(DTD)i
+(\(document)c(type)j(de\002nition\).)f(The)g(DTD)396
+1559 y(describes)h(only)f(the)i(rules)f(ho)n(w)f(the)i(document)d(can)i
+(be)g(structured,)e(b)n(ut)j(not)e(ho)n(w)h(the)g(document)e(can)i(be)
+396 1667 y(processed.)f(F)o(or)h(e)o(xample,)e(if)j(you)e(w)o(ant)i(to)
+f(publish)f(a)i(book)e(that)h(uses)h(XML)f(markup,)e(you)h(will)i(need)
+f(a)g(processor)396 1775 y(that)h(con)m(v)o(erts)d(the)i(XML)g(\002le)h
+(into)f(a)h(printable)e(format)g(such)h(as)h(Postscript.)f(On)g(the)g
+(one)g(hand,)f(the)h(structure)f(of)396 1883 y(XML)h(documents)f(is)i
+(con\002gurable;)d(on)i(the)g(other)f(hand,)g(there)h(is)h(no)f(longer)
+f(a)h(canonical)f(interpretation)f(of)i(the)396 1991
+y(elements)g(of)g(the)g(document;)f(for)g(e)o(xample)g(one)h(XML)g(DTD)
+g(might)g(w)o(ant)g(that)g(paragraphes)e(are)i(delimited)g(by)396
+2099 y Fq(para)g Fv(tags,)h(and)e(another)g(DTD)h(e)o(xpects)g
+Fq(p)g Fv(tags)h(for)e(the)i(same)f(purpose.)e(As)j(a)g(result,)f(for)g
+(e)n(v)o(ery)e(DTD)j(a)f(ne)n(w)396 2206 y(processor)f(is)i(required.)
+396 2356 y(Although)e(XML)h(can)g(be)g(used)g(to)g(e)o(xpress)g
+(structured)f(te)o(xt)h(documents)e(it)j(is)g(not)f(limited)g(to)g
+(this)h(kind)e(of)396 2464 y(application.)g(F)o(or)h(e)o(xample,)e(XML)
+i(can)g(also)h(be)f(used)g(to)g(e)o(xchange)e(structured)h(data)h(o)o
+(v)o(er)f(a)h(netw)o(ork,)f(or)h(to)396 2572 y(simply)g(store)g
+(structured)f(data)h(in)g(\002les.)h(Note)f(that)h(XML)f(documents)e
+(cannot)i(contain)f(arbitrary)f(binary)h(data)396 2680
+y(because)g(some)g(characters)g(are)g(forbidden;)e(for)i(some)g
+(applications)g(you)f(need)h(to)h(encode)e(binary)g(data)h(as)h(te)o
+(xt)g(\(e.g.)396 2788 y(the)g(base)h(64)f(encoding\).)-2
+3116 y Fp(1.1.1.)35 b(The)f("hello)g(w)n(orld")e(e)n(xample)396
+3283 y Fv(The)20 b(follo)n(wing)f(e)o(xample)f(sho)n(ws)j(a)f(v)o(ery)f
+(simple)i(DTD,)f(and)f(a)i(corresponding)c(document)h(instance.)h(The)
+396 3391 y(document)f(is)k(structured)c(such)i(that)h(it)f(consists)h
+(of)f(sections,)g(and)g(that)g(sections)g(consist)h(of)f(paragraphs,)d
+(and)j(that)396 3499 y(paragraphs)e(contain)h(plain)h(te)o(xt:)396
+3679 y Fq(<!ELEMENT)44 b(document)f(\(section\)+>)396
+3777 y(<!ELEMENT)h(section)f(\(paragraph\)+>)396 3874
+y(<!ELEMENT)h(paragraph)f(\(#PCDATA\)>)396 4065 y Fv(The)20
+b(follo)n(wing)f(document)f(is)j(an)f(instance)g(of)g(this)h(DTD:)396
+4245 y Fq(<?xml)44 b(version="1.0")f(encoding="ISO-8859-1"?>)396
+4342 y(<!DOCTYPE)h(document)f(SYSTEM)h("simple.dtd">)396
+4439 y(<document>)486 4536 y(<section>)576 4633 y(<paragraph>This)e(is)
+i(a)h(paragraph)e(of)i(the)f(first)g(section.</paragraph>)576
+4731 y(<paragraph>This)e(is)i(another)g(paragraph)f(of)i(the)f(first)g
+(section.</paragraph>)486 4828 y(</section>)p Black 3839
+5278 a Fr(7)p Black eop
+%%Page: 8 8
+8 7 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 486 579 a Fq(<section>)576 676 y(<paragraph>This)42
+b(is)i(the)h(only)f(paragraph)f(of)i(the)f(second)g
+(section.</paragraph>)486 773 y(</section>)396 870 y(</document>)396
+1061 y Fv(As)21 b(in)g(HTML)f(\(and,)f(of)h(course,)f(in)h(grand-f)o
+(ather)d(SGML\),)j(the)g("pieces")g(of)g(the)g(document)f(are)h
+(delimited)f(by)396 1169 y(element)h(braces,)f(i.e.)i(such)f(a)g(piece)
+g(be)o(gins)f(with)i Fo(<)p Fq(name-of-the-type-of-the-piece)p
+Fo(>)15 b Fv(and)20 b(ends)g(with)396 1277 y Fo(<)p Fq
+(/name-of-the-type-of-the-piece)p Fo(>)p Fv(,)15 b(and)20
+b(the)g(pieces)g(are)g(called)g Fr(elements)p Fv(.)g(Unlik)o(e)g(HTML)g
+(and)396 1385 y(SGML,)g(both)g(start)g(tags)h(and)f(end)f(tags)i
+(\(i.e.)f(the)g(delimiters)g(written)g(in)g(angle)g(brack)o(ets\))f
+(can)h(ne)n(v)o(er)f(be)h(left)g(out.)396 1493 y(F)o(or)g(e)o(xample,)f
+(HTML)h(calls)h(the)f(paragraphs)e(simply)i Fq(p)p Fv(,)g(and)f
+(because)h(paragraphs)e(ne)n(v)o(er)h(contain)g(paragraphs,)f(a)396
+1601 y(sequence)h(of)h(se)n(v)o(eral)g(paragraphs)e(can)i(be)g(written)
+g(as:)396 1781 y Fq(<p>First)44 b(paragraph)396 1878
+y(<p>Second)g(paragraph)396 2069 y Fv(This)21 b(is)g(not)f(possible)g
+(in)g(XML;)g(continuing)e(our)i(e)o(xample)e(abo)o(v)o(e)h(we)h(must)h
+(al)o(w)o(ays)f(write)396 2249 y Fq(<paragraph>First)42
+b(paragraph</paragraph>)396 2346 y(<paragraph>Second)g
+(paragraph</paragraph>)396 2537 y Fv(The)20 b(rationale)f(behind)g
+(that)h(is)i(to)e(\(1\))f(simplify)h(the)g(de)n(v)o(elopment)d(of)j
+(XML)h(parsers)f(\(you)e(need)i(not)g(con)m(v)o(ert)e(the)396
+2645 y(DTD)j(into)f(a)g(deterministic)f(\002nite)i(automaton)d(which)i
+(is)h(required)d(to)j(detect)f(omitted)f(tags\),)h(and)g(to)g(\(2\))g
+(mak)o(e)f(it)396 2753 y(possible)h(to)h(parse)e(the)i(document)d
+(independent)f(of)j(whether)f(the)i(DTD)f(is)h(kno)n(wn)e(or)h(not.)396
+2903 y(The)g(\002rst)h(line)f(of)g(our)g(sample)g(document,)396
+3083 y Fq(<?xml)44 b(version="1.0")f(encoding="ISO-8859-1"?>)396
+3274 y Fv(is)21 b(the)e(so-called)g Fr(XML)h(declar)o(ation)p
+Fv(.)d(It)j(e)o(xpresses)e(that)i(the)f(document)f(follo)n(ws)h(the)g
+(con)m(v)o(entions)e(of)i(XML)g(v)o(ersion)396 3382 y(1.0,)h(and)f
+(that)h(the)h(document)d(is)j(encoded)d(using)i(characters)f(from)g
+(the)i(ISO-8859-1)c(character)i(set)i(\(often)e(kno)n(wn)396
+3490 y(as)i("Latin)e(1",)g(mostly)h(used)f(in)h(W)-7
+b(estern)20 b(Europe\).)d(Although)h(the)i(XML)g(declaration)e(is)i
+(not)g(mandatory)-5 b(,)16 b(it)21 b(is)f(good)396 3598
+y(style)h(to)f(include)f(it;)i(e)n(v)o(erybody)c(sees)k(at)g(the)f
+(\002rst)h(glance)f(that)g(the)g(document)e(uses)j(XML)f(markup)f(and)g
+(not)h(the)396 3706 y(similar)n(-looking)e(HTML)i(and)g(SGML)g(markup)f
+(languages.)f(If)i(you)g(omit)g(the)g(XML)g(declaration,)e(the)j
+(parser)e(will)396 3813 y(assume)h(that)h(the)f(document)e(is)j
+(encoded)e(as)i(UTF-8)e(or)h(UTF-16)f(\(there)h(is)h(a)g(rule)e(that)i
+(mak)o(es)f(it)h(possible)f(to)396 3921 y(distinguish)f(between)h
+(UTF-8)g(and)f(UTF-16)g(automatically\);)g(these)h(are)g(encodings)f
+(of)h(Unicode')-5 b(s)19 b(uni)n(v)o(ersal)396 4029 y(character)g(set.)
+i(\(Note)f(that)g(PXP,)h(unlik)o(e)e(its)i(predecessor)e("Markup",)f
+(fully)i(supports)f(Unicode.\))396 4179 y(The)h(second)f(line,)396
+4359 y Fq(<!DOCTYPE)44 b(document)f(SYSTEM)h("simple.dtd">)396
+4550 y Fv(names)20 b(the)g(DTD)h(that)f(is)h(going)e(to)h(be)g(used)g
+(for)g(the)g(rest)h(of)f(the)g(document.)e(In)i(general,)f(it)i(is)g
+(possible)f(that)g(the)396 4658 y(DTD)h(consists)f(of)g(tw)o(o)h
+(parts,)f(the)g(so-called)f(e)o(xternal)g(and)h(the)g(internal)f
+(subset.)h("External")f(means)h(that)g(the)h(DTD)396
+4766 y(e)o(xists)g(as)g(a)f(second)g(\002le;)h("internal")e(means)h
+(that)g(the)g(DTD)h(is)g(included)d(in)j(the)f(same)g(\002le.)h(In)f
+(this)g(e)o(xample,)f(there)p Black 3842 5278 a Fr(8)p
+Black eop
+%%Page: 9 9
+9 8 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(is)g(only)f(an)g(e)o(xternal)f(subset,)h(and)g(the)g
+(system)g(identi\002er)g("simple.dtd")e(speci\002es)j(where)f(the)g
+(DTD)g(\002le)h(can)f(be)396 687 y(found.)e(System)j(identi\002ers)f
+(are)g(interpreted)e(as)j(URLs;)g(for)f(instance)g(this)g(w)o(ould)g
+(be)g(le)o(gal:)396 867 y Fq(<!DOCTYPE)44 b(document)f(SYSTEM)h
+("http://host/location/simple.dtd">)396 1058 y Fv(Please)21
+b(note)f(that)g(PXP)h(cannot)e(interpret)g(HTTP)i(identi\002ers)e(by)h
+(def)o(ault,)f(b)n(ut)i(it)g(is)g(possible)f(to)g(change)f(the)396
+1166 y(interpretation)f(of)i(system)h(identi\002ers.)396
+1315 y(The)f(w)o(ord)g(immediately)f(follo)n(wing)f Fq(DOCTYPE)i
+Fv(determines)f(which)g(of)h(the)g(declared)f(element)h(types)g(\(here)
+396 1423 y("document",)e("section",)h(and)h("paragraph"\))d(is)k(used)f
+(for)g(the)g(outermost)f(element,)g(the)h Fr(r)l(oot)h(element)q
+Fv(.)f(In)g(this)396 1531 y(e)o(xample)f(it)i(is)g Fq(document)f
+Fv(because)f(the)h(outermost)f(element)h(is)h(delimited)e(by)h
+Fo(<)p Fq(document)p Fo(>)f Fv(and)396 1639 y Fo(<)p
+Fq(/document)p Fo(>)p Fv(.)396 1789 y(The)h(DTD)g(consists)h(of)f
+(three)g(declarations)f(for)g(element)h(types:)g Fq(document)p
+Fv(,)f Fq(section)p Fv(,)g(and)h Fq(paragraph)p Fv(.)f(Such)396
+1896 y(a)i(declaration)d(has)j(tw)o(o)f(parts:)396 2077
+y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(content-model)p Fo(>)396
+2268 y Fv(The)20 b(content)f(model)h(is)h(a)f(re)o(gular)f(e)o
+(xpression)g(which)g(describes)h(the)g(possible)g(inner)f(structure)h
+(of)g(the)g(element.)396 2376 y(Here,)g Fq(document)f
+Fv(contains)h(one)g(or)g(more)f(sections,)h(and)g(a)g
+Fq(section)g Fv(contains)f(one)h(or)g(more)f(paragraphs.)f(Note)396
+2483 y(that)j(these)f(tw)o(o)g(element)g(types)g(are)g(not)g(allo)n
+(wed)f(to)i(contain)e(arbitrary)g(te)o(xt.)g(Only)h(the)g
+Fq(paragraph)g Fv(element)f(type)396 2591 y(is)i(declared)e(such)h
+(that)h(parsed)e(character)g(data)h(\(indicated)f(by)h(the)g(symbol)f
+Fq(#PCDATA)p Fv(\))g(is)i(permitted.)396 2741 y(See)g(belo)n(w)e(for)h
+(a)h(detailed)e(discussion)h(of)g(content)f(models.)-2
+3110 y Fp(1.1.2.)35 b(XML)e(par)n(ser)n(s)h(and)g(pr)n(ocessor)n(s)396
+3278 y Fv(XML)20 b(documents)f(are)h(human-readable,)c(b)n(ut)21
+b(this)f(is)h(not)f(the)h(main)e(purpose)g(of)h(this)h(language.)d(XML)
+i(has)g(been)396 3386 y(designed)f(such)h(that)g(documents)f(can)h(be)g
+(read)g(by)f(a)i(program)d(called)i(an)g Fr(XML)h(par)o(ser)r
+Fv(.)f(The)g(parser)g(checks)f(that)396 3494 y(the)h(document)f(is)i
+(well-formatted,)d(and)h(it)i(represents)f(the)g(document)e(as)j
+(objects)f(of)g(the)g(programming)d(language.)396 3602
+y(There)j(are)g(tw)o(o)g(aspects)h(when)e(checking)g(the)h(document:)e
+(First,)j(the)f(document)e(must)j(follo)n(w)e(some)h(basic)396
+3710 y(syntactic)g(rules,)g(such)g(as)h(that)f(tags)h(are)f(written)g
+(in)g(angle)g(brack)o(ets,)f(that)h(for)g(e)n(v)o(ery)f(start)h(tag)h
+(there)e(must)i(be)f(a)396 3818 y(corresponding)d(end)j(tag)g(and)f(so)
+i(on.)f(A)g(document)e(respecting)h(these)i(rules)f(is)h
+Fr(well-formed)r Fv(.)f(Second,)f(the)396 3926 y(document)f(must)j
+(match)e(the)i(DTD)f(in)g(which)g(case)h(the)f(document)e(is)j
+Fr(valid)r Fv(.)f(Man)o(y)f(parsers)h(check)f(only)h(on)396
+4034 y(well-formedness)e(and)i(ignore)f(the)h(DTD;)h(PXP)g(is)g
+(designed)e(such)g(that)i(it)g(can)f(e)n(v)o(en)f(v)n(alidate)g(the)i
+(document.)396 4183 y(A)g(parser)f(does)f(not)h(mak)o(e)g(a)h(sensible)
+f(application,)e(it)j(only)f(reads)g(XML)g(documents.)e(The)i(whole)g
+(application)396 4291 y(w)o(orking)f(with)h(XML-formatted)e(data)i(is)h
+(called)f(an)g Fr(XML)h(pr)l(ocessor)r Fv(.)f(Often)g(XML)g(processors)
+f(con)m(v)o(ert)396 4399 y(documents)g(into)h(another)e(format,)h(such)
+h(as)h(HTML)f(or)g(Postscript.)g(Sometimes)g(processors)f(e)o(xtract)g
+(data)h(of)g(the)396 4507 y(documents)f(and)g(output)g(the)i(processed)
+e(data)h(again)f(XML-formatted.)e(The)j(parser)g(can)g(help)f(the)i
+(application)396 4615 y(processing)e(the)h(document;)f(for)g(e)o
+(xample)g(it)i(can)f(pro)o(vide)e(means)i(to)g(access)h(the)f(document)
+e(in)j(a)f(speci\002c)h(manner)-5 b(.)396 4723 y(PXP)21
+b(supports)e(an)i(object-oriented)c(access)k(layer)e(specially)-5
+b(.)p Black 3842 5278 a Fr(9)p Black eop
+%%Page: 10 10
+10 9 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 583 a Fp(1.1.3.)35 b(Discussion)396 751 y Fv(As)21
+b(we)g(ha)n(v)o(e)e(seen,)h(there)g(are)g(tw)o(o)h(le)n(v)o(els)f(of)g
+(description:)f(On)h(the)g(one)g(hand,)f(XML)h(can)g(de\002ne)f(rules)i
+(about)e(the)396 859 y(format)g(of)h(a)h(document)d(\(the)i(DTD\),)g
+(on)f(the)i(other)e(hand,)g(XML)h(e)o(xpresses)g(structured)f
+(documents.)f(There)h(are)h(a)396 967 y(number)f(of)h(possible)f
+(applications:)p Black 396 1199 a Ft(\225)p Black 60
+w Fv(XML)i(can)f(be)g(used)g(to)g(e)o(xpress)f(structured)g(te)o(xts.)h
+(Unlik)o(e)g(HTML,)g(there)g(is)h(no)e(canonical)g(interpretation;)g
+(one)479 1307 y(w)o(ould)h(ha)n(v)o(e)f(to)i(write)f(a)h(back)o(end)d
+(for)i(the)g(DTD)g(that)h(translates)f(the)g(structured)f(te)o(xts)h
+(into)g(a)h(format)e(that)479 1415 y(e)o(xisting)h(bro)n(wsers,)f
+(printers)g(etc.)i(understand.)c(The)j(adv)n(antage)e(of)i(a)h
+(self-de\002ned)e(document)f(format)h(is)i(that)f(it)479
+1523 y(is)h(possible)f(to)h(design)e(the)h(format)f(in)i(a)f(more)g
+(problem-oriented)c(w)o(ay)-5 b(.)20 b(F)o(or)f(e)o(xample,)g(if)h(the)
+h(task)f(is)h(to)g(e)o(xtract)479 1631 y(reports)f(from)f(a)h
+(database,)g(one)f(can)h(use)h(a)f(DTD)h(that)f(re\003ects)h(the)f
+(structure)f(of)h(the)g(report)f(or)h(the)g(database.)g(A)479
+1739 y(possible)g(approach)e(w)o(ould)i(be)g(to)g(ha)n(v)o(e)g(an)g
+(element)f(type)h(for)g(e)n(v)o(ery)f(database)g(table)h(and)g(for)g(e)
+n(v)o(ery)e(column.)479 1847 y(Once)i(the)g(DTD)h(has)f(been)g
+(designed,)e(the)j(report)e(procedure)e(can)j(be)g(splitted)h(up)e(in)i
+(a)f(part)g(that)h(selects)g(the)479 1955 y(database)f(ro)n(ws)g(and)g
+(outputs)f(them)h(as)h(an)f(XML)g(document)e(according)g(to)j(the)f
+(DTD,)g(and)g(in)g(a)g(part)g(that)479 2063 y(translates)h(the)f
+(document)e(into)i(other)f(formats.)g(Of)i(course,)e(the)h(latter)h
+(part)e(can)h(be)h(solv)o(ed)e(in)h(a)h(generic)e(w)o(ay)-5
+b(,)479 2170 y(e.g.)20 b(there)g(may)f(be)h(con\002gurable)e(back)o
+(ends)h(for)h(all)g(DTDs)h(that)f(follo)n(w)g(the)g(approach)e(and)i
+(ha)n(v)o(e)f(element)h(types)479 2278 y(for)g(tables)g(and)g(columns.)
+479 2428 y(XML)h(plays)f(the)g(role)g(of)g(a)g(con\002gurable)e
+(intermediate)h(format.)g(The)g(database)h(e)o(xtraction)e(function)h
+(can)h(be)479 2536 y(written)g(without)g(ha)n(ving)f(to)h(kno)n(w)f
+(the)h(details)h(of)f(typesetting;)f(the)h(back)o(ends)f(can)h(be)g
+(written)g(without)g(ha)n(ving)479 2644 y(to)h(kno)n(w)e(the)h(details)
+h(of)e(the)i(database.)479 2793 y(Of)g(course,)e(there)h(are)g
+(traditional)f(solutions.)g(One)h(can)g(de\002ne)g(an)g(ad)g(hoc)g
+(intermediate)e(te)o(xt)j(\002le)f(format.)f(This)479
+2901 y(disadv)n(antage)f(is)k(that)e(there)g(are)g(no)f(names)h(for)g
+(the)g(pieces)g(of)g(the)g(format,)f(and)h(that)g(such)g(formats)g
+(usually)f(lack)479 3009 y(of)h(documentation)d(because)j(of)g(this.)g
+(Another)f(solution)g(w)o(ould)h(be)g(to)g(ha)n(v)o(e)g(a)h(binary)e
+(representation,)e(either)j(as)479 3117 y(language-dependent)c(or)k
+(language-independent)14 b(structure)20 b(\(e)o(xample)e(of)i(the)g
+(latter)h(can)f(be)g(found)e(in)j(RPC)479 3225 y(implementations\).)d
+(The)i(disadv)n(antage)e(is)j(that)f(it)h(is)g(harder)e(to)i(vie)n(w)f
+(such)g(representations,)e(one)h(has)i(to)f(write)479
+3333 y(pretty)g(printers)f(for)h(this)g(purpose.)f(It)h(is)h(also)g
+(more)e(dif)n(\002cult)h(to)g(enter)g(test)h(data;)f(XML)g(is)h(plain)f
+(te)o(xt)g(that)h(can)f(be)479 3441 y(written)g(using)g(an)g(arbitrary)
+f(editor)g(\(Emacs)h(has)g(e)n(v)o(en)f(a)i(good)e(XML)h(mode,)f
+(PSGML\).)h(All)h(these)f(alternati)n(v)o(es)479 3549
+y(suf)n(fer)g(from)f(a)h(missing)g(structure)g(check)o(er)m(,)e(i.e.)i
+(the)h(programs)d(processing)h(these)h(formats)f(usually)h(do)g(not)479
+3657 y(check)g(the)g(input)f(\002le)i(or)f(input)g(object)f(in)i
+(detail;)f(XML)g(parsers)g(check)f(the)h(syntax)g(of)g(the)g(input)g
+(\(the)f(so-called)479 3765 y(well-formedness)f(check\),)h(and)h(the)g
+(adv)n(anced)e(parsers)i(lik)o(e)g(PXP)h(e)n(v)o(en)f(v)o(erify)e(that)
+j(the)f(structure)f(matches)h(the)479 3872 y(DTD)h(\(the)f(so-called)f
+(v)n(alidation\).)p Black 396 4022 a Ft(\225)p Black
+60 w Fv(XML)i(can)f(be)g(used)g(as)g(con\002gurable)e(communication)g
+(language.)g(A)i(fundamental)e(problem)h(of)h(e)n(v)o(ery)479
+4130 y(communication)e(is)j(that)f(sender)f(and)h(recei)n(v)o(er)f
+(must)h(follo)n(w)g(the)g(same)g(con)m(v)o(entions)e(about)h(the)h
+(language.)e(F)o(or)479 4238 y(data)i(e)o(xchange,)e(the)i(question)f
+(is)j(usually)d(which)h(data)g(records)f(and)h(\002elds)g(are)g(a)n(v)n
+(ailable,)g(ho)n(w)g(the)o(y)f(are)479 4346 y(syntactically)h
+(composed,)e(and)i(which)f(v)n(alues)h(are)g(possible)g(for)g(the)g(v)n
+(arious)f(\002elds.)h(Similar)h(questions)e(arise)479
+4454 y(for)h(te)o(xt)g(document)e(e)o(xchange.)g(XML)i(does)g(not)g
+(answer)g(these)g(problems)f(completely)-5 b(,)18 b(b)n(ut)i(it)h
+(reduces)e(the)479 4562 y(number)g(of)h(ambiguities)f(for)g(such)h(con)
+m(v)o(entions:)e(The)i(outlines)f(of)h(the)g(syntax)g(are)g
+(speci\002ed)g(by)g(the)g(DTD)g(\(b)n(ut)479 4669 y(not)g(necessarily)g
+(the)g(details\),)g(and)g(XML)g(introduces)e(canonical)h(names)h(for)g
+(the)g(components)e(of)i(documents)479 4777 y(such)g(that)h(it)f(is)i
+(simpler)d(to)i(describe)e(the)h(rest)h(of)f(the)g(syntax)g(and)f(the)h
+(semantics)h(informally)-5 b(.)p Black 3800 5278 a Fr(10)p
+Black eop
+%%Page: 11 11
+11 10 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black Black 396 579 a Ft(\225)p Black 60 w Fv(XML)f(is)g(a)g(data)f
+(storage)g(format.)f(Currently)-5 b(,)17 b(e)n(v)o(ery)h(softw)o(are)h
+(product)f(tends)h(to)h(use)f(its)i(o)n(wn)d(w)o(ay)i(to)f(store)h
+(data;)479 687 y(commercial)f(softw)o(are)h(often)f(does)h(not)g
+(describe)f(such)h(formats,)f(and)h(it)h(is)g(a)g(pain)e(to)i(inte)o
+(grate)e(such)h(softw)o(are)479 795 y(into)g(a)g(bigger)f(project.)f
+(XML)i(can)g(help)f(to)h(impro)o(v)o(e)e(this)j(situation)e(when)g(se)n
+(v)o(eral)g(applications)g(share)h(the)g(same)479 903
+y(syntax)g(of)g(data)g(\002les.)h(DTDs)f(are)g(then)g(neutral)g
+(instances)g(that)g(check)f(the)h(format)g(of)f(data)i(\002les)g
+(independent)c(of)479 1011 y(applications.)-2 1512 y
+Fx(1.2.)39 b(Highlights)e(of)i(XML)396 1692 y Fv(This)21
+b(section)f(e)o(xplains)f(man)o(y)g(of)h(the)g(features)f(of)h(XML,)g
+(b)n(ut)h(not)e(all,)i(and)f(some)g(features)f(not)h(in)g(detail.)g(F)o
+(or)g(a)396 1800 y(complete)f(description,)g(see)i(the)f(XML)g
+(speci\002cation)396 1908 y(\(http://www)-5 b(.w3.or)o
+(g/TR/1998/REC-xml-)o(19)o(98)o(02)o(10)o(.htm)o(l\).)-2
+2236 y Fp(1.2.1.)35 b(The)f(DTD)g(and)g(the)f(instance)396
+2404 y Fv(The)20 b(DTD)g(contains)g(v)n(arious)f(declarations;)g(in)h
+(general)f(you)h(can)g(only)f(use)i(a)f(feature)f(if)i(you)e(ha)n(v)o
+(e)h(pre)n(viously)396 2512 y(declared)f(it.)i(The)f(document)e
+(instance)i(\002le)h(may)e(contain)g(the)i(full)f(DTD,)g(b)n(ut)g(it)h
+(is)g(also)g(possible)f(to)g(split)h(the)f(DTD)396 2619
+y(into)g(an)g(internal)g(and)f(an)h(e)o(xternal)f(subset.)h(A)h
+(document)d(must)j(be)o(gin)e(as)h(follo)n(ws)g(if)h(the)f(full)g(DTD)g
+(is)h(included:)396 2800 y Fo(<)p Fq(?xml)44 b(version="1.0")f
+(encoding=")p Fn(Your)f(encoding)t Fq("?)p Fo(>)396 2897
+y(<)p Fq(!DOCTYPE)h Fn(root)i Fq([)486 2994 y Fn(Declarations)396
+3091 y Fq(])p Fo(>)396 3282 y Fv(These)20 b(declarations)f(are)h
+(called)g(the)h Fr(internal)e(subset)q Fv(.)i(Note)f(that)g(the)g
+(usage)g(of)g(entities)h(and)e(conditional)g(sections)396
+3390 y(is)i(restricted)f(within)g(the)g(internal)g(subset.)396
+3539 y(If)g(the)h(declarations)d(are)j(located)e(in)h(a)h(dif)n(ferent)
+e(\002le,)h(you)f(can)h(refer)g(to)g(this)h(\002le)g(as)g(follo)n(ws:)
+396 3720 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 3817 y(<)p Fq(!DOCTYPE)h
+Fn(root)i Fq(SYSTEM)e(")p Fn(file)h(name)p Fq(")p Fo(>)396
+4008 y Fv(The)20 b(declarations)f(in)h(the)h(\002le)f(are)h(called)f
+(the)g Fr(e)n(xternal)g(subset)q Fv(.)g(The)g(\002le)h(name)f(is)h
+(called)f(the)g Fr(system)h(identi\002er)r Fv(.)e(It)396
+4116 y(is)i(also)g(possible)f(to)g(refer)g(to)g(the)g(\002le)h(by)f(a)g
+(so-called)g Fr(public)f(identi\002er)r Fv(,)g(b)n(ut)i(most)f(XML)g
+(applications)f(w)o(on')o(t)g(use)396 4223 y(this)i(feature.)396
+4373 y(Y)-9 b(ou)20 b(can)g(also)g(specify)g(both)f(internal)h(and)f(e)
+o(xternal)g(subsets.)i(In)e(this)i(case,)g(the)f(declarations)f(of)h
+(both)f(subsets)i(are)396 4481 y(mix)o(ed,)e(and)h(if)g(there)g(are)g
+(con\003icts,)g(the)g(declaration)f(of)h(the)g(internal)f(subset)i(o)o
+(v)o(errides)d(those)i(of)g(the)g(e)o(xternal)396 4589
+y(subset)h(with)f(the)g(same)h(name.)e(This)h(looks)g(as)h(follo)n(ws:)
+396 4769 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 4866 y(<)p Fq(!DOCTYPE)h
+Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)p
+Black 3800 5278 a Fr(11)p Black eop
+%%Page: 12 12
+12 11 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 486 579 a Fn(Declarations)396 676 y Fq(])p Fo(>)396
+909 y Fv(The)f(XML)g(declaration)f(\(the)h(string)g(be)o(ginning)d
+(with)k Fo(<)p Fq(?xml)e Fv(and)h(ending)f(at)i Fq(?)p
+Fo(>)p Fv(\))f(should)f(specify)g(the)h(encoding)396
+1016 y(of)g(the)g(\002le.)h(Common)e(v)n(alues)h(are)g(UTF-8,)f(and)h
+(the)g(ISO-8859)e(series)j(of)f(character)f(sets.)i(Note)f(that)g(e)n
+(v)o(ery)f(\002le)396 1124 y(parsed)h(by)f(the)i(XML)f(processor)f(can)
+h(be)o(gin)f(with)h(an)g(XML)h(declaration)d(and)i(that)g(e)n(v)o(ery)f
+(\002le)i(may)e(ha)n(v)o(e)h(its)h(o)n(wn)396 1232 y(encoding.)396
+1382 y(The)f(name)g(of)g(the)g(root)f(element)h(must)g(be)g(mentioned)f
+(directly)g(after)h(the)g Fq(DOCTYPE)g Fv(string.)f(This)i(means)e
+(that)i(a)396 1490 y(full)f(document)f(instance)g(looks)h(lik)o(e)396
+1670 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 1767 y(<)p Fq(!DOCTYPE)h
+Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)486
+1864 y Fn(Declarations)396 1961 y Fq(])p Fo(>)396 2156
+y(<)p Fn(root)p Fo(>)486 2253 y Fn(inner)g(contents)396
+2350 y Fo(<)p Fq(/)p Fn(root)p Fo(>)-2 2802 y Fp(1.2.2.)35
+b(Reser)q(ved)h(c)o(haracter)n(s)396 2970 y Fv(Some)20
+b(characters)f(are)i(generally)d(reserv)o(ed)h(to)h(indicate)g(markup)e
+(such)i(that)g(the)o(y)g(cannot)f(be)h(used)g(for)g(character)396
+3078 y(data.)g(These)g(characters)f(are)h Fm(<)p Fv(,)h
+Fm(>)p Fv(,)f(and)f(&.)h(Furthermore,)e(single)i(and)g(double)e(quotes)
+i(are)g(sometimes)g(reserv)o(ed.)396 3186 y(If)g(you)g(w)o(ant)g(to)g
+(include)f(such)h(a)h(character)e(as)i(character)m(,)d(write)j(it)f(as)
+h(follo)n(ws:)p Black 396 3473 a Ft(\225)p Black 60 w
+Fq(&lt;)f Fv(instead)g(of)g Fm(<)p Black 396 3581 a Ft(\225)p
+Black 60 w Fq(&gt;)g Fv(instead)g(of)g Fm(>)p Black 396
+3689 a Ft(\225)p Black 60 w Fq(&amp;)g Fv(instead)g(of)g(&)p
+Black 396 3797 a Ft(\225)p Black 60 w Fq(&apos;)g Fv(instead)g(of)g(')p
+Black 396 3905 a Ft(\225)p Black 60 w Fq(&quot;)g Fv(instead)g(of)g(")
+396 4054 y(All)h(other)e(characters)h(are)g(free)g(in)g(the)g(document)
+e(instance.)i(It)g(is)i(possible)d(to)i(include)e(a)i(character)e(by)g
+(its)j(position)396 4162 y(in)f(the)f(Unicode)f(alphabet:)396
+4342 y Fq(&#)p Fn(n)p Fq(;)396 4533 y Fv(where)h Fl(n)g
+Fv(is)i(the)e(decimal)f(number)g(of)h(the)g(character)-5
+b(.)19 b(Alternati)n(v)o(ely)-5 b(,)18 b(you)h(can)h(specify)g(the)g
+(character)f(by)h(its)396 4641 y(he)o(xadecimal)e(number:)396
+4822 y Fq(&#x)p Fn(n)p Fq(;)p Black 3800 5278 a Fr(12)p
+Black eop
+%%Page: 13 13
+13 12 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(In)f(the)g(scope)g(of)g(declarations,)f(the)h
+(character)f(\045)i(is)g(no)f(longer)f(free.)g(T)-7 b(o)20
+b(include)g(it)h(as)f(character)m(,)f(you)g(must)h(use)396
+687 y(the)g(notations)g Fq(&#37;)g Fv(or)f Fq(&#x25;)p
+Fv(.)396 836 y(Note)h(that)h(besides)f(&lt;,)g(&gt;,)g(&amp;,)f
+(&apos;,)g(and)h(&quot;)f(there)h(are)g(no)g(prede\002nes)f(character)g
+(entities.)h(This)396 944 y(is)h(dif)n(ferent)e(from)g(HTML)h(which)g
+(de\002nes)g(a)g(list)i(of)d(characters)h(that)g(can)g(be)g(referenced)
+e(by)i(name)f(\(e.g.)h(&auml;)396 1052 y(for)g(\344\);)g(ho)n(we)n(v)o
+(er)m(,)e(if)i(you)g(prefer)e(named)i(characters,)f(you)g(can)h
+(declare)f(such)h(entities)h(yourself)e(\(see)h(belo)n(w\).)-2
+1422 y Fp(1.2.3.)35 b(Elements)g(and)f(ELEMENT)e(dec)n(larations)396
+1589 y Fv(Elements)20 b(structure)f(the)h(document)f(instance)g(in)i(a)
+f(hierarchical)f(w)o(ay)-5 b(.)20 b(There)f(is)i(a)g(top-le)n(v)o(el)d
+(element,)i(the)g Fr(r)l(oot)396 1697 y(element)q Fv(,)g(which)g
+(contains)g(a)g(sequence)f(of)h(inner)g(elements)f(and)h(character)f
+(sections.)h(The)g(inner)f(elements)h(are)396 1805 y(structured)f(in)h
+(the)f(same)h(w)o(ay)-5 b(.)20 b(Ev)o(ery)e(element)h(has)h(an)g
+Fr(element)f(type)p Fv(.)h(The)f(be)o(ginning)f(of)h(the)h(element)f
+(is)i(indicated)396 1913 y(by)f(a)h Fr(start)g(ta)o(g)p
+Fv(,)e(written)396 2093 y Fo(<)p Fn(element-type)p Fo(>)396
+2284 y Fv(and)h(the)g(element)g(continues)f(until)h(the)g
+(corresponding)d Fr(end)i(ta)o(g)h Fv(is)h(reached:)396
+2465 y Fo(<)p Fq(/)p Fn(element-type)p Fo(>)396 2655
+y Fv(In)f(XML,)f(it)i(is)f(not)g(allo)n(wed)f(to)h(omit)f(start)i(or)e
+(end)g(tags,)h(e)n(v)o(en)f(if)h(the)g(DTD)g(w)o(ould)f(permit)g(this.)
+h(Note)g(that)g(there)f(are)396 2763 y(no)h(special)g(rules)g(ho)n(w)g
+(to)g(interpret)g(spaces)g(or)g(ne)n(wlines)g(near)f(start)i(or)f(end)g
+(tags;)g(all)h(spaces)f(and)g(ne)n(wlines)g(count.)396
+2913 y(Ev)o(ery)f(element)h(type)f(must)i(be)f(declared)f(before)f(it)j
+(can)f(be)g(used.)g(The)g(declaration)f(consists)h(of)g(tw)o(o)h
+(parts:)f(the)396 3021 y(ELEMENT)f(declaration)f(describes)h(the)h
+(content)f(model,)f(i.e.)i(which)f(inner)g(elements)g(are)h(allo)n
+(wed;)f(the)h(A)-9 b(TTLIST)396 3129 y(declaration)19
+b(describes)h(the)g(attrib)n(utes)g(of)g(the)g(element.)396
+3278 y(An)g(element)g(can)g(simply)g(allo)n(w)g(e)n(v)o(erything)e(as)i
+(content.)f(This)i(is)g(written:)396 3458 y Fo(<)p Fq(!ELEMENT)43
+b Fn(name)i Fq(ANY)p Fo(>)396 3649 y Fv(On)20 b(the)h(opposite,)e(an)h
+(element)f(can)h(be)g(forced)f(to)i(be)f(empty;)f(declared)g(by:)396
+3829 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(EMPTY)p Fo(>)396
+4020 y Fv(Note)20 b(that)h(there)e(is)j(an)e(abbre)n(viated)e(notation)
+h(for)g(empty)g(element)h(instances:)g Fo(<)p Fn(name)p
+Fq(/)p Fo(>)p Fv(.)396 4170 y(There)g(are)g(tw)o(o)g(more)g
+(sophisticated)f(forms)g(of)h(declarations:)f(so-called)h
+Fr(mixed)g(declar)o(ations)p Fv(,)e(and)i Fr(r)m(e)m(gular)396
+4278 y(e)n(xpr)m(essions)p Fv(.)g(An)h(element)e(with)i(mix)o(ed)e
+(content)g(contains)g(character)g(data)h(interspersed)f(with)i(inner)e
+(elements,)396 4386 y(and)h(the)g(set)h(of)f(allo)n(wed)g(inner)f
+(elements)h(can)g(be)g(speci\002ed.)g(In)f(contrast)h(to)g(this,)h(a)g
+(re)o(gular)d(e)o(xpression)396 4494 y(declaration)h(does)h(not)g(allo)
+n(w)g(character)f(data,)h(b)n(ut)g(the)g(inner)f(elements)h(can)g(be)g
+(described)f(by)h(the)g(more)g(po)n(werful)396 4601 y(means)g(of)g(re)o
+(gular)f(e)o(xpressions.)396 4751 y(A)i(declaration)e(for)g(mix)o(ed)g
+(content)g(looks)h(as)h(follo)n(ws:)p Black 3800 5278
+a Fr(13)p Black eop
+%%Page: 14 14
+14 13 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(\(#PCDATA)e(|)i
+Fn(element)1892 609 y Fk(1)1962 579 y Fq(|)g(...)f(|)h
+Fn(element)2636 609 y Fk(n)2707 579 y Fq(\)*)p Fo(>)396
+770 y Fv(or)20 b(if)h(you)e(do)h(not)g(w)o(ant)g(to)g(allo)n(w)g(an)o
+(y)g(inner)f(element,)h(simply)396 950 y Fo(<)p Fq(!ELEMENT)43
+b Fn(name)i Fq(\(#PCDATA\))p Fo(>)396 1279 y Fj(Example)479
+1426 y Fi(If)19 b(element)g(type)g Fh(q)g Fi(is)g(declared)h(as)479
+1596 y Fh(<!ELEMENT)44 b(q)c(\(#PCDATA)k(|)c(r)h(|)g(s\)*>)479
+1776 y Fi(this)19 b(is)f(a)h(le)o(gal)g(instance:)479
+1947 y Fh(<q>This)43 b(is)e(character)j(data<r></r>with)h(<s></s>inner)
+g(elements</q>)479 2127 y Fi(But)19 b(this)g(is)f(ille)o(gal)g(because)
+i Fh(t)f Fi(has)h(not)f(been)g(enumerated)i(in)e(the)g(declaration:)479
+2297 y Fh(<q>This)43 b(is)e(character)j(data<r></r>with)h(<t></t>inner)
+g(elements</q>)396 2571 y Fv(The)20 b(other)f(form)h(uses)g(a)h(re)o
+(gular)e(e)o(xpression)f(to)j(describe)e(the)h(possible)g(contents:)396
+2752 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(regexp)p Fo(>)396
+2942 y Fv(The)20 b(follo)n(wing)f(well-kno)n(wn)f(re)o(ge)o(xp)g
+(operators)h(are)h(allo)n(wed:)p Black 396 3299 a Ft(\225)p
+Black 60 w Fn(element-name)p Black 396 3407 a Ft(\225)p
+Black 60 w Fq(\()p Fn(subexpr)839 3437 y Fk(1)910 3407
+y Fq(,)g Fv(...)g Fq(,)45 b Fn(subexpr)1463 3437 y Fk(n)1533
+3407 y Fq(\))p Black 396 3515 a Ft(\225)p Black 60 w
+Fq(\()p Fn(subexpr)839 3545 y Fk(1)910 3515 y Fq(|)20
+b Fv(...)g Fq(|)45 b Fn(subexpr)1463 3545 y Fk(n)1533
+3515 y Fq(\))p Black 396 3623 a Ft(\225)p Black 60 w
+Fn(subexpr)s Fq(*)p Black 396 3731 a Ft(\225)p Black
+60 w Fn(subexpr)s Fq(+)p Black 396 3839 a Ft(\225)p Black
+60 w Fn(subexpr)s Fq(?)396 3989 y Fv(The)20 b Fq(,)h
+Fv(operator)d(indicates)i(a)h(sequence)e(of)h(sub-models,)e(the)i
+Fq(|)h Fv(operator)d(describes)i(alternati)n(v)o(e)f(sub-models.)f(The)
+396 4096 y Fq(*)j Fv(indicates)f(zero)f(or)h(more)g(repetitions,)f(and)
+g Fq(+)i Fv(one)f(or)f(more)h(repetitions.)f(Finally)-5
+b(,)19 b Fq(?)i Fv(can)f(be)g(used)g(for)f(optional)396
+4204 y(sub-models.)g(As)i(atoms)f(the)g(re)o(ge)o(xp)e(can)i(contain)f
+(names)h(of)g(elements;)g(note)g(that)g(it)h(is)g(not)f(allo)n(wed)f
+(to)i(include)396 4312 y Fq(#PCDATA)p Fv(.)396 4462 y(The)f(e)o(xact)g
+(syntax)f(of)h(the)g(re)o(gular)f(e)o(xpressions)g(is)i(rather)e
+(strange.)h(This)g(can)g(be)g(e)o(xplained)f(best)h(by)g(a)g(list)i(of)
+396 4570 y(constraints:)p Black 396 4802 a Ft(\225)p
+Black 60 w Fv(The)e(outermost)f(e)o(xpression)g(must)h(not)g(be)g
+Fn(element-name)p Fv(.)p Black 3800 5278 a Fr(14)p Black
+eop
+%%Page: 15 15
+15 14 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 479 579 a(Ille)m(gal:)e Fq(<!ELEMENT)43 b(x)i(y>)p
+Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq(<!ELEMENT)43
+b(x)i(\(y\)>)p Fv(.)p Black 396 728 a Ft(\225)p Black
+60 w Fv(F)o(or)20 b(the)g(unary)f(operators)g Fn(subexpr)s
+Fq(*)p Fv(,)g Fn(subexpr)s Fq(+)p Fv(,)g(and)g Fn(subexpr)s
+Fq(?)p Fv(,)g(the)h Fn(subexpr)i Fv(must)f(not)f(be)g(again)f(an)479
+836 y(unary)g(operator)-5 b(.)479 986 y Fr(Ille)m(gal:)19
+b Fq(<!ELEMENT)43 b(x)i(y**>)p Fv(;)20 b(this)h(must)f(be)g(written)g
+(as)h Fq(<!ELEMENT)44 b(x)g(\(y*\)*>)p Fv(.)p Black 396
+1135 a Ft(\225)p Black 60 w Fv(Between)21 b Fq(\))f Fv(and)g(one)f(of)h
+(the)h(unary)d(operatory)g Fq(*)p Fv(,)j Fq(+)p Fv(,)f(or)g
+Fq(?)p Fv(,)g(there)g(must)g(not)g(be)g(whitespace.)479
+1285 y Fr(Ille)m(gal:)f Fq(<!ELEMENT)43 b(x)i(\(y|z\))f(*>)p
+Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq(<!ELEMENT)43
+b(x)i(\(y|z\)*>)p Fv(.)p Black 396 1434 a Ft(\225)p Black
+60 w Fv(There)20 b(is)h(the)f(additional)f(constraint)g(that)h(the)h
+(right)e(parenthsis)g(must)i(be)f(contained)e(in)j(the)f(same)g(entity)
+g(as)h(the)479 1542 y(left)g(parenthesis;)e(see)i(the)f(section)g
+(about)f(parsed)h(entities)g(belo)n(w)-5 b(.)396 1733
+y(Note)20 b(that)g(there)g(is)h(another)e(restriction)g(on)h(re)o
+(gular)e(e)o(xpressions)h(which)h(must)g(be)g(deterministic.)f(This)h
+(means)g(that)396 1841 y(the)g(parser)g(must)g(be)g(able)g(to)h(see)g
+(by)e(looking)g(at)i(the)f(ne)o(xt)f(tok)o(en)h(which)f(alternati)n(v)o
+(e)g(is)i(actually)f(used,)g(or)f(whether)396 1949 y(the)h(repetition)f
+(stops.)i(The)f(reason)f(for)g(this)i(is)g(simply)f(compatability)f
+(with)h(SGML)g(\(there)g(is)h(no)f(intrinsic)f(reason)396
+2057 y(for)h(this)h(rule;)e(XML)i(can)f(li)n(v)o(e)g(without)f(this)i
+(restriction\).)396 2302 y Fj(Example)479 2449 y Fi(The)e(elements)g
+(are)g(declared)h(as)f(follo)n(ws:)479 2620 y Fh(<!ELEMENT)44
+b(q)c(\(r?,)i(\(s)f(|)g(t\)+\)>)479 2707 y(<!ELEMENT)j(r)c
+(\(#PCDATA\)>)479 2795 y(<!ELEMENT)k(s)c(EMPTY>)479 2882
+y(<!ELEMENT)k(t)c(\(q)i(|)e(r\)>)479 3062 y Fi(This)19
+b(is)f(a)h(le)o(gal)g(instance:)479 3233 y Fh(<q><r>Some)44
+b(characters</r><s/><)q(/q>)479 3413 y Fi(\(Note:)19
+b Fg(<)p Fh(s/)p Fg(>)g Fi(is)g(an)g(abbre)n(viation)h(for)f
+Fg(<)p Fh(s)p Fg(><)p Fh(/s)p Fg(>)p Fi(.\))g(It)f(w)o(ould)i(be)f
+(ille)o(gal)f(to)h(lea)o(v)o(e)g Fh(<s/>)h Fi(out)f(because)h(at)f
+(least)f(one)479 3510 y(instance)i(of)f Fh(s)g Fi(or)g
+Fh(t)g Fi(must)g(be)g(present.)g(It)f(w)o(ould)i(be)f(ille)o(gal,)f
+(too,)h(if)f(characters)i(e)o(xisted)f(outside)h(the)e
+Fh(r)i Fi(element;)f(the)g(only)479 3607 y(e)o(xception)h(is)f(white)g
+(space.)g(\226)g(This)f(is)h(le)o(gal,)f(too:)479 3778
+y Fh(<q><s/><t><q><s/><)q(/q>)q(</t)q(></)q(q>)-2 4230
+y Fp(1.2.4.)35 b(Attrib)n(ute)e(lists)h(and)g(A)-11 b(TTLIST)34
+b(dec)n(larations)396 4398 y Fv(Elements)20 b(may)g(ha)n(v)o(e)f
+(attrib)n(utes.)h(These)g(are)g(put)g(into)g(the)g(start)h(tag)f(of)g
+(an)g(element)g(as)h(follo)n(ws:)396 4578 y Fo(<)p Fn(element-name)43
+b(attribute)1444 4608 y Fk(1)1469 4578 y Fq(=")p Fn(value)1784
+4608 y Fk(1)1810 4578 y Fq(")i(...)f Fn(attribute)2484
+4608 y Fk(n)2509 4578 y Fq(=")p Fn(value)2824 4608 y
+Fk(n)2850 4578 y Fq(")p Fo(>)396 4769 y Fv(Instead)20
+b(of)g Fq(")p Fn(value)1017 4799 y Fk(k)1043 4769 y Fq(")g
+Fv(it)h(is)g(also)g(possible)f(to)g(use)g(single)g(quotes)g(as)h(in)f
+Fq(')p Fn(value)2817 4799 y Fk(k)2843 4769 y Fq(')p Fv(.)g(Note)h(that)
+f(you)f(cannot)g(use)396 4877 y(double)g(quotes)h(literally)g(within)g
+(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(if)h(double)d(quotes)i(are)
+g(the)g(delimiters;)g(the)g(same)p Black 3800 5278 a
+Fr(15)p Black eop
+%%Page: 16 16
+16 15 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(applies)f(to)h(single)f(quotes.)f(Y)-9
+b(ou)20 b(can)g(generally)e(not)i(use)g Fm(<)h Fv(and)e(&)i(as)g
+(characters)e(in)h(attrib)n(ute)g(v)n(alues.)g(It)g(is)396
+687 y(possible)g(to)h(include)e(the)h(paraphrases)e(&lt;,)j(&gt;,)f
+(&amp;,)f(&apos;,)g(and)h(&quot;)f(\(and)g(an)o(y)g(other)h(reference)e
+(to)j(a)396 795 y(general)e(entity)h(as)h(long)f(as)g(the)h(entity)f
+(is)h(not)e(de\002ned)h(by)f(an)i(e)o(xternal)d(\002le\))j(as)g(well)g
+(as)g(&#)p Fl(n)p Fv(;.)396 944 y(Before)f(you)f(can)h(use)h(an)f
+(attrib)n(ute)g(you)f(must)h(declare)g(it.)g(An)g(A)-9
+b(TTLIST)20 b(declaration)e(looks)i(as)h(follo)n(ws:)396
+1124 y Fo(<)p Fq(!ATTLIST)43 b Fn(element-name)845 1222
+y(attribute-name)f(attribute-type)h(attribute-default)845
+1319 y Fq(...)845 1416 y Fn(attribute-name)f(attribute-type)h
+(attribute-default)396 1513 y Fo(>)396 1704 y Fv(There)20
+b(are)g(a)g(lot)h(of)f(types,)f(b)n(ut)i(most)f(important)f(are:)p
+Black 396 2061 a Ft(\225)p Black 60 w Fq(CDATA)p Fv(:)h(Ev)o(ery)f
+(string)h(is)h(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)p
+Black 396 2169 a Ft(\225)p Black 60 w Fq(NMTOKEN)p Fv(:)g(Ev)o(ery)f
+(nametok)o(en)f(is)j(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)g
+(Nametok)o(ens)f(consist)h(\(mainly\))f(of)g(letters,)479
+2277 y(digits,)h(.,)h(:,)f(-,)g(_)h(in)f(arbitrary)f(order)-5
+b(.)p Black 396 2385 a Ft(\225)p Black 60 w Fq(NMTOKENS)p
+Fv(:)20 b(A)g(space-separated)f(list)i(of)f(nametok)o(ens)e(is)k(allo)n
+(wed)d(as)i(attrib)n(ute)f(v)n(alue.)396 2534 y(The)g(most)g
+(interesting)g(def)o(ault)f(declarations)g(are:)p Black
+396 2767 a Ft(\225)p Black 60 w Fq(#REQUIRED)p Fv(:)h(The)f(attrib)n
+(ute)h(must)g(be)h(speci\002ed.)p Black 396 2874 a Ft(\225)p
+Black 60 w Fq(#IMPLIED)p Fv(:)e(The)h(attrib)n(ute)f(can)g(be)h
+(speci\002ed)f(b)n(ut)h(also)g(can)f(be)h(left)g(out.)f(The)g
+(application)g(can)g(\002nd)g(out)h(whether)479 2982
+y(the)g(attrib)n(ute)g(w)o(as)h(present)f(or)g(not.)p
+Black 396 3090 a Ft(\225)p Black 60 w Fq(")p Fn(value)p
+Fq(")g Fv(or)f Fq(')p Fn(value)p Fq(')p Fv(:)h(This)g(particular)e(v)n
+(alue)i(is)g(used)g(as)h(def)o(ault)e(if)h(the)g(attrib)n(ute)g(is)g
+(omitted)g(in)g(the)g(element.)396 3378 y Fj(Example)479
+3525 y Fi(This)f(is)f(a)h(v)n(alid)g(attrib)o(ute)g(declaration)g(for)g
+(element)g(type)h Fh(r)p Fi(:)479 3695 y Fh(<!ATTLIST)44
+b(r)883 3782 y(x)c(CDATA)164 b(#REQUIRED)883 3870 y(y)40
+b(NMTOKEN)84 b(#IMPLIED)883 3957 y(z)40 b(NMTOKENS)k("one)d(two)h
+(three">)479 4137 y Fi(This)19 b(means)g(that)g Fh(x)g
+Fi(is)g(a)g(required)g(attrib)o(ute)f(that)h(cannot)h(be)f(left)g(out,)
+f(while)h Fh(y)g Fi(and)h Fh(z)f Fi(are)g(optional.)g(The)g(XML)g
+(parser)479 4235 y(indicates)h(the)f(application)g(whether)h
+Fh(y)f Fi(is)f(present)i(or)f(not,)f(b)o(ut)h(if)f Fh(z)h
+Fi(is)g(missing)g(the)g(def)o(ault)h(v)n(alue)f("one)h(tw)o(o)f(three")
+h(is)479 4332 y(returned)g(automatically)-5 b(.)479 4470
+y(This)19 b(is)f(a)h(v)n(alid)g(e)o(xample)h(of)f(these)g(attrib)o
+(utes:)479 4641 y Fh(<r)41 b(x="He)h(said:)h(&quot;I)f(don't)g(like)g
+(quotes!&quot;")j(y='1'>)p Black 3798 5278 a Fr(16)p
+Black eop
+%%Page: 17 17
+17 16 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 583 a Fp(1.2.5.)35 b(P)l(ar)n(sed)g(entities)396
+751 y Fv(Elements)20 b(describe)f(the)i(logical)e(structure)h(of)g(the)
+g(document,)e(while)i Fr(entities)g Fv(determine)f(the)h(physical)g
+(structure.)396 859 y(Entities)h(are)f(the)g(pieces)g(of)g(te)o(xt)g
+(the)g(parser)g(operates)f(on,)h(mostly)g(\002les)h(and)f(macros.)f
+(Entities)h(may)g(be)g Fr(par)o(sed)i Fv(in)396 967 y(which)e(case)h
+(the)f(parser)f(reads)h(the)g(te)o(xt)h(and)e(interprets)g(it)i(as)g
+(XML)g(markup,)d(or)i Fr(unpar)o(sed)h Fv(which)e(simply)h(means)396
+1075 y(that)h(the)f(data)g(of)g(the)g(entity)g(has)g(a)h(foreign)d
+(format)h(\(e.g.)h(a)g(GIF)h(icon\).)396 1224 y(If)f(the)g(parsed)f
+(entity)g(is)i(going)e(to)h(be)g(used)f(as)i(part)e(of)h(the)g(DTD,)g
+(it)g(is)h(called)f(a)g Fr(par)o(ameter)f(entity)p Fv(.)h(Y)-9
+b(ou)19 b(can)h(declare)396 1332 y(a)h(parameter)e(entity)g(with)i(a)f
+(\002x)o(ed)g(te)o(xt)g(as)h(content)e(by:)396 1512 y
+Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(")p Fn(value)p
+Fq(")p Fo(>)396 1703 y Fv(W)m(ithin)20 b(the)h(DTD,)f(you)f(can)h
+Fr(r)m(efer)h(to)f Fv(this)h(entity)-5 b(,)19 b(i.e.)i(read)e(the)h(te)
+o(xt)g(of)g(the)h(entity)-5 b(,)19 b(by:)396 1883 y Fq(\045)p
+Fn(name)p Fq(;)396 2074 y Fv(Such)h(entities)h(beha)n(v)o(e)e(lik)o(e)h
+(macros,)f(i.e.)i(when)e(the)o(y)h(are)g(referred)e(to,)i(the)g(macro)g
+(te)o(xt)g(is)h(inserted)e(and)h(read)396 2182 y(instead)g(of)g(the)g
+(original)f(te)o(xt.)396 2478 y Fj(Example)479 2625 y
+Fi(F)o(or)g(e)o(xample,)g(you)h(can)f(declare)h(tw)o(o)f(elements)g
+(with)f(the)h(same)h(content)f(model)h(by:)479 2795 y
+Fh(<!ENTITY)43 b(\045)e(model)h("a)f(|)g(b)g(|)f(c">)479
+2882 y(<!ELEMENT)k(x)c(\(\045model;\)>)479 2970 y(<!ELEMENT)k(y)c
+(\(\045model;\)>)396 3202 y Fv(If)20 b(the)h(contents)e(of)h(the)g
+(entity)g(are)g(gi)n(v)o(en)f(as)i(string)f(constant,)f(the)h(entity)g
+(is)h(called)f(an)g Fr(internal)g Fv(entity)-5 b(.)19
+b(It)i(is)g(also)396 3310 y(possible)f(to)h(name)e(a)i(\002le)g(to)f
+(be)g(used)g(as)h(content)e(\(an)h Fr(e)n(xternal)g Fv(entity\):)396
+3490 y Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(SYSTEM)g(")p
+Fn(file)g(name)p Fq(")p Fo(>)396 3681 y Fv(There)20 b(are)g(some)g
+(restrictions)f(for)h(parameter)f(entities:)p Black 396
+4038 a Ft(\225)p Black 60 w Fv(If)h(the)h(internal)e(parameter)g
+(entity)g(contains)h(the)g(\002rst)h(tok)o(en)e(of)h(a)h(declaration)e
+(\(i.e.)g Fo(<)p Fq(!)p Fv(\),)h(it)h(must)f(also)h(contain)479
+4146 y(the)f(last)i(tok)o(en)d(of)h(the)g(declaration,)e(i.e.)j(the)f
+Fo(>)p Fv(.)g(This)g(means)g(that)h(the)f(entity)g(either)g(contains)f
+(a)i(whole)e(number)479 4254 y(of)h(complete)f(declarations,)g(or)h
+(some)g(te)o(xt)g(from)f(the)h(middle)g(of)g(one)f(declaration.)479
+4404 y Fr(Ille)m(gal:)479 4542 y Fq(<!ENTITY)44 b(\045)g(e)h("\(a)f(|)h
+(b)g(|)f(c\)>">)479 4639 y(<!ELEMENT)g(x)g(\045e;)479
+4789 y Fv(Because)21 b Fo(<)p Fq(!)f Fv(is)h(contained)e(in)h(the)g
+(main)g(entity)-5 b(,)19 b(and)h(the)g(corresponding)d
+Fo(>)j Fv(is)h(contained)e(in)h(the)h(entity)e Fq(e)p
+Fv(.)p Black 3797 5278 a Fr(17)p Black eop
+%%Page: 18 18
+18 17 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black Black 396 579 a Ft(\225)p Black 60 w Fv(If)f(the)h(internal)e
+(parameter)g(entity)g(contains)h(a)h(left)f(paranthesis,)f(it)i(must)f
+(also)h(contain)e(the)h(corresponding)d(right)479 687
+y(paranthesis.)479 836 y Fr(Ille)m(gal:)479 975 y Fq(<!ENTITY)44
+b(\045)g(e)h("\(a)f(|)h(b)g(|)f(c">)479 1072 y(<!ELEMENT)g(x)g
+(\045e;\)>)479 1222 y Fv(Because)21 b Fq(\()f Fv(is)h(contained)e(in)h
+(the)g(entity)g Fq(e)p Fv(,)h(and)e(the)i(corresponding)16
+b Fq(\))21 b Fv(is)g(contained)e(in)h(the)g(main)g(entity)-5
+b(.)p Black 396 1371 a Ft(\225)p Black 60 w Fv(When)20
+b(reading)e(te)o(xt)i(from)f(an)g(entity)-5 b(,)19 b(the)h(parser)f
+(automatically)f(inserts)i(one)g(space)f(character)g(before)f(the)i
+(entity)479 1479 y(te)o(xt)g(and)g(one)g(space)g(character)f(after)h
+(the)g(entity)g(te)o(xt.)f(Ho)n(we)n(v)o(er)m(,)f(this)j(rule)f(is)h
+(not)f(applied)f(within)h(the)g(de\002nition)479 1587
+y(of)g(another)f(entity)-5 b(.)479 1736 y Fr(Le)m(gal:)479
+1875 y Fq(<!ENTITY)44 b(\045)g(suffix)g("gif">)479 1972
+y(<!ENTITY)g(iconfile)f('icon.\045suffix;'>)479 2121
+y Fv(Because)21 b Fq(\045suffix;)e Fv(is)i(referenced)d(within)i(the)g
+(de\002nition)f(te)o(xt)h(for)g Fq(iconfile)p Fv(,)f(no)h(additional)f
+(spaces)h(are)479 2229 y(added.)479 2379 y Fr(Ille)m(gal:)479
+2517 y Fq(<!ENTITY)44 b(\045)g(suffix)g("test">)479 2615
+y(<!ELEMENT)g(x.\045suffix;)f(ANY>)479 2764 y Fv(Because)21
+b Fq(\045suffix;)e Fv(is)i(referenced)d(outside)i(the)g(de\002nition)f
+(te)o(xt)h(of)g(another)f(entity)-5 b(,)19 b(the)h(parser)g(replaces)
+479 2872 y Fq(\045suffix;)g Fv(by)f Fn(space)p Fq(test)p
+Fn(space)p Fv(.)479 3021 y Fr(Ille)m(gal:)479 3160 y
+Fq(<!ENTITY)44 b(\045)g(e)h("\(a)f(|)h(b)g(|)f(c\)">)479
+3257 y(<!ELEMENT)g(x)g(\045e;*>)479 3407 y Fv(Because)21
+b(there)e(is)j(a)e(whitespace)g(between)f Fq(\))i Fv(and)e
+Fq(*)p Fv(,)i(which)e(is)i(ille)o(gal.)p Black 396 3556
+a Ft(\225)p Black 60 w Fv(An)f(e)o(xternal)f(parameter)g(entity)h(must)
+g(al)o(w)o(ays)h(consist)f(of)g(a)h(whole)e(number)g(of)h(complete)f
+(declarations.)p Black 396 3664 a Ft(\225)p Black 60
+w Fv(In)h(the)g(internal)g(subset)g(of)g(the)g(DTD,)g(a)h(reference)d
+(to)j(a)f(parameter)f(entity)h(\(internal)f(or)h(e)o(xternal\))e(is)k
+(only)479 3772 y(allo)n(wed)e(at)h(positions)e(where)h(a)g(ne)n(w)g
+(declaration)f(can)h(start.)396 3963 y(If)g(the)f(parsed)g(entity)g(is)
+h(going)e(to)i(be)f(used)g(in)h(the)f(document)e(instance,)i(it)h(is)h
+(called)e(a)h Fr(g)o(ener)o(al)e(entity)p Fv(.)h(Such)g(entities)396
+4071 y(can)h(be)g(used)g(as)h(abbre)n(viations)d(for)i(frequent)e
+(phrases,)i(or)g(to)g(include)f(e)o(xternal)g(\002les.)i(Internal)e
+(general)g(entities)i(are)396 4179 y(declared)e(as)i(follo)n(ws:)396
+4359 y Fo(<)p Fq(!ENTITY)44 b Fn(name)g Fq(")p Fn(value)p
+Fq(")p Fo(>)396 4550 y Fv(External)19 b(general)g(entities)i(are)f
+(declared)f(this)i(w)o(ay:)396 4730 y Fo(<)p Fq(!ENTITY)44
+b Fn(name)g Fq(SYSTEM)g(")p Fn(file)g(name)p Fq(")p Fo(>)p
+Black 3800 5278 a Fr(18)p Black eop
+%%Page: 19 19
+19 18 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(References)f(to)g(general)f(entities)i(are)f
+(written)g(as:)396 759 y Fq(&)p Fn(name)p Fq(;)396 950
+y Fv(The)g(main)g(dif)n(ference)e(between)h(parameter)g(and)h(general)f
+(entities)h(is)i(that)e(the)g(former)f(are)h(only)f(recognized)f(in)j
+(the)396 1058 y(DTD)g(and)e(that)i(the)f(latter)g(are)g(only)g
+(recognized)e(in)i(the)g(document)e(instance.)i(As)h(the)f(DTD)g(is)i
+(parsed)d(before)g(the)396 1166 y(document,)f(the)i(parameter)f
+(entities)i(are)f(e)o(xpanded)d(\002rst;)k(for)f(e)o(xample)f(it)i(is)g
+(possible)f(to)g(use)h(the)f(content)f(of)h(a)396 1274
+y(parameter)f(entity)h(as)h(the)f(name)g(of)f(a)i(general)e(entity:)h
+Fq(&#38;\045name;;)2557 1241 y Ff(1)2580 1274 y Fv(.)396
+1423 y(General)g(entities)g(must)h(respect)e(the)i(element)e(hierarchy)
+-5 b(.)17 b(This)k(means)f(that)g(there)g(must)g(be)g(an)g(end)g(tag)g
+(for)g(e)n(v)o(ery)396 1531 y(start)h(tag)f(in)h(the)f(entity)g(v)n
+(alue,)f(and)h(that)g(end)f(tags)i(without)e(corresponding)e(start)k
+(tags)f(are)g(not)g(allo)n(wed.)396 1777 y Fj(Example)479
+1924 y Fi(If)f(the)f(author)i(of)f(a)f(document)j(changes)f(sometimes,)
+f(it)f(is)g(w)o(orthwhile)h(to)g(set)f(up)i(a)e(general)i(entity)e
+(containing)i(the)f(names)479 2021 y(of)g(the)g(authors.)h(If)e(the)h
+(author)h(changes,)g(you)f(need)h(only)g(to)e(change)j(the)e
+(de\002nition)g(of)g(the)g(entity)-5 b(,)18 b(and)i(do)f(not)h(need)f
+(to)479 2118 y(check)h(all)f(occurrences)h(of)f(authors')h(names:)479
+2289 y Fh(<!ENTITY)43 b(authors)g("Gerd)f(Stolpmann">)479
+2469 y Fi(In)19 b(the)g(document)i(te)o(xt,)d(you)i(can)f(no)n(w)h
+(refer)e(to)h(the)g(author)h(names)f(by)h(writing)e Fh(&authors;)p
+Fi(.)479 2607 y Fe(Ille)m(gal:)h Fi(The)g(follo)n(wing)g(tw)o(o)g
+(entities)g(are)g(ille)o(gal)f(because)i(the)f(elements)g(in)g(the)g
+(de\002nition)g(do)g(not)h(nest)f(properly:)479 2778
+y Fh(<!ENTITY)43 b(lengthy-tag)i("<section)e(textcolor='white')j
+(background='graphi)q(c'>)q(">)479 2865 y(<!ENTITY)d(nonsense)165
+b("<a></b>">)396 3139 y Fv(Earlier)20 b(in)g(this)h(introduction)d(we)i
+(e)o(xplained)e(that)j(there)e(are)i(substitutes)f(for)g(reserv)o(ed)e
+(characters:)i(&lt;,)g(&gt;,)396 3247 y(&amp;,)f(&apos;,)h(and)f
+(&quot;.)g(These)h(are)g(simply)g(prede\002ned)e(general)h(entities;)i
+(note)f(that)g(the)o(y)g(are)g(the)g(only)396 3355 y(prede\002ned)e
+(entities.)j(It)f(is)h(allo)n(wed)f(to)g(de\002ne)g(these)g(entities)h
+(again)e(as)i(long)e(as)i(the)f(meaning)f(is)i(unchanged.)-2
+3725 y Fp(1.2.6.)35 b(Notations)g(and)e(unpar)n(sed)i(entities)396
+3892 y Fv(Unparsed)19 b(entities)i(ha)n(v)o(e)e(a)i(foreign)d(format)i
+(and)f(can)h(thus)g(not)g(be)g(read)g(by)g(the)g(XML)g(parser)-5
+b(.)20 b(Unparsed)f(entities)396 4000 y(are)h(al)o(w)o(ays)h(e)o
+(xternal.)e(The)h(format)f(of)h(an)g(unparsed)e(entity)i(must)g(ha)n(v)
+o(e)g(been)f(declared,)g(such)h(a)h(format)e(is)i(called)f(a)396
+4108 y Fr(notation)p Fv(.)f(The)g(entity)h(can)g(then)g(be)g(declared)f
+(by)h(referring)e(to)i(this)h(notation.)e(As)i(unparsed)d(entities)j
+(do)f(not)396 4216 y(contain)f(XML)i(te)o(xt,)e(it)i(is)h(not)d
+(possible)h(to)h(include)e(them)h(directly)f(into)h(the)g(document;)e
+(you)i(can)g(only)f(declare)396 4324 y(attrib)n(utes)h(such)g(that)h
+(names)e(of)h(unparsed)f(entities)h(are)h(acceptable)e(v)n(alues.)396
+4474 y(As)i(you)f(can)g(see,)g(unparsed)f(entities)h(are)g(too)g
+(complicated)f(in)h(order)f(to)h(ha)n(v)o(e)g(an)o(y)f(purpose.)g(It)h
+(is)h(almost)f(al)o(w)o(ays)396 4581 y(better)g(to)g(simply)g(pass)h
+(the)f(name)g(of)g(the)g(data)g(\002le)h(as)g(normal)e(attrib)n(ute)g
+(v)n(alue,)h(and)f(let)i(the)f(application)f(recognize)396
+4689 y(and)h(process)g(the)g(foreign)e(format.)p Black
+3800 5278 a Fr(19)p Black eop
+%%Page: 20 20
+20 19 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 597 a Fx(1.3.)39 b(A)g(complete)f(e)n(xample:)g(The)h
+Fd(readme)k Fx(DTD)396 777 y Fv(The)20 b(reason)g(for)f
+Fr(r)m(eadme)h Fv(w)o(as)h(that)f(I)g(often)g(wrote)g(tw)o(o)g(v)o
+(ersions)f(of)h(\002les)h(such)f(as)h(README)g(and)e(INST)-8
+b(ALL)396 885 y(which)20 b(e)o(xplain)f(aspects)h(of)g(a)h(distrib)n
+(uted)e(softw)o(are)h(archi)n(v)o(e;)f(one)g(v)o(ersion)g(w)o(as)i
+(ASCII-formatted,)d(the)i(other)g(w)o(as)396 993 y(written)g(in)h
+(HTML.)e(Maintaining)g(both)g(v)o(ersions)h(means)f(double)g(amount)g
+(of)h(w)o(ork,)f(and)h(changes)f(of)h(one)f(v)o(ersion)396
+1101 y(may)h(be)g(for)o(gotten)e(in)i(the)g(other)f(v)o(ersion.)g(T)-7
+b(o)20 b(impro)o(v)o(e)e(this)j(situation)e(I)i(in)m(v)o(ented)d(the)i
+Fr(r)m(eadme)g Fv(DTD)g(which)f(allo)n(ws)396 1209 y(me)h(to)h
+(maintain)e(only)h(one)f(source)h(written)g(as)g(XML)h(document,)d(and)
+h(to)i(generate)e(the)h(ASCII)g(and)g(the)g(HTML)396
+1317 y(v)o(ersion)f(from)g(it.)396 1466 y(In)h(this)h(section,)f(I)g(e)
+o(xplain)f(only)g(the)i(DTD.)f(The)f Fr(r)m(eadme)h Fv(DTD)h(is)g
+(contained)d(in)j(the)f(PXP)h(distrib)n(ution)e(together)396
+1574 y(with)i(the)f(tw)o(o)g(con)m(v)o(erters)e(to)j(produce)d(ASCII)i
+(and)g(HTML.)g(Another)e(section)i(of)g(this)h(manual)e(describes)h
+(the)396 1682 y(HTML)g(con)m(v)o(erter)-5 b(.)396 1831
+y(The)20 b(documents)f(ha)n(v)o(e)g(a)i(simple)f(structure:)f(There)h
+(are)g(up)g(to)g(three)g(le)n(v)o(els)g(of)g(nested)g(sections,)g
+(paragraphs,)d(item)396 1939 y(lists,)22 b(footnotes,)c(hyperlinks,)g
+(and)h(te)o(xt)h(emphasis.)g(The)g(outermost)f(element)g(has)i(usually)
+e(the)h(type)g Fq(readme)p Fv(,)g(it)h(is)396 2047 y(declared)e(by)396
+2228 y Fq(<!ELEMENT)44 b(readme)f(\(sect1+\)>)396 2325
+y(<!ATTLIST)h(readme)845 2422 y(title)g(CDATA)g(#REQUIRED>)396
+2613 y Fv(This)21 b(means)f(that)g(this)h(element)e(contains)h(one)f
+(or)h(more)f(sections)i(of)f(the)g(\002rst)h(le)n(v)o(el)f(\(element)f
+(type)h Fq(sect1)p Fv(\),)f(and)396 2721 y(that)i(the)f(element)f(has)i
+(a)f(required)f(attrib)n(ute)h Fq(title)f Fv(containing)g(character)g
+(data)h(\(CD)m(A)-9 b(T)h(A\).)19 b(Note)h(that)h Fq(readme)396
+2829 y Fv(elements)f(must)g(not)g(contain)f(te)o(xt)h(data.)396
+2978 y(The)g(three)g(le)n(v)o(els)g(of)g(sections)g(are)g(declared)f
+(as)i(follo)n(ws:)396 3158 y Fq(<!ELEMENT)44 b(sect1)g
+(\(title,\(sect2|p|ul\)+\)>)396 3352 y(<!ELEMENT)g(sect2)g
+(\(title,\(sect3|p|ul\)+\)>)396 3547 y(<!ELEMENT)g(sect3)g
+(\(title,\(p|ul\)+\)>)396 3738 y Fv(Ev)o(ery)19 b(section)h(has)g(a)h
+Fq(title)f Fv(element)g(as)g(\002rst)h(subelement.)e(After)h(the)g
+(title)h(an)f(arbitrary)f(b)n(ut)h(non-empty)396 3846
+y(sequence)f(of)h(inner)g(sections,)g(paragraphs)e(and)h(item)i(lists)g
+(follo)n(ws.)f(Note)g(that)g(the)g(inner)g(sections)g(must)g(belong)f
+(to)396 3954 y(the)h(ne)o(xt)g(higher)f(section)h(le)n(v)o(el;)g
+Fq(sect3)g Fv(elements)f(must)i(not)f(contain)f(inner)g(sections)h
+(because)g(there)g(is)h(no)e(ne)o(xt)396 4061 y(higher)g(le)n(v)o(el.)
+396 4211 y(Ob)o(viously)-5 b(,)18 b(all)j(three)f(declarations)e(allo)n
+(w)j(paragraphs)d(\()p Fq(p)p Fv(\))h(and)h(item)g(lists)i(\()p
+Fq(ul)p Fv(\).)e(The)f(de\002nition)g(can)h(be)396 4319
+y(simpli\002ed)g(at)h(this)g(point)e(by)h(using)f(a)i(parameter)e
+(entity:)396 4499 y Fq(<!ENTITY)44 b(\045)g(p.like)g("p|ul">)396
+4693 y(<!ELEMENT)g(sect1)g(\(title,\(sect2|\045p.like;\)+\)>)p
+Black 3800 5278 a Fr(20)p Black eop
+%%Page: 21 21
+21 20 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fq(<!ELEMENT)44 b(sect2)g
+(\(title,\(sect3|\045p.like;\)+\)>)396 773 y(<!ELEMENT)g(sect3)g
+(\(title,\(\045p.like;\)+\)>)396 964 y Fv(Here,)20 b(the)g(entity)g
+Fq(p.like)g Fv(is)h(nothing)e(b)n(ut)h(a)g(macro)g(abbre)n(viating)d
+(the)j(same)h(sequence)e(of)h(declarations;)f(if)h(ne)n(w)396
+1072 y(elements)f(on)h(the)f(same)h(le)n(v)o(el)f(as)h
+Fq(p)g Fv(and)f Fq(ul)h Fv(are)f(later)h(added,)e(it)i(is)h(suf)n
+(\002cient)e(only)f(to)i(change)e(the)i(entity)f(de\002nition.)396
+1180 y(Note)h(that)h(there)e(are)i(some)f(restrictions)f(on)h(the)g
+(usage)g(of)g(entities)h(in)f(this)h(conte)o(xt;)e(most)h(important,)e
+(entities)396 1288 y(containing)h(a)h(left)h(paranthesis)e(must)h(also)
+h(contain)e(the)h(corresponding)d(right)i(paranthesis.)396
+1437 y(Note)h(that)h(the)f(entity)g Fq(p.like)g Fv(is)h(a)f
+Fr(par)o(ameter)i Fv(entity)-5 b(,)19 b(i.e.)h(the)g(ENTITY)g
+(declaration)e(contains)i(a)g(percent)f(sign,)396 1545
+y(and)h(the)g(entity)g(is)h(referred)e(to)h(by)g Fq(\045p.like;)p
+Fv(.)f(This)h(kind)g(of)f(entity)h(must)h(be)f(used)g(to)g(abbre)n
+(viate)e(parts)j(of)f(the)396 1653 y(DTD;)g(the)g Fr(g)o(ener)o(al)f
+Fv(entities)h(declared)e(without)h(percent)g(sign)h(and)f(referred)f
+(to)i(as)g Fq(&name;)f Fv(are)h(not)f(allo)n(wed)g(in)h(this)396
+1761 y(conte)o(xt.)396 1911 y(The)g Fq(title)g Fv(element)g
+(speci\002es)g(the)h(title)f(of)g(the)h(section)f(in)g(which)g(it)g
+(occurs.)g(The)f(title)i(is)h(gi)n(v)o(en)c(as)j(character)396
+2019 y(data,)f(optionally)f(interspersed)f(with)j(line)f(breaks)g(\()p
+Fq(br)p Fv(\):)396 2199 y Fq(<!ELEMENT)44 b(title)g(\(#PCDATA|br\)*>)
+396 2390 y Fv(Compared)19 b(with)h(the)g Fq(title)g Fr(attrib)n(ute)g
+Fv(of)g(the)h Fq(readme)e Fv(element,)h(this)g(element)g(allo)n(ws)g
+(inner)g(markup)e(\(i.e.)i Fq(br)p Fv(\))396 2498 y(while)g(attrib)n
+(ute)g(v)n(alues)g(do)g(not:)g(It)g(is)h(an)g(error)e(if)h(an)g(attrib)
+n(ute)g(v)n(alue)g(contains)f(the)h(left)h(angle)e(brack)o(et)g
+Fm(<)i Fv(literally)396 2605 y(such)f(that)g(it)h(is)h(impossible)d(to)
+h(include)g(inner)f(elements.)396 2755 y(The)h(paragraph)e(element)h
+Fq(p)i Fv(has)f(a)h(structure)e(similar)i(to)f Fq(title)p
+Fv(,)g(b)n(ut)g(it)h(allo)n(ws)f(more)g(inner)f(elements:)396
+2935 y Fq(<!ENTITY)44 b(\045)g(text)h("br|code|em|footnote|a">)396
+3129 y(<!ELEMENT)f(p)g(\(#PCDATA|\045text;\)*>)396 3320
+y Fv(Line)20 b(breaks)g(do)f(not)h(ha)n(v)o(e)g(inner)f(structure,)g
+(so)i(the)o(y)e(are)h(declared)f(as)i(being)e(empty:)396
+3500 y Fq(<!ELEMENT)44 b(br)g(EMPTY>)396 3691 y Fv(This)21
+b(means)f(that)g(really)g(nothing)e(is)j(allo)n(wed)f(within)g
+Fq(br)p Fv(;)g(you)f(must)i(al)o(w)o(ays)f(write)h Fq(<br></br>)e
+Fv(or)h(abbre)n(viated)396 3799 y Fq(<br/>)p Fv(.)396
+3949 y(Code)g(samples)h(should)e(be)h(mark)o(ed)f(up)h(by)f(the)h
+Fq(code)h Fv(tag;)f(emphasized)f(te)o(xt)h(can)g(be)g(indicated)f(by)h
+Fq(em)p Fv(:)396 4129 y Fq(<!ELEMENT)44 b(code)g(\(#PCDATA\)>)396
+4323 y(<!ELEMENT)g(em)g(\(#PCDATA|\045text;\)*>)396 4514
+y Fv(That)20 b Fq(code)g Fv(elements)g(are)g(not)g(allo)n(wed)g(to)g
+(contain)f(further)g(markup)f(while)i Fq(em)h Fv(elements)f(do)g(is)h
+(a)f(design)g(decision)396 4622 y(by)g(the)g(author)f(of)h(the)g(DTD.)
+396 4772 y(Unordered)e(lists)k(simply)d(consists)i(of)f(one)g(or)g
+(more)f(list)i(items,)g(and)e(a)i(list)g(item)g(may)e(contain)g
+(paragraph-le)n(v)o(el)396 4879 y(material:)p Black 3800
+5278 a Fr(21)p Black eop
+%%Page: 22 22
+22 21 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fq(<!ELEMENT)44 b(ul)g(\(li+\)>)396 773
+y(<!ELEMENT)g(li)g(\(\045p.like;\)*>)396 964 y Fv(F)o(ootnotes)19
+b(are)h(described)f(by)h(the)g(te)o(xt)g(of)g(the)g(note;)g(this)h(te)o
+(xt)f(may)g(contain)f(te)o(xt-le)n(v)o(el)g(markup.)f(There)h(is)i(no)
+396 1072 y(mechanism)e(to)i(describe)e(the)h(numbering)e(scheme)h(of)h
+(footnotes,)f(or)h(to)g(specify)g(ho)n(w)f(footnote)g(references)f(are)
+396 1180 y(printed.)396 1360 y Fq(<!ELEMENT)44 b(footnote)f
+(\(#PCDATA|\045text;\)*>)396 1551 y Fv(Hyperlinks)19
+b(are)h(written)g(as)h(in)f(HTML.)g(The)g(anchor)f(tag)h(contains)f
+(the)h(te)o(xt)g(describing)f(where)h(the)g(link)g(points)g(to,)396
+1659 y(and)g(the)g Fq(href)g Fv(attrib)n(ute)g(is)h(the)f(pointer)f
+(\(as)i(URL\).)f(There)f(is)j(no)d(w)o(ay)i(to)f(describe)f(locations)h
+(of)g("hash)g(marks".)f(If)396 1767 y(the)h(link)g(refers)g(to)g
+(another)f Fr(r)m(eadme)h Fv(document,)e(the)i(attrib)n(ute)g
+Fq(readmeref)f Fv(should)g(be)h(used)g(instead)g(of)g
+Fq(href)p Fv(.)396 1875 y(The)g(reason)g(is)h(that)f(the)g(con)m(v)o
+(erted)e(document)g(has)i(usually)g(a)h(dif)n(ferent)d(system)i
+(identi\002er)g(\(\002le)h(name\),)d(and)i(the)396 1983
+y(link)g(to)h(a)f(con)m(v)o(erted)e(document)g(must)i(be)g(con)m(v)o
+(erted,)e(too.)396 2163 y Fq(<!ELEMENT)44 b(a)g(\(#PCDATA\)*>)396
+2260 y(<!ATTLIST)g(a)845 2357 y(href)268 b(CDATA)44 b(#IMPLIED)845
+2454 y(readmeref)f(CDATA)h(#IMPLIED)396 2552 y(>)396
+2742 y Fv(Note)20 b(that)h(although)d(it)j(is)g(only)e(sensible)i(to)f
+(specify)g(one)f(of)h(the)g(tw)o(o)h(attrib)n(utes,)f(the)g(DTD)g(has)h
+(no)e(means)h(to)396 2850 y(e)o(xpress)g(this)g(restriction.)396
+3000 y(So)h(f)o(ar)f(the)g(DTD.)g(Finally)-5 b(,)19 b(here)h(is)h(a)g
+(document)d(for)i(it:)396 3180 y Fq(<?xml)44 b(version="1.0")f
+(encoding="ISO-8859-1"?>)396 3277 y(<!DOCTYPE)h(readme)f(SYSTEM)h
+("readme.dtd">)396 3374 y(<readme)g(title="How)f(to)i(use)f(the)g
+(readme)g(converters">)396 3471 y(<sect1>)486 3569 y
+(<title>Usage</title>)486 3666 y(<p>)576 3763 y(The)g(<em>readme</em>)e
+(converter)i(is)g(invoked)g(on)g(the)h(command)e(line)h(by:)486
+3860 y(</p>)486 3957 y(<p>)576 4054 y(<code>readme)e([)j(-text)f(|)h
+(-html)f(])g(input.xml</code>)486 4151 y(</p>)486 4248
+y(<p>)576 4346 y(Here)g(a)g(list)h(of)f(options:)486
+4443 y(</p>)486 4540 y(<ul>)576 4637 y(<li>)665 4734
+y(<p><code>-)396 4831 y(text</code>:)f(specifies)g(that)i(ASCII)f
+(output)f(should)h(be)h(produced</p>)p Black 3800 5278
+a Fr(22)p Black eop
+%%Page: 23 23
+23 22 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 576 579 a Fq(</li>)576 676 y(<li>)665 773 y(<p><code>-)396
+870 y(html</code>:)43 b(specifies)g(that)i(HTML)f(output)g(should)f(be)
+i(produced</p>)576 967 y(</li>)486 1065 y(</ul>)486 1162
+y(<p>)576 1259 y(The)f(input)g(file)g(must)g(be)h(given)f(on)g(the)h
+(command)e(line.)h(The)h(converted)e(output)h(is)576
+1356 y(printed)f(to)i(<em>stdout</em>.)486 1453 y(</p>)396
+1550 y(</sect1>)396 1647 y(<sect1>)486 1745 y(<title>Author</title>)486
+1842 y(<p>)576 1939 y(The)f(program)g(has)g(been)g(written)g(by)576
+2036 y(<a)g(href="mailto:Gerd.Stolpmann@darmstadt.ne)o(tsurf.)o(de">Ge)
+o(rd)39 b(Stolpmann</a>.)486 2133 y(</p>)396 2230 y(</sect1>)396
+2327 y(</readme>)-2 2746 y Fx(Notes)p Black 396 2926
+a Fv(1.)p Black 70 w(This)20 b(construct)g(is)h(only)e(allo)n(wed)h
+(within)g(the)g(de\002nition)f(of)h(another)e(entity;)i(otherwise)g(e)o
+(xtra)f(spaces)i(w)o(ould)529 3034 y(be)f(added)f(\(as)i(e)o(xplained)d
+(abo)o(v)o(e\).)g(Such)i(indirection)e(is)j(not)f(recommended.)529
+3172 y Fi(Complete)f(e)o(xample:)529 3343 y Fh(<!ENTITY)43
+b(\045)e(variant)i("a">)243 b(<!-)42 b(or)f("b")g(->)529
+3430 y(<!ENTITY)i(text-a)g("This)f(is)f(text)h(A.">)529
+3518 y(<!ENTITY)h(text-b)g("This)f(is)f(text)h(B.">)529
+3605 y(<!ENTITY)h(text)f("&#38;text-\045variant)q(;;")q(>)529
+3785 y Fi(Y)-8 b(ou)19 b(can)h(no)n(w)f(write)f Fh(&text;)j
+Fi(in)e(the)g(document)h(instance,)f(and)h(depending)h(on)e(the)g(v)n
+(alue)g(of)g Fh(variant)i Fi(either)e Fh(text-a)i Fi(or)529
+3882 y Fh(text-b)g Fi(is)d(inserted.)p Black 3800 5278
+a Fr(23)p Black eop
+%%Page: 24 24
+24 23 bop Black Black -2 621 a Fs(Chapter)48 b(2.)f(Using)i(PXP)-2
+1055 y Fx(2.1.)39 b(V)-9 b(alidation)396 1235 y Fv(The)20
+b(parser)g(can)g(be)g(used)g(to)g Fr(validate)f Fv(a)i(document.)d
+(This)i(means)g(that)g(all)h(the)f(constraints)g(that)g(must)g(hold)g
+(for)f(a)396 1343 y(v)n(alid)h(document)e(are)i(actually)g(check)o(ed.)
+f(V)-9 b(alidation)19 b(is)i(the)f(def)o(ault)f(mode)h(of)g(PXP,)g
+(i.e.)h(e)n(v)o(ery)d(document)h(is)396 1451 y(v)n(alidated)g(while)i
+(it)f(is)i(being)d(parsed.)396 1600 y(In)h(the)g Fq(examples)g
+Fv(directory)e(of)i(the)g(distrib)n(ution)f(you)h(\002nd)g(the)g
+Fq(pxpvalidate)f Fv(application.)f(It)j(is)g(in)m(v)n(ok)o(ed)d(in)j
+(the)396 1708 y(follo)n(wing)e(w)o(ay:)396 1888 y Fq(pxpvalidate)43
+b([)i(-wf)f(])h Fn(file)p Fq(...)396 2079 y Fv(The)20
+b(\002les)h(mentioned)e(on)g(the)i(command)d(line)i(are)g(v)n
+(alidated,)f(and)h(e)n(v)o(ery)e(w)o(arning)h(and)h(e)n(v)o(ery)f
+(error)g(messages)h(are)396 2187 y(printed)f(to)i(stderr)-5
+b(.)396 2337 y(The)20 b(-wf)g(switch)h(modi\002es)e(the)i(beha)n(viour)
+d(such)i(that)g(a)h(well-formedness)d(parser)h(is)i(simulated.)f(In)g
+(this)g(mode,)f(the)396 2445 y(ELEMENT)-6 b(,)19 b(A)-9
+b(TTLIST)j(,)19 b(and)g(NO)m(T)-8 b(A)f(TION)20 b(declarations)f(of)h
+(the)g(DTD)g(are)g(ignored,)e(and)i(only)f(the)i(ENTITY)396
+2553 y(declarations)e(will)i(tak)o(e)f(ef)n(fect.)g(This)g(mode)f(is)i
+(intended)e(for)h(documents)e(lacking)h(a)i(DTD.)f(Please)h(note)f
+(that)g(the)396 2661 y(parser)g(still)h(scans)g(the)f(DTD)g(fully)g
+(and)g(will)h(report)e(all)h(errors)g(in)g(the)g(DTD;)h(such)f(checks)f
+(are)h(not)g(required)f(by)g(a)396 2769 y(well-formedness)f(parser)-5
+b(.)396 2918 y(The)20 b Fq(pxpvalidate)f Fv(application)g(is)i(the)f
+(simplest)h(sensible)f(program)e(using)i(PXP,)g(you)g(may)f(consider)g
+(it)i(as)396 3026 y("hello)f(w)o(orld")f(program.)-2
+3445 y Fx(2.2.)39 b(Ho)n(w)g(to)g(par)n(se)f(a)i(document)d(fr)m(om)i
+(an)g(application)396 3624 y Fv(Let)21 b(me)f(\002rst)h(gi)n(v)o(e)e(a)
+i(rough)d(o)o(v)o(ervie)n(w)g(of)i(the)h(object)e(model)g(of)h(the)h
+(parser)-5 b(.)19 b(The)h(follo)n(wing)f(items)h(are)h(represented)396
+3732 y(by)f(objects:)p Black 396 4055 a Ft(\225)p Black
+60 w Fr(Documents:)f Fv(The)h(document)e(representation)g(is)j(more)e
+(or)h(less)h(the)f(anchor)f(for)g(the)h(application;)f(all)i(accesses)g
+(to)479 4163 y(the)f(parsed)g(entities)h(start)f(here.)g(It)g(is)h
+(described)e(by)h(the)g(class)h Fq(document)f Fv(contained)e(in)j(the)f
+(module)479 4271 y Fq(Pxp_document)p Fv(.)f(Y)-9 b(ou)19
+b(can)h(get)h(some)f(global)f(information,)e(such)j(as)h(the)f(XML)h
+(declaration)d(the)j(document)479 4379 y(be)o(gins)f(with,)g(the)g(DTD)
+g(of)g(the)g(document,)e(global)i(processing)e(instructions,)h(and)h
+(most)g(important,)f(the)479 4487 y(document)f(tree.)p
+Black 396 4595 a Ft(\225)p Black 60 w Fr(The)j(contents)e(of)h
+(documents:)f Fv(The)h(contents)f(ha)n(v)o(e)h(the)g(structure)f(of)h
+(a)h(tree:)f(Elements)g(contain)f(other)g(elements)479
+4703 y(and)h(te)o(xt)744 4670 y Ff(1)768 4703 y Fv(.)h(The)e(common)g
+(type)h(to)g(represent)f(both)g(kinds)h(of)g(content)f(is)i
+Fq(node)f Fv(which)g(is)h(a)g(class)g(type)e(that)479
+4811 y(uni\002es)h(the)h(properties)d(of)i(elements)g(and)g(character)f
+(data.)h(Ev)o(ery)e(node)i(has)g(a)h(list)g(of)f(children)f(\(which)g
+(is)i(empty)p Black 3800 5278 a Fr(24)p Black eop
+%%Page: 25 25
+25 24 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 479 579 a Fv(if)h(the)f(element)g(is)h(empty)e(or)h(the)g(node)f
+(represents)h(te)o(xt\);)f(nodes)h(may)g(ha)n(v)o(e)f(attrib)n(utes;)h
+(nodes)g(ha)n(v)o(e)f(al)o(w)o(ays)i(te)o(xt)479 687
+y(contents.)d(There)g(are)g(tw)o(o)h(implementations)e(of)h
+Fq(node)p Fv(,)h(the)f(class)i Fq(element_impl)d Fv(for)h(elements,)g
+(and)g(the)h(class)479 795 y Fq(data_impl)h Fv(for)f(te)o(xt)h(data.)g
+(Y)-9 b(ou)20 b(\002nd)f(these)i(classes)g(and)f(class)h(types)f(in)g
+(the)g(module)f Fq(Pxp_document)p Fv(,)g(too.)479 944
+y(Note)h(that)h(attrib)n(ute)f(lists)h(are)f(represented)f(by)g
+(non-class)h(v)n(alues.)p Black 396 1094 a Ft(\225)p
+Black 60 w Fr(The)h(node)e(e)n(xtension:)g Fv(F)o(or)h(adv)n(anced)e
+(usage,)i(e)n(v)o(ery)e(node)i(of)f(the)i(document)d(may)i(ha)n(v)o(e)f
+(an)h(associated)479 1202 y Fr(e)n(xtension)g Fv(which)g(is)h(simply)f
+(a)g(second)f(object.)h(This)g(object)g(must)g(ha)n(v)o(e)g(the)g
+(three)g(methods)f Fq(clone)p Fv(,)g Fq(node)p Fv(,)h(and)479
+1310 y Fq(set_node)f Fv(as)h(bare)f(minimum,)e(b)n(ut)j(you)e(are)i
+(free)e(to)i(add)f(methods)f(as)i(you)f(w)o(ant.)g(This)g(is)i(the)e
+(preferred)e(w)o(ay)j(to)479 1417 y(add)g(functionality)e(to)i(the)h
+(document)d(tree)1746 1384 y Ff(2)1770 1417 y Fv(.)j(The)e(class)j
+(type)d Fq(extension)h Fv(is)h(de\002ned)e(in)h Fq(Pxp_document)p
+Fv(,)f(too.)p Black 396 1525 a Ft(\225)p Black 60 w Fr(The)i(DTD:)f
+Fv(Sometimes)g(it)h(is)g(necessary)e(to)i(access)f(the)h(DTD)f(of)g(a)h
+(document;)d(the)i(a)n(v)o(erage)f(application)g(does)479
+1633 y(not)h(need)g(this)g(feature.)f(The)h(class)h Fq(dtd)g
+Fv(describes)e(DTDs,)i(and)e(mak)o(es)h(it)h(possible)f(to)h(get)f
+(representations)e(of)479 1741 y(element,)i(entity)-5
+b(,)19 b(and)h(notation)e(declarations)h(as)i(well)g(as)g(processing)e
+(instructions)g(contained)f(in)j(the)f(DTD.)479 1849
+y(This)g(class,)g(and)f Fq(dtd_element)p Fv(,)g Fq(dtd_notation)p
+Fv(,)e(and)i Fq(proc_instruction)f Fv(can)h(be)h(found)e(in)i(the)f
+(module)479 1957 y Fq(Pxp_dtd)p Fv(.)h(There)f(are)h(a)h(couple)e(of)h
+(classes)h(representing)d(dif)n(ferent)h(kinds)g(of)h(entities;)h
+(these)f(can)g(be)g(found)f(in)479 2065 y(the)h(module)f
+Fq(Pxp_entity)p Fv(.)396 2214 y(Additionally)-5 b(,)18
+b(the)i(follo)n(wing)f(modules)g(play)h(a)g(role:)p Black
+396 2447 a Ft(\225)p Black 60 w Fr(Pxp_yacc:)e Fv(Here)i(the)h(main)e
+(parsing)h(functions)e(such)i(as)h Fq(parse_document_entity)c
+Fv(are)k(located.)e(Some)479 2555 y(additional)g(types)h(and)g
+(functions)f(allo)n(w)h(the)g(parser)f(to)i(be)f(con\002gured)e(in)i(a)
+h(non-standard)c(w)o(ay)-5 b(.)p Black 396 2663 a Ft(\225)p
+Black 60 w Fr(Pxp_types:)19 b Fv(This)h(is)h(a)g(collection)e(of)h
+(basic)g(types)g(and)g(e)o(xceptions.)396 2812 y(There)g(are)g(some)g
+(further)e(modules)i(that)g(are)g(needed)f(internally)g(b)n(ut)h(are)g
+(not)g(part)g(of)g(the)g(API.)396 2962 y(Let)h(the)f(document)e(to)i
+(be)h(parsed)e(be)h(stored)g(in)g(a)h(\002le)g(called)f
+Fq(doc.xml)p Fv(.)f(The)h(parsing)f(process)h(is)h(started)f(by)396
+3070 y(calling)g(the)g(function)396 3250 y Fq(val)45
+b(parse_document_entity)c(:)k(config)e(->)i(source)f(->)g('ext)g(spec)h
+(->)f('ext)g(document)396 3441 y Fv(de\002ned)19 b(in)i(the)f(module)f
+Fq(Pxp_yacc)p Fv(.)g(The)h(\002rst)h(ar)o(gument)d(speci\002es)i(some)g
+(global)g(properties)e(of)i(the)g(parser;)g(it)h(is)396
+3549 y(recommended)c(to)j(start)g(with)g(the)g Fq(default_config)p
+Fv(.)e(The)h(second)g(ar)o(gument)e(determines)i(where)g(the)h
+(document)396 3657 y(to)h(be)f(parsed)f(comes)h(from;)f(this)i(may)f
+(be)g(a)g(\002le,)h(a)g(channel,)d(or)i(an)g(entity)g(ID.)g(T)-7
+b(o)21 b(parse)f Fq(doc.xml)p Fv(,)f(it)i(is)g(suf)n(\002cient)396
+3764 y(to)g(pass)f Fq(from_file)44 b("doc.xml")p Fv(.)396
+3914 y(The)20 b(third)g(ar)o(gument)e(passes)i(the)h(object)e
+(speci\002cation)h(to)g(use.)g(Roughly)f(speaking,)g(it)i(determines)e
+(which)g(classes)396 4022 y(implement)g(the)h(node)g(objects)f(of)h
+(which)g(element)g(types,)f(and)h(which)g(e)o(xtensions)f(are)h(to)g
+(be)g(used.)g(The)g Fq('ext)396 4130 y Fv(polymorphic)d(v)n(ariable)i
+(is)j(the)e(type)f(of)h(the)h(e)o(xtension.)d(F)o(or)i(the)g(moment,)f
+(let)i(us)f(simply)g(pass)h Fq(default_spec)d Fv(as)396
+4238 y(this)j(ar)o(gument,)d(and)h(ignore)g(it.)396 4387
+y(So)i(the)f(follo)n(wing)e(e)o(xpression)h(parses)h
+Fq(doc.xml)p Fv(:)396 4567 y Fq(open)44 b(Pxp_yacc)396
+4664 y(let)h(d)f(=)h(parse_document_entity)c(default_config)i
+(\(from_file)g("doc.xml"\))g(de-)396 4762 y(fault_spec)p
+Black 3800 5278 a Fr(25)p Black eop
+%%Page: 26 26
+26 25 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(Note)g(that)h Fq(default_config)d
+Fv(implies)i(that)h(w)o(arnings)e(are)h(collected)g(b)n(ut)g(not)g
+(printed.)e(Errors)h(raise)i(one)f(of)g(the)396 687 y(e)o(xception)f
+(de\002ned)g(in)h Fq(Pxp_types)p Fv(;)f(to)i(get)f(readable)f(errors)g
+(and)h(w)o(arnings)f(catch)h(the)g(e)o(xceptions)f(as)i(follo)n(ws:)396
+867 y Fq(class)44 b(warner)g(=)486 964 y(object)576 1061
+y(method)f(warn)i(w)f(=)665 1158 y(print_endline)f(\("WARNING:)g(")i(^)
+f(w\))486 1256 y(end)396 1353 y(;;)396 1547 y(try)486
+1644 y(let)g(config)g(=)h({)f(default_config)f(with)h(warner)g(=)h(new)
+f(warner)g(})g(in)486 1741 y(let)g(d)h(=)g(parse_document_entity)c
+(config)j(\(from_file)f("doc.xml"\))g(default_spec)486
+1838 y(in)576 1935 y(...)396 2033 y(with)531 2130 y(e)h(->)620
+2227 y(print_endline)f(\(Pxp_types.string_of_exn)e(e\))396
+2418 y Fv(No)n(w)20 b Fq(d)h Fv(is)g(an)f(object)g(of)g(the)g
+Fq(document)f Fv(class.)i(If)f(you)g(w)o(ant)g(the)g(node)f(tree,)h
+(you)g(can)g(get)g(the)g(root)f(element)h(by)396 2598
+y Fq(let)45 b(root)f(=)g(d)h(#)g(root)396 2789 y Fv(and)20
+b(if)g(you)g(w)o(ould)f(rather)h(lik)o(e)g(to)g(access)h(the)f(DTD,)g
+(determine)f(it)i(by)396 2969 y Fq(let)45 b(dtd)f(=)h(d)f(#)h(dtd)396
+3160 y Fv(As)21 b(it)g(is)g(more)f(interesting,)f(let)h(us)h(in)m(v)o
+(estigate)e(the)h(node)f(tree)h(no)n(w)-5 b(.)19 b(Gi)n(v)o(en)g(the)i
+(root)e(element,)g(it)i(is)h(possible)d(to)396 3268 y(recursi)n(v)o
+(ely)f(tra)n(v)o(erse)h(the)h(whole)f(tree.)g(The)g(children)g(of)g(a)h
+(node)f Fq(n)h Fv(are)f(returned)f(by)h(the)h(method)e
+Fq(sub_nodes)p Fv(,)g(and)396 3376 y(the)i(type)g(of)g(a)h(node)e(is)i
+(returned)d(by)i Fq(node_type)p Fv(.)f(This)i(function)d(tra)n(v)o
+(erses)i(the)g(tree,)g(and)g(prints)g(the)g(type)f(of)h(each)396
+3484 y(node:)396 3664 y Fq(let)45 b(rec)f(print_structure)e(n)j(=)486
+3761 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 3858
+y(match)g(ntype)g(with)576 3955 y(T_element)f(name)h(->)665
+4053 y(print_endline)f(\("Element)g(of)i(type)f(")h(^)f(name\);)665
+4150 y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665
+4247 y(List.iter)h(print_structure)e(children)486 4344
+y(|)j(T_data)e(->)665 4441 y(print_endline)g("Data")486
+4538 y(|)i(_)f(->)665 4635 y(\(*)h(Other)f(node)g(types)g(are)g(not)h
+(possible)e(unless)h(the)g(parser)g(is)h(configured)800
+4733 y(differently.)710 4830 y(*\))p Black 3798 5278
+a Fr(26)p Black eop
+%%Page: 27 27
+27 26 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 665 579 a Fq(assert)44 b(false)396 770 y Fv(Y)-9
+b(ou)20 b(can)g(call)g(this)h(function)e(by)396 950 y
+Fq(print_structure)43 b(root)396 1141 y Fv(The)20 b(type)g(returned)e
+(by)i Fq(node_type)f Fv(is)i(either)f Fq(T_element)43
+b(name)21 b Fv(or)e Fq(T_data)p Fv(.)h(The)g Fq(name)g
+Fv(of)g(the)g(element)g(type)396 1249 y(is)h(the)g(string)e(included)g
+(in)i(the)f(angle)f(brack)o(ets.)h(Note)g(that)g(only)f(elements)h(ha)n
+(v)o(e)g(children;)f(data)h(nodes)f(are)h(al)o(w)o(ays)396
+1357 y(lea)n(v)o(es)h(of)e(the)i(tree.)396 1506 y(There)f(are)g(some)g
+(more)f(methods)g(in)i(order)e(to)h(access)h(a)f(parsed)g(node)f(tree:)
+p Black 396 1739 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(parent)p
+Fv(:)19 b(Returns)h(the)h(parent)e(node,)g(or)h(raises)h
+Fq(Not_found)e Fv(if)h(the)g(node)g(is)h(already)e(the)h(root)p
+Black 396 1847 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(root)p
+Fv(:)20 b(Returns)g(the)g(root)g(of)f(the)i(node)e(tree.)p
+Black 396 1955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(attribute)e(a)p
+Fv(:)21 b(Returns)f(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(with)h
+(name)e Fq(a)p Fv(.)i(The)e(method)g(returns)h(a)g(v)n(alue)g(for)479
+2063 y(e)n(v)o(ery)f Fr(declar)m(ed)j Fv(attrib)n(ute,)d(independently)
+e(of)j(whether)f(the)i(attrib)n(ute)e(instance)h(is)h(de\002ned)e(or)h
+(not.)g(If)g(the)479 2170 y(attrib)n(ute)g(is)h(not)f(declared,)f
+Fq(Not_found)g Fv(will)i(be)f(raised.)g(\(In)f(well-formedness)f(mode,)
+h(e)n(v)o(ery)g(attrib)n(ute)h(is)479 2278 y(considered)f(as)i(being)e
+(implicitly)h(declared)e(with)j(type)f Fq(CDATA)p Fv(.\))479
+2428 y(The)g(follo)n(wing)f(return)g(v)n(alues)g(are)i(possible:)f
+Fq(Value)44 b(s)p Fv(,)20 b Fq(Valuelist)43 b(sl)21 b
+Fv(,)f(and)g Fq(Implied_value)p Fv(.)e(The)i(\002rst)479
+2536 y(tw)o(o)h(v)n(alue)e(types)h(indicate)g(that)g(the)g(attrib)n
+(ute)g(v)n(alue)g(is)h(a)n(v)n(ailable,)e(either)h(because)g(there)f
+(is)i(a)g(de\002nition)479 2644 y Fn(a)p Fq(=")p Fn(value)p
+Fq(")f Fv(in)g(the)g(XML)g(te)o(xt,)g(or)g(because)g(there)f(is)i(a)g
+(def)o(ault)e(v)n(alue)h(\(declared)f(in)h(the)g(DTD\).)g(Only)g(if)g
+(both)479 2752 y(the)g(instance)g(de\002nition)f(and)h(the)g(def)o
+(ault)g(declaration)e(are)i(missing,)g(the)h(latter)f(v)n(alue)f
+Fq(Implied_value)g Fv(will)479 2860 y(be)h(returned.)479
+3009 y(In)g(the)g(DTD,)h(e)n(v)o(ery)d(attrib)n(ute)i(is)h(typed.)e
+(There)h(are)g(single-v)n(alue)e(types)i(\(CD)m(A)-9
+b(T)h(A,)20 b(ID,)g(IDREF)-7 b(,)21 b(ENTITY)-11 b(,)479
+3117 y(NMT)o(OKEN,)19 b(enumerations\),)f(in)i(which)g(case)g(the)h
+(method)d(passes)j Fq(Value)44 b(s)21 b Fv(back,)e(where)h
+Fq(s)g Fv(is)h(the)479 3225 y(normalized)e(string)g(v)n(alue)h(of)g
+(the)g(attrib)n(ute.)g(The)f(other)h(types)g(\(IDREFS,)g(ENTITIES,)f
+(NMT)o(OKENS\))479 3333 y(represent)g(list)j(v)n(alues,)d(and)h(the)g
+(parser)g(splits)h(the)f(XML)g(literal)h(into)e(se)n(v)o(eral)h(tok)o
+(ens)g(and)f(returns)h(these)g(tok)o(ens)479 3441 y(as)h
+Fq(Valuelist)44 b(sl)p Fv(.)479 3590 y(Normalization)19
+b(means)h(that)g(entity)g(references)e(\(the)i Fq(&)p
+Fn(name)p Fq(;)g Fv(tok)o(ens\))f(and)h(character)f(references)479
+3698 y(\()p Fq(&#)p Fn(number)s Fq(;)p Fv(\))g(are)h(replaced)f(by)g
+(the)i(te)o(xt)f(the)o(y)f(represent,)g(and)h(that)g(white)g(space)g
+(characters)f(are)i(con)m(v)o(erted)479 3806 y(into)f(plain)g(spaces.)p
+Black 396 3955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(data)p
+Fv(:)20 b(Returns)g(the)g(character)f(data)h(contained)f(in)h(the)g
+(node.)f(F)o(or)h(data)g(nodes,)f(the)h(meaning)f(is)i(ob)o(vious)479
+4063 y(as)g(this)g(is)g(the)f(main)g(content)f(of)h(data)g(nodes.)f(F)o
+(or)h(element)g(nodes,)f(this)i(method)e(returns)g(the)h(concatenated)
+479 4171 y(contents)g(of)g(all)g(inner)g(data)g(nodes.)479
+4321 y(Note)g(that)h(entity)f(references)e(included)h(in)h(the)h(te)o
+(xt)f(are)g(resolv)o(ed)f(while)h(the)o(y)f(are)h(being)g(parsed;)f
+(for)h(e)o(xample)479 4429 y(the)g(te)o(xt)h("a)f(&lt;&gt;)g(b")g(will)
+h(be)f(returned)e(as)j("a)g(<>)f(b")g(by)g(this)h(method.)d(Spaces)j
+(of)f(data)g(nodes)f(are)h(al)o(w)o(ays)479 4537 y(preserv)o(ed.)e(Ne)n
+(wlines)j(are)f(preserv)o(ed,)e(b)n(ut)i(al)o(w)o(ays)g(con)m(v)o
+(erted)e(to)i(\\n)h(characters)e(e)n(v)o(en)g(if)i(ne)n(wlines)e(are)i
+(encoded)479 4644 y(as)g(\\r\\n)f(or)g(\\r)-5 b(.)21
+b(Normally)e(you)g(will)i(ne)n(v)o(er)e(see)i(tw)o(o)f(adjacent)f(data)
+i(nodes)e(because)h(the)g(parser)f(collapses)h(all)h(data)479
+4752 y(material)f(at)h(one)e(location)h(into)g(one)f(node.)g(\(Ho)n(we)
+n(v)o(er)m(,)f(if)i(you)g(create)g(your)f(o)n(wn)g(tree)h(or)g
+(transform)f(the)h(parsed)479 4860 y(tree,)g(it)h(is)g(possible)f(to)h
+(ha)n(v)o(e)e(adjacent)h(data)g(nodes.\))p Black 3797
+5278 a Fr(27)p Black eop
+%%Page: 28 28
+28 27 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 479 579 a Fv(Note)g(that)h(elements)f(that)g(do)g
+Fr(not)h Fv(allo)n(w)f(#PCD)m(A)-9 b(T)h(A)20 b(as)h(content)e(will)i
+(not)f(ha)n(v)o(e)g(data)g(nodes)f(as)i(children.)e(This)479
+687 y(means)h(that)g(spaces)h(and)f(ne)n(wlines,)f(the)h(only)g
+(character)f(material)g(allo)n(wed)h(for)g(such)f(elements,)h(are)g
+(silently)479 795 y(dropped.)396 986 y(F)o(or)g(e)o(xample,)e(if)i(the)
+f(task)h(is)h(to)f(print)f(all)h(contents)f(of)g(elements)h(with)f
+(type)h("v)n(aluable")e(whose)h(attrib)n(ute)g("priority")396
+1094 y(is)i("1",)f(this)h(function)d(can)i(help:)396
+1274 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
+1371 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 1468
+y(match)g(ntype)g(with)576 1565 y(T_element)f("valuable")g(when)h(n)h
+(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
+1662 y(print_endline)f("Valuable)g(node)h(with)h(priotity)e(1)i
+(found:";)665 1759 y(print_endline)e(\(n)h(#)h(data\))486
+1857 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 1954
+y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665 2051
+y(List.iter)h(print_valuable_prio1)d(children)486 2148
+y(|)k(_)f(->)665 2245 y(assert)g(false)396 2436 y Fv(Y)-9
+b(ou)20 b(can)g(call)g(this)h(function)e(by:)396 2616
+y Fq(print_valuable_prio1)42 b(root)396 2807 y Fv(If)20
+b(you)g(lik)o(e)g(a)h(DSSSL-lik)o(e)f(style,)g(you)g(can)g(mak)o(e)f
+(the)h(function)f Fq(process_children)f Fv(e)o(xplicit:)396
+2987 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
+3182 y(let)f(process_children)e(n)j(=)576 3279 y(let)f(children)f(=)i
+(n)g(#)f(sub_nodes)g(in)576 3376 y(List.iter)f(print_valuable_prio1)e
+(children)486 3473 y(in)486 3667 y(let)j(ntype)g(=)h(n)g(#)f(node_type)
+g(in)486 3764 y(match)g(ntype)g(with)576 3862 y(T_element)f("valuable")
+g(when)h(n)h(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
+3959 y(print_endline)f("Valuable)g(node)h(with)h(priority)e(1)i
+(found:";)665 4056 y(print_endline)e(\(n)h(#)h(data\))486
+4153 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 4250
+y(process_children)e(n)486 4347 y(|)j(_)f(->)665 4444
+y(assert)g(false)396 4635 y Fv(So)21 b(f)o(ar)m(,)e(O'Caml)h(is)i(no)n
+(w)d(a)i(simple)f("style-sheet)g(language":)e(Y)-9 b(ou)20
+b(can)g(form)f(a)h(big)g("match")g(e)o(xpression)e(to)396
+4743 y(distinguish)h(between)h(all)h(signi\002cant)e(cases,)i(and)f
+(pro)o(vide)e(dif)n(ferent)g(reactions)i(on)g(dif)n(ferent)e
+(conditions.)h(But)h(this)396 4851 y(technique)f(has)h(limitations;)g
+(the)h("match")e(e)o(xpression)g(tends)h(to)g(get)g(lar)o(ger)f(and)h
+(lar)o(ger)m(,)e(and)i(it)g(is)i(dif)n(\002cult)d(to)i(store)p
+Black 3800 5278 a Fr(28)p Black eop
+%%Page: 29 29
+29 28 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(intermediate)f(v)n(alues)h(as)h(there)e(is)j(only)d
+(one)h(big)f(recursion.)g(Alternati)n(v)o(ely)-5 b(,)18
+b(it)j(is)g(also)f(possible)g(to)h(represent)e(the)396
+687 y(v)n(arious)g(cases)i(as)g(classes,)g(and)f(to)g(use)h(dynamic)d
+(method)h(lookup)g(to)h(\002nd)g(the)g(appropiate)e(class.)j(The)f(ne)o
+(xt)f(section)396 795 y(e)o(xplains)g(this)i(technique)e(in)h(detail.)
+-2 1213 y Fx(2.3.)39 b(Class-based)e(pr)m(ocessing)g(of)j(the)f(node)f
+(tree)396 1393 y Fv(By)21 b(def)o(ault,)e(the)h(parsed)g(node)f(tree)h
+(consists)h(of)f(objects)g(of)g(the)g(same)g(class;)h(this)g(is)g(a)g
+(good)e(design)g(as)i(long)e(as)i(you)396 1501 y(w)o(ant)g(only)e(to)h
+(access)h(selected)f(parts)g(of)g(the)h(document.)c(F)o(or)j(comple)o
+(x)f(transformations,)e(it)k(may)f(be)g(better)g(to)g(use)396
+1609 y(dif)n(ferent)f(classes)i(for)f(objects)g(describing)e(dif)n
+(ferent)h(element)g(types.)396 1758 y(F)o(or)h(e)o(xample,)f(if)h(the)g
+(DTD)h(declares)e(the)i(element)e(types)h Fq(a)p Fv(,)h
+Fq(b)p Fv(,)f(and)g Fq(c)p Fv(,)g(and)g(if)g(the)g(task)h(is)g(to)f
+(con)m(v)o(ert)e(an)j(arbitrary)396 1866 y(document)d(into)i(a)h
+(printable)e(format,)g(the)h(idea)g(is)h(to)f(de\002ne)g(for)g(e)n(v)o
+(ery)f(element)g(type)h(a)g(separate)g(class)h(that)g(has)f(a)396
+1974 y(method)f Fq(print)p Fv(.)h(The)g(classes)h(are)f
+Fq(eltype_a)p Fv(,)f Fq(eltype_b)p Fv(,)g(and)h Fq(eltype_c)p
+Fv(,)f(and)h(e)n(v)o(ery)f(class)i(implements)396 2082
+y Fq(print)f Fv(such)g(that)g(elements)g(of)g(the)g(type)g
+(corresponding)d(to)j(the)g(class)i(are)e(con)m(v)o(erted)d(to)k(the)f
+(output)f(format.)396 2232 y(The)h(parser)g(supports)f(such)h(a)g
+(design)g(directly)-5 b(.)19 b(As)i(it)g(is)g(impossible)e(to)i(deri)n
+(v)o(e)d(recursi)n(v)o(e)h(classes)i(in)g(O'Caml)3703
+2198 y Ff(3)3727 2232 y Fv(,)g(the)396 2340 y(specialized)f(element)f
+(classes)j(cannot)d(be)h(formed)f(by)g(simply)h(inheriting)f(from)g
+(the)h(b)n(uilt-in)g(classes)h(of)f(the)g(parser)396
+2447 y(and)g(adding)f(methods)g(for)g(customized)g(functionality)-5
+b(.)18 b(T)-7 b(o)20 b(get)g(around)f(this)h(limitation,)g(e)n(v)o(ery)
+f(node)g(of)h(the)396 2555 y(document)e(tree)j(is)g(represented)d(by)i
+Fr(two)h Fv(objects,)e(one)h(called)g("the)g(node")f(and)h(containing)e
+(the)i(recursi)n(v)o(e)396 2663 y(de\002nition)f(of)h(the)g(tree,)g
+(one)g(called)g("the)g(e)o(xtension".)e(Ev)o(ery)h(node)g(object)h(has)
+g(a)h(reference)e(to)h(the)g(e)o(xtension,)f(and)396
+2771 y(the)h(e)o(xtension)f(has)i(a)f(reference)f(to)h(the)g(node.)f
+(The)h(adv)n(antage)e(of)i(this)h(model)e(is)i(that)g(it)g(is)g(no)n(w)
+e(possible)h(to)396 2879 y(customize)g(the)g(e)o(xtension)f(without)g
+(af)n(fecting)g(the)h(typing)f(constraints)g(of)h(the)h(recursi)n(v)o
+(e)d(node)h(de\002nition.)396 3029 y(Ev)o(ery)g(e)o(xtension)g(must)h
+(ha)n(v)o(e)g(the)g(three)g(methods)f Fq(clone)p Fv(,)g
+Fq(node)p Fv(,)h(and)g Fq(set_node)p Fv(.)f(The)h(method)f
+Fq(clone)h Fv(creates)396 3137 y(a)h(deep)e(cop)o(y)h(of)g(the)g(e)o
+(xtension)f(object)g(and)h(returns)f(it;)i Fq(node)f
+Fv(returns)g(the)g(node)f(object)h(for)f(this)i(e)o(xtension)e(object;)
+396 3244 y(and)h Fq(set_node)f Fv(is)i(used)f(to)h(tell)g(the)f(e)o
+(xtension)f(object)g(which)h(node)f(is)i(associated)f(with)g(it,)h
+(this)g(method)e(is)396 3352 y(automatically)g(called)h(when)g(the)g
+(node)f(tree)h(is)h(initialized.)f(The)g(follo)n(wing)e(de\002nition)h
+(is)i(a)g(good)e(starting)h(point)396 3460 y(for)g(these)g(methods;)f
+(usually)h Fq(clone)g Fv(must)g(be)g(further)f(re\002ned)g(when)h
+(instance)g(v)n(ariables)f(are)h(added)f(to)h(the)h(class:)396
+3640 y Fq(class)44 b(custom_extension)e(=)486 3738 y(object)i(\(self\))
+576 3932 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(custom_extension)d
+(node)i(option\))576 4126 y(method)f(clone)h(=)h({<)g(>})576
+4223 y(method)e(node)i(=)665 4320 y(match)f(node)g(with)845
+4418 y(None)g(->)934 4515 y(assert)g(false)755 4612 y(|)h(Some)f(n)g
+(->)h(n)576 4709 y(method)e(set_node)h(n)h(=)665 4806
+y(node)f(<-)h(Some)f(n)p Black 3800 5278 a Fr(29)p Black
+eop
+%%Page: 30 30
+30 29 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 676 a Fq(end)396 867 y Fv(This)h(part)e(of)h(the)h(e)o
+(xtension)d(is)j(usually)f(the)g(same)h(for)e(all)i(classes,)g(so)g(it)
+g(is)g(a)f(good)f(idea)h(to)g(consider)396 975 y Fq(custom_extension)e
+Fv(as)j(the)f(super)n(-class)g(of)g(the)h(further)d(class)j
+(de\002nitions.)e(Continuining)f(the)j(e)o(xample)d(of)396
+1083 y(abo)o(v)o(e,)h(we)h(can)g(de\002ne)g(the)g(element)g(type)f
+(classes)j(as)e(follo)n(ws:)396 1263 y Fq(class)44 b(virtual)g
+(custom_extension)e(=)486 1360 y(object)i(\(self\))576
+1457 y(...)g(clone,)g(node,)g(set_node)f(defined)h(as)g(above)g(...)576
+1652 y(method)f(virtual)h(print)g(:)h(out_channel)e(->)h(unit)486
+1749 y(end)396 1943 y(class)g(eltype_a)g(=)486 2040 y(object)g
+(\(self\))576 2137 y(inherit)f(custom_extension)576 2234
+y(method)g(print)h(ch)h(=)g(...)486 2332 y(end)396 2526
+y(class)f(eltype_b)g(=)486 2623 y(object)g(\(self\))576
+2720 y(inherit)f(custom_extension)576 2817 y(method)g(print)h(ch)h(=)g
+(...)486 2914 y(end)396 3109 y(class)f(eltype_c)g(=)486
+3206 y(object)g(\(self\))576 3303 y(inherit)f(custom_extension)576
+3400 y(method)g(print)h(ch)h(=)g(...)486 3497 y(end)396
+3688 y Fv(The)20 b(method)f Fq(print)h Fv(can)g(no)n(w)f(be)i
+(implemented)d(for)h(e)n(v)o(ery)g(element)h(type)g(separately)-5
+b(.)18 b(Note)i(that)h(you)e(get)h(the)396 3796 y(associated)g(node)f
+(by)h(in)m(v)n(oking)396 3976 y Fq(self)44 b(#)h(node)396
+4167 y Fv(and)20 b(you)f(get)h(the)h(e)o(xtension)d(object)i(of)g(a)h
+(node)e Fq(n)h Fv(by)g(writing)396 4347 y Fq(n)45 b(#)g(extension)396
+4538 y Fv(It)21 b(is)g(guaranteed)d(that)396 4718 y Fq(self)44
+b(#)h(node)f(#)h(extension)e(==)i(self)p Black 3800 5278
+a Fr(30)p Black eop
+%%Page: 31 31
+31 30 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(al)o(w)o(ays)h(holds.)396 728 y(Here)f(are)g(sample)
+g(de\002nitions)g(of)g(the)g Fq(print)g Fv(methods:)396
+909 y Fq(class)44 b(eltype_a)g(=)486 1006 y(object)g(\(self\))576
+1103 y(inherit)f(custom_extension)576 1200 y(method)g(print)h(ch)h(=)
+665 1297 y(\(*)g(Nodes)f(<a>...</a>)f(are)h(only)g(containers:)f(*\))
+665 1394 y(output_string)g(ch)h("\(";)665 1491 y(List.iter)755
+1588 y(\(fun)g(n)h(->)f(n)h(#)g(extension)e(#)i(print)f(ch\))755
+1686 y(\(self)g(#)h(node)f(#)g(sub_nodes\);)665 1783
+y(output_string)f(ch)h("\)";)486 1880 y(end)396 2074
+y(class)g(eltype_b)g(=)486 2171 y(object)g(\(self\))576
+2268 y(inherit)f(custom_extension)576 2366 y(method)g(print)h(ch)h(=)
+665 2463 y(\(*)g(Print)f(the)g(value)g(of)h(the)f(CDATA)g(attribute)f
+("print":)h(*\))665 2560 y(match)g(self)g(#)h(node)f(#)h(attribute)e
+("print")h(with)755 2657 y(Value)g(s)314 b(->)44 b(output_string)f(ch)h
+(s)665 2754 y(|)h(Implied_value)e(->)h(output_string)f(ch)h
+("<missing>")665 2851 y(|)h(Valuelist)e(l)135 b(->)44
+b(assert)g(false)1517 2948 y(\(*)h(not)f(possible)f(because)h(the)g
+(att)h(is)f(CDATA)g(*\))486 3045 y(end)396 3240 y(class)g(eltype_c)g(=)
+486 3337 y(object)g(\(self\))576 3434 y(inherit)f(custom_extension)576
+3531 y(method)g(print)h(ch)h(=)665 3628 y(\(*)g(Print)f(the)g(contents)
+g(of)g(this)g(element:)g(*\))665 3725 y(output_string)f(ch)h(\(self)g
+(#)h(node)f(#)h(data\))486 3823 y(end)396 4017 y(class)f
+(null_extension)f(=)486 4114 y(object)h(\(self\))576
+4211 y(inherit)f(custom_extension)576 4308 y(method)g(print)h(ch)h(=)g
+(assert)e(false)486 4405 y(end)396 4638 y Fv(The)20 b(remaining)f(task)
+h(is)h(to)g(con\002gure)d(the)i(parser)g(such)g(that)g(these)g(e)o
+(xtension)f(classes)i(are)f(actually)g(used.)g(Here)396
+4746 y(another)f(problem)f(arises:)j(It)g(is)g(not)f(possible)g(to)g
+(dynamically)e(select)j(the)f(class)h(of)f(an)g(object)g(to)g(be)h
+(created.)e(As)396 4854 y(w)o(orkaround,)e(PXP)k(allo)n(ws)g(the)f
+(user)g(to)g(specify)g Fr(e)n(xemplar)g(objects)g Fv(for)f(the)h(v)n
+(arious)g(element)f(types;)h(instead)g(of)p Black 3800
+5278 a Fr(31)p Black eop
+%%Page: 32 32
+32 31 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(creating)f(the)i(nodes)e(of)h(the)g(tree)g(by)g
+(applying)f(the)h Fq(new)g Fv(operator)e(the)j(nodes)e(are)h(produced)e
+(by)i(duplicating)e(the)396 687 y(e)o(x)o(emplars.)h(As)h(object)g
+(duplication)f(preserv)o(es)g(the)h(class)h(of)f(the)g(object,)f(one)h
+(can)g(create)g(fresh)g(objects)g(of)g(e)n(v)o(ery)396
+795 y(class)h(for)f(which)g(pre)n(viously)e(an)i(e)o(x)o(emplar)e(has)j
+(been)e(re)o(gistered.)396 944 y(Ex)o(emplars)g(are)h(meant)g(as)h
+(objects)f(without)f(contents,)g(the)h(only)g(interesting)f(thing)g(is)
+j(that)e(e)o(x)o(emplars)e(are)396 1052 y(instances)i(of)g(a)h(certain)
+f(class.)g(The)g(creation)f(of)h(an)h(e)o(x)o(emplar)d(for)h(an)h
+(element)g(node)f(can)h(be)g(done)f(by:)396 1232 y Fq(let)45
+b(element_exemplar)d(=)i(new)h(element_impl)e(extension_exemplar)396
+1423 y Fv(And)20 b(a)h(data)f(node)f(e)o(x)o(emplar)f(is)j(created)f
+(by:)396 1603 y Fq(let)45 b(data_exemplar)d(=)j(new)f(data_impl)g
+(extension_exemplar)396 1794 y Fv(The)20 b(classes)h
+Fq(element_impl)e Fv(and)h Fq(data_impl)f Fv(are)h(de\002ned)f(in)i
+(the)f(module)f Fq(Pxp_document)p Fv(.)f(The)396 1902
+y(constructors)h(initialize)h(the)g(fresh)g(objects)g(as)h(empty)e
+(objects,)h(i.e.)g(without)g(children,)e(without)i(data)g(contents,)f
+(and)396 2010 y(so)i(on.)e(The)h Fq(extension_exemplar)e
+Fv(is)j(the)f(initial)h(e)o(xtension)e(object)g(the)h(e)o(x)o(emplars)f
+(are)h(associated)g(with.)396 2160 y(Once)g(the)g(e)o(x)o(emplars)f
+(are)h(created)f(and)h(stored)g(some)n(where)f(\(e.g.)g(in)h(a)h(hash)f
+(table\),)f(you)h(can)g(tak)o(e)g(an)g(e)o(x)o(emplar)396
+2268 y(and)g(create)g(a)g(concrete)f(instance)h(\(with)g(contents\))f
+(by)h(duplicating)e(it.)j(As)g(user)f(of)g(the)g(parser)g(you)f(are)h
+(normally)396 2376 y(not)g(concerned)e(with)i(this)h(as)g(this)g(is)g
+(part)f(of)g(the)g(internal)f(logic)h(of)g(the)g(parser)m(,)f(b)n(ut)h
+(as)h(background)c(kno)n(wledge)h(it)396 2483 y(is)j(w)o(orthwhile)e
+(to)i(mention)e(that)h(the)g(tw)o(o)h(methods)e Fq(create_element)f
+Fv(and)i Fq(create_data)f Fv(actually)g(perform)396 2591
+y(the)h(duplication)f(of)g(the)i(e)o(x)o(emplar)d(for)h(which)h(the)o
+(y)f(are)h(in)m(v)n(ok)o(ed,)e(additionally)g(apply)i(modi\002cations)e
+(to)j(the)f(clone,)396 2699 y(and)g(\002nally)g(return)f(the)h(ne)n(w)g
+(object.)f(Moreo)o(v)o(er)m(,)f(the)i(e)o(xtension)e(object)i(is)h
+(copied,)e(too,)h(and)f(the)i(ne)n(w)f(node)f(object)396
+2807 y(is)i(associated)f(with)g(the)g(fresh)g(e)o(xtension)e(object.)i
+(Note)g(that)g(this)g(is)h(the)f(reason)g(why)f(e)n(v)o(ery)g(e)o
+(xtension)f(object)i(must)396 2915 y(ha)n(v)o(e)g(a)g
+Fq(clone)g Fv(method.)396 3065 y(The)g(con\002guration)e(of)i(the)g
+(set)h(of)f(e)o(x)o(emplars)e(is)j(passed)f(to)h(the)f
+Fq(parse_document_entity)d Fv(function)i(as)i(third)396
+3173 y(ar)o(gument.)d(In)i(our)f(e)o(xample,)g(this)h(ar)o(gument)e
+(can)i(be)g(set)h(up)f(as)h(follo)n(ws:)396 3353 y Fq(let)45
+b(spec)f(=)486 3450 y(make_spec_from_alist)576 3547 y(~data_exemplar:)
+535 b(\(new)44 b(data_impl)g(\(new)g(null_extension\)\))576
+3644 y(~default_element_exemplar:)c(\(new)k(element_impl)f(\(new)h
+(null_extension\)\))576 3741 y(~element_alist:)710 3838
+y([)h("a",)89 b(new)44 b(element_impl)f(\(new)h(eltype_a\);)800
+3935 y("b",)89 b(new)44 b(element_impl)f(\(new)h(eltype_b\);)800
+4033 y("c",)89 b(new)44 b(element_impl)f(\(new)h(eltype_c\);)710
+4130 y(])576 4227 y(\(\))396 4418 y Fv(The)20 b Fq(~element_alist)f
+Fv(function)f(ar)o(gument)g(de\002nes)i(the)g(mapping)e(from)h(element)
+h(types)g(to)g(e)o(x)o(emplars)f(as)396 4526 y(associati)n(v)o(e)h
+(list.)h(The)f(ar)o(gument)e Fq(~data_exemplar)g Fv(speci\002es)j(the)f
+(e)o(x)o(emplar)e(for)i(data)g(nodes,)f(and)h(the)396
+4634 y Fq(~default_element_exemplar)d Fv(is)k(used)f(whene)n(v)o(er)e
+(the)i(parser)g(\002nds)g(an)g(element)g(type)f(for)h(which)g(the)396
+4742 y(associati)n(v)o(e)g(list)h(does)f(not)g(de\002ne)g(an)g(e)o(x)o
+(emplar)-5 b(.)p Black 3800 5278 a Fr(32)p Black eop
+%%Page: 33 33
+33 32 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(The)g(con\002guration)e(is)j(no)n(w)e(complete.)g(Y)
+-9 b(ou)20 b(can)g(still)h(use)g(the)f(same)g(parsing)f(functions,)g
+(only)g(the)h(initialization)g(is)396 687 y(a)h(bit)f(dif)n(ferent.)f
+(F)o(or)g(e)o(xample,)g(call)i(the)f(parser)f(by:)396
+867 y Fq(let)45 b(d)f(=)h(parse_document_entity)c(default_config)i
+(\(from_file)g("doc.xml"\))g(spec)396 1058 y Fv(Note)20
+b(that)h(the)f(resulting)f(document)f Fq(d)j Fv(has)f(a)h(usable)f
+(type;)g(especially)f(the)i Fq(print)f Fv(method)e(we)j(added)e(is)i
+(visible.)396 1166 y(So)g(you)e(can)h(print)g(your)e(document)h(by)396
+1346 y Fq(d)45 b(#)g(root)f(#)g(extension)g(#)g(print)g(stdout)396
+1578 y Fv(This)21 b(object-oriented)c(approach)h(looks)i(rather)f
+(complicated;)g(this)h(is)i(mostly)d(caused)h(by)g(w)o(orking)e(around)
+h(some)396 1686 y(problems)g(of)h(the)g(strict)h(typing)e(system)h(of)g
+(O'Caml.)g(Some)g(auxiliary)f(concepts)g(such)h(as)h(e)o(xtensions)e
+(were)396 1794 y(needed,)g(b)n(ut)h(the)g(practical)g(consequences)e
+(are)i(lo)n(w)-5 b(.)20 b(In)g(the)g(ne)o(xt)f(section,)h(one)g(of)g
+(the)g(e)o(xamples)f(of)h(the)396 1902 y(distrib)n(ution)f(is)i(e)o
+(xplained,)d(a)j(con)m(v)o(erter)d(from)h Fr(r)m(eadme)h
+Fv(documents)e(to)i(HTML.)-2 2321 y Fx(2.4.)39 b(Example:)f(An)h(HTML)f
+(bac)m(kend)g(f)m(or)h(the)g Fd(readme)44 b Fx(DTD)396
+2501 y Fv(The)20 b(con)m(v)o(erter)e(from)h Fr(r)m(eadme)h
+Fv(documents)e(to)i(HTML)g(documents)f(follo)n(ws)h(strictly)g(the)g
+(approach)e(to)j(de\002ne)e(one)396 2609 y(class)i(per)f(element)g
+(type.)f(The)h(HTML)g(code)g(is)h(similar)f(to)g(the)h
+Fr(r)m(eadme)e Fv(source,)g(because)h(of)g(this)h(most)f(elements)396
+2716 y(can)g(be)g(con)m(v)o(erted)e(in)i(the)g(follo)n(wing)f(w)o(ay:)h
+(Gi)n(v)o(en)g(the)g(input)f(element)396 2897 y Fq(<e>content</e>)396
+3088 y Fv(the)h(con)m(v)o(ersion)e(te)o(xt)i(is)h(the)f(concatenation)e
+(of)i(a)h(computed)d(pre\002x,)h(the)h(recursi)n(v)o(ely)f(con)m(v)o
+(erted)e(content,)i(and)h(a)396 3195 y(computed)e(suf)n(\002x.)396
+3345 y(Only)i(one)g(element)f(type)h(cannot)f(be)h(handled)f(by)h(this)
+g(scheme:)g Fq(footnote)p Fv(.)f(F)o(ootnotes)g(are)h(collected)g
+(while)g(the)o(y)396 3453 y(are)g(found)f(in)h(the)g(input)g(te)o(xt,)g
+(and)f(the)o(y)h(are)g(printed)f(after)h(the)g(main)g(te)o(xt)g(has)g
+(been)g(con)m(v)o(erted)d(and)j(printed.)-2 3781 y Fp(2.4.1.)35
+b(Header)396 4021 y Fq(open)44 b(Pxp_types)396 4118 y(open)g
+(Pxp_document)-2 4571 y Fp(2.4.2.)35 b(T)-7 b(ype)34
+b(dec)n(larations)396 4811 y Fq(class)44 b(type)g(footnote_printer)f(=)
+p Black 3800 5278 a Fr(33)p Black eop
+%%Page: 34 34
+34 33 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 579 a Fq(object)576 676 y(method)43 b(footnote_to_html)g(:)h
+(store_type)f(-)p Fo(>)i Fq(out_channel)e(-)p Fo(>)h
+Fq(unit)486 773 y(end)396 967 y(and)h(store_type)e(=)486
+1065 y(object)576 1162 y(method)g(alloc_footnote)g(:)i
+(footnote_printer)d(-)p Fo(>)i Fq(int)576 1259 y(method)f
+(print_footnotes)g(:)h(out_channel)f(-)p Fo(>)i Fq(unit)486
+1356 y(end)396 1453 y(;;)-2 1906 y Fp(2.4.3.)35 b(Class)g
+Fc(store)396 2073 y Fv(The)20 b Fq(store)g Fv(is)h(a)g(container)d(for)
+i(footnotes.)f(Y)-9 b(ou)19 b(can)h(add)g(a)g(footnote)f(by)h(in)m(v)n
+(oking)e Fq(alloc_footnote)p Fv(;)g(the)396 2181 y(ar)o(gument)g(is)j
+(an)f(object)g(of)g(the)g(class)h Fq(footnote_printer)p
+Fv(,)d(the)i(method)f(returns)g(the)i(number)d(of)i(the)g(footnote.)396
+2289 y(The)g(interesting)f(property)f(of)i(a)h(footnote)d(is)k(that)e
+(it)h(can)f(be)g(con)m(v)o(erted)d(to)k(HTML,)e(so)i(a)g
+Fq(footnote_printer)d Fv(is)396 2397 y(an)i(object)g(with)g(a)h(method)
+e Fq(footnote_to_html)p Fv(.)f(The)i(class)h Fq(footnote)e
+Fv(which)h(is)h(de\002ned)e(belo)n(w)h(has)g(a)396 2505
+y(compatible)f(method)g Fq(footnote_to_html)f Fv(such)i(that)g(objects)
+g(created)f(from)h(it)h(can)f(be)g(used)g(as)396 2613
+y Fq(footnote_printer)p Fv(s.)396 2763 y(The)g(other)f(method,)g
+Fq(print_footnotes)f Fv(prints)i(the)g(footnotes)f(as)i(de\002nition)e
+(list,)i(and)f(is)h(typically)e(in)m(v)n(ok)o(ed)396
+2870 y(after)h(the)g(main)g(material)g(of)g(the)g(page)g(has)g(already)
+f(been)h(printed.)e(Ev)o(ery)h(item)h(of)g(the)h(list)g(is)g(printed)e
+(by)396 2978 y Fq(footnote_to_html)p Fv(.)396 3200 y
+Fq(class)44 b(store)g(=)486 3297 y(object)g(\(self\))576
+3491 y(val)g(mutable)g(footnotes)f(=)i(\()f([])h(:)f(\(int)h(*)f
+(footnote_printer\))e(list)i(\))576 3589 y(val)g(mutable)g
+(next_footnote_number)d(=)k(1)576 3783 y(method)e(alloc_footnote)g(n)i
+(=)665 3880 y(let)g(number)e(=)i(next_footnote_number)d(in)665
+3977 y(next_footnote_number)g Fo(<)p Fq(-)i(number+1;)665
+4074 y(footnotes)g Fo(<)p Fq(-)g(footnotes)f(@)i([)g(number,)e(n)i(];)
+665 4171 y(number)576 4366 y(method)e(print_footnotes)g(ch)h(=)665
+4463 y(if)h(footnotes)e Fo(<>)h Fq([])h(then)f(begin)396
+4560 y(output_string)f(ch)h(")p Fo(<)p Fq(hr)g(align=left)g
+(noshade=noshade)e(width=\\"30\045\\")p Fo(>)p Fq(\\n";)396
+4657 y(output_string)h(ch)h(")p Fo(<)p Fq(dl)p Fo(>)p
+Fq(\\n";)396 4754 y(List.iter)486 4851 y(\(fun)g(\(_,n\))g(-)p
+Fo(>)p Black 3800 5278 a Fr(34)p Black eop
+%%Page: 35 35
+35 34 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 620 579 a Fq(n)45 b(#)g(footnote_to_html)d(\(self)i(:)h
+(#store_type)e(:)p Fo(>)h Fq(store_type\))f(ch\))486
+676 y(footnotes;)396 773 y(output_string)g(ch)h(")p Fo(<)p
+Fq(/dl)p Fo(>)p Fq(\\n";)665 870 y(end)486 1065 y(end)396
+1162 y(;;)-2 1614 y Fp(2.4.4.)35 b(Function)f Fc(escape_html)396
+1782 y Fv(This)21 b(function)d(con)m(v)o(erts)h(the)h(characters)f
+Fm(<)p Fv(,)h Fm(>)p Fv(,)g(&,)g(and)g(")h(to)f(their)g(HTML)g
+(representation.)e(F)o(or)h(e)o(xample,)396 1890 y Fq(escape_html)43
+b(")p Fo(<>)p Fq(")h(=)h("&lt;&gt;")p Fv(.)19 b(Other)g(characters)h
+(are)g(left)g(unchanged.)396 2070 y Fq(let)45 b(escape_html)e(s)h(=)486
+2167 y(Str.global_substitute)576 2264 y(\(Str.regexp)f(")p
+Fo(<)p Fq(\\\\|)p Fo(>)p Fq(\\\\|&\\\\|\\""\))576 2362
+y(\(fun)h(s)g(-)p Fo(>)665 2459 y Fq(match)g(Str.matched_string)e(s)j
+(with)755 2556 y(")p Fo(<)p Fq(")f(-)p Fo(>)h Fq("&lt;")665
+2653 y(|)g(")p Fo(>)p Fq(")f(-)p Fo(>)h Fq("&gt;")665
+2750 y(|)g("&")f(-)p Fo(>)h Fq("&amp;")665 2847 y(|)g("\\"")f(-)p
+Fo(>)g Fq("&quot;")665 2944 y(|)h(_)g(-)p Fo(>)f Fq(assert)g(false\))
+576 3042 y(s)396 3139 y(;;)-2 3591 y Fp(2.4.5.)35 b(Vir)r(tual)f(c)n
+(lass)h Fc(shared)396 3759 y Fv(This)21 b(virtual)e(class)i(is)g(the)g
+(abstract)f(superclass)g(of)f(the)i(e)o(xtension)d(classes)k(sho)n(wn)d
+(belo)n(w)-5 b(.)19 b(It)i(de\002nes)f(the)g(standard)396
+3867 y(methods)f Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)g
+Fq(set_node)p Fv(,)f(and)g(declares)h(the)g(type)g(of)g(the)g(virtual)g
+(method)e Fq(to_html)p Fv(.)i(This)396 3975 y(method)f(recursi)n(v)o
+(ely)f(tra)n(v)o(erses)i(the)g(whole)g(element)g(tree,)g(and)f(prints)h
+(the)g(con)m(v)o(erted)e(HTML)i(code)f(to)i(the)f(output)396
+4083 y(channel)f(passed)h(as)h(second)f(ar)o(gument.)d(The)j(\002rst)h
+(ar)o(gument)d(is)j(the)f(reference)f(to)h(the)g(global)f
+Fq(store)h Fv(object)g(which)396 4191 y(collects)h(the)f(footnotes.)396
+4371 y Fq(class)44 b(virtual)g(shared)g(=)486 4468 y(object)g(\(self\))
+576 4662 y(\(*)g(--)h(default_ext)e(--)h(*\))576 4857
+y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(shared)f(node)g(option\))p
+Black 3800 5278 a Fr(35)p Black eop
+%%Page: 36 36
+36 35 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 576 676 a Fq(method)43 b(clone)h(=)h({)p Fo(<)f(>)p
+Fq(})576 773 y(method)f(node)i(=)665 870 y(match)f(node)g(with)845
+967 y(None)g(-)p Fo(>)934 1065 y Fq(assert)g(false)755
+1162 y(|)h(Some)f(n)g(-)p Fo(>)h Fq(n)576 1259 y(method)e(set_node)h(n)
+h(=)665 1356 y(node)f Fo(<)p Fq(-)h(Some)f(n)576 1550
+y(\(*)g(--)h(virtual)e(--)i(*\))576 1745 y(method)e(virtual)h(to_html)g
+(:)g(store)g(-)p Fo(>)h Fq(out_channel)e(-)p Fo(>)h Fq(unit)486
+1939 y(end)396 2036 y(;;)-2 2489 y Fp(2.4.6.)35 b(Class)g
+Fc(only_data)396 2656 y Fv(This)21 b(class)g(de\002nes)f
+Fq(to_html)f Fv(such)h(that)h(the)f(character)f(data)h(of)g(the)g
+(current)f(node)g(is)i(con)m(v)o(erted)d(to)i(HTML.)g(Note)396
+2764 y(that)h Fq(self)f Fv(is)h(an)f(e)o(xtension)f(object,)g
+Fq(self)44 b(#)h(node)20 b Fv(is)h(the)f(node)f(object,)h(and)f
+Fq(self)45 b(#)f(node)g(#)h(data)20 b Fv(returns)396
+2872 y(the)g(character)f(data)h(of)g(the)h(node.)396
+3052 y Fq(class)44 b(only_data)g(=)486 3149 y(object)g(\(self\))576
+3247 y(inherit)f(shared)576 3441 y(method)g(to_html)h(store)g(ch)h(=)
+665 3538 y(output_string)e(ch)h(\(escape_html)f(\(self)h(#)h(node)f(#)h
+(data\)\))486 3635 y(end)396 3732 y(;;)-2 4185 y Fp(2.4.7.)35
+b(Class)g Fc(readme)396 4353 y Fv(This)21 b(class)g(con)m(v)o(erts)d
+(elements)i(of)g(type)g Fq(readme)g Fv(to)g(HTML.)g(Such)f(an)h
+(element)g(is)h(\(by)f(de\002nition\))e(al)o(w)o(ays)j(the)396
+4461 y(root)f(element)f(of)h(the)g(document.)e(First,)j(the)f(HTML)g
+(header)f(is)j(printed;)d(the)h Fq(title)g Fv(attrib)n(ute)f(of)h(the)h
+(element)396 4568 y(determines)e(the)i(title)f(of)g(the)h(HTML)f(page.)
+f(Some)h(aspects)g(of)g(the)g(HTML)g(page)g(can)g(be)g(con\002gured)e
+(by)h(setting)396 4676 y(certain)h(parameter)f(entities,)h(for)g(e)o
+(xample)e(the)i(background)d(color)m(,)i(the)h(te)o(xt)g(color)m(,)f
+(and)h(link)g(colors.)f(After)h(the)396 4784 y(header)m(,)f(the)h
+Fq(body)g Fv(tag,)g(and)g(the)g(headline)f(ha)n(v)o(e)g(been)h
+(printed,)f(the)h(contents)f(of)h(the)g(page)g(are)g(con)m(v)o(erted)e
+(by)p Black 3798 5278 a Fr(36)p Black eop
+%%Page: 37 37
+37 36 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(in)m(v)n(oking)e Fq(to_html)i Fv(on)g(all)g
+(children)f(of)h(the)g(current)f(node)g(\(which)h(is)h(the)f(root)f
+(node\).)g(Then,)g(the)h(footnotes)f(are)396 687 y(appended)f(to)j
+(this)f(by)g(telling)g(the)g(global)f Fq(store)h Fv(object)g(to)g
+(print)g(the)g(footnotes.)f(Finally)-5 b(,)19 b(the)h(end)g(tags)g(of)g
+(the)396 795 y(HTML)g(pages)g(are)g(printed.)396 944
+y(This)h(class)g(is)g(an)f(e)o(xample)f(ho)n(w)g(to)i(access)g(the)f(v)
+n(alue)f(of)h(an)g(attrib)n(ute:)g(The)g(v)n(alue)g(is)h(determined)d
+(by)i(in)m(v)n(oking)396 1052 y Fq(self)44 b(#)h(node)f(#)h(attribute)e
+("title")p Fv(.)20 b(As)h(this)f(attrib)n(ute)g(has)g(been)g(declared)f
+(as)i(CD)m(A)-9 b(T)h(A)20 b(and)g(as)h(being)396 1160
+y(required,)d(the)j(v)n(alue)e(has)i(al)o(w)o(ays)f(the)g(form)g
+Fq(Value)44 b(s)20 b Fv(where)g Fq(s)g Fv(is)h(the)g(string)e(v)n(alue)
+h(of)g(the)g(attrib)n(ute.)396 1310 y(Y)-9 b(ou)20 b(can)g(also)g(see)h
+(ho)n(w)f(entity)g(contents)f(can)h(be)g(accessed.)g(A)h(parameter)e
+(entity)g(object)h(can)g(be)g(look)o(ed)f(up)h(by)396
+1417 y Fq(self)44 b(#)h(node)f(#)h(dtd)f(#)h(par_entity)e("name")p
+Fv(,)20 b(and)f(by)h(in)m(v)n(oking)e Fq(replacement_text)g
+Fv(the)i(v)n(alue)g(of)396 1525 y(the)g(entity)g(is)h(returned)e(after)
+h(inner)f(parameter)g(and)g(character)g(entities)i(ha)n(v)o(e)f(been)f
+(processed.)g(Note)h(that)g(you)396 1633 y(must)g(use)h
+Fq(gen_entity)e Fv(instead)h(of)g Fq(par_entity)f Fv(to)h(access)h
+(general)e(entities.)396 1855 y Fq(class)44 b(readme)g(=)486
+1952 y(object)g(\(self\))576 2049 y(inherit)f(shared)576
+2244 y(method)g(to_html)h(store)g(ch)h(=)665 2341 y(\(*)g(output)f
+(header)f(*\))665 2438 y(output_string)396 2535 y(ch)i(")p
+Fo(<)p Fq(!DOCTYPE)e(HTML)h(PUBLIC)g(\\"-//W3C//DTD)e(HTML)j(3.2)f
+(Final//EN\\")p Fo(>)p Fq(";)665 2632 y(output_string)396
+2729 y(ch)h(")p Fo(<)p Fq(!-)f(WARNING!)f(This)h(is)h(a)g(generated)e
+(file,)h(do)g(not)h(edit!)f(-)p Fo(>)p Fq(\\n";)665 2826
+y(let)h(title)f(=)396 2923 y(match)g(self)g(#)h(node)f(#)h(attribute)e
+("title")h(with)576 3021 y(Value)g(s)g(-)p Fo(>)h Fq(s)486
+3118 y(|)g(_)f(-)p Fo(>)h Fq(assert)e(false)665 3215
+y(in)665 3312 y(let)i(html_header,)d(_)j(=)396 3409 y(try)g(\(self)f(#)
+g(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:header"\))934
+3506 y(#)i(replacement_text)396 3603 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 3701 y(let)h(html_trailer,)d(_)j(=)396
+3798 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:trailer"\))934 3895 y(#)i(replacement_text)396
+3992 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+4089 y(let)h(html_bgcolor,)d(_)j(=)396 4186 y(try)g(\(self)f(#)g(node)g
+(#)h(dtd)f(#)h(par_entity)e("readme:html:bgcolor"\))934
+4283 y(#)i(replacement_text)396 4380 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("white",)f(false)h(in)665 4478 y(let)h(html_textcolor,)d(_)j
+(=)396 4575 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:textcolor"\))934 4672 y(#)i(replacement_text)396
+4769 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+4866 y(let)h(html_alinkcolor,)d(_)i(=)p Black 3797 5278
+a Fr(37)p Black eop
+%%Page: 38 38
+38 37 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fq(try)45 b(\(self)f(#)g(node)g(#)h(dtd)f(#)h
+(par_entity)e("readme:html:alinkcolor"\))934 676 y(#)i
+(replacement_text)396 773 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 870 y(let)h(html_vlinkcolor,)d(_)i(=)396
+967 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:vlinkcolor"\))934 1065 y(#)i(replacement_text)396
+1162 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+1259 y(let)h(html_linkcolor,)d(_)j(=)396 1356 y(try)g(\(self)f(#)g
+(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:linkcolor"\))934
+1453 y(#)i(replacement_text)396 1550 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 1647 y(let)h(html_background,)d(_)i(=)
+396 1745 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:background"\))934 1842 y(#)i(replacement_text)396
+1939 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+2133 y(output_string)f(ch)h(")p Fo(<)p Fq(html)p Fo(><)p
+Fq(header)p Fo(><)p Fq(title)p Fo(>)p Fq(\\n";)665 2230
+y(output_string)f(ch)h(\(escape_html)f(title\);)665 2327
+y(output_string)g(ch)h(")p Fo(<)p Fq(/title)p Fo(><)p
+Fq(/header)p Fo(>)p Fq(\\n";)665 2424 y(output_string)f(ch)h(")p
+Fo(<)p Fq(body)g(";)665 2522 y(List.iter)396 2619 y(\(fun)g
+(\(name,value\))f(-)p Fo(>)531 2716 y Fq(if)h(value)g
+Fo(<>)h Fq("")f(then)620 2813 y(output_string)f(ch)i(\(name)f(^)g
+("=\\"")g(^)h(escape_html)e(value)h(^)h("\\")f("\)\))396
+2910 y([)h("bgcolor",)178 b(html_bgcolor;)486 3007 y("text",)313
+b(html_textcolor;)486 3104 y("link",)g(html_linkcolor;)486
+3202 y("alink",)268 b(html_alinkcolor;)486 3299 y("vlink",)g
+(html_vlinkcolor;)396 3396 y(];)665 3493 y(output_string)43
+b(ch)h(")p Fo(>)p Fq(\\n";)665 3590 y(output_string)f(ch)h
+(html_header;)665 3687 y(output_string)f(ch)h(")p Fo(<)p
+Fq(h1)p Fo(>)p Fq(";)665 3784 y(output_string)f(ch)h(\(escape_html)f
+(title\);)665 3882 y(output_string)g(ch)h(")p Fo(<)p
+Fq(/h1)p Fo(>)p Fq(\\n";)665 3979 y(\(*)h(process)e(main)i(content:)e
+(*\))665 4076 y(List.iter)396 4173 y(\(fun)h(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+4270 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 4367
+y(\(*)g(now)f(process)g(footnotes)f(*\))665 4464 y(store)h(#)h
+(print_footnotes)d(ch;)665 4561 y(\(*)j(trailer)e(*\))665
+4659 y(output_string)g(ch)h(html_trailer;)665 4756 y(output_string)f
+(ch)h(")p Fo(<)p Fq(/html)p Fo(>)p Fq(\\n";)p Black 3800
+5278 a Fr(38)p Black eop
+%%Page: 39 39
+39 38 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 579 a Fq(end)396 676 y(;;)-2 1129 y Fp(2.4.8.)35
+b(Classes)h Fc(section)p Fp(,)31 b Fc(sect1)p Fp(,)g
+Fc(sect2)p Fp(,)g(and)j Fc(sect3)396 1296 y Fv(As)21
+b(the)f(con)m(v)o(ersion)e(process)i(is)h(v)o(ery)e(similar)m(,)h(the)g
+(con)m(v)o(ersion)d(classes)22 b(of)e(the)g(three)g(section)f(le)n(v)o
+(els)i(are)f(deri)n(v)o(ed)396 1404 y(from)f(the)i(more)e(general)g
+Fq(section)h Fv(class.)h(The)e(HTML)h(code)g(of)g(the)g(section)g(le)n
+(v)o(els)g(only)f(dif)n(fers)h(in)g(the)g(type)g(of)396
+1512 y(the)g(headline,)f(and)h(because)f(of)h(this)h(the)f(classes)i
+(describing)c(the)i(section)g(le)n(v)o(els)g(can)g(be)h(computed)d(by)i
+(replacing)396 1620 y(the)g(class)i(ar)o(gument)17 b
+Fq(the_tag)j Fv(of)g Fq(section)g Fv(by)f(the)i(HTML)e(name)h(of)g(the)
+g(headline)f(tag.)396 1770 y(Section)h(elements)g(are)g(con)m(v)o
+(erted)e(to)i(HTML)g(by)g(printing)e(a)j(headline)e(and)h(then)f(con)m
+(v)o(erting)f(the)i(contents)f(of)h(the)396 1878 y(element)g(recursi)n
+(v)o(ely)-5 b(.)18 b(More)h(precisely)-5 b(,)19 b(the)h(\002rst)h
+(sub-element)e(is)i(al)o(w)o(ays)f(a)h Fq(title)f Fv(element,)f(and)h
+(the)g(other)396 1985 y(elements)g(are)g(the)g(contents)g(of)g(the)g
+(section.)g(This)g(structure)f(is)j(declared)c(in)j(the)f(DTD,)g(and)g
+(it)h(is)g(guaranteed)d(that)396 2093 y(the)i(document)f(matches)g(the)
+i(DTD.)f(Because)g(of)g(this)h(the)f(title)h(node)e(can)h(be)g
+(separated)f(from)g(the)h(rest)h(without)f(an)o(y)396
+2201 y(checks.)396 2351 y(Both)g(the)h(title)g(node,)e(and)g(the)h
+(body)f(nodes)h(are)g(then)f(con)m(v)o(erted)f(to)i(HTML)g(by)g
+(calling)g Fq(to_html)f Fv(on)h(them.)396 2572 y Fq(class)44
+b(section)g(the_tag)g(=)486 2670 y(object)g(\(self\))576
+2767 y(inherit)f(shared)576 2961 y(val)h(tag)g(=)h(the_tag)576
+3155 y(method)e(to_html)h(store)g(ch)h(=)665 3252 y(let)g(sub_nodes)e
+(=)i(self)f(#)g(node)h(#)f(sub_nodes)g(in)665 3350 y(match)g(sub_nodes)
+g(with)486 3447 y(title_node)f(::)i(rest)f(-)p Fo(>)576
+3544 y Fq(output_string)e(ch)j(\(")p Fo(<)p Fq(")f(^)g(tag)h(^)f(")p
+Fo(>)p Fq(\\n"\);)576 3641 y(title_node)f(#)h(extension)g(#)g(to_html)g
+(store)g(ch;)576 3738 y(output_string)e(ch)j(\("\\n)p
+Fo(<)p Fq(/")e(^)i(tag)f(^)h(")p Fo(>)p Fq("\);)576 3835
+y(List.iter)665 3932 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)
+i(to_html)e(store)h(ch\))665 4029 y(rest)396 4127 y(|)h(_)g(-)p
+Fo(>)576 4224 y Fq(assert)e(false)486 4321 y(end)396
+4418 y(;;)396 4612 y(class)h(sect1)g(=)h(section)f("h1";;)396
+4709 y(class)g(sect2)g(=)h(section)f("h3";;)396 4807
+y(class)g(sect3)g(=)h(section)f("h4";;)p Black 3800 5278
+a Fr(39)p Black eop
+%%Page: 40 40
+40 39 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.9.)35 b(Classes)h Fc(map_tag)p
+Fp(,)31 b Fc(p)p Fp(,)i Fc(em)p Fp(,)f Fc(ul)p Fp(,)h
+Fc(li)396 751 y Fv(Se)n(v)o(eral)20 b(element)f(types)h(are)g(con)m(v)o
+(erted)e(to)i(HTML)g(by)g(simply)g(mapping)e(them)i(to)g(corresponding)
+d(HTML)396 859 y(element)j(types.)g(The)f(class)j Fq(map_tag)d
+Fv(implements)g(this,)i(and)f(the)g(class)h(ar)o(gument)d
+Fq(the_target_tag)396 967 y Fv(determines)h(the)i(tag)f(name)f(to)i
+(map)e(to.)h(The)g(output)f(consists)i(of)f(the)g(start)h(tag,)f(the)g
+(recursi)n(v)o(ely)e(con)m(v)o(erted)g(inner)396 1075
+y(elements,)i(and)g(the)g(end)f(tag.)396 1255 y Fq(class)44
+b(map_tag)g(the_target_tag)e(=)486 1352 y(object)i(\(self\))576
+1449 y(inherit)f(shared)576 1643 y(val)h(target_tag)f(=)i
+(the_target_tag)576 1838 y(method)e(to_html)h(store)g(ch)h(=)665
+1935 y(output_string)e(ch)h(\(")p Fo(<)p Fq(")g(^)h(target_tag)e(^)i(")
+p Fo(>)p Fq(\\n"\);)665 2032 y(List.iter)396 2129 y(\(fun)f(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+2226 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 2323
+y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/")g(^)h(target_tag)e(^)h(")p
+Fo(>)p Fq("\);)486 2420 y(end)396 2518 y(;;)396 2712
+y(class)g(p)h(=)g(map_tag)e("p";;)396 2809 y(class)h(em)h(=)f(map_tag)g
+("b";;)396 2906 y(class)g(ul)h(=)f(map_tag)g("ul";;)396
+3003 y(class)g(li)h(=)f(map_tag)g("li";;)-2 3456 y Fp(2.4.10.)36
+b(Class)f Fc(br)396 3624 y Fv(Element)20 b(of)g(type)f
+Fq(br)i Fv(are)f(mapped)f(to)h(the)g(same)g(HTML)g(type.)g(Note)g(that)
+g(HTML)g(forbids)f(the)h(end)g(tag)g(of)g Fq(br)p Fv(.)396
+3804 y Fq(class)44 b(br)h(=)486 3901 y(object)f(\(self\))576
+3998 y(inherit)f(shared)576 4192 y(method)g(to_html)h(store)g(ch)h(=)
+665 4289 y(output_string)e(ch)h(")p Fo(<)p Fq(br)p Fo(>)p
+Fq(\\n";)665 4387 y(List.iter)396 4484 y(\(fun)g(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+4581 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)486 4678
+y(end)396 4775 y(;;)p Black 3800 5278 a Fr(40)p Black
+eop
+%%Page: 41 41
+41 40 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.11.)36 b(Class)f Fc(code)396 751
+y Fv(The)20 b Fq(code)g Fv(type)g(is)h(con)m(v)o(erted)d(to)i(a)h
+Fq(pre)f Fv(section)g(\(preformatted)d(te)o(xt\).)i(As)i(the)g(meaning)
+d(of)i(tabs)h(is)g(unspeci\002ed)e(in)396 859 y(HTML,)h(tabs)g(are)h(e)
+o(xpanded)c(to)k(spaces.)396 1039 y Fq(class)44 b(code)g(=)486
+1136 y(object)g(\(self\))576 1233 y(inherit)f(shared)576
+1427 y(method)g(to_html)h(store)g(ch)h(=)665 1525 y(let)g(data)f(=)g
+(self)h(#)f(node)g(#)h(data)f(in)665 1622 y(\(*)h(convert)e(tabs)i(*\))
+665 1719 y(let)g(l)f(=)h(String.length)e(data)h(in)665
+1816 y(let)h(rec)f(preprocess)f(i)i(column)f(=)396 1913
+y(\(*)h(this)f(is)g(very)h(ineffective)e(but)h(comprehensive:)e(*\))396
+2010 y(if)j(i)f Fo(<)h Fq(l)g(then)486 2107 y(match)f(data.[i])f(with)
+665 2205 y('\\t')h(-)p Fo(>)396 2302 y Fq(let)h(n)f(=)h(8)g(-)f
+(\(column)g(mod)g(8\))h(in)396 2399 y(String.make)e(n)i(')g(')f(^)h
+(preprocess)e(\(i+1\))h(\(column)g(+)g(n\))576 2496 y(|)g('\\n')g(-)p
+Fo(>)396 2593 y Fq("\\n")g(^)h(preprocess)e(\(i+1\))h(0)576
+2690 y(|)g(c)h(-)p Fo(>)396 2787 y Fq(String.make)e(1)i(c)g(^)f
+(preprocess)f(\(i+1\))h(\(column)g(+)h(1\))396 2884 y(else)486
+2982 y("")665 3079 y(in)665 3176 y(output_string)e(ch)h(")p
+Fo(<)p Fq(p)p Fo(><)p Fq(pre)p Fo(>)p Fq(";)665 3273
+y(output_string)f(ch)h(\(escape_html)f(\(preprocess)g(0)i(0\)\);)665
+3370 y(output_string)e(ch)h(")p Fo(<)p Fq(/pre)p Fo(><)p
+Fq(/p)p Fo(>)p Fq(";)486 3564 y(end)396 3662 y(;;)-2
+4114 y Fp(2.4.12.)36 b(Class)f Fc(a)396 4282 y Fv(Hyperlinks,)19
+b(e)o(xpressed)g(by)g(the)i Fq(a)f Fv(element)g(type,)f(are)h(con)m(v)o
+(erted)e(to)i(the)g(HTML)g Fq(a)h Fv(type.)e(If)i(the)f(tar)o(get)f(of)
+h(the)396 4390 y(hyperlink)e(is)j(gi)n(v)o(en)d(by)i
+Fq(href)p Fv(,)g(the)g(URL)g(of)g(this)g(attrib)n(ute)g(can)g(be)g
+(used)g(directly)-5 b(.)18 b(Alternati)n(v)o(ely)-5 b(,)18
+b(the)i(tar)o(get)f(can)h(be)396 4498 y(gi)n(v)o(en)f(by)h
+Fq(readmeref)f Fv(in)i(which)e(case)i(the)f(".html")g(suf)n(\002x)f
+(must)i(be)f(added)f(to)h(the)g(\002le)h(name.)396 4647
+y(Note)f(that)h(within)f Fq(a)g Fv(only)g(#PCD)m(A)-9
+b(T)h(A)20 b(is)h(allo)n(wed,)e(so)i(the)f(contents)f(can)h(be)g(con)m
+(v)o(erted)e(directly)h(by)h(applying)396 4755 y Fq(escape_html)f
+Fv(to)i(the)f(character)f(data)h(contents.)p Black 3800
+5278 a Fr(41)p Black eop
+%%Page: 42 42
+42 41 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fq(class)44 b(a)h(=)486 676 y(object)f(\(self\))576
+773 y(inherit)f(shared)576 967 y(method)g(to_html)h(store)g(ch)h(=)665
+1065 y(output_string)e(ch)h(")p Fo(<)p Fq(a)h(";)665
+1162 y(let)g(href)f(=)396 1259 y(match)g(self)g(#)h(node)f(#)h
+(attribute)e("href")h(with)576 1356 y(Value)g(v)g(-)p
+Fo(>)h Fq(escape_html)e(v)486 1453 y(|)i(Valuelist)e(_)i(-)p
+Fo(>)f Fq(assert)g(false)486 1550 y(|)h(Implied_value)d(-)p
+Fo(>)665 1647 y Fq(begin)i(match)g(self)g(#)h(node)f(#)h(attribute)e
+("readmeref")g(with)486 1745 y(Value)h(v)h(-)p Fo(>)f
+Fq(escape_html)f(v)i(^)f(".html")396 1842 y(|)h(Valuelist)e(_)i(-)p
+Fo(>)f Fq(assert)g(false)396 1939 y(|)h(Implied_value)e(-)p
+Fo(>)576 2036 y Fq("")665 2133 y(end)665 2230 y(in)665
+2327 y(if)i(href)f Fo(<>)g Fq("")h(then)396 2424 y(output_string)e(ch)h
+(\("href=\\"")88 b(^)45 b(href)f(^)h("\\""\);)665 2522
+y(output_string)e(ch)h(")p Fo(>)p Fq(";)665 2619 y(output_string)f(ch)h
+(\(escape_html)f(\(self)h(#)h(node)f(#)h(data\)\);)665
+2716 y(output_string)e(ch)h(")p Fo(<)p Fq(/a)p Fo(>)p
+Fq(";)486 2910 y(end)396 3007 y(;;)-2 3460 y Fp(2.4.13.)36
+b(Class)f Fc(footnote)396 3628 y Fv(The)20 b Fq(footnote)g
+Fv(class)h(has)f(tw)o(o)h(methods:)e Fq(to_html)g Fv(to)i(con)m(v)o
+(ert)d(the)i(footnote)f(reference)f(to)i(HTML,)g(and)396
+3736 y Fq(footnote_to_html)e Fv(to)j(con)m(v)o(ert)d(the)i(footnote)f
+(te)o(xt)h(itself.)396 3885 y(The)g(footnote)f(reference)f(is)j(con)m
+(v)o(erted)d(to)i(a)h(local)f(hyperlink;)e(more)h(precisely)-5
+b(,)19 b(to)h(tw)o(o)h(anchor)d(tags)j(which)e(are)396
+3993 y(connected)g(with)h(each)g(other)-5 b(.)19 b(The)h(te)o(xt)g
+(anchor)f(points)h(to)g(the)g(footnote)f(anchor)m(,)f(and)h(the)i
+(footnote)d(anchor)h(points)396 4101 y(to)i(the)f(te)o(xt)g(anchor)-5
+b(.)396 4250 y(The)20 b(footnote)f(must)h(be)g(allocated)f(in)i(the)f
+Fq(store)g Fv(object.)f(By)i(allocating)e(the)h(footnote,)f(you)g(get)h
+(the)g(number)f(of)396 4358 y(the)h(footnote,)f(and)g(the)i(te)o(xt)f
+(of)f(the)i(footnote)d(is)j(stored)f(until)g(the)g(end)g(of)g(the)g
+(HTML)g(page)f(is)j(reached)c(when)i(the)396 4466 y(footnotes)f(can)h
+(be)g(printed.)f(The)h Fq(to_html)f Fv(method)g(stores)i(simply)e(the)i
+(object)e(itself,)i(such)f(that)g(the)396 4574 y Fq(footnote_to_html)e
+Fv(method)h(is)i(in)m(v)n(ok)o(ed)e(on)g(the)i(same)f(object)g(that)g
+(encountered)d(the)k(footnote.)p Black 3800 5278 a Fr(42)p
+Black eop
+%%Page: 43 43
+43 42 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(The)g Fq(to_html)g Fv(only)f(allocates)h(the)h
+(footnote,)d(and)h(prints)h(the)g(reference)f(anchor)m(,)f(b)n(ut)i(it)
+h(does)f(not)g(print)g(nor)396 687 y(con)m(v)o(ert)e(the)j(contents)e
+(of)h(the)g(note.)g(This)g(is)h(deferred)d(until)j(the)f(footnotes)e
+(actually)i(get)g(printed,)f(i.e.)h(the)g(recursi)n(v)o(e)396
+795 y(call)h(of)f Fq(to_html)f Fv(on)h(the)g(sub)g(nodes)g(is)h(done)e
+(by)h Fq(footnote_to_html)p Fv(.)396 944 y(Note)g(that)h(this)f
+(technique)f(does)h(not)g(w)o(ork)f(if)i(you)e(mak)o(e)h(another)f
+(footnote)f(within)i(a)h(footnote;)d(the)i(second)396
+1052 y(footnote)f(gets)h(allocated)g(b)n(ut)g(not)g(printed.)396
+1274 y Fq(class)44 b(footnote)g(=)486 1371 y(object)g(\(self\))576
+1468 y(inherit)f(shared)576 1662 y(val)h(mutable)g(footnote_number)e(=)
+j(0)576 1857 y(method)e(to_html)h(store)g(ch)h(=)665
+1954 y(let)g(number)e(=)396 2051 y(store)h(#)h(alloc_footnote)d(\(self)
+i(:)h(#shared)f(:)p Fo(>)g Fq(footnote_printer\))e(in)665
+2148 y(let)j(foot_anchor)e(=)396 2245 y("footnote")g(^)i(string_of_int)
+e(number)h(in)665 2342 y(let)h(text_anchor)e(=)396 2439
+y("textnote")g(^)i(string_of_int)e(number)h(in)665 2537
+y(footnote_number)f Fo(<)p Fq(-)h(number;)665 2634 y(output_string)f
+(ch)h(\()h(")p Fo(<)p Fq(a)f(name=\\"")g(^)g(text_anchor)f(^)i("\\")f
+(href=\\"#")g(^)441 2731 y(foot_anchor)f(^)i("\\")p Fo(>)p
+Fq([")e(^)i(string_of_int)e(number)h(^)441 2828 y("])p
+Fo(<)p Fq(/a)p Fo(>)p Fq(")g(\))576 3022 y(method)f(footnote_to_html)g
+(store)h(ch)g(=)665 3119 y(\(*)h(prerequisite:)d(we)j(are)f(in)h(a)f
+(definition)g(list)g Fo(<)p Fq(dl)p Fo(>)p Fq(...)p Fo(<)p
+Fq(/dl)p Fo(>)e Fq(*\))665 3217 y(let)j(foot_anchor)e(=)396
+3314 y("footnote")g(^)i(string_of_int)e(footnote_number)f(in)665
+3411 y(let)j(text_anchor)e(=)396 3508 y("textnote")g(^)i(string_of_int)
+e(footnote_number)f(in)665 3605 y(output_string)h(ch)h(\(")p
+Fo(<)p Fq(dt)p Fo(><)p Fq(a)g(name=\\"")f(^)i(foot_anchor)e(^)h("\\")h
+(href=\\"#")e(^)396 3702 y(text_anchor)g(^)i("\\")p Fo(>)p
+Fq([")f(^)g(string_of_int)f(footnote_number)f(^)396 3799
+y("])p Fo(<)p Fq(/a)p Fo(><)p Fq(/dt)p Fo(>)p Fq(\\n)p
+Fo(<)p Fq(dd)p Fo(>)p Fq("\);)665 3896 y(List.iter)396
+3994 y(\(fun)i(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e
+(store)h(ch\))396 4091 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665
+4188 y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/dd)p
+Fo(>)p Fq("\))486 4382 y(end)396 4479 y(;;)p Black 3800
+5278 a Fr(43)p Black eop
+%%Page: 44 44
+44 43 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.14.)36 b(The)d(speci\002cation)j(of)e(the)f
+(document)i(model)396 751 y Fv(This)21 b(code)e(sets)i(up)f(the)g(hash)
+g(table)g(that)h(connects)e(element)h(types)g(with)g(the)g(e)o(x)o
+(emplars)f(of)h(the)g(e)o(xtension)f(classes)396 859
+y(that)i(con)m(v)o(ert)d(the)i(elements)g(to)g(HTML.)396
+1039 y Fq(open)44 b(Pxp_yacc)396 1233 y(let)h(tag_map)e(=)486
+1330 y(make_spec_from_alist)576 1427 y(~data_exemplar:\(new)e
+(data_impl)j(\(new)g(only_data\)\))576 1525 y
+(~default_element_exemplar:\(new)39 b(element_impl)k(\(new)h
+(no_markup\)\))576 1622 y(~element_alist:)665 1719 y([)h("readme",)e
+(\(new)h(element_impl)f(\(new)h(readme\)\);)396 1816
+y("sect1",)89 b(\(new)44 b(element_impl)f(\(new)h(sect1\)\);)396
+1913 y("sect2",)89 b(\(new)44 b(element_impl)f(\(new)h(sect2\)\);)396
+2010 y("sect3",)89 b(\(new)44 b(element_impl)f(\(new)h(sect3\)\);)396
+2107 y("title",)89 b(\(new)44 b(element_impl)f(\(new)h(no_markup\)\);)
+396 2205 y("p",)269 b(\(new)44 b(element_impl)f(\(new)h(p\)\);)396
+2302 y("br",)224 b(\(new)44 b(element_impl)f(\(new)h(br\)\);)396
+2399 y("code",)134 b(\(new)44 b(element_impl)f(\(new)h(code\)\);)396
+2496 y("em",)224 b(\(new)44 b(element_impl)f(\(new)h(em\)\);)396
+2593 y("ul",)224 b(\(new)44 b(element_impl)f(\(new)h(ul\)\);)396
+2690 y("li",)224 b(\(new)44 b(element_impl)f(\(new)h(li\)\);)396
+2787 y("footnote",)f(\(new)h(element_impl)f(\(new)h(footnote)g(:)h
+(#shared)e(:)p Fo(>)i Fq(shared\)\);)396 2884 y("a",)269
+b(\(new)44 b(element_impl)f(\(new)h(a\)\);)665 2982 y(])576
+3079 y(\(\))396 3176 y(;;)-2 3678 y Fx(Notes)p Black
+396 3857 a Fv(1.)p Black 70 w(Elements)20 b(may)g(also)g(contain)f
+(processing)g(instructions.)g(Unlik)o(e)h(other)f(document)g(models,)g
+(PXP)i(separates)529 3965 y(processing)e(instructions)g(from)g(the)i
+(rest)f(of)g(the)g(te)o(xt)g(and)g(pro)o(vides)e(a)j(second)e(interf)o
+(ace)h(to)g(access)h(them)529 4073 y(\(method)e Fq(pinstr)p
+Fv(\).)g(Ho)n(we)n(v)o(er)m(,)f(there)h(is)j(a)e(parser)g(option)f(\()p
+Fq(enable_pinstr_nodes)p Fv(\))e(which)i(changes)g(the)529
+4181 y(beha)n(viour)f(of)i(the)g(parser)g(such)g(that)g(e)o(xtra)g
+(nodes)f(for)h(processing)e(instructions)i(are)g(included)e(into)i(the)
+h(tree.)529 4320 y Fi(Furthermore,)e(the)g(tree)g(does)g(normally)h
+(not)f(contain)h(nodes)g(for)e(XML)h(comments;)h(the)o(y)f(are)g
+(ignored)h(by)f(def)o(ault.)g(Again,)529 4417 y(there)g(is)g(an)g
+(option)h(\()p Fh(enable_comment_nodes)p Fi(\))25 b(changing)c(this.)p
+Black 396 4566 a Fv(2.)p Black 70 w(Due)f(to)h(the)f(typing)f(system)h
+(it)h(is)g(more)e(or)h(less)i(impossible)d(to)i(deri)n(v)o(e)d(recursi)
+n(v)o(e)h(classes)i(in)g(O'Caml.)f(T)-7 b(o)20 b(get)529
+4674 y(around)e(this,)j(it)g(is)g(common)d(practice)i(to)g(put)g(the)g
+(modi\002able)f(or)h(e)o(xtensible)f(part)h(of)g(recursi)n(v)o(e)f
+(objects)h(into)529 4782 y(parallel)g(objects.)p Black
+3800 5278 a Fr(44)p Black eop
+%%Page: 45 45
+45 44 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black Black 396 579 a Fv(3.)p Black 70 w(The)g(problem)e(is)k(that)e
+(the)g(subclass)h(is)g(usually)e(not)h(a)h(subtype)e(in)h(this)h(case)f
+(because)g(O'Caml)g(has)h(a)529 687 y(contra)n(v)n(ariant)d(subtyping)g
+(rule.)p Black 3800 5278 a Fr(45)p Black eop
+%%Page: 46 46
+46 45 bop Black Black -2 621 a Fs(Chapter)48 b(3.)f(The)h(objects)g
+(representing)g(the)-2 845 y(document)396 1093 y Fr(This)21
+b(description)e(might)h(be)g(out-of-date)o(.)e(See)i(the)g(module)f
+(interface)h(\002les)g(for)h(updated)d(information.)-2
+1470 y Fx(3.1.)39 b(The)g Fb(document)44 b Fx(c)m(lass)396
+1722 y Fq(class)g([)h('ext)f(])h(document)e(:)486 1819
+y(Pxp_types.collect_warnings)d(->)486 1916 y(object)576
+2013 y(method)j(init_xml_version)g(:)h(string)g(->)h(unit)576
+2111 y(method)e(init_root)h(:)g('ext)h(node)f(->)g(unit)576
+2305 y(method)f(xml_version)g(:)i(string)576 2402 y(method)e
+(xml_standalone)g(:)i(bool)576 2499 y(method)e(dtd)i(:)f(dtd)576
+2596 y(method)f(root)i(:)f('ext)g(node)576 2791 y(method)f(encoding)h
+(:)h(Pxp_types.rep_encoding)576 2985 y(method)e(add_pinstr)h(:)g
+(proc_instruction)e(->)j(unit)576 3082 y(method)e(pinstr)h(:)h(string)f
+(->)g(proc_instruction)e(list)576 3179 y(method)h(pinstr_names)g(:)i
+(string)f(list)576 3373 y(method)f(write)h(:)h(Pxp_types.output_stream)
+c(->)k(Pxp_types.encoding)c(->)k(unit)486 3568 y(end)396
+3665 y(;;)396 3856 y Fv(The)20 b(methods)f(be)o(ginning)f(with)i
+Fq(init_)g Fv(are)g(only)g(for)f(internal)h(use)g(of)g(the)g(parser)-5
+b(.)p Black 396 4088 a Ft(\225)p Black 60 w Fq(xml_version)p
+Fv(:)19 b(returns)h(the)g(v)o(ersion)f(string)h(at)g(the)g(be)o
+(ginning)e(of)i(the)g(document.)e(F)o(or)i(e)o(xample,)f("1.0")g(is)479
+4196 y(returned)g(if)h(the)g(document)f(be)o(gins)g(with)h
+Fo(<)p Fq(?xml)44 b(version="1.0"?)p Fo(>)p Fv(.)p Black
+396 4304 a Ft(\225)p Black 60 w Fq(xml_standalone)p Fv(:)19
+b(returns)g(the)h(boolean)f(v)n(alue)g(of)h Fq(standalone)f
+Fv(declaration)g(in)h(the)h(XML)f(declaration.)e(If)479
+4412 y(the)i Fq(standalone)g Fv(attrib)n(ute)f(is)i(missing,)f
+Fq(false)g Fv(is)h(returned.)p Black 396 4520 a Ft(\225)p
+Black 60 w Fq(dtd)p Fv(:)g(returns)e(a)i(reference)d(to)i(the)h(global)
+e(DTD)h(object.)p Black 396 4628 a Ft(\225)p Black 60
+w Fq(root)p Fv(:)g(returns)g(a)g(reference)f(to)h(the)g(root)g
+(element.)p Black 396 4736 a Ft(\225)p Black 60 w Fq(encoding)p
+Fv(:)g(returns)f(the)h(internal)g(encoding)e(of)i(the)g(document.)e
+(This)i(means)g(that)g(all)h(strings)f(of)g(which)g(the)479
+4844 y(document)e(consists)j(are)f(encoded)f(in)h(this)h(character)e
+(set.)p Black 3798 5278 a Fr(46)p Black eop
+%%Page: 47 47
+47 46 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(pinstr)p Fv(:)g(returns)f(the)i(processing)d
+(instructions)i(outside)f(the)h(DTD)h(and)e(outside)h(the)g(root)g
+(element.)f(The)479 687 y(ar)o(gument)f(passed)i(to)h(the)f(method)f
+(names)g(a)i Fr(tar)m(g)o(et)q Fv(,)g(and)e(the)h(method)f(returns)g
+(all)i(instructions)e(with)i(this)g(tar)o(get.)479 795
+y(The)f(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(inside)h
+Fo(<)p Fq(?)h Fv(and)e Fq(?)p Fo(>)p Fv(.)p Black 396
+903 a Ft(\225)p Black 60 w Fq(pinstr_names)p Fv(:)g(returns)g(the)i
+(names)e(of)h(the)h(processing)d(instructions)p Black
+396 1011 a Ft(\225)p Black 60 w Fq(add_pinstr)p Fv(:)h(adds)h(another)f
+(processing)g(instruction.)f(This)j(method)e(is)i(used)f(by)f(the)h
+(parser)g(itself)h(to)f(enter)g(the)479 1119 y(instructions)f(returned)
+g(by)h Fq(pinstr)p Fv(,)f(b)n(ut)h(you)g(can)g(also)g(enter)g
+(additional)f(instructions.)p Black 396 1226 a Ft(\225)p
+Black 60 w Fq(write)p Fv(:)h(writes)h(the)f(document)e(to)j(the)f
+(passed)g(stream)g(as)h(XML)f(te)o(xt)g(using)g(the)g(passed)g(\(e)o
+(xternal\))e(encoding.)479 1334 y(The)i(generated)f(te)o(xt)h(is)h(al)o
+(w)o(ays)f(v)n(alid)g(XML)g(and)g(can)g(be)g(parsed)g(by)f(PXP;)i(ho)n
+(we)n(v)o(er)m(,)d(the)i(te)o(xt)g(is)h(badly)479 1442
+y(formatted)e(\(this)h(is)h(not)f(a)h(pretty)e(printer\).)-2
+1861 y Fx(3.2.)39 b(The)g(c)m(lass)g(type)g Fb(node)396
+2041 y Fv(From)20 b Fq(Pxp_document)p Fv(:)396 2221 y
+Fq(type)44 b(node_type)g(=)486 2318 y(T_data)396 2415
+y(|)h(T_element)e(of)i(string)396 2512 y(|)g(T_super_root)396
+2609 y(|)g(T_pinstr)e(of)i(string)396 2706 y(|)g(T_comment)396
+2804 y Fn(and)g(some)f(other,)g(reserved)f(types)396
+2901 y Fq(;;)396 3095 y(class)h(type)g([)h('ext)f(])h(node)f(=)486
+3192 y(object)g(\('self\))576 3289 y(constraint)f('ext)h(=)h('ext)f
+(node)g(#extension)576 3484 y(\(*)g Fn(General)g(observers)f
+Fq(*\))576 3678 y(method)g(extension)h(:)g('ext)576 3775
+y(method)f(dtd)i(:)f(dtd)576 3872 y(method)f(parent)h(:)h('ext)f(node)
+576 3969 y(method)f(root)i(:)f('ext)g(node)576 4066 y(method)f
+(sub_nodes)h(:)g('ext)h(node)f(list)576 4164 y(method)f(iter_nodes)h(:)
+g(\('ext)g(node)g(-)p Fo(>)h Fq(unit\))f(-)p Fo(>)g Fq(unit)576
+4261 y(method)f(iter_nodes_sibl)g(:)889 4358 y(\('ext)h(node)h(option)e
+(-)p Fo(>)i Fq('ext)f(node)g(-)p Fo(>)g Fq('ext)h(node)f(option)g(-)p
+Fo(>)g Fq(unit\))g(-)396 4455 y Fo(>)h Fq(unit)576 4552
+y(method)e(node_type)h(:)g(node_type)576 4649 y(method)f(encoding)h(:)h
+(Pxp_types.rep_encoding)576 4746 y(method)e(data)i(:)f(string)576
+4843 y(method)f(position)h(:)h(\(string)e(*)i(int)f(*)h(int\))p
+Black 3797 5278 a Fr(47)p Black eop
+%%Page: 48 48
+48 47 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 576 579 a Fq(method)43
+b(comment)h(:)h(string)f(option)576 676 y(method)f(pinstr)h(:)h(string)
+f(-)p Fo(>)g Fq(proc_instruction)e(list)576 773 y(method)h
+(pinstr_names)g(:)i(string)f(list)576 870 y(method)f(write)h(:)h
+(Pxp_types.output_stream)c(->)k(Pxp_types.encoding)c(->)k(unit)576
+1065 y(\(*)f Fn(Attribute)f(observers)h Fq(*\))576 1259
+y(method)f(attribute)h(:)g(string)g(-)p Fo(>)h Fq(Pxp_types.att_value)
+576 1356 y(method)e(required_string_attribute)e(:)k(string)f(-)p
+Fo(>)g Fq(string)576 1453 y(method)f(optional_string_attribute)e(:)k
+(string)f(-)p Fo(>)g Fq(string)g(option)576 1550 y(method)f
+(required_list_attribute)e(:)k(string)f(-)p Fo(>)g Fq(string)g(list)576
+1647 y(method)f(optional_list_attribute)e(:)k(string)f(-)p
+Fo(>)g Fq(string)g(list)576 1745 y(method)f(attribute_names)g(:)h
+(string)g(list)576 1842 y(method)f(attribute_type)g(:)i(string)e(-)p
+Fo(>)i Fq(Pxp_types.att_type)576 1939 y(method)e(attributes)h(:)g
+(\(string)g(*)h(Pxp_types.att_value\))c(list)576 2036
+y(method)i(id_attribute_name)f(:)j(string)576 2133 y(method)e
+(id_attribute_value)f(:)j(string)576 2230 y(method)e
+(idref_attribute_names)f(:)i(string)576 2424 y(\(*)g
+Fn(Modifying)f(methods)h Fq(*\))576 2619 y(method)f(add_node)h(:)h
+(?force:bool)e(-)p Fo(>)h Fq('ext)g(node)g(-)p Fo(>)h
+Fq(unit)576 2716 y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p
+Fo(>)j Fq(unit)576 2813 y(method)e(delete)h(:)h(unit)576
+2910 y(method)e(set_nodes)h(:)g('ext)h(node)f(list)g(-)p
+Fo(>)g Fq(unit)576 3007 y(method)f(quick_set_attributes)f(:)j(\(string)
+e(*)i(Pxp_types.att_value\))c(list)j(-)p Fo(>)h Fq(unit)576
+3104 y(method)e(set_comment)g(:)i(string)f(option)g(-)p
+Fo(>)g Fq(unit)576 3299 y(\(*)g Fn(Cloning)g(methods)f
+Fq(*\))576 3493 y(method)g(orphaned_clone)g(:)i('self)576
+3590 y(method)e(orphaned_flat_clone)f(:)j('self)576 3687
+y(method)e(create_element)g(:)1024 3784 y(?position:\(string)f(*)j(int)
+f(*)h(int\))f(-)p Fo(>)1024 3882 y Fq(dtd)g(-)p Fo(>)h
+Fq(node_type)e(-)p Fo(>)h Fq(\(string)g(*)h(string\))e(list)h(-)p
+Fo(>)1203 3979 y Fq('ext)g(node)576 4076 y(method)f(create_data)g(:)i
+(dtd)f(-)p Fo(>)h Fq(string)f(-)p Fo(>)g Fq('ext)g(node)576
+4173 y(method)f(keep_always_whitespace_mode)e(:)j(unit)576
+4367 y(\(*)g Fn(Validating)f(methods)h Fq(*\))576 4561
+y(method)f(local_validate)g(:)i(?use_dfa:bool)d(->)j(unit)f(->)g(unit)
+576 4756 y(\(*)g(...)g(Internal)g(methods)g(are)g(undocumented.)f(*\))p
+Black 3800 5278 a Fr(48)p Black eop
+%%Page: 49 49
+49 48 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(end)396
+676 y(;;)396 867 y Fv(In)g(the)g(module)f Fq(Pxp_types)g
+Fv(you)h(can)g(\002nd)g(another)e(type)i(de\002nition)f(that)h(is)i
+(important)c(in)j(this)f(conte)o(xt:)396 1047 y Fq(type)44
+b(Pxp_types.att_value)e(=)576 1144 y(Value)223 b(of)44
+b(string)486 1241 y(|)h(Valuelist)e(of)h(string)g(list)486
+1339 y(|)h(Implied_value)396 1436 y(;;)-2 1847 y Fp(3.2.1.)35
+b(The)f(structure)f(of)g(document)i(trees)396 2015 y
+Fv(A)21 b(node)e(represents)g(either)h(an)g(element)g(or)g(a)g
+(character)f(data)h(section.)g(There)g(are)g(tw)o(o)g(classes)h
+(implementing)d(the)396 2122 y(tw)o(o)j(aspects)f(of)g(nodes:)g
+Fq(element_impl)e Fv(and)i Fq(data_impl)p Fv(.)f(The)h(latter)g(class)h
+(does)f(not)g(implement)f(all)i(methods)396 2230 y(because)f(some)g
+(methods)f(do)h(not)g(mak)o(e)f(sense)i(for)e(data)h(nodes.)396
+2380 y(\(Note:)g(PXP)h(also)g(supports)e(a)h(mode)g(which)f(forces)h
+(that)g(processing)f(instructions)g(and)h(comments)f(are)396
+2488 y(represented)g(as)i(nodes)e(of)h(the)g(document)e(tree.)i(Ho)n
+(we)n(v)o(er)m(,)e(these)j(nodes)e(are)h(instances)g(of)g
+Fq(element_impl)f Fv(with)396 2596 y(node)g(types)h Fq(T_pinstr)g
+Fv(and)f Fq(T_comment)p Fv(,)g(respecti)n(v)o(ely)-5
+b(.)18 b(This)j(mode)e(must)h(be)g(e)o(xplicitly)g(con\002gured;)d(the)
+k(basic)396 2704 y(representation)d(kno)n(ws)i(only)f(element)h(and)f
+(data)h(nodes.\))396 2853 y(The)g(follo)n(wing)f(\002gure)g(\()p
+Fr(A)h(tr)m(ee)h(with)g(element)f(nodes,)f(data)g(nodes,)h(and)f
+(attrib)n(utes)p Fv(\))h(sho)n(ws)g(an)g(e)o(xample)f(ho)n(w)h(a)396
+2961 y(tree)g(is)i(constructed)c(from)h(element)h(and)f(data)i(nodes.)e
+(The)h(circular)f(areas)h(represent)f(element)h(nodes)f(whereas)h(the)
+396 3069 y(o)o(v)n(als)f(denote)f(data)i(nodes.)e(Only)h(elements)g
+(may)g(ha)n(v)o(e)g(subnodes;)f(data)h(nodes)g(are)g(al)o(w)o(ays)h
+(lea)n(v)o(es)f(of)h(the)f(tree.)g(The)396 3177 y(subnodes)g(of)h(an)g
+(element)g(can)g(be)g(either)g(element)f(or)h(data)g(nodes;)g(in)g
+(both)f(cases)i(the)g(O'Caml)f(objects)g(storing)f(the)396
+3285 y(nodes)h(ha)n(v)o(e)f(the)i(class)g(type)e Fq(node)p
+Fv(.)396 3434 y(Attrib)n(utes)h(\(the)g(clouds)g(in)g(the)g(picture\))f
+(are)h(not)g(directly)g(inte)o(grated)e(into)i(the)g(tree;)h(there)e
+(is)i(al)o(w)o(ays)g(an)f(e)o(xtra)g(link)396 3542 y(to)h(the)f(attrib)
+n(ute)g(list.)h(This)f(is)h(also)g(true)f(for)f(processing)g
+(instructions)g(\(not)h(sho)n(wn)f(in)h(the)h(picture\).)d(This)j
+(means)396 3650 y(that)g(there)e(are)h(separated)g(access)g(methods)g
+(for)f(attrib)n(utes)h(and)g(processing)f(instructions.)p
+Black 3800 5278 a Fr(49)p Black eop
+%%Page: 50 50
+50 49 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-1.)f(A)i
+(tr)o(ee)e(with)i(element)f(nodes,)h(data)e(nodes,)i(and)f(attrib)n
+(utes)396 2578 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 2578 a @beginspecial 0 @llx 0 @lly
+329 @urx 218 @ury 3290 @rwi @setspecial
+%%BeginDocument: pic/node_term.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_term.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 329 218
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-1.0 251.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+  bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+  4 -2 roll mul srgb} bind def
+/reencdict 12 dict def /ReEncode { reencdict begin
+/newcodesandnames exch def /newfontname exch def /basefontname exch def
+/basefontdict basefontname findfont def /newfont basefontdict maxlength dict def
+basefontdict { exch dup /FID ne { dup /Encoding eq
+{ exch dup length array copy newfont 3 1 roll put }
+{ exch newfont 3 1 roll put } ifelse } { pop pop } ifelse } forall
+newfont /FontName newfontname put newcodesandnames aload pop
+128 1 255 { newfont /Encoding get exch /.notdef put } for
+newcodesandnames length 2 idiv { newfont /Encoding get 3 1 roll put } repeat
+newfontname newfont definefont pop end } def
+/isovec [
+8#200 /grave 8#201 /acute 8#202 /circumflex 8#203 /tilde
+8#204 /macron 8#205 /breve 8#206 /dotaccent 8#207 /dieresis
+8#210 /ring 8#211 /cedilla 8#212 /hungarumlaut 8#213 /ogonek 8#214 /caron
+8#220 /dotlessi 8#230 /oe 8#231 /OE
+8#240 /space 8#241 /exclamdown 8#242 /cent 8#243 /sterling
+8#244 /currency 8#245 /yen 8#246 /brokenbar 8#247 /section 8#250 /dieresis
+8#251 /copyright 8#252 /ordfeminine 8#253 /guillemotleft 8#254 /logicalnot
+8#255 /endash 8#256 /registered 8#257 /macron 8#260 /degree 8#261 /plusminus
+8#262 /twosuperior 8#263 /threesuperior 8#264 /acute 8#265 /mu 8#266 /paragraph
+8#267 /periodcentered 8#270 /cedilla 8#271 /onesuperior 8#272 /ordmasculine
+8#273 /guillemotright 8#274 /onequarter 8#275 /onehalf
+8#276 /threequarters 8#277 /questiondown 8#300 /Agrave 8#301 /Aacute
+8#302 /Acircumflex 8#303 /Atilde 8#304 /Adieresis 8#305 /Aring
+8#306 /AE 8#307 /Ccedilla 8#310 /Egrave 8#311 /Eacute
+8#312 /Ecircumflex 8#313 /Edieresis 8#314 /Igrave 8#315 /Iacute
+8#316 /Icircumflex 8#317 /Idieresis 8#320 /Eth 8#321 /Ntilde 8#322 /Ograve
+8#323 /Oacute 8#324 /Ocircumflex 8#325 /Otilde 8#326 /Odieresis 8#327 /multiply
+8#330 /Oslash 8#331 /Ugrave 8#332 /Uacute 8#333 /Ucircumflex
+8#334 /Udieresis 8#335 /Yacute 8#336 /Thorn 8#337 /germandbls 8#340 /agrave
+8#341 /aacute 8#342 /acircumflex 8#343 /atilde 8#344 /adieresis 8#345 /aring
+8#346 /ae 8#347 /ccedilla 8#350 /egrave 8#351 /eacute
+8#352 /ecircumflex 8#353 /edieresis 8#354 /igrave 8#355 /iacute
+8#356 /icircumflex 8#357 /idieresis 8#360 /eth 8#361 /ntilde 8#362 /ograve
+8#363 /oacute 8#364 /ocircumflex 8#365 /otilde 8#366 /odieresis 8#367 /divide
+8#370 /oslash 8#371 /ugrave 8#372 /uacute 8#373 /ucircumflex
+8#374 /udieresis 8#375 /yacute 8#376 /thorn 8#377 /ydieresis] def
+/Helvetica-Bold /Helvetica-Bold-iso isovec ReEncode
+/Helvetica /Helvetica-iso isovec ReEncode
+/Helvetica-Oblique /Helvetica-Oblique-iso isovec ReEncode
+ /DrawEllipse {
+       /endangle exch def
+       /startangle exch def
+       /yrad exch def
+       /xrad exch def
+       /y exch def
+       /x exch def
+       /savematrix mtrx currentmatrix def
+       x y tr xrad yrad sc 0 0 1 startangle endangle arc
+       closepath
+       savematrix setmatrix
+       } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5962 m -1000 -1000 l 7537 -1000 l 7537 5962 l cp clip
+ 0.05039 0.05039 sc
+% Polyline
+7.500 slw
+n 1770 2700 m 1665 2700 1665 3045 105 arcto 4 {pop} repeat
+  1665 3150 2730 3150 105 arcto 4 {pop} repeat
+  2835 3150 2835 2805 105 arcto 4 {pop} repeat
+  2835 2700 1770 2700 105 arcto 4 {pop} repeat
+ cp gs col7 0.75 shd ef gr gs col0 s gr 
+% Ellipse
+n 2250 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1575 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2925 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 900 2925 242 242 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+n 420 3825 m 315 3825 315 4170 105 arcto 4 {pop} repeat
+  315 4275 1380 4275 105 arcto 4 {pop} repeat
+  1485 4275 1485 3930 105 arcto 4 {pop} repeat
+  1485 3825 420 3825 105 arcto 4 {pop} repeat
+ cp gs col7 0.75 shd ef gr gs col0 s gr 
+% Polyline
+n 2085 1275 m 1582 1807 l gs col0 s gr 
+% Polyline
+n 2407 1297 m 2940 1800 l gs col0 s gr 
+% Polyline
+n 1417 2190 m 900 2692 l gs col0 s gr 
+% Polyline
+n 1740 2190 m 2257 2700 l gs col0 s gr 
+% Polyline
+n 892 3180 m 892 3825 l gs col0 s gr 
+% Polyline
+n 45 675 m 6525 675 l 6525 4950 l 45 4950 l cp gs col0 s gr 
+% Polyline
+n 2250 3600 m 2263 3597 l 2277 3594 l 2293 3592 l 2309 3589 l 2326 3586 l
+ 2344 3583 l 2362 3580 l 2381 3578 l 2399 3575 l 2418 3572 l
+ 2436 3569 l 2454 3566 l 2471 3563 l 2488 3561 l 2504 3558 l
+ 2520 3555 l 2537 3552 l 2555 3548 l 2571 3545 l 2588 3541 l
+ 2604 3537 l 2621 3533 l 2637 3528 l 2653 3524 l 2669 3520 l
+ 2684 3517 l 2700 3514 l 2715 3512 l 2730 3510 l 2745 3510 l
+ 2762 3511 l 2777 3512 l 2793 3514 l 2807 3517 l 2821 3520 l
+ 2835 3524 l 2849 3528 l 2863 3532 l 2877 3537 l 2893 3542 l
+ 2908 3548 l 2925 3555 l 2938 3561 l 2951 3568 l 2965 3575 l
+ 2978 3584 l 2992 3593 l 3007 3602 l 3021 3612 l 3035 3623 l
+ 3050 3633 l 3064 3643 l 3079 3652 l 3093 3661 l 3108 3670 l
+ 3122 3677 l 3136 3684 l 3150 3690 l 3166 3696 l 3182 3701 l
+ 3198 3706 l 3214 3710 l 3230 3713 l 3246 3716 l 3263 3719 l
+ 3279 3721 l 3295 3724 l 3311 3726 l 3327 3729 l 3343 3731 l
+ 3359 3733 l 3375 3735 l 3391 3736 l 3407 3737 l 3423 3738 l
+ 3439 3738 l 3455 3738 l 3471 3738 l 3488 3737 l 3504 3737 l
+ 3520 3736 l 3536 3736 l 3552 3735 l 3568 3735 l 3584 3735 l
+ 3600 3735 l 3616 3735 l 3632 3735 l 3648 3734 l 3663 3734 l
+ 3678 3733 l 3693 3732 l 3708 3731 l 3723 3730 l 3739 3729 l
+ 3755 3729 l 3771 3729 l 3788 3730 l 3806 3732 l 3825 3735 l
+ 3840 3738 l 3856 3741 l 3874 3745 l 3892 3749 l 3911 3753 l
+ 3931 3757 l 3951 3762 l 3972 3767 l 3993 3772 l 4014 3777 l
+ 4034 3782 l 4054 3787 l 4072 3793 l 4089 3799 l 4105 3805 l
+ 4119 3811 l 4130 3818 l 4140 3825 l 4150 3835 l 4157 3846 l
+ 4161 3858 l 4163 3870 l 4164 3883 l 4163 3897 l 4161 3911 l
+ 4159 3925 l 4156 3939 l 4154 3952 l 4151 3966 l 4148 3979 l
+ 4144 3992 l 4140 4005 l 4135 4018 l 4128 4031 l 4121 4045 l
+ 4112 4058 l 4104 4073 l 4095 4087 l 4085 4101 l 4075 4116 l
+ 4065 4129 l 4055 4143 l 4043 4155 l 4032 4166 l 4019 4176 l
+ 4005 4185 l 3992 4192 l 3978 4197 l 3963 4202 l 3947 4206 l
+ 3930 4210 l 3913 4213 l 3896 4216 l 3878 4218 l 3861 4220 l
+ 3843 4222 l 3825 4224 l 3807 4226 l 3789 4228 l 3771 4229 l
+ 3753 4230 l 3735 4230 l 3717 4230 l 3698 4228 l 3678 4226 l
+ 3659 4224 l 3639 4220 l 3619 4216 l 3598 4212 l 3578 4208 l
+ 3557 4203 l 3536 4199 l 3516 4195 l 3496 4191 l 3477 4189 l
+ 3457 4187 l 3438 4185 l 3420 4185 l 3402 4185 l 3384 4186 l
+ 3367 4188 l 3350 4190 l 3333 4193 l 3317 4196 l 3301 4200 l
+ 3285 4203 l 3269 4207 l 3253 4211 l 3237 4214 l 3220 4218 l
+ 3203 4221 l 3186 4224 l 3168 4227 l 3150 4230 l 3132 4233 l
+ 3113 4236 l 3094 4239 l 3074 4242 l 3055 4246 l 3035 4249 l
+ 3015 4253 l 2995 4257 l 2974 4260 l 2954 4264 l 2934 4267 l
+ 2914 4270 l 2894 4272 l 2874 4274 l 2855 4275 l 2835 4275 l
+ 2815 4275 l 2795 4274 l 2775 4272 l 2755 4270 l 2734 4268 l
+ 2713 4265 l 2692 4262 l 2671 4259 l 2650 4256 l 2630 4252 l
+ 2609 4249 l 2590 4245 l 2571 4242 l 2553 4238 l 2536 4234 l
+ 2520 4230 l 2503 4225 l 2487 4219 l 2473 4213 l 2460 4207 l
+ 2448 4200 l 2437 4192 l 2426 4185 l 2415 4178 l 2404 4170 l
+ 2393 4163 l 2380 4157 l 2368 4151 l 2354 4145 l 2340 4140 l
+ 2325 4135 l 2310 4131 l 2294 4128 l 2277 4125 l 2260 4122 l
+ 2243 4120 l 2225 4118 l 2208 4115 l 2191 4113 l 2174 4110 l
+ 2158 4107 l 2143 4104 l 2128 4100 l 2115 4095 l 2101 4089 l
+ 2087 4083 l 2074 4076 l 2061 4070 l 2049 4063 l 2037 4056 l
+ 2025 4049 l 2014 4042 l 2004 4034 l 1995 4025 l 1987 4016 l
+ 1980 4005 l 1975 3993 l 1972 3980 l 1971 3965 l 1970 3949 l
+ 1971 3932 l 1972 3915 l 1973 3898 l 1974 3881 l 1976 3865 l
+ 1977 3850 l 1978 3837 l 1980 3825 l 1983 3812 l 1986 3801 l
+ 1990 3792 l 1994 3784 l 1998 3776 l 2003 3768 l 2008 3761 l
+ 2013 3752 l 2019 3744 l 2025 3735 l 2032 3726 l 2040 3717 l
+ 2048 3707 l 2057 3698 l 2066 3688 l 2075 3678 l 2084 3669 l
+ 2094 3660 l 2104 3652 l 2115 3645 l 2127 3639 l 2138 3633 l
+ 2150 3628 l 2162 3624 l 2174 3620 l 2186 3617 l 2200 3613 l
+ 2214 3609 l 2231 3604 l cp gs col0 s gr 
+% Polyline
+n 3645 1080 m 3660 1077 l 3677 1074 l 3694 1071 l 3713 1068 l 3733 1065 l
+ 3754 1063 l 3775 1060 l 3798 1058 l 3820 1056 l 3843 1053 l
+ 3866 1051 l 3889 1049 l 3912 1047 l 3934 1045 l 3955 1043 l
+ 3976 1041 l 3996 1039 l 4015 1038 l 4033 1036 l 4050 1035 l
+ 4071 1034 l 4090 1033 l 4109 1032 l 4127 1032 l 4144 1031 l
+ 4161 1031 l 4177 1031 l 4193 1031 l 4209 1031 l 4225 1031 l
+ 4241 1031 l 4257 1032 l 4273 1032 l 4289 1033 l 4304 1034 l
+ 4320 1035 l 4337 1037 l 4354 1039 l 4371 1041 l 4387 1044 l
+ 4403 1047 l 4419 1050 l 4435 1053 l 4450 1057 l 4466 1060 l
+ 4481 1063 l 4497 1067 l 4513 1071 l 4529 1075 l 4545 1080 l
+ 4561 1085 l 4577 1091 l 4592 1097 l 4607 1103 l 4622 1110 l
+ 4637 1118 l 4651 1125 l 4666 1132 l 4681 1140 l 4697 1147 l
+ 4713 1153 l 4731 1159 l 4750 1165 l 4770 1170 l 4787 1174 l
+ 4804 1177 l 4823 1180 l 4842 1182 l 4863 1184 l 4884 1186 l
+ 4906 1188 l 4928 1189 l 4950 1190 l 4972 1192 l 4994 1193 l
+ 5016 1195 l 5037 1197 l 5058 1200 l 5077 1203 l 5096 1206 l
+ 5113 1210 l 5130 1215 l 5148 1221 l 5165 1228 l 5181 1235 l
+ 5197 1242 l 5212 1250 l 5228 1259 l 5243 1267 l 5257 1276 l
+ 5272 1285 l 5286 1294 l 5299 1303 l 5312 1312 l 5324 1322 l
+ 5336 1331 l 5346 1340 l 5355 1350 l 5365 1363 l 5373 1378 l
+ 5380 1392 l 5386 1408 l 5390 1424 l 5394 1440 l 5398 1456 l
+ 5401 1472 l 5402 1488 l 5403 1502 l 5403 1517 l 5400 1530 l
+ 5395 1543 l 5389 1555 l 5381 1568 l 5372 1580 l 5363 1592 l
+ 5354 1604 l 5343 1616 l 5331 1627 l 5318 1638 l 5303 1648 l
+ 5286 1657 l 5265 1665 l 5251 1669 l 5235 1673 l 5219 1677 l
+ 5201 1680 l 5182 1683 l 5162 1685 l 5141 1688 l 5119 1690 l
+ 5097 1692 l 5075 1694 l 5053 1696 l 5030 1697 l 5008 1699 l
+ 4986 1701 l 4964 1703 l 4943 1704 l 4921 1706 l 4901 1707 l
+ 4880 1709 l 4860 1710 l 4840 1711 l 4819 1712 l 4799 1713 l
+ 4779 1713 l 4758 1713 l 4738 1714 l 4717 1714 l 4697 1714 l
+ 4676 1714 l 4655 1714 l 4635 1714 l 4614 1714 l 4594 1714 l
+ 4573 1714 l 4553 1713 l 4533 1713 l 4513 1713 l 4494 1712 l
+ 4474 1711 l 4455 1710 l 4434 1709 l 4413 1707 l 4392 1705 l
+ 4372 1703 l 4351 1701 l 4331 1698 l 4311 1695 l 4291 1692 l
+ 4271 1690 l 4251 1687 l 4231 1684 l 4211 1681 l 4191 1678 l
+ 4172 1675 l 4152 1673 l 4133 1670 l 4114 1668 l 4095 1665 l
+ 4074 1662 l 4053 1659 l 4033 1657 l 4012 1654 l 3992 1651 l
+ 3972 1648 l 3951 1645 l 3931 1643 l 3911 1640 l 3891 1637 l
+ 3872 1634 l 3852 1631 l 3833 1628 l 3815 1626 l 3797 1623 l
+ 3780 1620 l 3761 1617 l 3743 1614 l 3725 1611 l 3708 1608 l
+ 3692 1605 l 3675 1602 l 3659 1600 l 3643 1597 l 3627 1594 l
+ 3612 1591 l 3597 1587 l 3582 1584 l 3568 1580 l 3555 1575 l
+ 3541 1569 l 3527 1563 l 3514 1556 l 3501 1550 l 3489 1543 l
+ 3477 1536 l 3465 1529 l 3454 1522 l 3444 1514 l 3435 1505 l
+ 3427 1496 l 3420 1485 l 3415 1473 l 3412 1460 l 3411 1445 l
+ 3410 1430 l 3411 1414 l 3412 1397 l 3413 1380 l 3414 1364 l
+ 3416 1348 l 3417 1333 l 3418 1318 l 3420 1305 l 3423 1290 l
+ 3425 1275 l 3428 1261 l 3431 1247 l 3434 1233 l 3437 1220 l
+ 3442 1207 l 3447 1194 l 3455 1182 l 3465 1170 l 3474 1162 l
+ 3483 1155 l 3493 1148 l 3504 1141 l 3515 1134 l 3526 1127 l
+ 3538 1121 l 3550 1114 l 3563 1108 l 3577 1102 l 3591 1096 l
+ 3607 1090 l 3625 1085 l cp gs col0 s gr 
+% Polyline
+n 2475 1215 m 2477 1217 l 2482 1221 l 2491 1229 l 2503 1239 l 2517 1252 l
+ 2534 1267 l 2552 1282 l 2570 1296 l 2588 1310 l 2605 1322 l
+ 2621 1332 l 2638 1342 l 2655 1350 l 2669 1356 l 2684 1362 l
+ 2700 1368 l 2717 1374 l 2734 1380 l 2752 1386 l 2770 1392 l
+ 2789 1398 l 2808 1403 l 2827 1409 l 2846 1415 l 2865 1420 l
+ 2884 1425 l 2902 1429 l 2920 1433 l 2937 1436 l 2954 1438 l
+ 2970 1440 l 2988 1441 l 3006 1441 l 3024 1440 l 3041 1439 l
+ 3059 1437 l 3076 1434 l 3094 1431 l 3111 1428 l 3129 1425 l
+ 3146 1421 l 3162 1417 l 3179 1414 l 3195 1409 l 3211 1405 l
+ 3226 1400 l 3240 1395 l 3256 1388 l 3271 1380 l 3287 1370 l
+ 3304 1358 l 3322 1344 l 3340 1329 l 3359 1314 l 3376 1299 l
+ 3391 1286 l 3404 1275 l 3412 1267 l 3418 1262 l 3420 1260 l gs col0 s gr 
+% Polyline
+n 1125 3060 m 1126 3063 l 1127 3068 l 1129 3078 l 1132 3093 l 1136 3112 l
+ 1141 3135 l 1146 3162 l 1153 3190 l 1159 3219 l 1166 3248 l
+ 1173 3275 l 1180 3301 l 1187 3324 l 1193 3345 l 1200 3364 l
+ 1207 3381 l 1215 3397 l 1224 3414 l 1234 3429 l 1245 3444 l
+ 1256 3459 l 1267 3473 l 1279 3486 l 1291 3499 l 1304 3512 l
+ 1316 3525 l 1329 3537 l 1342 3550 l 1355 3562 l 1368 3574 l
+ 1382 3585 l 1396 3596 l 1410 3607 l 1425 3617 l 1441 3626 l
+ 1457 3635 l 1473 3644 l 1490 3653 l 1507 3661 l 1524 3669 l
+ 1542 3677 l 1559 3685 l 1577 3692 l 1595 3700 l 1613 3706 l
+ 1631 3713 l 1649 3718 l 1668 3723 l 1687 3727 l 1704 3730 l
+ 1723 3732 l 1743 3733 l 1764 3734 l 1788 3734 l 1814 3733 l
+ 1841 3732 l 1869 3731 l 1898 3729 l 1926 3727 l 1952 3725 l
+ 1975 3724 l 1993 3722 l 2008 3721 l 2017 3721 l 2022 3720 l
+ 2025 3720 l gs col0 s gr 
+/Helvetica-iso ff 180.00 scf sf
+3600 1260 m
+gs 1 -1 sc (attributes:) col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+3600 1485 m
+gs 1 -1 sc ("att" -> Value "apple") col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+2250 3780 m
+gs 1 -1 sc (attributes:) col0 sh gr
+/Helvetica-Oblique-iso ff 180.00 scf sf
+390 4725 m
+gs 1 -1 sc (<a att="apple"><b><a att="orange">An orange</a>Cherries</b><c/></a>) col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+2250 4005 m
+gs 1 -1 sc ("att" -> Value "orange") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+1815 3015 m
+gs 1 -1 sc ("Cherries") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+375 4125 m
+gs 1 -1 sc ("An orange") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+750 2985 m
+gs 1 -1 sc (<a>) col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+1410 2085 m
+gs 1 -1 sc (<b>) col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+2790 2070 m
+gs 1 -1 sc (<c>) col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+2100 1200 m
+gs 1 -1 sc (<a>) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 2578 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 2578 a 357 x Fv(Only)g(elements,)g(data)g
+(sections,)g(attrib)n(utes)g(and)g(processing)e(instructions)i(\(and)f
+(comments,)g(if)h(con\002gured\))e(can,)396 3043 y(directly)i(or)g
+(indirectly)-5 b(,)18 b(occur)h(in)h(the)h(document)d(tree.)i(It)g(is)h
+(impossible)f(to)g(add)g(entity)g(references)f(to)h(the)g(tree;)g(if)
+396 3151 y(the)g(parser)g(\002nds)g(such)g(a)h(reference,)d(not)i(the)g
+(reference)f(as)i(such)f(b)n(ut)g(the)g(referenced)e(te)o(xt)i(\(i.e.)g
+(the)g(tree)396 3259 y(representing)e(the)j(structured)d(te)o(xt\))i
+(is)h(included)e(in)h(the)g(tree.)396 3409 y(Note)g(that)h(the)f
+(parser)f(collapses)i(as)g(much)e(data)h(material)g(into)g(one)f(data)h
+(node)f(as)i(possible)f(such)g(that)g(there)g(are)396
+3517 y(normally)f(ne)n(v)o(er)g(tw)o(o)h(adjacent)f(data)i(nodes.)e
+(This)h(in)m(v)n(ariant)f(is)i(enforced)d(e)n(v)o(en)h(if)i(data)f
+(material)f(is)j(included)c(by)396 3625 y(entity)i(references)f(or)h
+(CD)m(A)-9 b(T)h(A)20 b(sections,)g(or)g(if)h(a)f(data)g(sequence)f(is)
+j(interrupted)c(by)h(comments.)g(So)i Fq(a)44 b(&amp;)g(b)396
+3732 y Fo(<)p Fq(-)h(comment)e(-)p Fo(>)i Fq(c)f Fo(<)p
+Fq(![CDATA[)g Fo(<>)g Fq(d]])p Fo(>)20 b Fv(is)h(represented)d(by)i
+(only)g(one)f(data)h(node,)f(for)h(instance.)396 3840
+y(Ho)n(we)n(v)o(er)m(,)e(you)i(can)g(create)g(document)e(trees)i
+(manually)f(which)h(break)f(this)i(in)m(v)n(ariant;)d(it)j(is)g(only)f
+(the)g(w)o(ay)g(the)396 3948 y(parser)g(forms)f(the)h(tree.)p
+Black 3800 5278 a Fr(50)p Black eop
+%%Page: 51 51
+51 50 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-2.)f
+(Nodes)h(ar)o(e)g(doubly)g(link)o(ed)i(tr)o(ees)396 1537
+y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1537 a @beginspecial 0 @llx 0 @lly 138 @urx 93
+@ury 1380 @rwi @setspecial
+%%BeginDocument: pic/node_general.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_general.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 138 93
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-22.0 126.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+  bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+  4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+       /endangle exch def
+       /startangle exch def
+       /yrad exch def
+       /xrad exch def
+       /y exch def
+       /x exch def
+       /savematrix mtrx currentmatrix def
+       x y tr xrad yrad sc 0 0 1 startangle endangle arc
+       closepath
+       savematrix setmatrix
+       } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 3487 m -1000 -1000 l 4162 -1000 l 4162 3487 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2025 2025 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1350 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2700 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+gs  clippath
+1743 1345 m 1845 1275 l 1788 1385 l 1877 1284 l 1832 1244 l cp
+clip
+n 1380 1800 m 1845 1275 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1743 1345 m 1845 1275 l 1788 1385 l 1765 1365 l 1743 1345 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+1384 1745 m 1282 1815 l 1339 1705 l 1250 1807 l 1295 1846 l cp
+clip
+n 1815 1207 m 1282 1815 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1384 1745 m 1282 1815 l 1339 1705 l 1361 1725 l 1384 1745 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2025 1470 m 2055 1350 l 2085 1470 l 2085 1335 l 2025 1335 l cp
+clip
+n 2055 1792 m 2055 1350 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2025 1470 m 2055 1350 l 2085 1470 l 2055 1470 l 2025 1470 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2010 1687 m 1980 1807 l 1950 1687 l 1950 1822 l 2010 1822 l cp
+clip
+n 1980 1350 m 1980 1807 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2010 1687 m 1980 1807 l 1950 1687 l 1980 1687 l 2010 1687 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2511 1750 m 2550 1867 l 2461 1782 l 2533 1896 l 2583 1864 l cp
+clip
+n 2190 1297 m 2550 1867 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2511 1750 m 2550 1867 l 2461 1782 l 2486 1766 l 2511 1750 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2262 1353 m 2220 1237 l 2312 1320 l 2237 1208 l 2187 1241 l cp
+clip
+n 2602 1807 m 2220 1237 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2262 1353 m 2220 1237 l 2312 1320 l 2287 1337 l 2262 1353 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+n 450 675 m 3150 675 l 3150 2475 l 450 2475 l cp gs col0 s gr 
+/Courier ff 150.00 scf sf
+2377 1342 m
+gs 1 -1 sc (parent) col0 sh gr
+/Courier ff 150.00 scf sf
+645 1628 m
+gs 1 -1 sc (sub_nodes) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1537 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1537 a 357 x Fv(The)e(node)f(tree)h(has)h
+(links)f(in)g(both)g(directions:)f(Ev)o(ery)g(node)g(has)h(a)h(link)f
+(to)g(its)i(parent)d(\(if)h(an)o(y\),)f(and)g(it)i(has)g(links)f(to)396
+2002 y(the)g(subnodes)f(\(see)i(\002gure)e Fr(Nodes)h(ar)m(e)h(doubly)d
+(link)o(ed)i(tr)m(ees)p Fv(\).)h(Ob)o(viously)-5 b(,)18
+b(this)i(doubly-link)o(ed)d(structure)396 2110 y(simpli\002es)k(the)f
+(na)n(vigation)e(in)j(the)f(tree;)g(b)n(ut)g(has)h(also)f(some)g
+(consequences)f(for)g(the)h(possible)g(operations)f(on)h(trees.)396
+2259 y(Because)h(e)n(v)o(ery)d(node)i(must)g(ha)n(v)o(e)f(at)i(most)f
+Fr(one)g Fv(parent)f(node,)g(operations)g(are)h(ille)o(gal)g(if)g(the)o
+(y)f(violate)h(this)396 2367 y(condition.)e(The)i(follo)n(wing)f
+(\002gure)g(\()p Fr(A)h(node)g(can)f(only)h(be)g(added)f(if)i(it)g(is)g
+(a)f(r)l(oot)q Fv(\))g(sho)n(ws)h(on)e(the)i(left)f(side)h(that)f(node)
+396 2475 y Fq(y)h Fv(is)g(added)e(to)h Fq(x)h Fv(as)g(ne)n(w)f(subnode)
+e(which)i(is)h(allo)n(wed)f(because)f Fq(y)i Fv(does)f(not)g(ha)n(v)o
+(e)f(a)i(parent)e(yet.)h(The)g(right)f(side)i(of)396
+2583 y(the)f(picture)g(illustrates)g(what)h(w)o(ould)e(happen)g(if)h
+Fq(y)h Fv(had)e(a)i(parent)e(node;)g(this)i(is)g(ille)o(gal)f(because)f
+Fq(y)i Fv(w)o(ould)e(ha)n(v)o(e)h(tw)o(o)396 2691 y(parents)g(after)g
+(the)g(operation.)396 2923 y Fu(Figur)o(e)g(3-3.)f(A)i(node)f(can)g
+(only)g(be)h(added)g(if)f(it)h(is)g(a)f(r)o(oot)396 4165
+y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 4165 a @beginspecial 0 @llx 0 @lly 422 @urx 127
+@ury 4220 @rwi @setspecial
+%%BeginDocument: pic/node_add.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_add.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 422 127
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-33.0 171.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+  bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+  4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+       /endangle exch def
+       /startangle exch def
+       /yrad exch def
+       /xrad exch def
+       /y exch def
+       /x exch def
+       /savematrix mtrx currentmatrix def
+       x y tr xrad yrad sc 0 0 1 startangle endangle arc
+       closepath
+       savematrix setmatrix
+       } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 4387 m -1000 -1000 l 10012 -1000 l 10012 4387 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 6141 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6141 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5426 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6856 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7571 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8524 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8047 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1866 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1866 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1151 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2581 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3296 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 4249 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3772 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+gs  clippath
+5507 1945 m 5402 2017 l 5460 1904 l 5369 2008 l 5415 2049 l cp
+clip
+n 5910 1440 m 5402 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 5507 1945 m 5402 2017 l 5460 1904 l 5484 1924 l 5507 1945 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6134 1902 m 6101 2025 l 6072 1901 l 6070 2039 l 6132 2041 l cp
+clip
+n 6109 1590 m 6101 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6134 1902 m 6101 2025 l 6072 1901 l 6103 1901 l 6134 1902 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6649 1952 m 6697 2070 l 6599 1989 l 6681 2100 l 6731 2064 l cp
+clip
+n 6307 1537 m 6697 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6649 1952 m 6697 2070 l 6599 1989 l 6624 1970 l 6649 1952 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+7696 2606 m 7602 2692 l 7645 2572 l 7568 2687 l 7619 2722 l cp
+clip
+n 7832 2347 m 7602 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 7696 2606 m 7602 2692 l 7645 2572 l 7671 2589 l 7696 2606 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+8306 2632 m 8349 2752 l 8255 2666 l 8332 2782 l 8383 2747 l cp
+clip
+n 8150 2452 m 8349 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 8306 2632 m 8349 2752 l 8255 2666 l 8281 2649 l 8306 2632 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+5853 1564 m 5958 1492 l 5899 1605 l 5991 1501 l 5945 1460 l cp
+clip
+n 5490 2017 m 5958 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 5853 1564 m 5958 1492 l 5899 1605 l 5876 1584 l 5853 1564 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+6140 1698 m 6173 1575 l 6201 1699 l 6204 1561 l 6142 1559 l cp
+clip
+n 6164 2010 m 6173 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6140 1698 m 6173 1575 l 6201 1699 l 6170 1699 l 6140 1698 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+6404 1588 m 6355 1470 l 6454 1551 l 6371 1440 l 6321 1476 l cp
+clip
+n 6768 2025 m 6355 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6404 1588 m 6355 1470 l 6454 1551 l 6429 1569 l 6404 1588 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+7784 2499 m 7880 2415 l 7835 2534 l 7914 2420 l 7863 2385 l cp
+clip
+n 7673 2715 m 7880 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 7784 2499 m 7880 2415 l 7835 2534 l 7810 2517 l 7784 2499 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+8263 2535 m 8222 2415 l 8315 2502 l 8240 2386 l 8188 2419 l cp
+clip
+n 8412 2707 m 8222 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 8263 2535 m 8222 2415 l 8315 2502 l 8289 2519 l 8263 2535 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+1232 1945 m 1127 2017 l 1185 1904 l 1094 2008 l 1140 2049 l cp
+clip
+n 1635 1440 m 1127 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1232 1945 m 1127 2017 l 1185 1904 l 1209 1924 l 1232 1945 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+1859 1902 m 1826 2025 l 1797 1901 l 1795 2039 l 1857 2041 l cp
+clip
+n 1834 1590 m 1826 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1859 1902 m 1826 2025 l 1797 1901 l 1828 1902 l 1859 1902 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2374 1952 m 2422 2070 l 2324 1989 l 2406 2100 l 2456 2064 l cp
+clip
+n 2032 1537 m 2422 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2374 1952 m 2422 2070 l 2324 1989 l 2349 1970 l 2374 1952 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+3421 2606 m 3327 2692 l 3370 2572 l 3293 2687 l 3344 2722 l cp
+clip
+n 3557 2347 m 3327 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3421 2606 m 3327 2692 l 3370 2572 l 3396 2589 l 3421 2606 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+4031 2632 m 4074 2752 l 3980 2666 l 4057 2782 l 4108 2747 l cp
+clip
+n 3875 2452 m 4074 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 4031 2632 m 4074 2752 l 3980 2666 l 4006 2649 l 4031 2632 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+1578 1564 m 1683 1492 l 1624 1605 l 1716 1501 l 1670 1460 l cp
+clip
+n 1215 2017 m 1683 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1578 1564 m 1683 1492 l 1624 1605 l 1601 1584 l 1578 1564 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+1865 1698 m 1898 1575 l 1926 1699 l 1929 1561 l 1867 1559 l cp
+clip
+n 1889 2010 m 1898 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1865 1698 m 1898 1575 l 1926 1699 l 1895 1698 l 1865 1698 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2129 1588 m 2080 1470 l 2179 1551 l 2096 1440 l 2046 1476 l cp
+clip
+n 2493 2025 m 2080 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2129 1588 m 2080 1470 l 2179 1551 l 2154 1569 l 2129 1588 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+3509 2499 m 3605 2415 l 3560 2534 l 3639 2420 l 3588 2385 l cp
+clip
+n 3398 2715 m 3605 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3509 2499 m 3605 2415 l 3560 2534 l 3535 2517 l 3509 2499 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+3988 2535 m 3947 2415 l 4040 2502 l 3965 2386 l 3913 2419 l cp
+clip
+n 4137 2707 m 3947 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3988 2535 m 3947 2415 l 4040 2502 l 4014 2519 l 3988 2535 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+ [60] 0 sd
+n 6387 1372 m 8023 2017 l gs col7 0.75 shd ef gr gs col0 s gr  [] 0 sd
+% Polyline
+n 4950 900 m 9000 900 l 9000 3375 l 4950 3375 l cp gs col0 s gr 
+% Polyline
+ [60] 0 sd
+n 2112 1372 m 3748 2017 l gs col7 0.75 shd ef gr gs col0 s gr  [] 0 sd
+% Polyline
+n 675 900 m 4725 900 l 4725 3375 l 675 3375 l cp gs col0 s gr 
+% Polyline
+gs  clippath
+8119 1904 m 8055 2010 l 8061 1886 l 8022 2016 l 8079 2033 l cp
+clip
+n 8197 1545 m 8055 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8119 1904 m 8055 2010 l 8061 1886 l 8090 1895 l 8119 1904 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+8214 1695 m 8280 1590 l 8271 1713 l 8313 1585 l 8256 1566 l cp
+clip
+n 8137 2025 m 8280 1590 l gs col0 s gr gr
+
+% arrowhead
+n 8214 1695 m 8280 1590 l 8271 1713 l 8243 1704 l 8214 1695 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+30.000 slw
+gs  clippath
+7687 2205 m 7502 2333 l 7594 2129 l 7410 2351 l 7503 2428 l cp
+clip
+n 7875 1500 m 7620 1965 l 7845 1920 l 7485 2355 l gs col0 s gr gr
+
+% arrowhead
+15.000 slw
+n 7687 2205 m 7502 2333 l 7594 2129 l 7618 2195 l 7687 2205 l  cp gs 0.00 setgray ef gr  col0 s
+/Courier-Bold ff 195.00 scf sf
+6094 1379 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+7991 2265 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+1819 1379 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+3716 2265 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier ff 180.00 scf sf
+6459 1335 m
+gs 1 -1 sc (x # add_node y) col0 sh gr
+/Courier ff 180.00 scf sf
+2214 1365 m
+gs 1 -1 sc (x # add_node y) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 4165 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 4165 a 357 x Fv(The)g("delete")g(operation)
+e(simply)i(remo)o(v)o(es)f(the)h(links)g(between)f(tw)o(o)i(nodes.)e
+(In)h(the)g(picture)f(\()p Fr(A)i(deleted)e(node)396
+4629 y(becomes)h(the)g(r)l(oot)g(of)h(the)f(subtr)m(ee)p
+Fv(\))g(the)g(node)f Fq(x)i Fv(is)g(deleted)e(from)h(the)g(list)h(of)f
+(subnodes)f(of)h Fq(y)p Fv(.)g(After)g(that,)g Fq(x)396
+4737 y Fv(becomes)g(the)g(root)f(of)h(the)g(subtree)g(starting)g(at)g
+(this)h(node.)p Black 3800 5278 a Fr(51)p Black eop
+%%Page: 52 52
+52 51 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-4.)f(A)i
+(deleted)f(node)g(becomes)h(the)f(r)o(oot)f(of)h(the)g(subtr)o(ee)396
+1912 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1912 a @beginspecial 0 @llx 0 @lly 388 @urx
+138 @ury 3880 @rwi @setspecial
+%%BeginDocument: pic/node_delete.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_delete.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 388 138
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-78.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+  bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+  4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+       /endangle exch def
+       /startangle exch def
+       /yrad exch def
+       /xrad exch def
+       /y exch def
+       /x exch def
+       /savematrix mtrx currentmatrix def
+       x y tr xrad yrad sc 0 0 1 startangle endangle arc
+       closepath
+       savematrix setmatrix
+       } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+gs  clippath
+2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
+clip
+n 2535 2857 m 2235 3360 l gs col0 s gr gr
+
+% arrowhead
+n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
+clip
+n 2782 2932 m 3000 3420 l gs col0 s gr gr
+
+% arrowhead
+n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
+clip
+n 2317 3367 m 2587 2910 l gs col0 s gr gr
+
+% arrowhead
+n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
+clip
+n 3060 3375 m 2842 2887 l gs col0 s gr gr
+
+% arrowhead
+n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l  cp gs col7 1.00 shd ef gr  col0 s
+% Ellipse
+n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+gs  clippath
+2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
+clip
+n 2737 2460 m 2737 2032 l gs col0 s gr gr
+
+% arrowhead
+n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
+clip
+n 2662 2032 m 2662 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+1 slj
+60.000 slw
+n 4050 2610 m 4725 2610 l gs col0 s gr 
+% Polyline
+n 4050 2745 m 4725 2745 l gs col0 s gr 
+% Polyline
+1 slc
+n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr 
+% Polyline
+0 slj
+0 slc
+7.500 slw
+gs  clippath
+2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
+clip
+n 2490 1905 m 2025 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
+clip
+n 2827 2002 m 3202 2542 l gs col0 s gr gr
+
+% arrowhead
+n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
+clip
+n 2115 2475 m 2535 1965 l gs col0 s gr gr
+
+% arrowhead
+n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
+clip
+n 3255 2505 m 2872 1957 l gs col0 s gr gr
+
+% arrowhead
+n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
+clip
+n 6135 1905 m 5670 2467 l gs col0 s gr gr
+
+% arrowhead
+n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
+clip
+n 6472 2002 m 6847 2542 l gs col0 s gr gr
+
+% arrowhead
+n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
+clip
+n 5760 2475 m 6180 1965 l gs col0 s gr gr
+
+% arrowhead
+n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
+clip
+n 6900 2505 m 6517 1957 l gs col0 s gr gr
+
+% arrowhead
+n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
+clip
+n 8160 1957 m 7860 2460 l gs col0 s gr gr
+
+% arrowhead
+n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
+clip
+n 8407 2032 m 8625 2520 l gs col0 s gr gr
+
+% arrowhead
+n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
+clip
+n 7942 2467 m 8212 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
+clip
+n 8685 2475 m 8467 1987 l gs col0 s gr gr
+
+% arrowhead
+n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l  cp gs col7 1.00 shd ef gr  col0 s
+/Courier ff 180.00 scf sf
+3960 2250 m
+gs 1 -1 sc (x # delete) col0 sh gr
+% Polyline
+1 slj
+1 slc
+45.000 slw
+n 2595 2362 m 2820 2137 l gs col0 s gr 
+% Polyline
+n 2595 2137 m 2820 2362 l gs col0 s gr 
+% Polyline
+0 slj
+0 slc
+7.500 slw
+n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr 
+/Courier-Bold ff 180.00 scf sf
+2640 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+8280 1845 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+2655 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6300 1845 m
+gs 1 -1 sc (y) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1912 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1912 a 357 x Fv(It)g(is)h(also)e(possible)h
+(to)f(mak)o(e)h(a)g(clone)e(of)i(a)g(subtree;)f(illustrated)g(in)h
+Fr(The)f(clone)g(of)h(a)f(subtr)m(ee)p Fv(.)h(In)f(this)h(case,)g(the)f
+(clone)396 2377 y(is)i(a)g(cop)o(y)e(of)h(the)g(original)f(subtree)h(e)
+o(xcept)f(that)h(it)h(is)h(no)d(longer)g(a)i(subnode.)d(Because)i
+(cloning)f(ne)n(v)o(er)g(k)o(eeps)h(the)396 2485 y(connection)e(to)j
+(the)f(parent,)f(the)h(clones)g(are)g(called)g Fr(orphaned)r
+Fv(.)396 2717 y Fu(Figur)o(e)g(3-5.)f(The)i(clone)f(of)g(a)g(subtr)o
+(ee)396 4050 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 4050 a @beginspecial 0 @llx 0 @lly
+388 @urx 138 @ury 3880 @rwi @setspecial
+%%BeginDocument: pic/node_clone.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_clone.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 388 138
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-78.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+  bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+  4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+       /endangle exch def
+       /startangle exch def
+       /yrad exch def
+       /xrad exch def
+       /y exch def
+       /x exch def
+       /savematrix mtrx currentmatrix def
+       x y tr xrad yrad sc 0 0 1 startangle endangle arc
+       closepath
+       savematrix setmatrix
+       } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5895 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6795 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+1 slj
+60.000 slw
+n 4050 2610 m 4725 2610 l gs col0 s gr 
+% Polyline
+n 4050 2745 m 4725 2745 l gs col0 s gr 
+% Polyline
+1 slc
+n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr 
+% Polyline
+0 slj
+0 slc
+7.500 slw
+gs  clippath
+2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
+clip
+n 2490 1905 m 2025 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
+clip
+n 2827 2002 m 3202 2542 l gs col0 s gr gr
+
+% arrowhead
+n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
+clip
+n 2115 2475 m 2535 1965 l gs col0 s gr gr
+
+% arrowhead
+n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
+clip
+n 3255 2505 m 2872 1957 l gs col0 s gr gr
+
+% arrowhead
+n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
+clip
+n 6135 1905 m 5670 2467 l gs col0 s gr gr
+
+% arrowhead
+n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
+clip
+n 6472 2002 m 6847 2542 l gs col0 s gr gr
+
+% arrowhead
+n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
+clip
+n 5760 2475 m 6180 1965 l gs col0 s gr gr
+
+% arrowhead
+n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
+clip
+n 6900 2505 m 6517 1957 l gs col0 s gr gr
+
+% arrowhead
+n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
+clip
+n 8160 1957 m 7860 2460 l gs col0 s gr gr
+
+% arrowhead
+n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
+clip
+n 8407 2032 m 8625 2520 l gs col0 s gr gr
+
+% arrowhead
+n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
+clip
+n 7942 2467 m 8212 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
+clip
+n 8685 2475 m 8467 1987 l gs col0 s gr gr
+
+% arrowhead
+n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+6352 2152 m 6382 2032 l 6412 2152 l 6412 2017 l 6352 2017 l cp
+clip
+n 6382 2460 m 6382 2032 l gs col0 s gr gr
+
+% arrowhead
+n 6352 2152 m 6382 2032 l 6412 2152 l 6382 2152 l 6352 2152 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+6337 2347 m 6307 2467 l 6277 2347 l 6277 2482 l 6337 2482 l cp
+clip
+n 6307 2032 m 6307 2467 l gs col0 s gr gr
+
+% arrowhead
+n 6337 2347 m 6307 2467 l 6277 2347 l 6307 2347 l 6337 2347 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+5967 3272 m 5880 3360 l 5916 3242 l 5847 3358 l 5898 3388 l cp
+clip
+n 6180 2857 m 5880 3360 l gs col0 s gr gr
+
+% arrowhead
+n 5967 3272 m 5880 3360 l 5916 3242 l 5941 3257 l 5967 3272 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6623 3298 m 6645 3420 l 6569 3323 l 6624 3446 l 6679 3421 l cp
+clip
+n 6427 2932 m 6645 3420 l gs col0 s gr gr
+
+% arrowhead
+n 6623 3298 m 6645 3420 l 6569 3323 l 6596 3310 l 6623 3298 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+6145 2998 m 6232 2910 l 6197 3029 l 6265 2912 l 6214 2882 l cp
+clip
+n 5962 3367 m 6232 2910 l gs col0 s gr gr
+
+% arrowhead
+n 6145 2998 m 6232 2910 l 6197 3029 l 6171 3013 l 6145 2998 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+6509 3009 m 6487 2887 l 6563 2984 l 6508 2861 l 6453 2886 l cp
+clip
+n 6705 3375 m 6487 2887 l gs col0 s gr gr
+
+% arrowhead
+n 6509 3009 m 6487 2887 l 6563 2984 l 6536 2997 l 6509 3009 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
+clip
+n 2737 2460 m 2737 2032 l gs col0 s gr gr
+
+% arrowhead
+n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
+clip
+n 2662 2032 m 2662 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
+clip
+n 2535 2857 m 2235 3360 l gs col0 s gr gr
+
+% arrowhead
+n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
+clip
+n 2782 2932 m 3000 3420 l gs col0 s gr gr
+
+% arrowhead
+n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l  cp gs 0.00 setgray ef gr  col0 s
+% Polyline
+gs  clippath
+2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
+clip
+n 2317 3367 m 2587 2910 l gs col0 s gr gr
+
+% arrowhead
+n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+gs  clippath
+2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
+clip
+n 3060 3375 m 2842 2887 l gs col0 s gr gr
+
+% arrowhead
+n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l  cp gs col7 1.00 shd ef gr  col0 s
+% Polyline
+n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr 
+/Courier-Bold ff 180.00 scf sf
+2655 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6300 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6285 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+2640 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier ff 180.00 scf sf
+3690 2025 m
+gs 1 -1 sc (let x' =) col0 sh gr
+/Courier ff 180.00 scf sf
+3690 2205 m
+gs 1 -1 sc (x # orphaned_clone) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+8235 1845 m
+gs 1 -1 sc (x') col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 4050 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 4050 a -2 4627 a Fp(3.2.2.)35
+b(The)f(methods)g(of)f(the)h(c)n(lass)h(type)f Fc(node)p
+Black 3800 5278 a Fr(52)p Black eop
+%%Page: 53 53
+53 52 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(General)g(obser)o(v)o
+(ers)g(.)p Black 396 866 a Ft(\225)p Black 60 w Fq(extension)p
+Fv(:)g(The)f(reference)g(to)h(the)h(e)o(xtension)d(object)i(which)g
+(belongs)f(to)h(this)h(node)e(\(see)h(...\).)p Black
+396 974 a Ft(\225)p Black 60 w Fq(dtd)p Fv(:)h(Returns)f(a)g(reference)
+f(to)h(the)g(global)g(DTD.)g(All)h(nodes)e(of)h(a)h(tree)f(must)g
+(share)g(the)g(same)h(DTD.)p Black 396 1082 a Ft(\225)p
+Black 60 w Fq(parent)p Fv(:)f(Get)h(the)f(f)o(ather)f(node.)g(Raises)j
+Fq(Not_found)d Fv(in)i(the)f(case)g(the)h(node)e(does)h(not)f(ha)n(v)o
+(e)h(a)h(parent,)e(i.e.)h(the)479 1190 y(node)f(is)j(the)e(root.)p
+Black 396 1298 a Ft(\225)p Black 60 w Fq(root)p Fv(:)g(Gets)h(the)g
+(reference)d(to)i(the)h(root)e(node)g(of)h(the)g(tree.)g(Ev)o(ery)f
+(node)g(is)i(contained)e(in)h(a)h(tree)f(with)h(a)f(root,)f(so)479
+1406 y(this)h(method)f(al)o(w)o(ays)h(succeeds.)e(Note)i(that)g(this)g
+(method)e Fr(sear)m(c)o(hes)h Fv(the)h(root,)e(which)h(costs)h(time)g
+(proportional)d(to)479 1514 y(the)j(length)g(of)g(the)g(path)g(to)g
+(the)g(root.)p Black 396 1622 a Ft(\225)p Black 60 w
+Fq(sub_nodes)p Fv(:)g(Returns)g(references)e(to)j(the)f(children.)f
+(The)g(returned)g(list)i(re\003ects)g(the)f(order)f(of)h(the)g
+(children.)e(F)o(or)479 1730 y(data)i(nodes,)g(this)g(method)f(returns)
+g(the)i(empty)e(list.)p Black 396 1838 a Ft(\225)p Black
+60 w Fq(iter_nodes)43 b(f)p Fv(:)21 b(Iterates)f(o)o(v)o(er)f(the)h
+(children,)f(and)g(calls)i Fq(f)g Fv(for)e(e)n(v)o(ery)g(child)h(in)g
+(turn.)p Black 396 1945 a Ft(\225)p Black 60 w Fq(iter_nodes_sibl)43
+b(f)p Fv(:)20 b(Iterates)g(o)o(v)o(er)f(the)h(children,)f(and)h(calls)g
+Fq(f)h Fv(for)f(e)n(v)o(ery)e(child)i(in)h(turn.)e Fq(f)h
+Fv(gets)h(as)479 2053 y(ar)o(guments)d(the)j(pre)n(vious)d(node,)h(the)
+h(current)f(node,)g(and)h(the)g(ne)o(xt)f(node.)p Black
+396 2161 a Ft(\225)p Black 60 w Fq(node_type)p Fv(:)h(Returns)g(either)
+f Fq(T_data)h Fv(which)g(means)g(that)g(the)g(node)f(is)i(a)g(data)f
+(node,)f(or)h Fq(T_element)43 b(n)479 2269 y Fv(which)20
+b(means)g(that)g(the)g(node)f(is)j(an)e(element)f(of)h(type)g
+Fq(n)p Fv(.)g(If)g(con\002gured,)e(possible)i(node)f(types)h(are)g
+(also)479 2377 y Fq(T_pinstr)44 b(t)20 b Fv(indicating)f(that)h(the)h
+(node)e(represents)g(a)i(processing)e(instruction)g(with)h(tar)o(get)f
+Fq(t)p Fv(,)i(and)479 2485 y Fq(T_comment)f Fv(in)g(which)g(case)g(the)
+g(node)g(is)h(a)f(comment.)p Black 396 2593 a Ft(\225)p
+Black 60 w Fq(encoding)p Fv(:)g(Returns)g(the)g(encoding)e(of)i(the)g
+(strings.)p Black 396 2701 a Ft(\225)p Black 60 w Fq(data)p
+Fv(:)g(Returns)g(the)h(character)e(data)h(of)g(this)g(node)f(and)h(all)
+h(children,)d(concatenated)h(as)i(one)e(string.)h(The)479
+2809 y(encoding)e(of)i(the)h(string)e(is)j(what)e(the)g(method)f
+Fq(encoding)g Fv(returns.)g(-)i(F)o(or)e(data)h(nodes,)g(this)g(method)
+f(simply)479 2917 y(returns)h(the)g(represented)e(characters.)h(F)o(or)
+h(elements,)g(the)g(meaning)f(of)g(the)i(method)d(has)j(been)e(e)o
+(xtended)g(such)479 3025 y(that)i(it)f(returns)g(something)e(useful,)i
+(i.e.)g(the)g(ef)n(fecti)n(v)o(ely)f(contained)f(characters,)h(without)
+h(markup.)e(\(F)o(or)479 3133 y Fq(T_pinstr)i Fv(and)f
+Fq(T_comment)h Fv(nodes,)f(the)h(method)f(returns)g(the)h(empty)g
+(string.\))p Black 396 3241 a Ft(\225)p Black 60 w Fq(position)p
+Fv(:)g(If)g(con\002gured,)d(this)k(method)e(returns)g(the)h(position)g
+(of)g(the)g(element)g(as)g(triple)g(\(entity)-5 b(,)19
+b(line,)479 3349 y(byteposition\).)f(F)o(or)i(data)g(nodes,)f(the)h
+(position)g(is)h(not)f(stored.)f(If)h(the)g(position)g(is)h(not)f(a)n
+(v)n(ailable)f(the)i(triple)f Fq("?",)479 3456 y(0,)45
+b(0)20 b Fv(is)h(returned.)p Black 396 3564 a Ft(\225)p
+Black 60 w Fq(comment)p Fv(:)f(Returns)g Fq(Some)44 b(text)20
+b Fv(for)f(comment)g(nodes,)g(and)g Fq(None)h Fv(for)g(other)f(nodes.)g
+(The)h Fq(text)f Fv(is)i(e)n(v)o(erything)479 3672 y(between)f(the)g
+(comment)f(delimiters)g Fo(<)p Fq(-)i Fv(and)e Fq(-)p
+Fo(>)p Fv(.)p Black 396 3780 a Ft(\225)p Black 60 w Fq(pinstr)44
+b(n)p Fv(:)21 b(Returns)f(all)h(processing)d(instructions)i(that)g(are)
+g(directly)f(contained)g(in)h(this)h(element)e(and)h(that)g(ha)n(v)o(e)
+479 3888 y(a)h Fr(tar)m(g)o(et)h Fv(speci\002cation)d(of)h
+Fq(n)p Fv(.)g(The)g(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(after)h
+(the)g Fo(<)p Fq(?)p Fv(.)p Black 396 3996 a Ft(\225)p
+Black 60 w Fq(pinstr_names)p Fv(:)f(Returns)h(the)g(list)i(of)e(all)g
+(tar)o(gets)g(of)g(processing)f(instructions)g(directly)g(contained)g
+(in)h(this)479 4104 y(element.)p Black 396 4212 a Ft(\225)p
+Black 60 w Fq(write)44 b(s)h(enc)p Fv(:)20 b(Prints)h(the)f(node)f(and)
+h(all)h(subnodes)d(to)j(the)f(passed)g(output)f(stream)h(as)h(v)n(alid)
+f(XML)g(te)o(xt,)g(using)479 4320 y(the)g(passed)h(e)o(xternal)e
+(encoding.)396 4511 y Fu(Attrib)n(ute)h(obser)o(v)o(ers)h(.)p
+Black 396 4743 a Ft(\225)p Black 60 w Fq(attribute)44
+b(n)p Fv(:)20 b(Returns)g(the)h(v)n(alue)e(of)h(the)g(attrib)n(ute)g
+(with)g(name)g Fq(n)p Fv(.)g(This)h(method)d(returns)i(a)g(v)n(alue)g
+(for)f(e)n(v)o(ery)479 4851 y(declared)g(attrib)n(ute,)h(and)f(it)i
+(raises)g Fq(Not_found)e Fv(for)h(an)o(y)f(undeclared)f(attrib)n(ute.)i
+(Note)g(that)g(it)h(e)n(v)o(en)e(returns)h(a)p Black
+3800 5278 a Fr(53)p Black eop
+%%Page: 54 54
+54 53 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 479 579 a Fv(v)n(alue)g(if)g(the)g
+(attrib)n(ute)g(is)h(actually)f(missing)g(b)n(ut)g(is)h(declared)e(as)i
+Fq(#IMPLIED)f Fv(or)g(has)g(a)h(def)o(ault)e(v)n(alue.)g(-)i(Possible)
+479 687 y(v)n(alues)f(are:)p Black 479 919 a Fa(\225)p
+Black 62 w Fq(Implied_value)p Fv(:)f(The)h(attrib)n(ute)g(has)g(been)g
+(declared)e(with)j(the)f(k)o(e)o(yw)o(ord)e Fq(#IMPLIED)p
+Fv(,)i(and)f(the)h(attrib)n(ute)g(is)562 1027 y(missing)g(in)h(the)f
+(attrib)n(ute)g(list)h(of)f(this)h(element.)p Black 479
+1135 a Fa(\225)p Black 62 w Fq(Value)44 b(s)p Fv(:)21
+b(The)f(attrib)n(ute)g(has)g(been)g(declared)e(as)j(type)f
+Fq(CDATA)p Fv(,)g(as)h Fq(ID)p Fv(,)f(as)h Fq(IDREF)p
+Fv(,)e(as)i Fq(ENTITY)p Fv(,)f(or)g(as)562 1243 y Fq(NMTOKEN)p
+Fv(,)g(or)g(as)g(enumeration)e(or)i(notation,)f(and)g(one)h(of)g(the)g
+(tw)o(o)h(conditions)d(holds:)i(\(1\))g(The)g(attrib)n(ute)562
+1351 y(v)n(alue)g(is)h(present)e(in)i(the)f(attrib)n(ute)g(list)h(in)f
+(which)g(case)h(the)f(v)n(alue)f(is)j(returned)c(in)i(the)h(string)e
+Fq(s)p Fv(.)i(\(2\))e(The)562 1459 y(attrib)n(ute)h(has)h(been)e
+(omitted,)g(and)h(the)g(DTD)g(declared)f(the)i(attrib)n(ute)e(with)i(a)
+f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)562 1567
+y(v)n(alue)f(is)i(returned)d(in)i Fq(s)p Fv(.)g(-)g(Summarized,)d
+Fq(Value)44 b(s)20 b Fv(is)h(returned)d(for)h(non-implied,)e(non-list)i
+(attrib)n(ute)g(v)n(alues.)p Black 479 1675 a Fa(\225)p
+Black 62 w Fq(Valuelist)44 b(l)p Fv(:)20 b(The)g(attrib)n(ute)g(has)g
+(been)g(declared)f(as)i(type)e Fq(IDREFS)p Fv(,)h(as)h
+Fq(ENTITIES)p Fv(,)e(or)h(as)h Fq(NMTOKENS)p Fv(,)562
+1783 y(and)f(one)g(of)f(the)i(tw)o(o)f(conditions)f(holds:)h(\(1\))f
+(The)h(attrib)n(ute)g(v)n(alue)f(is)i(present)f(in)g(the)h(attrib)n
+(ute)e(list)j(in)e(which)562 1891 y(case)h(the)f(space-separated)e(tok)
+o(ens)i(of)g(the)g(v)n(alue)g(are)g(returned)e(in)j(the)f(string)g
+(list)h Fq(l)p Fv(.)f(\(2\))g(The)g(attrib)n(ute)g(has)562
+1999 y(been)g(omitted,)f(and)h(the)g(DTD)g(declared)f(the)h(attrib)n
+(ute)g(with)h(a)f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)f(v)n(alue)
+h(is)h(returned)562 2107 y(in)g Fq(l)p Fv(.)f(-)g(Summarized,)f
+Fq(Valuelist)43 b(l)20 b Fv(is)i(returned)c(for)i(all)g(list-type)g
+(attrib)n(ute)g(v)n(alues.)396 2256 y(Note)g(that)h(before)d(the)j
+(attrib)n(ute)f(v)n(alue)f(is)i(returned,)d(the)i(v)n(alue)g(is)h
+(normalized.)d(This)j(means)e(that)i(ne)n(wlines)e(are)479
+2364 y(con)m(v)o(erted)f(to)i(spaces,)g(and)g(that)g(references)f(to)h
+(character)f(entities)i(\(i.e.)f Fq(&#)p Fn(n)p Fq(;)p
+Fv(\))g(and)f(general)g(entities)i(\(i.e.)479 2472 y
+Fq(&)p Fn(name)p Fq(;)p Fv(\))f(are)g(e)o(xpanded;)e(if)i(necessary)-5
+b(,)19 b(e)o(xpansion)f(is)j(performed)d(recursi)n(v)o(ely)-5
+b(.)479 2621 y(In)20 b(well-formedness)e(mode,)h(there)h(is)h(no)f(DTD)
+g(which)g(could)f(declare)h(an)g(attrib)n(ute.)f(Because)i(of)f(this,)g
+(e)n(v)o(ery)479 2729 y(occuring)f(attrib)n(ute)g(is)i(considered)e(as)
+i(a)f(CD)m(A)-9 b(T)h(A)21 b(attrib)n(ute.)p Black 396
+2879 a Ft(\225)p Black 60 w Fq(required_string_attribute)41
+b(n)p Fv(:)21 b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)
+g(n,)g(or)g(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
+2987 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
+(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
+(if)h(the)479 3094 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
+(method)f(will)i(f)o(ail.)g(-)f(This)g(method)f(is)i(con)m(v)o(enient)d
+(if)i(you)g(e)o(xpect)f(a)h(non-implied)479 3202 y(and)g(non-list)f
+(attrib)n(ute)h(v)n(alue.)p Black 396 3310 a Ft(\225)p
+Black 60 w Fq(optional_string_attribute)41 b(n)p Fv(:)21
+b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)g(n,)g(or)g
+(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
+3418 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
+(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
+(if)h(the)479 3526 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
+(method)f(returns)h(None.)f(-)h(This)h(method)e(is)i(con)m(v)o(enient)c
+(if)k(you)e(e)o(xpect)g(a)i(non-list)479 3634 y(attrib)n(ute)f(v)n
+(alue)g(including)e(the)i(implied)g(v)n(alue.)p Black
+396 3742 a Ft(\225)p Black 60 w Fq(required_list_attribute)41
+b(n)p Fv(:)20 b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f
+(called)g(n,)g(or)g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g
+(list)479 3850 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n
+(ute)g(v)n(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g
+(not)g(e)o(xists,)g(the)g(method)479 3958 y(will)h(f)o(ail.)g(-)f(This)
+g(method)f(is)i(con)m(v)o(enient)d(if)i(you)g(e)o(xpect)f(a)h(list)i
+(attrib)n(ute)d(v)n(alue.)p Black 396 4066 a Ft(\225)p
+Black 60 w Fq(optional_list_attribute)41 b(n)p Fv(:)20
+b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f(called)g(n,)g(or)
+g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g(list)479
+4174 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n(ute)g(v)n
+(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g(not)g(e)o
+(xists,)g(an)g(empty)g(list)479 4282 y(will)h(be)f(returned.)e(-)j
+(This)f(method)f(is)i(con)m(v)o(enient)d(if)i(you)f(e)o(xpect)h(a)g
+(list)i(attrib)n(ute)d(v)n(alue)h(or)g(the)g(implied)f(v)n(alue.)p
+Black 396 4390 a Ft(\225)p Black 60 w Fq(attribute_names)p
+Fv(:)g(returns)g(the)h(list)h(of)f(all)h(attrib)n(ute)f(names)g(of)g
+(this)g(element.)g(As)h(this)f(is)i(a)e(v)n(alidating)479
+4498 y(parser)m(,)f(this)i(list)g(is)g(equal)f(to)g(the)h(list)g(of)f
+(declared)f(attrib)n(utes.)p Black 396 4605 a Ft(\225)p
+Black 60 w Fq(attribute_type)43 b(n)p Fv(:)20 b(returns)g(the)g(type)g
+(of)g(the)g(attrib)n(ute)g(called)g Fq(n)p Fv(.)g(See)h(the)f(module)f
+Fq(Pxp_types)g Fv(for)g(a)479 4713 y(description)g(of)h(the)g(encoding)
+e(of)i(the)g(types.)p Black 396 4821 a Ft(\225)p Black
+60 w Fq(attributes)p Fv(:)f(returns)h(the)g(list)h(of)f(pairs)g(of)g
+(names)g(and)g(v)n(alues)g(for)f(all)i(attrib)n(utes)f(of)g(this)h
+(element.)p Black 3800 5278 a Fr(54)p Black eop
+%%Page: 55 55
+55 54 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(id_attribute_name)p Fv(:)e(returns)h(the)i(name)e(of)h
+(the)g(attrib)n(ute)g(that)g(is)h(declared)e(with)h(type)g(ID.)g(There)
+f(is)i(at)g(most)479 687 y(one)f(such)g(attrib)n(ute.)f(The)h(method)f
+(raises)i Fq(Not_found)e Fv(if)i(there)e(is)i(no)f(declared)f(ID)i
+(attrib)n(ute)e(for)h(the)g(element)479 795 y(type.)p
+Black 396 903 a Ft(\225)p Black 60 w Fq(id_attribute_value)p
+Fv(:)e(returns)h(the)i(v)n(alue)e(of)h(the)g(attrib)n(ute)g(that)g(is)h
+(declared)e(with)i(type)e(ID.)i(There)e(is)i(at)479 1011
+y(most)g(one)e(such)h(attrib)n(ute.)g(The)g(method)e(raises)j
+Fq(Not_found)e Fv(if)i(there)f(is)h(no)e(declared)g(ID)i(attrib)n(ute)f
+(for)f(the)479 1119 y(element)h(type.)p Black 396 1226
+a Ft(\225)p Black 60 w Fq(idref_attribute_names)p Fv(:)d(returns)h(the)
+h(list)i(of)e(attrib)n(ute)f(names)h(that)h(are)f(declared)f(as)i
+(IDREF)f(or)g(IDREFS.)396 1417 y Fu(Modifying)h(methods)h(.)f
+Fv(The)g(follo)n(wing)f(methods)g(are)h(only)f(de\002ned)g(for)h
+(element)f(nodes)h(\(more)f(e)o(xactly:)g(the)396 1525
+y(methods)g(are)i(de\002ned)e(for)g(data)h(nodes,)f(too,)h(b)n(ut)g(f)o
+(ail)h(al)o(w)o(ays\).)p Black 396 1758 a Ft(\225)p Black
+60 w Fq(add_node)44 b(sn)p Fv(:)20 b(Adds)g(sub)g(node)g
+Fq(sn)g Fv(to)g(the)g(list)i(of)e(children.)e(This)j(operation)d(is)j
+(illustrated)f(in)g(the)g(picture)g Fr(A)479 1866 y(node)f(can)h(only)g
+(be)g(added)f(if)h(it)h(is)h(a)e(r)l(oot)q Fv(.)g(This)h(method)e(e)o
+(xpects)g(that)h Fq(sn)h Fv(is)g(a)g(root,)e(and)g(it)i(requires)f
+(that)g Fq(sn)g Fv(and)479 1974 y(the)g(current)f(object)h(share)g(the)
+g(same)h(DTD.)479 2123 y(Because)g Fq(add_node)e Fv(is)i(the)f(method)f
+(the)h(parser)g(itself)h(uses)g(to)f(add)g(ne)n(w)g(nodes)f(to)h(the)h
+(tree,)e(it)i(performs)e(by)479 2231 y(def)o(ault)h(some)g(simple)g(v)n
+(alidation)f(checks:)g(If)h(the)h(content)e(model)g(is)i(a)g(re)o
+(gular)e(e)o(xpression,)f(it)j(is)g(not)f(allo)n(wed)f(to)479
+2339 y(add)h(data)g(nodes)f(to)i(this)g(node)e(unless)h(the)g(ne)n(w)g
+(nodes)g(consist)g(only)f(of)h(whitespace.)g(In)g(this)g(case,)h(the)f
+(ne)n(w)g(data)479 2447 y(nodes)g(are)g(silently)g(dropped)e(\(you)h
+(can)h(change)f(this)h(by)g(in)m(v)n(oking)e Fq
+(keep_always_whitespace_mode)p Fv(\).)479 2596 y(If)i(the)h(document)d
+(is)j(\003agged)e(as)i(stand-alone,)d(these)j(data)f(nodes)f(only)g
+(containing)g(whitespace)g(are)h(e)n(v)o(en)479 2704
+y(forbidden)e(if)i(the)h(element)e(declaration)g(is)i(contained)d(in)j
+(an)f(e)o(xternal)f(entity)-5 b(.)19 b(This)h(case)h(is)g(detected)f
+(and)479 2812 y(rejected.)479 2962 y(If)g(the)h(content)e(model)g(is)i
+Fq(EMPTY)p Fv(,)f(it)h(is)g(not)f(allo)n(wed)f(to)i(add)e(an)o(y)h
+(data)g(node)f(unless)h(the)g(data)g(node)g(is)h(empty)-5
+b(.)18 b(In)479 3070 y(this)j(case,)f(the)h(ne)n(w)f(data)g(node)f(is)i
+(silently)f(dropped.)479 3219 y(These)g(checks)g(only)f(apply)h(if)g
+(there)g(is)h(a)f(DTD.)h(In)f(well-formedness)e(mode,)h(it)i(is)g
+(assumed)e(that)i(e)n(v)o(ery)d(element)479 3327 y(is)j(declared)e
+(with)i(content)e(model)g Fq(ANY)h Fv(which)g(prohibits)f(an)o(y)g(v)n
+(alidation)g(check.)g(Furthermore,)f(you)h(turn)h(these)479
+3435 y(checks)g(of)n(f)f(by)h(passing)g Fq(~force:true)f
+Fv(as)i(\002rst)g(ar)o(gument.)p Black 396 3584 a Ft(\225)p
+Black 60 w Fq(add_pinstr)43 b(pi)p Fv(:)21 b(Adds)f(the)g(processing)f
+(instruction)g Fq(pi)h Fv(to)h(the)f(list)h(of)f(processing)f
+(instructions.)p Black 396 3692 a Ft(\225)p Black 60
+w Fq(delete)p Fv(:)h(Deletes)h(this)g(node)e(from)g(the)h(tree.)g
+(After)g(this)h(operation,)d(this)i(node)g(is)h(no)f(longer)e(the)j
+(child)e(of)h(the)479 3800 y(former)f(f)o(ather)g(node;)f(and)i(the)g
+(node)e(loses)j(the)e(connection)f(to)i(the)g(f)o(ather)f(as)h(well.)h
+(This)e(operation)f(is)j(illustrated)479 3908 y(by)f(the)g(\002gure)g
+Fr(A)g(deleted)g(node)f(becomes)g(the)i(r)l(oot)f(of)g(the)h(subtr)m
+(ee)p Fv(.)p Black 396 4016 a Ft(\225)p Black 60 w Fq(set_nodes)44
+b(nl)p Fv(:)20 b(Sets)h(the)f(list)i(of)e(children)e(to)j
+Fq(nl)p Fv(.)f(It)g(is)i(required)c(that)i(e)n(v)o(ery)f(member)g(of)h
+Fq(nl)g Fv(is)h(a)g(root,)e(and)479 4124 y(that)i(all)f(members)f(and)h
+(the)g(current)f(object)h(share)g(the)g(same)g(DTD.)g(Unlik)o(e)g
+Fq(add_node)p Fv(,)g(no)f(v)n(alidation)g(checks)479
+4232 y(are)h(performed.)p Black 396 4340 a Ft(\225)p
+Black 60 w Fq(quick_set_attributes)42 b(atts)p Fv(:)20
+b(sets)h(the)f(attrib)n(utes)h(of)e(this)i(element)f(to)g
+Fq(atts)p Fv(.)g(It)g(is)i Fr(not)f Fv(check)o(ed)479
+4448 y(whether)e Fq(atts)i Fv(matches)e(the)i(DTD)f(or)g(not;)g(it)h
+(is)g(up)f(to)g(the)g(caller)g(of)g(this)h(method)e(to)h(ensure)g
+(this.)g(\(This)479 4556 y(method)f(may)h(be)g(useful)g(to)g(transform)
+e(the)j(attrib)n(ute)f(v)n(alues,)f(i.e.)h(apply)f(a)i(mapping)d(to)j
+(e)n(v)o(ery)e(attrib)n(ute.\))p Black 396 4664 a Ft(\225)p
+Black 60 w Fq(set_comment)43 b(text)p Fv(:)20 b(This)h(method)e(is)i
+(only)e(applicable)g(to)h Fq(T_comment)g Fv(nodes;)f(it)i(sets)g(the)g
+(comment)d(te)o(xt)479 4772 y(contained)h(by)h(such)g(nodes.)p
+Black 3800 5278 a Fr(55)p Black eop
+%%Page: 56 56
+56 55 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Cloning)g(methods)h(.)
+p Black 396 811 a Ft(\225)p Black 60 w Fq(orphaned_clone)p
+Fv(:)e(Returns)h(a)g(clone)g(of)g(the)g(node)f(and)h(the)g(complete)f
+(tree)h(belo)n(w)g(this)h(node)e(\(deep)g(clone\).)479
+919 y(The)h(clone)g(does)g(not)g(ha)n(v)o(e)f(a)i(parent)e(\(i.e.)h
+(the)g(reference)f(to)h(the)g(parent)f(node)g(is)j Fr(not)f
+Fv(cloned\).)d(While)j(cop)o(ying)479 1027 y(the)f(subtree,)g(strings)g
+(are)g(skipped;)f(it)i(is)g(lik)o(ely)f(that)h(the)f(original)f(tree)h
+(and)g(the)g(cop)o(y)f(tree)h(share)g(strings.)479 1135
+y(Extension)f(objects)h(are)g(cloned)f(by)h(in)m(v)n(oking)e(the)i
+Fq(clone)g Fv(method)f(on)h(the)g(original)f(objects;)h(ho)n(w)g(much)f
+(of)h(the)479 1243 y(e)o(xtension)f(objects)h(is)h(cloned)e(depends)g
+(on)h(the)g(implemention)e(of)i(this)h(method.)479 1393
+y(This)g(operation)d(is)j(illustrated)f(by)g(the)g(\002gure)f
+Fr(The)i(clone)e(of)i(a)f(subtr)m(ee)p Fv(.)p Black 396
+1542 a Ft(\225)p Black 60 w Fq(orphaned_flat_clone)p
+Fv(:)e(Returns)i(a)h(clone)e(of)h(the)g(node,)f(b)n(ut)h(sets)i(the)e
+(list)h(of)f(sub)g(nodes)g(to)g([],)g(i.e.)g(the)g(sub)479
+1650 y(nodes)g(are)g(not)g(cloned.)p Black 396 1758 a
+Ft(\225)p Black 81 w Fq(create_element)42 b(dtd)i(nt)h(al)p
+Fv(:)20 b(Returns)f(a)i(\003at)f(cop)o(y)f(of)g(this)i(node)d(\(which)h
+(must)h(be)f(an)h(element\))f(with)h(the)479 1866 y(follo)n(wing)f
+(modi\002cations:)g(The)h(DTD)g(is)h(set)g(to)f Fq(dtd)p
+Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f Fq(nt)p Fv(,)g(and)g(the)g
+(ne)n(w)g(attrib)n(ute)g(list)h(is)479 1974 y(set)g(to)f
+Fq(al)g Fv(\(gi)n(v)o(en)e(as)i(list)h(of)f(\(name,v)n(alue\))d
+(pairs\).)i(The)g(cop)o(y)g(does)h(not)f(ha)n(v)o(e)g(children)g(nor)g
+(a)h(parent.)f(It)h(does)f(not)479 2082 y(contain)g(processing)g
+(instructions.)g(See)i(the)f(e)o(xample)f(belo)n(w.)479
+2231 y(Note)h(that)h(you)e(can)h(specify)g(the)g(position)f(of)h(the)g
+(ne)n(w)g(node)f(by)h(the)g(optional)f(ar)o(gument)f
+Fq(~position)p Fv(.)p Black 396 2380 a Ft(\225)p Black
+81 w Fq(create_data)43 b(dtd)h(cdata)p Fv(:)20 b(Returns)g(a)h(\003at)g
+(cop)o(y)e(of)h(this)h(node)e(\(which)g(must)h(be)h(a)f(data)g(node\))f
+(with)h(the)479 2488 y(follo)n(wing)f(modi\002cations:)g(The)h(DTD)g
+(is)h(set)g(to)f Fq(dtd)p Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f
+Fq(T_data)p Fv(;)g(the)g(attrib)n(ute)g(list)h(is)479
+2596 y(empty)f(\(data)f(nodes)h(ne)n(v)o(er)f(ha)n(v)o(e)g(attrib)n
+(utes\);)h(the)g(list)h(of)f(children)f(and)h(PIs)h(is)g(empty)-5
+b(,)19 b(too)g(\(same)h(reason\).)f(The)479 2704 y(ne)n(w)h(node)f
+(does)h(not)g(ha)n(v)o(e)g(a)g(parent.)f(The)h(v)n(alue)g
+Fq(cdata)g Fv(is)h(the)f(ne)n(w)g(character)f(content)g(of)h(the)g
+(node.)f(See)i(the)479 2812 y(e)o(xample)e(belo)n(w.)p
+Black 396 2920 a Ft(\225)p Black 60 w Fq(keep_always_whitespace_mode)p
+Fv(:)e(Ev)o(en)i(data)h(nodes)f(which)h(are)g(normally)f(dropped)e
+(because)j(the)o(y)f(only)479 3028 y(contain)g(ignorable)f(whitespace,)
+h(can)h(added)e(to)i(this)h(node)d(once)h(this)i(mode)e(is)h(turned)f
+(on.)g(\(This)h(mode)f(is)h(useful)479 3136 y(to)h(produce)d(canonical)
+h(XML.\))396 3327 y Fu(V)-8 b(alidating)20 b(methods)h(.)f
+Fv(There)f(is)j(one)d(method)g(which)h(locally)f(v)n(alidates)h(the)g
+(node,)f(i.e.)i(checks)e(whether)g(the)396 3435 y(subnodes)g(match)h
+(the)g(content)f(model)g(of)h(this)h(node.)p Black 396
+3667 a Ft(\225)p Black 60 w Fq(local_validate)p Fv(:)e(Checks)h(that)g
+(this)h(node)e(conforms)f(to)j(the)f(DTD)g(by)g(comparing)e(the)i(type)
+g(of)g(the)479 3775 y(subnodes)e(with)i(the)g(content)e(model)h(for)g
+(this)h(node.)e(\(Applications)g(need)h(not)g(call)h(this)h(method)d
+(unless)h(the)o(y)g(add)479 3883 y(ne)n(w)h(nodes)g(themselv)o(es)f(to)
+i(the)f(tree.\))-2 4294 y Fp(3.2.3.)35 b(The)f(c)n(lass)h
+Fc(element_impl)396 4462 y Fv(This)21 b(class)g(is)g(an)f
+(implementation)e(of)i Fq(node)g Fv(which)g(realizes)g(element)g
+(nodes:)396 4642 y Fq(class)44 b([)h('ext)f(])h(element_impl)e(:)h
+('ext)g(->)h([)g('ext)f(])g(node)396 4875 y Fu(Constructor)-8
+b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)p
+Black 3798 5278 a Fr(56)p Black eop
+%%Page: 57 57
+57 56 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(new)45
+b(element_impl)d Fn(extension_object)396 770 y Fv(which)20
+b(creates)g(a)h(special)f(form)f(of)h(empty)f(element)h(which)g
+(already)f(contains)g(a)i(reference)d(to)j(the)396 878
+y Fl(extension_object)p Fv(,)d(b)n(ut)i(is)h(otherwise)f(empty)-5
+b(.)18 b(This)j(special)f(form)f(is)i(called)f(an)g Fr(e)n(xemplar)r
+Fv(.)g(The)g(purpose)f(of)396 986 y(e)o(x)o(emplars)g(is)i(that)f(the)o
+(y)g(serv)o(e)f(as)i(patterns)f(that)g(can)g(be)g(duplicated)f(and)g
+(\002lled)i(with)f(data.)g(The)g(method)396 1094 y Fq(create_element)f
+Fv(is)i(designed)e(to)h(perform)e(this)j(action.)396
+1243 y Fu(Example.)f Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
+1423 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 1520
+y(let)h(exemplar)222 b(=)45 b(new)f(element_impl)f(exemplar_ext)g(in)
+396 1711 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
+(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(element)g
+(nodes)f(are)i(created:)396 1891 y Fq(let)45 b(element)e(=)i(exemplar)e
+(#)i(create_element)e(dtd)h(\(T_element)f(name\))h(attlist)396
+2082 y Fv(The)20 b Fq(element)g Fv(is)h(a)f(cop)o(y)g(of)g
+Fq(exemplar)f Fv(\(e)n(v)o(en)g(the)h(e)o(xtension)f
+Fq(exemplar_ext)g Fv(has)h(been)g(copied\))e(which)396
+2190 y(ensures)h(that)h Fq(element)f Fv(and)g(its)i(e)o(xtension)d(are)
+i(objects)f(of)h(the)f(same)h(class)h(as)f(the)g(e)o(x)o(emplars;)e
+(note)h(that)h(you)e(need)396 2298 y(not)i(to)g(pass)h(a)g(class)g
+(name)f(or)f(other)h(meta)g(information.)d(The)j(cop)o(y)g(is)h
+(initially)f(connected)e(with)j(the)f Fq(dtd)p Fv(,)g(it)h(gets)f(a)396
+2406 y(node)f(type,)h(and)g(the)g(attrib)n(ute)g(list)h(is)g(\002lled.)
+f(The)g Fq(element)g Fv(is)h(no)n(w)e(fully)h(functional;)e(it)j(can)f
+(be)g(added)f(to)i(another)396 2514 y(element)f(as)h(child,)e(and)h(it)
+h(can)f(contain)f(references)g(to)h(subnodes.)-2 2884
+y Fp(3.2.4.)35 b(The)f(c)n(lass)h Fc(data_impl)396 3051
+y Fv(This)21 b(class)g(is)g(an)f(implementation)e(of)i
+Fq(node)g Fv(which)g(should)f(be)h(used)g(for)f(all)i(character)e(data)
+h(nodes:)396 3232 y Fq(class)44 b([)h('ext)f(])h(data_impl)e(:)i('ext)f
+(->)g([)h('ext)f(])h(node)396 3464 y Fu(Constructor)-8
+b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)
+396 3644 y Fq(new)45 b(data_impl)e Fn(extension_object)396
+3835 y Fv(which)20 b(creates)g(an)g(empty)g(e)o(x)o(emplar)e(node)h
+(which)h(is)h(connected)d(to)i Fl(extension_object)p
+Fv(.)e(The)i(node)f(does)396 3943 y(not)h(contain)f(a)i(reference)d(to)
+j(an)o(y)e(DTD,)h(and)g(because)f(of)h(this)h(it)g(cannot)e(be)h(added)
+f(to)i(node)e(trees.)396 4093 y(T)-7 b(o)21 b(get)f(a)g(fully)g(w)o
+(orking)f(data)h(node,)f(apply)g(the)h(method)f Fq(create_data)g
+Fv(to)h(the)g(e)o(x)o(emplar)f(\(see)h(e)o(xample\).)396
+4242 y Fu(Example.)g Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
+4422 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 4519
+y(let)h(exemplar)222 b(=)45 b(new)f(exemplar_ext)f(data_impl)h(in)396
+4710 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
+(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(data)g(nodes)
+g(are)g(created:)p Black 3797 5278 a Fr(57)p Black eop
+%%Page: 58 58
+58 57 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(let)45
+b(data_node)e(=)i(exemplar)e(#)i(create_data)e(dtd)h("The)g(characters)
+f(con-)396 676 y(tained)h(in)h(the)f(data)g(node")396
+867 y Fv(The)20 b Fq(data_node)f Fv(is)i(a)g(cop)o(y)e(of)h
+Fq(exemplar)p Fv(.)g(The)f(cop)o(y)h(is)h(initially)f(connected)e(with)
+j(the)f Fq(dtd)p Fv(,)g(and)f(it)i(is)h(\002lled)396
+975 y(with)f(character)e(material.)g(The)h Fq(data_node)f
+Fv(is)i(no)n(w)f(fully)g(functional;)e(it)j(can)f(be)g(added)f(to)h(an)
+h(element)e(as)i(child.)-2 1345 y Fp(3.2.5.)35 b(The)f(type)g
+Fc(spec)396 1512 y Fv(The)20 b(type)g Fq(spec)g Fv(de\002nes)g(a)g(w)o
+(ay)h(to)f(handle)f(the)h(details)h(of)f(creating)f(nodes)g(from)h(e)o
+(x)o(emplars.)396 1692 y Fq(type)44 b('ext)h(spec)396
+1790 y(constraint)e('ext)i(=)f('ext)g(node)h(#extension)396
+1984 y(val)g(make_spec_from_mapping)c(:)665 2081 y
+(?super_root_exemplar)h(:)i('ext)h(node)f(->)665 2178
+y(?comment_exemplar)e(:)j('ext)f(node)g(->)665 2275 y
+(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
+2372 y(?pinstr_mapping)f(:)h(\(string,)g('ext)g(node\))g(Hashtbl.t)f
+(->)665 2469 y(data_exemplar:)g('ext)h(node)g(->)665
+2567 y(default_element_exemplar:)d('ext)j(node)g(->)665
+2664 y(element_mapping:)e(\(string,)i('ext)g(node\))g(Hashtbl.t)f(->)
+665 2761 y(unit)h(->)755 2858 y('ext)g(spec)396 3052
+y(val)h(make_spec_from_alist)c(:)665 3149 y(?super_root_exemplar)h(:)i
+('ext)h(node)f(->)665 3247 y(?comment_exemplar)e(:)j('ext)f(node)g(->)
+665 3344 y(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
+3441 y(?pinstr_alist)f(:)i(\(string)e(*)i('ext)f(node\))g(list)g(->)665
+3538 y(data_exemplar:)f('ext)h(node)g(->)665 3635 y
+(default_element_exemplar:)d('ext)j(node)g(->)665 3732
+y(element_alist:)f(\(string)g(*)i('ext)f(node\))g(list)g(->)665
+3829 y(unit)g(->)755 3927 y('ext)g(spec)396 4117 y Fv(The)20
+b(tw)o(o)h(functions)d Fq(make_spec_from_mapping)f Fv(and)j
+Fq(make_spec_from_alist)d Fv(create)j Fq(spec)g Fv(v)n(alues.)396
+4225 y(Both)g(functions)f(are)h(functionally)e(equi)n(v)n(alent)h(and)g
+(the)i(only)e(dif)n(ference)f(is)j(that)g(the)f(\002rst)h(function)d
+(prefers)396 4333 y(hashtables)i(and)g(the)g(latter)g(associati)n(v)o
+(e)g(lists)h(to)g(describe)e(mappings)g(from)g(names)h(to)g(e)o(x)o
+(emplars.)396 4483 y(Y)-9 b(ou)20 b(can)g(specify)f(e)o(x)o(emplars)g
+(for)g(the)i(v)n(arious)e(kinds)g(of)h(nodes)g(that)g(need)g(to)g(be)g
+(generated)e(when)i(an)g(XML)396 4591 y(document)e(is)k(parsed:)p
+Black 3800 5278 a Fr(58)p Black eop
+%%Page: 59 59
+59 58 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(~super_root_exemplar)p Fv(:)e(This)i(e)o(x)o(emplar)e(is)
+j(used)f(to)h(create)f(the)g(super)f(root.)h(This)g(special)g(node)g
+(is)h(only)479 687 y(created)f(if)g(the)g(corresponding)d
+(con\002guration)h(option)h(has)h(been)g(selected;)g(it)h(is)g(the)f
+(parent)f(node)g(of)h(the)h(root)479 795 y(node)e(which)h(may)g(be)g
+(con)m(v)o(enient)d(if)k(e)n(v)o(ery)e(w)o(orking)f(node)i(must)g(ha)n
+(v)o(e)f(a)i(parent.)p Black 396 903 a Ft(\225)p Black
+60 w Fq(~comment_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)d(is)j(used)f
+(when)f(a)i(comment)e(node)g(must)h(be)g(created.)g(Note)g(that)g(such)
+479 1011 y(nodes)g(are)g(only)f(created)h(if)g(the)g(corresponding)d
+(con\002guration)h(option)h(is)i("on".)p Black 396 1119
+a Ft(\225)p Black 60 w Fq(~default_pinstr_exemplar)p
+Fv(:)c(If)j(a)h(node)e(for)g(a)i(processing)e(instruction)g(must)h(be)g
+(created,)f(and)h(the)479 1226 y(instruction)f(is)i(not)f(listed)h(in)f
+(the)g(table)h(passed)f(by)f Fq(~pinstr_mapping)g Fv(or)h
+Fq(~pinstr_alist)p Fv(,)e(this)j(e)o(x)o(emplar)479 1334
+y(is)g(used.)f(Again)f(the)i(con\002guration)c(option)i(must)h(be)g
+("on")g(in)g(order)f(to)i(create)e(such)h(nodes)g(at)h(all.)p
+Black 396 1442 a Ft(\225)p Black 60 w Fq(~pinstr_mapping)e
+Fv(or)g Fq(~pinstr_alist)p Fv(:)g(Map)h(the)g(tar)o(get)g(names)f(of)h
+(processing)f(instructions)g(to)479 1550 y(e)o(x)o(emplars.)g(These)h
+(mappings)e(are)i(only)g(used)g(when)f(nodes)h(for)f(processing)g
+(instructions)g(are)h(created.)p Black 396 1658 a Ft(\225)p
+Black 60 w Fq(~data_exemplar)p Fv(:)f(The)h(e)o(x)o(emplar)e(for)h
+(ordinary)f(data)i(nodes.)p Black 396 1766 a Ft(\225)p
+Black 60 w Fq(~default_element_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)
+e(is)k(used)e(if)g(an)g(element)g(node)f(must)h(be)g(created,)f(b)n(ut)
+i(the)479 1874 y(element)f(type)g(cannot)f(be)h(found)e(in)j(the)f
+(tables)g Fq(element_mapping)e Fv(or)i Fq(element_alist)p
+Fv(.)p Black 396 1982 a Ft(\225)p Black 60 w Fq(~element_mapping)e
+Fv(or)i Fq(~element_alist)p Fv(:)f(Map)h(the)g(element)f(types)h(to)h
+(e)o(x)o(emplars.)d(These)i(mappings)f(are)479 2090 y(used)h(to)h
+(create)e(element)h(nodes.)396 2239 y(In)g(most)g(cases,)h(you)e(only)h
+(w)o(ant)g(to)g(create)g Fq(spec)g Fv(v)n(alues)g(to)h(pass)f(them)g
+(to)g(the)h(parser)e(functions)g(found)f(in)396 2347
+y Fq(Pxp_yacc)p Fv(.)h(Ho)n(we)n(v)o(er)m(,)f(it)j(might)f(be)g(useful)
+g(to)g(apply)f Fq(spec)h Fv(v)n(alues)g(directly)-5 b(.)396
+2497 y(The)20 b(follo)n(wing)f(functions)f(create)i(v)n(arious)f(types)
+h(of)g(nodes)g(by)g(selecting)f(the)i(corresponding)16
+b(e)o(x)o(emplar)j(from)g(the)396 2605 y(passed)h Fq(spec)g
+Fv(v)n(alue,)g(and)f(by)h(calling)g Fq(create_element)e
+Fv(or)i Fq(create_data)f Fv(on)h(the)g(e)o(x)o(emplar)-5
+b(.)396 2785 y Fq(val)45 b(create_data_node)d(:)665 2882
+y('ext)i(spec)h(->)665 2979 y(dtd)g(->)665 3076 y(\(*)g(data)f
+(material:)f(*\))i(string)f(->)845 3173 y('ext)g(node)396
+3368 y(val)h(create_element_node)c(:)665 3465 y(?position:\(string)h(*)
+j(int)f(*)h(int\))f(->)665 3562 y('ext)g(spec)h(->)665
+3659 y(dtd)g(->)665 3756 y(\(*)g(element)e(type:)h(*\))h(string)f(->)
+665 3853 y(\(*)h(attributes:)e(*\))h(\(string)g(*)h(string\))e(list)h
+(->)845 3950 y('ext)g(node)396 4145 y(val)h(create_super_root_node)c(:)
+665 4242 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
+4339 y('ext)g(spec)h(->)710 4436 y(dtd)f(->)889 4533
+y('ext)h(node)396 4728 y(val)g(create_comment_node)c(:)665
+4825 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)p
+Black 3800 5278 a Fr(59)p Black eop
+%%Page: 60 60
+60 59 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 665 579 a Fq('ext)44
+b(spec)h(->)665 676 y(dtd)g(->)665 773 y(\(*)g(comment)e(text:)h(*\))h
+(string)f(->)845 870 y('ext)g(node)396 1065 y(val)h(create_pinstr_node)
+c(:)665 1162 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
+1259 y('ext)g(spec)h(->)665 1356 y(dtd)g(->)665 1453
+y(proc_instruction)d(->)845 1550 y('ext)i(node)-2 2003
+y Fp(3.2.6.)35 b(Examples)396 2171 y Fu(Building)22 b(tr)o(ees.)d
+Fv(Here)h(is)h(the)g(piece)e(of)h(code)g(that)g(creates)g(the)h(tree)f
+(of)g(the)g(\002gure)f Fr(A)i(tr)m(ee)g(with)f(element)g(nodes,)396
+2279 y(data)g(nodes,)f(and)g(attrib)n(utes)p Fv(.)h(The)g(e)o(xtension)
+f(object)h(and)f(the)h(DTD)h(are)f(be)o(yond)e(the)i(scope)g(of)g(this)
+g(e)o(xample.)396 2459 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(\(*)h
+(some)f(extension)f(*\))i(in)396 2556 y(let)g(dtd)f(=)h(...)f(\(*)g
+(some)h(DTD)f(*\))g(in)396 2750 y(let)h(element_exemplar)d(=)i(new)h
+(element_impl)e(exemplar_ext)f(in)396 2847 y(let)j(data_exemplar)177
+b(=)44 b(new)h(data_impl)178 b(exemplar_ext)42 b(in)396
+3042 y(let)j(a1)f(=)h(element_exemplar)d(#)j(cre-)396
+3139 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("apple"])396
+3236 y(and)h(b1)f(=)h(element_exemplar)d(#)j(create_element)d(dtd)i
+(\(T_element)g("b"\))g([])396 3333 y(and)h(c1)f(=)h(element_exemplar)d
+(#)j(create_element)d(dtd)i(\(T_element)g("c"\))g([])396
+3430 y(and)h(a2)f(=)h(element_exemplar)d(#)j(cre-)396
+3527 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("orange"])
+396 3624 y(in)396 3819 y(let)h(cherries)e(=)i(data_exemplar)d(#)j
+(create_data)e(dtd)h("Cherries")g(in)396 3916 y(let)h(orange)133
+b(=)45 b(data_exemplar)d(#)j(create_data)e(dtd)h("An)h(orange")e(in)396
+4110 y(a1)i(#)f(add_node)g(b1;)396 4207 y(a1)h(#)f(add_node)g(c1;)396
+4304 y(b1)h(#)f(add_node)g(a2;)396 4401 y(b1)h(#)f(add_node)g
+(cherries;)396 4499 y(a2)h(#)f(add_node)g(orange;)396
+4689 y Fv(Alternati)n(v)o(ely)-5 b(,)18 b(the)i(last)h(block)f(of)g
+(statements)g(could)f(also)i(be)f(written)g(as:)396 4870
+y Fq(a1)45 b(#)f(set_nodes)g([b1;)g(c1];)p Black 3800
+5278 a Fr(60)p Black eop
+%%Page: 61 61
+61 60 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(b1)45
+b(#)f(set_nodes)g([a2;)g(cherries];)396 676 y(a2)h(#)f(set_nodes)g
+([orange];)396 867 y Fv(The)20 b(root)g(of)g(the)g(tree)g(is)h
+Fq(a1)p Fv(,)f(i.e.)g(it)h(is)g(true)f(that)396 1047
+y Fq(x)45 b(#)g(root)f(==)g(a1)396 1238 y Fv(for)20 b(e)n(v)o(ery)f(x)h
+(from)f({)i Fq(a1)p Fv(,)f Fq(a2)p Fv(,)g Fq(b1)p Fv(,)g
+Fq(c1)p Fv(,)g Fq(cherries)p Fv(,)g Fq(orange)f Fv(}.)396
+1388 y(Furthermore,)f(the)i(follo)n(wing)f(properties)f(hold:)486
+1568 y Fq(a1)44 b(#)h(attribute)e("att")h(=)h(Value)f("apple")396
+1665 y(&)h(a2)f(#)h(attribute)e("att")h(=)h(Value)f("orange")396
+1859 y(&)h(cherries)e(#)i(data)f(=)h("Cherries")396 1956
+y(&)135 b(orange)43 b(#)i(data)f(=)h("An)f(orange")396
+2053 y(&)314 b(a1)44 b(#)h(data)f(=)h("CherriesAn)e(orange")396
+2248 y(&)314 b(a1)44 b(#)h(node_type)e(=)i(T_element)e("a")396
+2345 y(&)314 b(a2)44 b(#)h(node_type)e(=)i(T_element)e("a")396
+2442 y(&)314 b(b1)44 b(#)h(node_type)e(=)i(T_element)e("b")396
+2539 y(&)314 b(c1)44 b(#)h(node_type)e(=)i(T_element)e("c")396
+2636 y(&)i(cherries)e(#)i(node_type)e(=)i(T_data)396
+2733 y(&)135 b(orange)43 b(#)i(node_type)e(=)i(T_data)396
+2928 y(&)314 b(a1)44 b(#)h(sub_nodes)e(=)i([)g(b1;)f(c1)h(])396
+3025 y(&)314 b(a2)44 b(#)h(sub_nodes)e(=)i([)g(orange)f(])396
+3122 y(&)314 b(b1)44 b(#)h(sub_nodes)e(=)i([)g(a2;)f(cherries)g(])396
+3219 y(&)314 b(c1)44 b(#)h(sub_nodes)e(=)i([])396 3316
+y(&)g(cherries)e(#)i(sub_nodes)e(=)i([])396 3413 y(&)135
+b(orange)43 b(#)i(sub_nodes)e(=)i([])396 3608 y(&)314
+b(a2)44 b(#)h(parent)f(==)g(a1)396 3705 y(&)314 b(b1)44
+b(#)h(parent)f(==)g(b1)396 3802 y(&)314 b(c1)44 b(#)h(parent)f(==)g(a1)
+396 3899 y(&)h(cherries)e(#)i(parent)f(==)g(b1)396 3996
+y(&)135 b(orange)43 b(#)i(parent)f(==)g(a2)396 4229 y
+Fu(Sear)o(ching)19 b(nodes.)g Fv(The)g(follo)n(wing)e(function)h
+(searches)h(all)g(nodes)g(of)g(a)g(tree)h(for)e(which)h(a)g(certain)g
+(condition)e(holds:)396 4409 y Fq(let)45 b(rec)f(search)g(p)g(t)h(=)486
+4506 y(if)f(p)h(t)g(then)576 4603 y(t)f(::)h(search_list)e(p)h(\(t)h(#)
+g(sub_nodes\))486 4700 y(else)576 4797 y(search_list)e(p)h(\(t)h(#)f
+(sub_nodes\))p Black 3800 5278 a Fr(61)p Black eop
+%%Page: 62 62
+62 61 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 676 a Fq(and)45
+b(search_list)e(p)h(l)h(=)486 773 y(match)f(l)h(with)576
+870 y([])268 b(-)p Fo(>)45 b Fq([])486 967 y(|)g(t)f(::)h(l')f(-)p
+Fo(>)h Fq(\(search)e(p)i(t\))f(@)h(\(search_list)e(p)i(l'\))396
+1065 y(;;)396 1297 y Fv(F)o(or)20 b(e)o(xample,)f(if)h(you)f(w)o(ant)i
+(to)f(search)g(all)h(elements)f(of)f(a)i(certain)f(type)f
+Fq(et)p Fv(,)i(the)f(function)e Fq(search)i Fv(can)g(be)g(applied)396
+1405 y(as)h(follo)n(ws:)396 1585 y Fq(let)45 b(search_element_type)c
+(et)k(t)f(=)486 1682 y(search)g(\(fun)g(x)h(-)p Fo(>)f
+Fq(x)h(#)f(node_type)g(=)g(T_element)g(et\))g(t)396 1779
+y(;;)396 2012 y Fu(Getting)20 b(attrib)n(ute)f(v)o(alues.)h
+Fv(Suppose)f(we)i(ha)n(v)o(e)f(the)g(declaration:)396
+2192 y Fq(<!ATTLIST)44 b(e)g(a)h(CDATA)f(#REQUIRED)934
+2289 y(b)h(CDATA)f(#IMPLIED)934 2386 y(c)h(CDATA)f("12345">)396
+2577 y Fv(In)20 b(this)h(case,)f(e)n(v)o(ery)f(element)h
+Fq(e)g Fv(must)h(ha)n(v)o(e)e(an)h(attrib)n(ute)g Fq(a)p
+Fv(,)g(otherwise)g(the)g(parser)g(w)o(ould)f(indicate)h(an)g(error)-5
+b(.)19 b(If)h(the)396 2685 y(O'Caml)h(v)n(ariable)e Fq(n)h
+Fv(holds)g(the)g(node)f(of)h(the)g(tree)h(corresponding)16
+b(to)21 b(the)f(element,)f(you)g(can)h(get)h(the)f(v)n(alue)f(of)h(the)
+396 2793 y(attrib)n(ute)g Fq(a)h Fv(by)396 2973 y Fq(let)45
+b(value_of_a)e(=)h(n)h(#)g(required_string_attribute)40
+b("a")396 3164 y Fv(which)20 b(is)h(more)e(or)h(less)i(an)e(abbre)n
+(viation)d(for)396 3344 y Fq(let)45 b(value_of_a)e(=)486
+3442 y(match)h(n)h(#)f(attribute)g("a")g(with)576 3539
+y(Value)g(s)g(->)h(s)486 3636 y(|)g(_)313 b(->)45 b(assert)f(false)396
+3827 y Fv(-)21 b(as)g(the)f(attrib)n(ute)g(is)h(required,)d(the)i
+Fq(attribute)f Fv(method)g(al)o(w)o(ays)i(returns)e(a)i
+Fq(Value)p Fv(.)396 3976 y(In)f(contrast)g(to)g(this,)h(the)f(attrib)n
+(ute)g Fq(b)g Fv(can)g(be)g(omitted.)g(In)f(this)i(case,)g(the)f
+(method)396 4084 y Fq(required_string_attribute)d Fv(w)o(orks)j(only)f
+(if)h(the)h(attrib)n(ute)f(is)h(there,)e(and)h(the)g(method)f(will)i(f)
+o(ail)f(if)h(the)396 4192 y(attrib)n(ute)f(is)h(missing.)f(T)-7
+b(o)20 b(get)h(the)f(v)n(alue,)f(you)g(can)h(apply)g(the)g(method)f
+Fq(optional_string_attribute)p Fv(:)396 4372 y Fq(let)45
+b(value_of_b)e(=)h(n)h(#)g(optional_string_attribute)40
+b("b")396 4563 y Fv(No)n(w)-5 b(,)20 b Fq(value_of_b)f
+Fv(is)i(of)f(type)g Fq(string)43 b(option)p Fv(,)20 b(and)f
+Fq(None)i Fv(represents)e(the)h(omitted)g(attrib)n(ute.)f(Alternati)n
+(v)o(ely)-5 b(,)396 4671 y(you)20 b(could)f(also)h(use)h
+Fq(attribute)p Fv(:)396 4851 y Fq(let)45 b(value_of_b)e(=)p
+Black 3800 5278 a Fr(62)p Black eop
+%%Page: 63 63
+63 62 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(match)44
+b(n)h(#)f(attribute)g("b")g(with)576 676 y(Value)g(s)313
+b(->)45 b(Some)f(s)486 773 y(|)h(Implied_value)d(->)j(None)486
+870 y(|)g(_)582 b(->)45 b(assert)f(false)396 1103 y Fv(The)20
+b(attrib)n(ute)g Fq(c)h Fv(beha)n(v)o(es)e(much)g(lik)o(e)h
+Fq(a)p Fv(,)h(because)e(it)i(has)g(al)o(w)o(ays)f(a)h(v)n(alue.)e(If)h
+(the)g(attrib)n(ute)g(is)h(omitted,)f(the)g(def)o(ault,)396
+1211 y(here)g("12345",)e(will)j(be)f(returned)e(instead.)i(Because)g
+(of)g(this,)h(you)e(can)h(again)f(use)396 1319 y Fq
+(required_string_attribute)e Fv(to)j(get)g(the)h(v)n(alue.)396
+1468 y(The)f(type)g Fq(CDATA)g Fv(is)h(the)f(most)g(general)f(string)h
+(type.)g(The)g(types)g Fq(NMTOKEN)p Fv(,)f Fq(ID)p Fv(,)h
+Fq(IDREF)p Fv(,)g Fq(ENTITY)p Fv(,)f(and)h(all)396 1576
+y(enumerators)e(and)i(notations)f(are)h(special)h(forms)e(of)h(string)g
+(types)g(that)g(restrict)g(the)h(possible)f(v)n(alues.)f(From)396
+1684 y(O'Caml,)h(the)o(y)g(beha)n(v)o(e)f(lik)o(e)h Fq(CDATA)p
+Fv(,)g(i.e.)g(you)f(can)h(use)h(the)f(methods)f Fq
+(required_string_attribute)e Fv(and)396 1792 y Fq
+(optional_string_attribute)p Fv(,)g(too.)396 1941 y(In)j(contrast)g(to)
+g(this,)h(the)f(types)g Fq(NMTOKENS)p Fv(,)f Fq(IDREFS)p
+Fv(,)g(and)h Fq(ENTITIES)g Fv(mean)f(lists)j(of)e(strings.)g(Suppose)f
+(we)h(ha)n(v)o(e)396 2049 y(the)g(declaration:)396 2229
+y Fq(<!ATTLIST)44 b(f)g(d)h(NMTOKENS)e(#REQUIRED)934
+2327 y(e)i(NMTOKENS)e(#IMPLIED>)396 2517 y Fv(The)20
+b(type)g Fq(NMTOKENS)f Fv(stands)i(for)e(lists)j(of)e(space-separated)e
+(tok)o(ens;)i(for)f(e)o(xample)g(the)h(v)n(alue)g Fq("1)44
+b(abc)h(23ef")396 2625 y Fv(means)20 b(the)g(list)i Fq(["1";)44
+b("abc";)f("23ef"])p Fv(.)20 b(\(Again,)e Fq(IDREFS)i
+Fv(and)g Fq(ENTITIES)f Fv(ha)n(v)o(e)h(more)f(restricted)h(v)n
+(alues.\))396 2733 y(T)-7 b(o)21 b(get)f(the)g(v)n(alue)g(of)f(attrib)n
+(ute)h Fq(d)p Fv(,)h(one)e(can)h(use)396 2913 y Fq(let)45
+b(value_of_d)e(=)h(n)h(#)g(required_list_attribute)c("d")396
+3104 y Fv(or)396 3285 y Fq(let)k(value_of_d)e(=)486 3382
+y(match)h(n)h(#)f(attribute)g("d")g(with)576 3479 y(Valuelist)f(l)i(->)
+f(l)486 3576 y(|)h(_)493 b(->)44 b(assert)g(false)396
+3767 y Fv(As)21 b Fq(d)g Fv(is)g(required,)d(the)i(attrib)n(ute)g
+(cannot)f(be)h(omitted,)g(and)f(the)h Fq(attribute)g
+Fv(method)e(returns)i(al)o(w)o(ays)g(a)396 3875 y Fq(Valuelist)p
+Fv(.)396 4024 y(F)o(or)g(optional)f(attrib)n(utes)h(lik)o(e)h
+Fq(e)p Fv(,)f(apply)396 4204 y Fq(let)45 b(value_of_e)e(=)h(n)h(#)g
+(optional_list_attribute)c("e")396 4395 y Fv(or)396 4576
+y Fq(let)k(value_of_e)e(=)486 4673 y(match)h(n)h(#)f(attribute)g("e")g
+(with)576 4770 y(Valuelist)f(l)134 b(->)45 b(l)486 4867
+y(|)g(Implied_value)d(->)j([])p Black 3800 5278 a Fr(63)p
+Black eop
+%%Page: 64 64
+64 63 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(|)45
+b(_)582 b(->)45 b(assert)f(false)396 770 y Fv(Here,)20
+b(the)g(case)h(that)f(the)g(attrib)n(ute)g(is)h(missing)f(counts)g(lik)
+o(e)g(the)h(empty)e(list.)-2 1139 y Fp(3.2.7.)35 b(Iterator)n(s)396
+1307 y Fv(There)20 b(are)g(also)g(se)n(v)o(eral)g(iterators)g(in)g
+(Pxp_document;)d(please)j(see)h(the)f(mli)h(\002le)f(for)g(details.)g
+(Y)-9 b(ou)20 b(can)g(\002nd)396 1415 y(e)o(xamples)f(for)h(them)g(in)g
+(the)g("simple_transformation")d(directory)-5 b(.)396
+1595 y Fq(val)45 b(find)f(:)g(?deeply:bool)f(->)889 1692
+y(f:\('ext)h(node)g(->)h(bool\))f(->)g('ext)g(node)h(->)f('ext)g(node)
+396 1887 y(val)h(find_all)e(:)i(?deeply:bool)e(->)1069
+1984 y(f:\('ext)g(node)i(->)f(bool\))g(->)h('ext)f(node)g(->)g('ext)h
+(node)f(list)396 2178 y(val)h(find_element)d(:)j(?deeply:bool)e(->)1248
+2275 y(string)h(->)g('ext)h(node)f(->)g('ext)g(node)396
+2469 y(val)h(find_all_elements)d(:)i(?deeply:bool)f(->)1472
+2567 y(string)h(->)h('ext)f(node)g(->)g('ext)h(node)f(list)396
+2761 y(exception)g(Skip)396 2858 y(val)h(map_tree)e(:)90
+b(pre:\('exta)43 b(node)h(->)g('extb)g(node\))g(->)1069
+2955 y(?post:\('extb)f(node)h(->)g('extb)g(node\))g(->)1069
+3052 y('exta)g(node)g(->)1248 3149 y('extb)g(node)396
+3441 y(val)h(map_tree_sibl)d(:)755 3538 y(pre:)i(\('exta)g(node)g
+(option)g(->)g('exta)g(node)h(->)f('exta)g(node)g(option)g(->)1203
+3635 y('extb)g(node\))g(->)710 3732 y(?post:\('extb)f(node)h(option)g
+(->)g('extb)g(node)h(->)f('extb)g(node)g(option)g(->)1203
+3829 y('extb)g(node\))g(->)710 3927 y('exta)g(node)g(->)889
+4024 y('extb)g(node)396 4218 y(val)h(iter_tree)e(:)i(?pre:\('ext)e
+(node)h(->)g(unit\))g(->)1114 4315 y(?post:\('ext)f(node)h(->)g(unit\))
+g(->)1114 4412 y('ext)g(node)g(->)1293 4509 y(unit)396
+4704 y(val)h(iter_tree_sibl)d(:)710 4801 y(?pre:)i(\('ext)g(node)g
+(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g(unit\))g
+(->)p Black 3800 5278 a Fr(64)p Black eop
+%%Page: 65 65
+65 64 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 710 579 a Fq(?post:\('ext)43
+b(node)h(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g
+(unit\))g(->)710 676 y('ext)g(node)g(->)889 773 y(unit)-2
+1358 y Fx(3.3.)39 b(The)g(c)m(lass)g(type)g Fb(extension)396
+1610 y Fq(class)44 b(type)g([)h('node)f(])h(extension)e(=)486
+1707 y(object)h(\('self\))576 1804 y(method)f(clone)h(:)h('self)665
+1901 y(\(*)g("clone")e(should)h(return)g(an)h(exact)f(deep)g(copy)g(of)
+g(the)h(object.)e(*\))576 1998 y(method)g(node)i(:)f('node)665
+2095 y(\(*)h("node")f(returns)f(the)i(corresponding)d(node)i(of)h(this)
+f(extension.)f(This)h(method)710 2193 y(*)h(intended)e(to)i(return)f
+(exactly)f(what)h(previ-)396 2290 y(ously)g(has)h(been)f(set)g(by)h
+("set_node".)710 2387 y(*\))576 2484 y(method)e(set_node)h(:)h('node)f
+(->)g(unit)665 2581 y(\(*)h("set_node")e(is)h(invoked)g(once)g(the)h
+(extension)e(is)h(associated)g(to)g(a)h(new)710 2678
+y(*)g(node)f(object.)710 2775 y(*\))486 2873 y(end)396
+3063 y Fv(This)21 b(is)g(the)f(type)g(of)g(classes)h(used)f(for)f(node)
+h(e)o(xtensions.)e(F)o(or)i(e)n(v)o(ery)f(node)g(of)h(the)g(document)e
+(tree,)i(there)g(is)h(not)396 3171 y(only)f(the)g Fq(node)g
+Fv(object,)f(b)n(ut)h(also)g(an)g Fq(extension)f Fv(object.)h(The)f
+(latter)i(has)f(minimal)f(functionality;)f(it)j(has)f(only)g(the)396
+3279 y(necessary)g(methods)f(to)h(be)g(attached)g(to)g(the)g(node)f
+(object)h(containing)e(the)j(details)f(of)g(the)g(node)f(instance.)h
+(The)396 3387 y(e)o(xtension)f(object)h(is)h(called)f(e)o(xtension)f
+(because)g(its)i(purpose)e(is)i(e)o(xtensibility)-5 b(.)396
+3537 y(F)o(or)20 b(some)g(reasons,)g(it)h(is)g(impossible)e(to)i(deri)n
+(v)o(e)d(the)j Fq(node)f Fv(classes)h(\(i.e.)f Fq(element_impl)f
+Fv(and)g Fq(data_impl)p Fv(\))g(such)396 3645 y(that)i(the)f
+(subclasses)g(can)g(be)g(e)o(xtended)f(by)g(ne)n(w)h(ne)n(w)g(methods.)
+f(But)i(subclassing)f(nodes)f(is)i(a)g(great)f(feature,)396
+3753 y(because)g(it)h(allo)n(ws)f(the)g(user)g(to)h(pro)o(vide)d(dif)n
+(ferent)g(classes)k(for)d(dif)n(ferent)g(types)h(of)g(nodes.)f(The)h(e)
+o(xtension)f(objects)396 3860 y(are)h(a)h(w)o(orkaround)c(that)j(is)i
+(as)e(po)n(werful)f(as)i(direct)f(subclassing,)f(the)h(costs)h(are)f
+(some)g(notation)f(o)o(v)o(erhead.)p Black 3800 5278
+a Fr(65)p Black eop
+%%Page: 66 66
+66 65 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-6.)f
+(The)i(structur)o(e)f(of)g(nodes)g(and)h(extensions)396
+1928 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1928 a @beginspecial 0 @llx 0 @lly 206 @urx
+140 @ury 2060 @rwi @setspecial
+%%BeginDocument: pic/extension_general.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/extension_general.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 206 140
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-22.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add
+  4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+  bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+  4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+       /endangle exch def
+       /startangle exch def
+       /yrad exch def
+       /xrad exch def
+       /y exch def
+       /x exch def
+       /savematrix mtrx currentmatrix def
+       x y tr xrad yrad sc 0 0 1 startangle endangle arc
+       closepath
+       savematrix setmatrix
+       } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5050 m -1000 -1000 l 5514 -1000 l 5514 5050 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 1575 2250 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1575 3375 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 675 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2475 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3600 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2880 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 4320 2475 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3600 1485 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+n 675 3150 m 1395 2385 l gs col0 s gr 
+% Polyline
+n 1575 2475 m 1575 3150 l gs col0 s gr 
+% Polyline
+n 1755 2385 m 2475 3150 l gs col0 s gr 
+% Polyline
+ [60] 0 sd
+gs  clippath
+3288 1467 m 3412 1462 l 3305 1524 l 3435 1487 l 3418 1429 l cp
+clip
+n 1537 2010 m 3412 1462 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 3288 1467 m 3412 1462 l 3305 1524 l  col0 s
+% Polyline
+ [60] 0 sd
+gs  clippath
+1796 2042 m 1672 2047 l 1779 1984 l 1649 2022 l 1666 2080 l cp
+clip
+n 3412 1537 m 1672 2047 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 1796 2042 m 1672 2047 l 1779 1984 l  col0 s
+% Polyline
+ [60] 0 sd
+gs  clippath
+2584 2524 m 2707 2512 l 2604 2581 l 2731 2535 l 2711 2479 l cp
+933 3183 m 810 3195 l 913 3126 l 786 3172 l 806 3228 l cp
+clip
+n 810 3195 m 2707 2512 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 933 3183 m 810 3195 l 913 3126 l  col0 s
+% arrowhead
+n 2584 2524 m 2707 2512 l 2604 2581 l  col0 s
+% Polyline
+ [60] 0 sd
+gs  clippath
+3319 2594 m 3442 2580 l 3340 2650 l 3467 2603 l 3446 2547 l cp
+1863 3203 m 1740 3217 l 1842 3147 l 1715 3194 l 1736 3250 l cp
+clip
+n 1740 3217 m 3442 2580 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 1863 3203 m 1740 3217 l 1842 3147 l  col0 s
+% arrowhead
+n 3319 2594 m 3442 2580 l 3340 2650 l  col0 s
+% Polyline
+ [60] 0 sd
+gs  clippath
+4054 2626 m 4177 2610 l 4076 2682 l 4202 2632 l 4180 2577 l cp
+2763 3194 m 2640 3210 l 2741 3138 l 2615 3188 l 2637 3243 l cp
+clip
+n 2640 3210 m 4177 2610 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 2763 3194 m 2640 3210 l 2741 3138 l  col0 s
+% arrowhead
+n 4054 2626 m 4177 2610 l 4076 2682 l  col0 s
+/Courier-Bold ff 180.00 scf sf
+3555 1530 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+1530 2295 m
+gs 1 -1 sc (n) col0 sh gr
+/Courier ff 180.00 scf sf
+1658 1950 m
+gs 1 -1 sc  17.0 rot (n # extension) col0 sh gr
+/Courier ff 180.00 scf sf
+2475 1950 m
+gs 1 -1 sc  17.0 rot (x # node) col0 sh gr
+/Helvetica ff 180.00 scf sf
+1020 4050 m
+gs 1 -1 sc (The node tree) col0 sh gr
+/Helvetica ff 180.00 scf sf
+3225 3285 m
+gs 1 -1 sc (The extensions) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1928 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1928 a 357 x Fv(The)f(picture)f(sho)n(ws)i
+(ho)n(w)e(the)i(nodes)e(and)h(e)o(xtensions)f(are)h(link)o(ed)f
+(together)-5 b(.)19 b(Ev)o(ery)g(node)g(has)i(a)f(reference)f(to)h(its)
+396 2393 y(e)o(xtension,)f(and)g(e)n(v)o(ery)g(e)o(xtension)g(has)h(a)h
+(reference)d(to)j(its)g(node.)e(The)h(methods)f Fq(extension)g
+Fv(and)h Fq(node)g Fv(follo)n(w)396 2501 y(these)h(references;)e(a)h
+(typical)g(phrase)f(is)396 2681 y Fq(self)44 b(#)h(node)f(#)h
+(attribute)e("xy")396 2872 y Fv(to)21 b(get)f(the)g(v)n(alue)g(of)f(an)
+i(attrib)n(ute)e(from)h(a)g(method)f(de\002ned)g(in)h(the)h(e)o
+(xtension)d(object;)i(or)396 3053 y Fq(self)44 b(#)h(node)f(#)h(iter)
+486 3150 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)f(extension)g(#)g(my_method)
+g(...\))396 3341 y Fv(to)21 b(iterate)f(o)o(v)o(er)f(the)h(subnodes)f
+(and)g(to)i(call)f Fq(my_method)f Fv(of)h(the)h(corresponding)16
+b(e)o(xtension)j(objects.)396 3490 y(Note)h(that)h(e)o(xtension)d
+(objects)i(do)g(not)g(ha)n(v)o(e)g(references)e(to)j(subnodes)e(\(or)g
+("sube)o(xtensions"\))f(themselv)o(es;)h(in)i(order)396
+3598 y(to)g(get)f(one)f(of)h(the)h(children)d(of)i(an)g(e)o(xtension)f
+(you)g(must)i(\002rst)g(go)e(to)i(the)f(node)f(object,)h(then)f(get)h
+(the)h(child)e(node,)396 3706 y(and)h(\002nally)g(reach)f(the)i(e)o
+(xtension)d(that)j(is)g(logically)e(the)h(child)g(of)g(the)g(e)o
+(xtension)f(you)g(started)h(with.)-2 4034 y Fp(3.3.1.)35
+b(Ho)n(w)f(to)f(de\002ne)h(an)g(e)n(xtension)i(c)n(lass)396
+4202 y Fv(At)21 b(minimum,)e(you)g(must)h(de\002ne)g(the)g(methods)f
+Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)f Fq(set_node)h
+Fv(such)f(that)i(your)e(class)i(is)396 4310 y(compatible)e(with)h(the)h
+(type)e Fq(extension)p Fv(.)g(The)h(method)f Fq(set_node)g
+Fv(is)i(called)f(during)f(the)h(initialization)g(of)g(the)396
+4418 y(node,)f(or)h(after)g(a)h(node)e(has)h(been)g(cloned;)f(the)h
+(node)f(object)h(in)m(v)n(ok)o(es)f Fq(set_node)g Fv(on)h(the)g(e)o
+(xtension)f(object)h(to)g(tell)396 4526 y(it)h(that)f(this)h(node)e(is)
+i(no)n(w)f(the)g(object)g(the)g(e)o(xtension)f(is)i(link)o(ed)f(to.)g
+(The)f(e)o(xtension)g(must)h(return)f(the)i(node)e(object)396
+4633 y(passed)h(as)h(ar)o(gument)d(of)i Fq(set_node)f
+Fv(when)h(the)g Fq(node)g Fv(method)f(is)i(called.)p
+Black 3798 5278 a Fr(66)p Black eop
+%%Page: 67 67
+67 66 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(The)g
+Fq(clone)g Fv(method)f(must)h(return)f(a)i(cop)o(y)e(of)h(the)g(e)o
+(xtension)f(object;)h(at)g(least)h(the)f(object)g(itself)h(must)f(be)
+396 687 y(duplicated,)f(b)n(ut)h(if)g(required,)e(the)j(cop)o(y)e
+(should)g(deeply)g(duplicate)g(all)i(objects)f(and)g(v)n(alues)g(that)g
+(are)g(referred)e(by)396 795 y(the)i(e)o(xtension,)f(too.)h(Whether)f
+(this)i(is)g(required,)d(depends)h(on)h(the)g(application;)f
+Fq(clone)h Fv(is)h(in)m(v)n(ok)o(ed)d(by)i(the)g(node)396
+903 y(object)g(when)g(one)f(of)h(its)h(cloning)e(methods)g(is)i
+(called.)396 1052 y(A)g(good)e(starting)h(point)f(for)h(an)g(e)o
+(xtension)e(class:)396 1232 y Fq(class)44 b(custom_extension)e(=)486
+1329 y(object)i(\(self\))576 1524 y(val)g(mutable)g(node)g(=)g(\(None)g
+(:)h(custom_extension)d(node)i(option\))576 1718 y(method)f(clone)h(=)h
+({<)g(>})576 1912 y(method)e(node)i(=)665 2009 y(match)f(node)g(with)
+845 2107 y(None)g(->)934 2204 y(assert)g(false)755 2301
+y(|)h(Some)f(n)g(->)h(n)576 2495 y(method)e(set_node)h(n)h(=)665
+2592 y(node)f(<-)h(Some)f(n)486 2786 y(end)396 2977 y
+Fv(This)21 b(class)g(is)g(compatible)e(with)h Fq(extension)p
+Fv(.)f(The)h(purpose)e(of)i(de\002ning)f(such)h(a)h(class)g(is,)g(of)f
+(course,)f(adding)396 3085 y(further)g(methods;)g(and)h(you)f(can)h(do)
+g(it)h(without)e(restriction.)396 3235 y(Often,)h(you)f(w)o(ant)h(not)g
+(only)g(one)f(e)o(xtension)g(class.)i(In)f(this)h(case,)f(it)h(is)g
+(the)f(simplest)h(w)o(ay)f(that)g(all)h(your)e(classes)i(\(for)396
+3343 y(one)f(kind)f(of)h(document\))e(ha)n(v)o(e)i(the)g(same)g(type)g
+(\(with)g(respect)g(to)g(the)g(interf)o(ace;)g(i.e.)g(it)h(does)f(not)g
+(matter)g(if)g(your)396 3451 y(classes)i(dif)n(fer)d(in)h(the)g
+(de\002ned)f(pri)n(v)n(ate)h(methods)f(and)g(instance)h(v)n(ariables,)f
+(b)n(ut)h(public)g(methods)f(count\).)f(This)396 3559
+y(approach)g(a)n(v)n(oids)i(lots)h(of)f(coercions)f(and)h(problems)e
+(with)j(type)f(incompatibilities.)e(It)j(is)g(simple)f(to)g(implement:)
+396 3739 y Fq(class)44 b(custom_extension)e(=)486 3836
+y(object)i(\(self\))576 3933 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h
+(custom_extension)d(node)i(option\))576 4127 y(method)f(clone)h(=)h
+(...)269 b(\(*)44 b(see)g(above)g(*\))576 4224 y(method)f(node)i(=)f
+(...)314 b(\(*)44 b(see)g(above)g(*\))576 4322 y(method)f(set_node)h(n)
+h(=)f(...)h(\(*)f(see)g(above)g(*\))576 4516 y(method)f(virtual)h
+(my_method1)f(:)i(...)576 4613 y(method)e(virtual)h(my_method2)f(:)i
+(...)576 4710 y(...)f(\(*)g(etc.)h(*\))486 4807 y(end)p
+Black 3797 5278 a Fr(67)p Black eop
+%%Page: 68 68
+68 67 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 676 a Fq(class)44
+b(custom_extension_kind_A)d(=)486 773 y(object)j(\(self\))576
+870 y(inherit)f(custom_extension)576 1065 y(method)g(my_method1)h(=)g
+(...)576 1162 y(method)f(my_method2)h(=)g(...)486 1259
+y(end)396 1453 y(class)g(custom_extension_kind_B)d(=)486
+1550 y(object)j(\(self\))576 1647 y(inherit)f(custom_extension)576
+1842 y(method)g(my_method1)h(=)g(...)576 1939 y(method)f(my_method2)h
+(=)g(...)486 2036 y(end)396 2227 y Fv(If)20 b(a)h(class)g(does)f(not)g
+(need)f(a)i(method)e(\(e.g.)g(because)h(it)h(does)e(not)h(mak)o(e)g
+(sense,)g(or)g(it)h(w)o(ould)f(violate)f(some)396 2335
+y(important)g(condition\),)f(it)j(is)g(possible)f(to)g(de\002ne)g(the)g
+(method)f(and)g(to)i(al)o(w)o(ays)f(raise)h(an)f(e)o(xception)e(when)i
+(the)396 2443 y(method)f(is)i(in)m(v)n(ok)o(ed)e(\(e.g.)g
+Fq(assert)44 b(false)p Fv(\).)396 2592 y(The)20 b(latter)g(is)i(a)e
+(strong)g(recommendation:)c(do)k(not)g(try)g(to)g(further)f(specialize)
+h(the)g(types)g(of)g(e)o(xtension)f(objects.)h(It)g(is)396
+2700 y(dif)n(\002cult,)g(sometimes)g(e)n(v)o(en)f(impossible,)g(and)h
+(almost)g(ne)n(v)o(er)f(w)o(orth-while.)-2 3070 y Fp(3.3.2.)35
+b(Ho)n(w)f(to)f(bind)h(e)n(xtension)h(c)n(lasses)h(to)d(element)i
+(types)396 3237 y Fv(Once)20 b(you)f(ha)n(v)o(e)h(de\002ned)f(your)g(e)
+o(xtension)g(classes,)i(you)e(can)h(bind)g(them)f(to)i(element)e
+(types.)h(The)g(simplest)h(case)f(is)396 3345 y(that)h(you)e(ha)n(v)o
+(e)g(only)h(one)f(class)j(and)d(that)i(this)f(class)h(is)h(to)e(be)g
+(al)o(w)o(ays)h(used.)e(The)h(parsing)f(functions)g(in)h(the)h(module)
+396 3453 y Fq(Pxp_yacc)f Fv(tak)o(e)g(a)h Fq(spec)f Fv(ar)o(gument)d
+(which)j(can)g(be)g(customized.)f(If)h(your)f(single)h(class)h(has)g
+(the)f(name)f Fq(c)p Fv(,)i(this)396 3561 y(ar)o(gument)d(should)h(be)
+396 3741 y Fq(let)45 b(spec)f(=)486 3839 y(make_spec_from_alist)576
+3936 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
+4033 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
+4130 y(~element_alist:)535 b([])576 4227 y(\(\))396 4418
+y Fv(This)21 b(means)f(that)g(data)g(nodes)f(will)i(be)f(created)g
+(from)f(the)h(e)o(x)o(emplar)e(passed)i(by)g(~data_e)o(x)o(emplar)d
+(and)j(that)g(all)396 4526 y(element)g(nodes)f(will)i(be)f(made)g(from)
+f(the)h(e)o(x)o(emplar)e(speci\002ed)i(by)g(~def)o(ault_element_e)o(x)o
+(emplar)-5 b(.)15 b(In)396 4634 y(~element_alist,)k(you)h(can)g(pass)g
+(that)h(dif)n(ferent)d(e)o(x)o(emplars)h(are)h(to)g(be)g(used)g(for)g
+(dif)n(ferent)e(element)i(types;)g(b)n(ut)g(this)396
+4742 y(is)h(an)g(optional)d(feature.)h(If)h(you)g(do)g(not)f(need)h
+(it,)h(pass)f(the)g(empty)g(list.)p Black 3800 5278 a
+Fr(68)p Black eop
+%%Page: 69 69
+69 68 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(Remember)f(that)i(an)f
+(e)o(x)o(emplar)e(is)j(a)g(\(node,)d(e)o(xtension\))h(pair)g(that)i
+(serv)o(es)f(as)h(pattern)e(when)h(ne)n(w)g(nodes)f(\(and)g(the)396
+687 y(corresponding)e(e)o(xtension)i(objects\))g(are)h(added)f(to)i
+(the)f(document)e(tree.)i(In)g(this)h(case,)f(the)g(e)o(x)o(emplar)f
+(contains)g Fq(c)i Fv(as)396 795 y(e)o(xtension,)e(and)g(when)h(nodes)f
+(are)i(created,)e(the)h(e)o(x)o(emplar)e(is)j(cloned,)e(and)h(cloning)f
+(mak)o(es)h(also)g(a)h(cop)o(y)e(of)h Fq(c)h Fv(such)396
+903 y(that)g(all)f(nodes)g(of)g(the)g(document)e(tree)i(will)h(ha)n(v)o
+(e)f(a)g(cop)o(y)g(of)g Fq(c)g Fv(as)h(e)o(xtension.)396
+1052 y(The)f Fq(~element_alist)f Fv(ar)o(gument)e(can)j(bind)g
+(speci\002c)g(element)g(types)g(to)g(speci\002c)g(e)o(x)o(emplars;)f
+(as)i(e)o(x)o(emplars)396 1160 y(may)f(be)g(instances)g(of)g(dif)n
+(ferent)f(classes)i(it)g(is)g(ef)n(fecti)n(v)o(ely)d(possible)i(to)h
+(bind)e(element)h(types)g(to)g(classes.)h(F)o(or)396
+1268 y(e)o(xample,)e(if)h(the)g(element)g(type)g("p")g(is)h
+(implemented)d(by)i(class)h("c_p",)e(and)h("q")g(is)h(realized)f(by)f
+("c_q",)h(you)f(can)396 1376 y(pass)i(the)f(follo)n(wing)f(v)n(alue:)
+396 1556 y Fq(let)45 b(spec)f(=)486 1653 y(make_spec_from_alist)576
+1750 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
+1847 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
+1945 y(~element_alist:)665 2042 y([)i("p",)f(new)g(element_impl)f(c_p;)
+755 2139 y("q",)h(new)g(element_impl)f(c_q;)665 2236
+y(])576 2333 y(\(\))396 2524 y Fv(The)20 b(e)o(xtension)f(object)h
+Fq(c)g Fv(is)h(still)h(used)e(for)f(all)i(data)f(nodes)f(and)h(for)g
+(all)g(other)g(element)f(types.)-2 3026 y Fx(3.4.)39
+b(Details)f(of)i(the)f(mapping)e(fr)m(om)i(XML)g(te)n(xt)g(to)g(the)g
+(tree)-2 3212 y(representation)-2 3540 y Fp(3.4.1.)c(The)f
+(representation)h(of)e(c)o(haracter)n(-free)h(elements)396
+3708 y Fv(If)20 b(an)g(element)g(declaration)f(does)h(not)f(allo)n(w)i
+(the)f(element)f(to)i(contain)e(character)g(data,)h(the)g(follo)n(wing)
+e(rules)j(apply)-5 b(.)396 3858 y(If)20 b(the)h(element)e(must)h(be)g
+(empty)-5 b(,)19 b(i.e.)h(it)h(is)g(declared)e(with)i(the)f(k)o(e)o(yw)
+o(ord)e Fq(EMPTY)p Fv(,)i(the)g(element)g(instance)g(must)g(be)396
+3965 y(ef)n(fecti)n(v)o(ely)f(empty)g(\(it)h(must)h(not)f(e)n(v)o(en)f
+(contain)g(whitespace)h(characters\).)e(The)i(parser)g(guarantees)e
+(that)j(a)f(declared)396 4073 y Fq(EMPTY)g Fv(element)g(does)g(ne)n(v)o
+(er)f(contain)g(a)h(data)g(node,)f(e)n(v)o(en)g(if)i(the)f(data)g(node)
+f(represents)h(the)g(empty)f(string.)396 4223 y(If)h(the)h(element)e
+(declaration)g(only)g(permits)h(other)f(elements)h(to)h(occur)e(within)
+h(that)g(element)g(b)n(ut)g(not)g(character)396 4331
+y(data,)g(it)h(is)g(still)g(possible)f(to)h(insert)f(whitespace)g
+(characters)f(between)g(the)h(subelements.)f(The)h(parser)g(ignores)f
+(these)396 4439 y(characters,)g(too,)h(and)g(does)f(not)h(create)g
+(data)g(nodes)g(for)f(them.)396 4588 y Fu(Example.)h
+Fv(Consider)g(the)g(follo)n(wing)f(element)g(types:)396
+4768 y Fq(<!ELEMENT)44 b(x)g(\()h(#PCDATA)f(|)g(z)h(\)*)f(>)396
+4865 y(<!ELEMENT)g(y)g(\()h(z)g(\)*)f(>)p Black 3800
+5278 a Fr(69)p Black eop
+%%Page: 70 70
+70 69 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(<!ELEMENT)44
+b(z)g(EMPTY>)396 770 y Fv(Only)20 b Fq(x)h Fv(may)e(contain)h
+(character)e(data,)i(the)h(k)o(e)o(yw)o(ord)d Fq(#PCDATA)h
+Fv(indicates)h(this.)h(The)f(other)f(types)h(are)396
+878 y(character)n(-free.)396 1027 y(The)g(XML)g(term)396
+1207 y Fq(<x><z/>)44 b(<z/></x>)396 1398 y Fv(will)21
+b(be)f(internally)f(represented)g(by)g(an)i(element)e(node)g(for)h
+Fq(x)g Fv(with)h(three)f(subnodes:)e(the)j(\002rst)g
+Fq(z)f Fv(element,)g(a)g(data)396 1506 y(node)f(containing)g(the)h
+(space)g(character)m(,)e(and)i(the)g(second)g Fq(z)g
+Fv(element.)g(In)f(contrast)h(to)g(this,)h(the)f(term)396
+1686 y Fq(<y><z/>)44 b(<z/></y>)396 1877 y Fv(is)21 b(represented)e(by)
+h(an)g(element)f(node)g(for)h Fq(y)h Fv(with)f(only)f
+Fr(two)i Fv(subnodes,)e(the)h(tw)o(o)g Fq(z)h Fv(elements.)e(There)h
+(is)h(no)f(data)396 1985 y(node)f(for)h(the)g(space)g(character)f
+(because)h(spaces)g(are)g(ignored)f(in)h(the)g(character)n(-free)e
+(element)i Fq(y)p Fv(.)-2 2355 y Fp(3.4.2.)35 b(The)f(representation)h
+(of)e(c)o(haracter)h(data)396 2523 y Fv(The)20 b(XML)g(speci\002cation)
+g(allo)n(ws)g(all)h(Unicode)e(characters)g(in)i(XML)f(te)o(xts.)g(This)
+g(parser)g(can)g(be)g(con\002gured)e(such)396 2631 y(that)j(UTF-8)e(is)
+i(used)f(to)h(represent)e(the)h(characters)f(internally;)g(ho)n(we)n(v)
+o(er)m(,)f(the)i(def)o(ault)g(character)e(encoding)h(is)396
+2738 y(ISO-8859-1.)e(\(Currently)-5 b(,)18 b(no)i(other)f(encodings)g
+(are)h(possible)g(for)f(the)i(internal)e(string)h(representation;)e
+(the)i(type)396 2846 y Fq(Pxp_types.rep_encoding)d Fv(enumerates)i(the)
+h(possible)g(encodings.)e(Principially)-5 b(,)19 b(the)h(parser)g
+(could)f(use)h(an)o(y)396 2954 y(encoding)e(that)j(is)g
+(ASCII-compatible,)d(b)n(ut)i(there)g(are)g(currently)e(only)i(le)o
+(xical)f(analyzers)h(for)f(UTF-8)h(and)396 3062 y(ISO-8859-1.)d(It)k
+(is)g(currently)d(impossible)i(to)g(use)h(UTF-16)e(or)h(UCS-4)g(as)h
+(internal)f(encodings)e(\(or)i(other)f(multibyte)396
+3170 y(encodings)g(which)g(are)h(not)g(ASCII-compatible\))e(unless)i
+(major)g(parts)g(of)g(the)g(parser)g(are)g(re)n(written)f(-)i(unlik)o
+(ely)-5 b(...\))396 3320 y(The)20 b(internal)g(encoding)e(may)h(be)h
+(dif)n(ferent)f(from)g(the)h(e)o(xternal)f(encoding)f(\(speci\002ed)i
+(in)g(the)g(XML)h(declaration)396 3428 y Fo(<)p Fq(?xml)44
+b(...)g(encoding="..."?)p Fo(>)p Fv(\);)18 b(in)j(this)f(case)h(the)f
+(strings)g(are)g(automatically)f(con)m(v)o(erted)f(to)i(the)g(internal)
+396 3535 y(encoding.)396 3685 y(If)g(the)h(internal)e(encoding)f(is)j
+(ISO-8859-1,)c(it)k(is)g(possible)f(that)g(there)g(are)g(characters)g
+(that)g(cannot)f(be)h(represented.)396 3793 y(In)g(this)h(case,)f(the)g
+(parser)g(ignores)f(such)h(characters)f(and)h(prints)g(a)h(w)o(arning)e
+(\(to)h(the)g Fq(collect_warning)e Fv(object)396 3901
+y(that)j(must)f(be)g(passed)g(when)g(the)g(parser)f(is)i(called\).)396
+4050 y(The)f(XML)g(speci\002cation)g(allo)n(ws)g(lines)h(to)f(be)g
+(separated)g(by)f(single)h(LF)h(characters,)e(by)h(CR)h(LF)g(character)
+396 4158 y(sequences,)e(or)h(by)g(single)g(CR)i(characters.)d
+(Internally)-5 b(,)18 b(these)i(separators)f(are)h(al)o(w)o(ays)h(con)m
+(v)o(erted)d(to)i(single)g(LF)396 4266 y(characters.)396
+4416 y(The)g(parser)g(guarantees)e(that)j(there)e(are)i(ne)n(v)o(er)d
+(tw)o(o)j(adjacent)e(data)h(nodes;)g(if)g(necessary)-5
+b(,)19 b(data)h(material)g(that)g(w)o(ould)396 4523 y(otherwise)g(be)g
+(represented)e(by)i(se)n(v)o(eral)g(nodes)f(is)i(collapsed)f(into)f
+(one)h(node.)f(Note)h(that)g(you)g(can)g(still)h(create)f(node)396
+4631 y(trees)h(with)f(adjacent)g(data)g(nodes;)f(ho)n(we)n(v)o(er)m(,)f
+(the)i(parser)g(does)f(not)h(return)f(such)h(trees.)p
+Black 3800 5278 a Fr(70)p Black eop
+%%Page: 71 71
+71 70 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(Note)g(that)h(CD)m(A)
+-9 b(T)h(A)20 b(sections)g(are)g(not)g(represented)f(specially;)h(such)
+g(sections)g(are)g(added)f(to)h(the)h(current)d(data)396
+687 y(material)i(that)g(being)g(collected)f(for)h(the)g(ne)o(xt)f(data)
+h(node.)-2 1056 y Fp(3.4.3.)35 b(The)f(representation)h(of)e(entities)h
+(within)g(documents)396 1224 y Fr(Entities)21 b(ar)m(e)f(not)g(r)m(epr)
+m(esented)f(within)i(documents!)d Fv(If)i(the)h(parser)e(\002nds)h(an)h
+(entity)e(reference)g(in)h(the)g(document)396 1332 y(content,)f(the)h
+(reference)f(is)i(immediately)e(e)o(xpanded,)e(and)j(the)g(parser)g
+(reads)g(the)g(e)o(xpansion)e(te)o(xt)i(instead)g(of)g(the)396
+1440 y(reference.)-2 1810 y Fp(3.4.4.)35 b(The)f(representation)h(of)e
+(attrib)n(utes)396 1977 y Fv(As)21 b(attrib)n(ute)f(v)n(alues)g(are)g
+(composed)e(of)i(Unicode)f(characters,)g(too,)h(the)g(same)h(problems)d
+(with)j(the)f(character)396 2085 y(encoding)e(arise)j(as)g(for)e
+(character)g(material.)h(Attrib)n(ute)g(v)n(alues)g(are)g(con)m(v)o
+(erted)d(to)k(the)f(internal)f(encoding,)f(too;)i(and)396
+2193 y(if)h(there)e(are)i(characters)e(that)h(cannot)f(be)h
+(represented,)e(these)j(are)f(dropped,)e(and)h(a)i(w)o(arning)e(is)i
+(printed.)396 2343 y(Attrib)n(ute)f(v)n(alues)g(are)g(normalized)e
+(before)h(the)o(y)h(are)g(returned)e(by)i(methods)f(lik)o(e)h
+Fq(attribute)p Fv(.)f(First,)i(an)o(y)396 2451 y(remaining)e(entity)h
+(references)e(are)i(e)o(xpanded;)e(if)j(necessary)-5
+b(,)19 b(e)o(xpansion)f(is)j(performed)c(recursi)n(v)o(ely)-5
+b(.)18 b(Second,)396 2558 y(ne)n(wline)i(characters)f(\(an)o(y)g(of)h
+(LF)-7 b(,)21 b(CR)g(LF)-7 b(,)21 b(or)f(CR)h(characters\))e(are)h(con)
+m(v)o(erted)e(to)i(single)g(space)h(characters.)e(Note)396
+2666 y(that)i(especially)e(the)i(latter)f(action)g(is)h(prescribed)d
+(by)i(the)g(XML)g(standard)f(\(b)n(ut)41 b(is)21 b(not)f(con)m(v)o
+(erted)e(such)i(that)g(it)h(is)396 2774 y(still)h(possible)e(to)g
+(include)f(line)h(feeds)g(into)g(attrib)n(utes\).)-2
+3144 y Fp(3.4.5.)35 b(The)f(representation)h(of)e(pr)n(ocessing)h
+(instructions)396 3312 y Fv(Processing)20 b(instructions)f(are)h
+(parsed)g(to)g(some)g(e)o(xtent:)f(The)h(\002rst)h(w)o(ord)f(of)g(the)g
+(PI)g(is)i(called)e(the)g(tar)o(get,)f(and)g(it)i(is)396
+3420 y(stored)f(separated)f(from)g(the)i(rest)f(of)g(the)g(PI:)396
+3600 y Fq(<?target)44 b(rest?>)396 3791 y Fv(The)20 b(e)o(xact)g
+(location)f(where)h(a)g(PI)h(occurs)e(is)i(not)f(represented)f(\(by)g
+(def)o(ault\).)g(The)h(parser)f(puts)i(the)f(PI)g(into)g(the)396
+3899 y(object)g(that)g(represents)g(the)g(embracing)e(construct)h(\(an)
+h(element,)f(a)i(DTD,)f(or)g(the)g(whole)g(document\);)e(that)i(means)
+396 4007 y(you)g(can)g(\002nd)f(out)h(which)g(PIs)h(occur)e(in)h(a)h
+(certain)f(element,)f(in)h(the)h(DTD,)f(or)g(in)g(the)g(whole)g
+(document,)e(b)n(ut)i(you)396 4114 y(cannot)f(lookup)g(the)h(e)o(xact)g
+(position)f(within)h(the)g(construct.)396 4264 y(If)g(you)g(require)e
+(the)j(e)o(xact)e(location)h(of)g(PIs,)g(it)h(is)g(possible)f(to)g
+(create)g(e)o(xtra)g(nodes)f(for)h(them.)f(This)i(mode)e(is)396
+4372 y(controled)g(by)g(the)i(option)e Fq(enable_pinstr_nodes)p
+Fv(.)e(The)j(additional)f(nodes)g(ha)n(v)o(e)h(the)g(node)f(type)h
+Fq(T_pinstr)396 4480 y Fn(target)p Fv(,)g(and)f(are)i(created)e(from)g
+(special)h(e)o(x)o(emplars)f(contained)f(in)j(the)f Fq(spec)g
+Fv(\(see)g(pxp_document.mli\).)p Black 3800 5278 a Fr(71)p
+Black eop
+%%Page: 72 72
+72 71 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black -2 583 a Fp(3.4.6.)35
+b(The)f(representation)h(of)e(comments)396 751 y Fv(Normally)-5
+b(,)19 b(comments)g(are)h(not)g(represented;)e(the)o(y)i(are)g(dropped)
+e(by)h(def)o(ault.)h(Ho)n(we)n(v)o(er)m(,)e(if)i(you)f(require)g(them,)
+h(it)h(is)396 859 y(possible)f(to)h(create)e Fq(T_comment)h
+Fv(nodes)f(for)h(them.)f(This)i(mode)e(can)h(be)g(speci\002ed)g(by)g
+(the)g(option)396 967 y Fq(enable_comment_nodes)p Fv(.)d(Comment)j
+(nodes)f(are)h(created)g(from)f(special)h(e)o(x)o(emplars)f(contained)f
+(in)j(the)f Fq(spec)396 1075 y Fv(\(see)h(pxp_document.mli\).)15
+b(Y)-9 b(ou)19 b(can)h(access)h(the)f(contents)g(of)g(comments)f
+(through)f(the)i(method)f Fq(comment)p Fv(.)-2 1444 y
+Fp(3.4.7.)35 b(The)f(attrib)n(utes)f Fc(xml:lang)d Fp(and)k
+Fc(xml:space)396 1612 y Fv(These)20 b(attrib)n(utes)g(are)g(not)g
+(supported)f(specially;)h(the)o(y)f(are)h(handled)f(lik)o(e)h(an)o(y)g
+(other)f(attrib)n(ute.)-2 1982 y Fp(3.4.8.)35 b(And)f(what)f(about)h
+(namespaces?)396 2149 y Fv(Currently)-5 b(,)19 b(there)g(is)i(no)f
+(special)h(support)d(for)i(namespaces.)f(Ho)n(we)n(v)o(er)m(,)f(the)i
+(parser)g(allo)n(ws)g(it)h(that)f(the)h(colon)e(occurs)396
+2257 y(in)i(names)e(such)h(that)h(it)g(is)g(possible)f(to)g(implement)f
+(namespaces)g(on)h(top)g(of)g(the)g(current)f(API.)396
+2407 y(Some)h(future)f(release)h(of)g(PXP)h(will)g(support)e
+(namespaces)g(as)i(b)n(uilt-in)f(feature...)p Black 3800
+5278 a Fr(72)p Black eop
+%%Page: 73 73
+73 72 bop Black Black -2 621 a Fs(Chapter)48 b(4.)f(Con\002guring)j
+(and)e(calling)f(the)h(par)m(ser)-2 1055 y Fx(4.1.)39
+b(Over)q(vie)n(w)396 1235 y Fv(There)20 b(are)g(the)g(follo)n(wing)f
+(main)g(functions)g(in)m(v)n(oking)f(the)i(parser)g(\(in)g(Pxp_yacc\):)
+p Black 396 1558 a Ft(\225)p Black 60 w Fr(par)o(se_document_entity:)d
+Fv(Y)-9 b(ou)19 b(w)o(ant)i(to)f(parse)g(a)g(complete)g(and)f(closed)h
+(document)e(consisting)i(of)g(a)g(DTD)h(and)479 1666
+y(the)f(document)f(body;)g(the)h(body)f(is)i(v)n(alidated)e(against)g
+(the)h(DTD.)h(This)f(mode)f(is)i(interesting)f(if)g(you)f(ha)n(v)o(e)h
+(a)h(\002le)479 1835 y Fq(<!DOCTYPE)44 b(root)g(...)g([)h(...)f(])h(>)f
+(<root>)g(...)h(</root>)396 1984 y Fv(and)20 b(you)f(can)h(accept)g(an)
+o(y)f(DTD)i(that)f(is)h(included)e(in)h(the)g(\002le)h(\(e.g.)f
+(because)f(the)h(\002le)h(is)g(under)e(your)g(control\).)p
+Black 396 2092 a Ft(\225)p Black 60 w Fr(par)o(se_wfdocument_entity:)e
+Fv(Y)-9 b(ou)20 b(w)o(ant)g(to)g(parse)g(a)h(complete)e(and)h(closed)f
+(document)g(consisting)g(of)h(a)h(DTD)479 2200 y(and)f(the)g(document)e
+(body;)h(b)n(ut)h(the)h(body)d(is)k(not)d(v)n(alidated,)g(only)h(check)
+o(ed)e(for)i(well-formedness.)e(This)i(mode)f(is)479
+2308 y(preferred)f(if)j(v)n(alidation)d(costs)j(too)f(much)f(time)i(or)
+f(if)g(the)g(DTD)h(is)g(missing.)p Black 396 2416 a Ft(\225)p
+Black 60 w Fr(par)o(se_dtd_entity:)d Fv(Y)-9 b(ou)20
+b(w)o(ant)g(only)f(to)i(parse)e(an)i(entity)e(\(\002le\))i(containing)d
+(the)i(e)o(xternal)f(subset)h(of)g(a)h(DTD.)479 2524
+y(Sometimes)f(it)h(is)g(interesting)e(to)i(read)e(such)h(a)h(DTD,)f
+(for)g(e)o(xample)e(to)j(compare)d(it)j(with)g(the)f(DTD)g(included)f
+(in)h(a)479 2632 y(document,)e(or)i(to)g(apply)g(the)g(ne)o(xt)f(mode:)
+p Black 396 2740 a Ft(\225)p Black 60 w Fr(par)o(se_content_entity:)e
+Fv(Y)-9 b(ou)20 b(w)o(ant)g(only)g(to)g(parse)g(an)g(entity)g
+(\(\002le\))g(containing)e(a)j(fragment)d(of)i(a)h(document)479
+2848 y(body;)e(this)i(fragment)d(is)j(v)n(alidated)f(against)f(the)h
+(DTD)h(you)e(pass)i(to)f(the)g(function.)e(Especially)-5
+b(,)19 b(the)i(fragment)479 2956 y(must)g(not)e(ha)n(v)o(e)h(a)65
+b Fo(<)p Fq(!DOCTYPE)p Fo(>)19 b Fv(clause,)h(and)g(must)g(directly)g
+(be)o(gin)f(with)h(an)g(element.)f(The)h(element)g(is)479
+3064 y(v)n(alidated)f(against)h(the)g(DTD.)g(This)h(mode)e(is)i
+(interesting)e(if)i(you)e(w)o(ant)h(to)h(check)e(documents)f(against)i
+(a)h(\002x)o(ed,)479 3172 y(immutable)e(DTD.)p Black
+396 3280 a Ft(\225)p Black 60 w Fr(par)o(se_wfcontent_entity:)f
+Fv(This)i(function)f(also)h(parses)g(a)h(single)f(element)g(without)f
+(DTD,)h(b)n(ut)g(does)g(not)g(v)n(alidate)479 3388 y(it.)p
+Black 396 3495 a Ft(\225)p Black 60 w Fr(e)n(xtr)o(act_dtd_fr)l
+(om_document_entity:)15 b Fv(This)20 b(function)f(e)o(xtracts)g(the)i
+(DTD)f(from)f(a)i(closed)f(document)479 3603 y(consisting)g(of)g(a)g
+(DTD)h(and)e(a)i(document)d(body)-5 b(.)18 b(Both)j(the)f(internal)f
+(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted.)396
+3794 y(In)f(man)o(y)f(cases,)i Fq(parse_document_entity)c
+Fv(is)k(the)f(preferred)e(mode)i(to)g(parse)g(a)g(document)f(in)h(a)h
+(v)n(alidating)396 3902 y(w)o(ay)-5 b(,)20 b(and)g Fq
+(parse_wfdocument_entity)c Fv(is)22 b(the)e(mode)f(of)h(choice)f(to)i
+(parse)f(a)g(\002le)h(while)f(only)g(checking)e(for)396
+4010 y(well-formedness.)396 4160 y(There)i(are)g(a)g(number)f(of)h(v)n
+(ariations)f(of)h(these)g(modes.)f(One)h(important)f(application)g(of)h
+(a)g(parser)g(is)h(to)f(check)396 4268 y(documents)f(of)h(an)g
+(untrusted)f(source)g(against)h(a)g(\002x)o(ed)g(DTD.)g(One)g(solution)
+f(is)i(to)g(not)f(allo)n(w)g(the)g Fo(<)p Fq(!DOCTYPE)p
+Fo(>)396 4375 y Fv(clause)g(in)h(these)f(documents,)e(and)i(treat)g
+(the)h(document)d(lik)o(e)i(a)h(fragment)d(\(using)i(mode)f
+Fr(par)o(se_content_entity)p Fv(\).)396 4483 y(This)i(is)g(v)o(ery)e
+(simple,)h(b)n(ut)g(in\003e)o(xible;)f(users)i(of)e(such)h(a)h(system)f
+(cannot)f(e)n(v)o(en)h(de\002ne)f(additional)g(entities)i(to)396
+4591 y(abbre)n(viate)e(frequent)f(phrases)i(of)g(their)g(te)o(xt.)396
+4741 y(It)h(may)e(be)i(necessary)e(to)h(ha)n(v)o(e)g(a)h(more)e
+(intelligent)g(check)o(er)-5 b(.)20 b(F)o(or)g(e)o(xample,)e(it)j(is)g
+(also)g(possible)e(to)i(parse)f(the)396 4849 y(document)e(to)j(check)e
+(fully)-5 b(,)19 b(i.e.)h(with)h(DTD,)f(and)f(to)i(compare)d(this)j
+(DTD)f(with)h(the)f(prescribed)f(one.)g(In)h(order)f(to)p
+Black 3800 5278 a Fr(73)p Black eop
+%%Page: 74 74
+74 73 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(fully)g(parse)g(the)g
+(document,)e(mode)h Fr(par)o(se_document_entity)e Fv(is)k(applied,)e
+(and)h(to)g(get)g(the)g(DTD)h(to)f(compare)f(with)396
+687 y(mode)g Fr(par)o(se_dtd_entity)f Fv(can)i(be)h(used.)396
+836 y(There)f(is)h(another)d(v)o(ery)i(important)e(con\002gurable)g
+(aspect)i(of)g(the)g(parser:)g(the)g(so-called)g(resolv)o(er)-5
+b(.)19 b(The)h(task)g(of)g(the)396 944 y(resolv)o(er)f(is)i(to)g
+(locate)f(the)g(contents)f(of)h(an)g(\(e)o(xternal\))f(entity)g(for)h
+(a)h(gi)n(v)o(en)e(entity)g(name,)h(and)f(to)i(mak)o(e)e(the)i
+(contents)396 1052 y(accessible)g(as)f(a)h(character)e(stream.)h
+(\(Furthermore,)d(it)k(also)f(normalizes)g(the)g(character)f(set;)i(b)n
+(ut)f(this)h(is)g(a)f(detail)h(we)396 1160 y(can)f(ignore)f(here.\))g
+(Consider)h(you)f(ha)n(v)o(e)h(a)g(\002le)h(called)f
+Fq("main.xml")f Fv(containing)396 1340 y Fq(<!ENTITY)44
+b(\045)g(sub)h(SYSTEM)f("sub/sub.xml">)396 1437 y(\045sub;)396
+1628 y Fv(and)20 b(a)h(\002le)f(stored)g(in)g(the)h(subdirectory)c
+Fq("sub")j Fv(with)h(name)e Fq("sub.xml")g Fv(containing)396
+1808 y Fq(<!ENTITY)44 b(\045)g(subsub)g(SYSTEM)g("subsub/subsub.xml">)
+396 1906 y(\045subsub;)396 2097 y Fv(and)20 b(a)g(\002le)h(stored)e(in)
+h(the)g(subdirectory)d Fq("subsub")j Fv(of)f Fq("sub")h
+Fv(with)g(name)f Fq("subsub.xml")g Fv(\(the)g(contents)h(of)f(this)396
+2204 y(\002le)i(do)f(not)g(matter\).)f(Here,)h(the)g(resolv)o(er)f
+(must)h(track)g(that)g(the)g(second)g(entity)g Fq(subsub)f
+Fv(is)i(located)f(in)g(the)h(directory)396 2312 y Fq("sub/subsub")p
+Fv(,)e(i.e.)h(the)g(dif)n(\002culty)f(is)i(to)g(interpret)e(the)h
+(system)g(\(\002le\))h(names)e(of)h(entities)h(relati)n(v)o(e)e(to)i
+(the)f(entities)396 2420 y(containing)f(them,)g(e)n(v)o(en)g(if)i(the)f
+(entities)h(are)f(deeply)f(nested.)396 2570 y(There)h(is)h(not)f(a)g
+(\002x)o(ed)g(resolv)o(er)f(already)g(doing)g(e)n(v)o(erything)e(right)
+j(-)g(resolving)f(entity)h(names)g(is)h(a)f(task)h(that)f(highly)396
+2678 y(depends)f(on)h(the)g(en)m(vironment.)d(The)j(XML)g
+(speci\002cation)f(only)h(demands)f(that)h Fq(SYSTEM)g
+Fv(entities)g(are)g(interpreted)396 2786 y(lik)o(e)h(URLs)g(\(which)e
+(is)i(not)f(v)o(ery)f(precise,)h(as)h(there)e(are)i(lots)f(of)g(URL)h
+(schemes)f(in)g(use\),)g(hoping)f(that)h(this)h(helps)396
+2894 y(o)o(v)o(ercoming)c(the)j(local)g(peculiarities)g(of)g(the)g(en)m
+(vironment;)d(the)k(idea)f(is)h(that)f(if)h(you)e(do)h(not)f(kno)n(w)h
+(your)396 3001 y(en)m(vironment)d(you)j(can)g(refer)f(to)h(other)g
+(entities)g(by)g(denoting)e(URLs)k(for)d(them.)h(I)g(think)g(that)g
+(this)h(interpretation)d(of)396 3109 y Fq(SYSTEM)i Fv(names)g(may)g(ha)
+n(v)o(e)f(some)h(applications)f(in)i(the)f(internet,)f(b)n(ut)h(it)h
+(is)g(not)f(the)g(\002rst)h(choice)f(in)g(general.)396
+3217 y(Because)h(of)f(this,)g(the)g(resolv)o(er)f(is)i(a)g(separate)f
+(module)e(of)i(the)h(parser)e(that)h(can)g(be)h(e)o(xchanged)c(by)j
+(another)f(one)g(if)396 3325 y(necessary;)h(more)f(precisely)-5
+b(,)19 b(the)h(parser)g(already)f(de\002nes)h(se)n(v)o(eral)f(resolv)o
+(ers.)396 3475 y(The)h(follo)n(wing)f(resolv)o(ers)g(do)h(already)f(e)o
+(xist:)p Black 396 3707 a Ft(\225)p Black 60 w Fv(Resolv)o(ers)h
+(reading)f(from)g(arbitrary)g(input)g(channels.)g(These)h(can)g(be)g
+(con\002gured)e(such)i(that)g(a)h(certain)f(ID)g(is)479
+3815 y(associated)g(with)h(the)f(channel;)f(in)h(this)h(case)g(inner)e
+(references)g(to)h(e)o(xternal)f(entities)i(can)f(be)g(resolv)o(ed.)e
+(There)i(is)479 3923 y(also)h(a)f(special)h(resolv)o(er)e(that)h
+(interprets)f(SYSTEM)i(IDs)f(as)h(URLs;)g(this)g(resolv)o(er)e(can)h
+(process)g(relati)n(v)o(e)479 4031 y(SYSTEM)h(names)e(and)h(determine)f
+(the)h(corresponding)d(absolute)i(URL.)p Black 396 4139
+a Ft(\225)p Black 60 w Fv(A)i(resolv)o(er)e(that)h(reads)g(al)o(w)o
+(ays)h(from)e(a)i(gi)n(v)o(en)d(O'Caml)j(string.)e(This)i(resolv)o(er)e
+(is)i(not)f(able)g(to)g(resolv)o(e)f(further)479 4247
+y(names)h(unless)g(the)h(string)f(is)h(not)f(associated)g(with)g(an)o
+(y)f(name,)h(i.e.)g(if)g(the)g(document)f(contained)f(in)j(the)f
+(string)479 4355 y(refers)g(to)g(an)g(e)o(xternal)f(entity)-5
+b(,)20 b(this)g(reference)f(cannot)g(be)h(follo)n(wed)f(in)h(this)h
+(case.)p Black 396 4463 a Ft(\225)p Black 60 w Fv(A)g(resolv)o(er)e
+(for)g(\002le)i(names.)f(The)g Fq(SYSTEM)g Fv(name)f(is)i(interpreted)e
+(as)i(\002le)f(URL)h(with)g(the)f(slash)h("/")f(as)h(separator)479
+4571 y(for)f(directories.)f(-)h(This)h(resolv)o(er)d(is)k(deri)n(v)o
+(ed)c(from)h(the)h(generic)f(URL)i(resolv)o(er)-5 b(.)396
+4720 y(The)20 b(interf)o(ace)f(a)i(resolv)o(er)e(must)h(ha)n(v)o(e)g
+(is)h(documented,)c(so)k(it)g(is)g(possible)f(to)g(write)g(your)f(o)n
+(wn)h(resolv)o(er)-5 b(.)19 b(F)o(or)396 4828 y(e)o(xample,)g(you)g
+(could)g(connect)g(the)h(parser)g(with)g(an)h(HTTP)f(client,)g(and)f
+(resolv)o(e)h(URLs)h(of)f(the)g(HTTP)g(namespace.)p Black
+3800 5278 a Fr(74)p Black eop
+%%Page: 75 75
+75 74 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(resolv)o(er)f
+(classes)i(support)e(that)h(se)n(v)o(eral)g(independent)e(resolv)o(ers)
+h(are)h(combined)e(to)i(one)g(more)f(po)n(werful)396
+687 y(resolv)o(er;)g(thus)h(it)h(is)g(possible)f(to)h(combine)d(a)j
+(self-written)e(resolv)o(er)g(with)i(the)f(already)f(e)o(xisting)g
+(resolv)o(ers.)396 836 y(Note)h(that)h(the)f(e)o(xisting)f(resolv)o
+(ers)h(only)f(interpret)g Fq(SYSTEM)h Fv(names,)f(not)h
+Fq(PUBLIC)g Fv(names.)g(If)g(it)h(helps)f(you,)f(it)h(is)396
+944 y(possible)g(to)f(de\002ne)h(resolv)o(ers)e(for)h
+Fq(PUBLIC)h Fv(names,)f(too;)g(for)g(e)o(xample,)f(such)i(a)g(resolv)o
+(er)e(could)h(look)g(up)g(the)h(public)396 1052 y(name)g(in)g(a)h(hash)
+f(table,)g(and)f(map)h(it)h(to)f(a)h(system)f(name)g(which)g(is)h
+(passed)f(o)o(v)o(er)f(to)h(the)g(e)o(xisting)g(resolv)o(er)e(for)396
+1160 y(system)j(names.)e(It)i(is)g(relati)n(v)o(ely)e(simple)h(to)g
+(pro)o(vide)f(such)g(a)i(resolv)o(er)-5 b(.)-2 1579 y
+Fx(4.2.)39 b(Resolver)n(s)e(and)i(sour)m(ces)-2 1907
+y Fp(4.2.1.)c(Using)f(the)g(b)n(uilt-in)f(resolver)n(s)i(\(called)g
+(sour)n(ces\))396 2075 y Fv(The)20 b(type)g Fq(source)g
+Fv(enumerates)e(the)j(tw)o(o)f(possibilities)h(where)e(the)h(document)f
+(to)h(parse)g(comes)g(from.)396 2255 y Fq(type)44 b(source)g(=)576
+2352 y(Entity)f(of)i(\(\(dtd)f(-)p Fo(>)g Fq(Pxp_entity.entity\))e(*)j
+(Pxp_reader.resolver\))486 2449 y(|)g(ExtID)f(of)g(\(ext_id)g(*)g
+(Pxp_reader.resolver\))396 2640 y Fv(Y)-9 b(ou)20 b(normally)e(need)i
+(not)g(to)g(w)o(orry)f(about)h(this)g(type)g(as)h(there)f(are)g(con)m
+(v)o(enience)d(functions)i(that)h(create)g Fq(source)396
+2748 y Fv(v)n(alues:)p Black 396 3105 a Ft(\225)p Black
+60 w Fq(from_file)44 b(s)p Fv(:)20 b(The)g(document)e(is)j(read)f(from)
+f(\002le)i Fq(s)p Fv(;)g(you)e(may)h(specify)f(absolute)h(or)g(relati)n
+(v)o(e)f(path)h(names.)479 3213 y(The)g(\002le)h(name)f(must)g(be)g
+(encoded)e(as)j(UTF-8)f(string.)479 3362 y(There)g(is)h(an)f(optional)f
+(ar)o(gument)f Fq(~system_encoding)g Fv(specifying)g(the)j(character)d
+(encoding)h(which)g(is)i(used)479 3470 y(for)f(the)g(names)g(of)g(the)g
+(\002le)h(system.)f(F)o(or)g(e)o(xample,)e(if)j(this)g(encoding)d(is)j
+(ISO-8859-1)c(and)j Fq(s)g Fv(is)i(also)e(a)479 3578
+y(ISO-8859-1)e(string,)h(you)h(can)g(form)f(the)h(source:)479
+3717 y Fq(let)45 b(s_utf8)88 b(=)i(recode_string)42 b
+(~in_enc:`Enc_iso88591)g(~out_enc:`Enc_utf8)g(s)i(in)479
+3814 y(from_file)g(~system_encoding:`Enc_iso88591)39
+b(s_utf8)479 4005 y Fv(This)21 b Fq(source)e Fv(has)i(the)f(adv)n
+(antage)e(that)j(it)f(is)i(able)e(to)g(resolv)o(e)f(inner)h(e)o
+(xternal)f(entities;)h(i.e.)g(if)h(your)e(document)479
+4113 y(includes)g(data)g(from)g(another)f(\002le)i(\(using)f(the)g
+Fq(SYSTEM)g Fv(attrib)n(ute\),)g(this)g(mode)g(will)h(\002nd)f(that)h
+(\002le.)g(Ho)n(we)n(v)o(er)m(,)d(this)479 4221 y(mode)j(cannot)f
+(resolv)o(e)g Fq(PUBLIC)h Fv(identi\002ers)f(nor)h Fq(SYSTEM)g
+Fv(identi\002ers)f(other)h(than)g("\002le:".)p Black
+396 4370 a Ft(\225)p Black 60 w Fq(from_channel)43 b(ch)p
+Fv(:)21 b(The)e(document)g(is)i(read)e(from)h(the)g(channel)f
+Fq(ch)p Fv(.)h(In)g(general,)f(this)h(source)g(also)g(supports)479
+4478 y(\002le)h(URLs)g(found)e(in)h(the)g(document;)f(ho)n(we)n(v)o(er)
+m(,)e(by)j(def)o(ault)f(only)h(absolute)f(URLs)i(are)f(understood.)e
+(It)i(is)479 4586 y(possible)g(to)h(associate)f(an)g(ID)g(with)h(the)f
+(channel)f(such)h(that)g(the)g(resolv)o(er)f(kno)n(ws)h(ho)n(w)f(to)i
+(interpret)e(relati)n(v)o(e)479 4694 y(URLs:)479 4832
+y Fq(from_channel)43 b(~id:\(System)g("file:///dir/dir1/"\))f(ch)p
+Black 3800 5278 a Fr(75)p Black eop
+%%Page: 76 76
+76 75 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(There)g(is)h(also)f
+(the)g(~system_encoding)e(ar)o(gument)f(specifying)i(ho)n(w)h(\002le)h
+(names)e(are)i(encoded.)d(-)i(The)g(e)o(xample)479 687
+y(from)f(abo)o(v)o(e)g(can)h(also)g(be)h(written)f(\(b)n(ut)f(it)i(is)g
+(no)f(longer)f(possible)h(to)g(interpret)f(relati)n(v)o(e)h(URLs)h
+(because)e(there)h(is)479 795 y(no)g(~id)g(ar)o(gument,)e(and)i
+(computing)d(this)k(ar)o(gument)d(is)j(relati)n(v)o(ely)e(complicated)g
+(because)g(it)i(must)f(be)h(a)f(v)n(alid)479 903 y(URL\):)479
+1041 y Fq(let)45 b(ch)f(=)h(open_in)e(s)i(in)479 1138
+y(let)g(src)f(=)h(from_channel)d(~system_encoding:`Enc_iso88591)e(ch)45
+b(in)479 1236 y(...;)479 1333 y(close_in)f(ch)p Black
+396 1482 a Ft(\225)p Black 60 w Fq(from_string)f(s)p
+Fv(:)21 b(The)f(string)g Fq(s)g Fv(is)h(the)g(document)d(to)i(parse.)g
+(This)g(mode)f(is)j(not)d(able)h(to)h(interpret)e(\002le)i(names)479
+1590 y(of)f Fq(SYSTEM)g Fv(clauses,)g(nor)g(it)h(can)f(look)f(up)h
+Fq(PUBLIC)f Fv(identi\002ers.)479 1740 y(Normally)-5
+b(,)19 b(the)h(encoding)e(of)i(the)g(string)g(is)h(detected)e(as)i
+(usual)f(by)g(analyzing)f(the)h(XML)g(declaration,)e(if)j(an)o(y)-5
+b(.)479 1847 y(Ho)n(we)n(v)o(er)m(,)18 b(it)j(is)g(also)g(possible)f
+(to)g(specify)g(the)g(encoding)e(directly:)479 1986 y
+Fq(let)45 b(src)f(=)h(from_string)e(~fixenc:`ISO-8859-2)e(s)p
+Black 396 2177 a Ft(\225)p Black 60 w Fq(ExtID)j(\(id,)g(r\))p
+Fv(:)21 b(The)f(document)e(to)i(parse)g(is)h(denoted)e(by)h(the)g
+(identi\002er)g Fq(id)g Fv(\(either)f(a)i Fq(SYSTEM)f
+Fv(or)g Fq(PUBLIC)479 2285 y Fv(clause\),)g(and)g(this)g(identi\002er)g
+(is)h(interpreted)d(by)i(the)g(resolv)o(er)f Fq(r)p Fv(.)i(Use)f(this)h
+(mode)e(if)i(you)e(ha)n(v)o(e)h(written)g(your)f(o)n(wn)479
+2393 y(resolv)o(er)-5 b(.)479 2542 y(Which)20 b(character)f(sets)j(are)
+e(possible)g(depends)e(on)i(the)g(passed)h(resolv)o(er)d
+Fq(r)p Fv(.)p Black 396 2692 a Ft(\225)p Black 60 w Fq(Entity)44
+b(\(get_entity,)f(r\))p Fv(:)20 b(The)g(document)e(to)j(parse)f(is)h
+(returned)d(by)i(the)g(function)f(in)m(v)n(ocation)479
+2800 y Fq(get_entity)43 b(dtd)p Fv(,)20 b(where)g Fq(dtd)g
+Fv(is)h(the)g(DTD)f(object)g(to)g(use)g(\(it)h(may)f(be)g(empty\).)f
+(Inner)f(e)o(xternal)h(references)479 2908 y(occuring)g(in)h(this)h
+(entity)e(are)i(resolv)o(ed)d(using)i(the)g(resolv)o(er)f
+Fq(r)p Fv(.)479 3057 y(Which)h(character)f(sets)j(are)e(possible)g
+(depends)e(on)i(the)g(passed)h(resolv)o(er)d Fq(r)p Fv(.)-2
+3510 y Fp(4.2.2.)35 b(The)f(resolver)g(API)396 3677 y
+Fv(A)21 b(resolv)o(er)e(is)i(an)f(object)g(that)g(can)g(be)g(opened)e
+(lik)o(e)j(a)f(\002le,)h(b)n(ut)f(you)f(do)h(not)g(pass)g(the)h(\002le)
+f(name)g(to)g(the)g(resolv)o(er)m(,)f(b)n(ut)396 3785
+y(the)h(XML)h(identi\002er)e(of)h(the)g(entity)g(to)h(read)e(from)g
+(\(either)h(a)g Fq(SYSTEM)g Fv(or)g Fq(PUBLIC)g Fv(clause\).)f(When)h
+(opened,)f(the)396 3893 y(resolv)o(er)g(must)h(return)f(the)i
+Fq(Lexing.lexbuf)d Fv(that)i(reads)g(the)h(characters.)e(The)g(resolv)o
+(er)g(can)h(be)h(closed,)e(and)h(it)396 4001 y(can)g(be)g(cloned.)f
+(Furthermore,)f(it)j(is)g(possible)f(to)g(tell)h(the)f(resolv)o(er)f
+(which)h(character)f(set)i(it)g(should)e(assume.)h(-)g(The)396
+4109 y(follo)n(wing)f(from)g(Pxp_reader:)396 4289 y Fq(exception)44
+b(Not_competent)396 4386 y(exception)g(Not_resolvable)e(of)j(exn)396
+4581 y(class)f(type)g(resolver)g(=)486 4678 y(object)576
+4775 y(method)f(init_rep_encoding)f(:)j(rep_encoding)e(->)h(unit)576
+4872 y(method)f(init_warner)g(:)i(collect_warnings)d(->)j(unit)p
+Black 3798 5278 a Fr(76)p Black eop
+%%Page: 77 77
+77 76 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(rep_encoding)g(:)i(rep_encoding)576 676 y(method)e(open_in)h(:)h
+(ext_id)f(->)g(Lexing.lexbuf)576 773 y(method)f(close_in)h(:)h(unit)576
+870 y(method)e(change_encoding)g(:)h(string)g(->)h(unit)576
+967 y(method)e(clone)h(:)h(resolver)576 1065 y(method)e(close_all)h(:)g
+(unit)486 1162 y(end)396 1353 y Fv(The)20 b(resolv)o(er)f(object)h
+(must)g(w)o(ork)f(as)i(follo)n(ws:)p Black 396 1627 a
+Ft(\225)p Black 60 w Fv(When)f(the)h(parser)e(is)i(called,)f(it)h
+(tells)g(the)f(resolv)o(er)f(the)h(w)o(arner)g(object)f(and)h(the)g
+(internal)g(encoding)e(by)i(in)m(v)n(oking)479 1735 y
+Fq(init_warner)f Fv(and)h Fq(init_rep_encoding)p Fv(.)d(The)j(resolv)o
+(er)f(should)g(store)i(these)f(v)n(alues.)f(The)h(method)479
+1843 y Fq(rep_encoding)f Fv(should)g(return)g(the)h(internal)g
+(encoding.)p Black 396 1950 a Ft(\225)p Black 60 w Fv(If)g(the)h
+(parser)e(w)o(ants)i(to)f(read)g(from)f(the)h(resolv)o(er)m(,)e(it)j
+(in)m(v)n(ok)o(es)f(the)g(method)f Fq(open_in)p Fv(.)g(Either)h(the)g
+(resolv)o(er)479 2058 y(succeeds,)g(in)g(which)g(case)g(the)h
+Fq(Lexing.lexbuf)d Fv(reading)h(from)g(the)h(\002le)h(or)f(stream)g
+(must)g(be)h(returned,)d(or)479 2166 y(opening)h(f)o(ails.)h(In)g(the)g
+(latter)h(case)f(the)h(method)d(implementation)g(should)h(raise)i(an)f
+(e)o(xception)e(\(see)j(belo)n(w\).)p Black 396 2274
+a Ft(\225)p Black 60 w Fv(If)f(the)h(parser)e(\002nishes)i(reading,)d
+(it)j(calls)g(the)f Fq(close_in)g Fv(method.)p Black
+396 2382 a Ft(\225)p Black 60 w Fv(If)g(the)h(parser)e(\002nds)h(a)h
+(reference)d(to)j(another)e(e)o(xternal)f(entity)i(in)h(the)f(input)f
+(stream,)h(it)h(calls)g Fq(clone)f Fv(to)g(get)h(a)479
+2490 y(second)f(resolv)o(er)f(which)g(must)h(be)h(initially)f(closed)g
+(\(not)f(yet)h(connected)f(with)h(an)g(input)f(stream\).)h(The)g
+(parser)479 2598 y(then)g(in)m(v)n(ok)o(es)f Fq(open_in)h
+Fv(and)f(the)i(other)e(methods)g(as)i(described.)p Black
+396 2706 a Ft(\225)p Black 60 w Fv(If)f(you)g(already)f(kno)n(w)g(the)h
+(character)f(set)i(of)f(the)g(input)g(stream,)f(you)h(should)f(recode)g
+(it)i(to)f(the)g(internal)479 2814 y(encoding,)e(and)i(de\002ne)f(the)i
+(method)d Fq(change_encoding)h Fv(as)i(an)f(empty)f(method.)p
+Black 396 2922 a Ft(\225)p Black 60 w Fv(If)h(you)g(w)o(ant)g(to)g
+(support)f(multiple)h(e)o(xternal)f(character)g(sets,)i(the)f(object)f
+(must)i(follo)n(w)e(a)i(much)e(more)479 3030 y(complicated)g(protocol.)
+f(Directly)i(after)g Fq(open_in)f Fv(has)i(been)e(called,)h(the)g
+(resolv)o(er)f(must)h(return)f(a)i(le)o(xical)f(b)n(uf)n(fer)479
+3138 y(that)h(only)e(reads)h(one)g(byte)f(at)i(a)g(time.)f(This)g(is)h
+(only)f(possible)f(if)i(you)e(create)h(the)g(le)o(xical)g(b)n(uf)n(fer)
+f(with)479 3246 y Fq(Lexing.from_function)p Fv(;)e(the)j(function)d
+(must)j(then)f(al)o(w)o(ays)h(return)e(1)i(if)f(the)h(EOF)g(is)g(not)f
+(yet)h(reached,)e(and)h(0)479 3354 y(if)i(EOF)f(is)h(reached.)e(If)h
+(the)g(parser)g(has)g(read)g(the)g(\002rst)h(line)f(of)g(the)h
+(document,)c(it)k(will)g(in)m(v)n(ok)o(e)479 3461 y Fq(change_encoding)
+e Fv(to)h(tell)h(the)f(resolv)o(er)f(which)h(character)e(set)j(to)g
+(assume.)f(From)f(this)i(moment,)e(the)h(object)479 3569
+y(can)g(return)f(more)h(than)f(one)h(byte)g(at)g(once.)g(The)g(ar)o
+(gument)d(of)j Fq(change_encoding)f Fv(is)i(either)e(the)i(parameter)d
+(of)479 3677 y(the)i("encoding")e(attrib)n(ute)i(of)g(the)g(XML)h
+(declaration,)d(or)i(the)g(empty)f(string)h(if)h(there)e(is)j(not)d(an)
+o(y)h(XML)479 3785 y(declaration)f(or)h(if)g(the)h(declaration)d(does)i
+(not)g(contain)f(an)h(encoding)e(attrib)n(ute.)479 3935
+y(At)j(the)f(be)o(ginning)e(the)i(resolv)o(er)f(must)h(only)g(return)f
+(one)g(character)g(e)n(v)o(ery)g(time)h(something)f(is)i(read)f(from)f
+(the)479 4043 y(le)o(xical)h(b)n(uf)n(fer)-5 b(.)19 b(The)h(reason)f
+(for)h(this)h(is)g(that)f(you)f(otherwise)h(w)o(ould)f(not)h(e)o
+(xactly)g(kno)n(w)f(at)h(which)g(position)f(in)479 4151
+y(the)h(input)g(stream)g(the)g(character)f(set)i(changes.)479
+4300 y(If)f(you)g(w)o(ant)g(automatic)f(recognition)f(of)i(the)g
+(character)f(set,)i(it)g(is)g(up)f(to)g(the)g(resolv)o(er)f(object)h
+(to)g(implement)f(this.)p Black 396 4449 a Ft(\225)p
+Black 60 w Fv(If)h(an)g(error)g(occurs,)f(the)h(parser)g(calls)g(the)h
+(method)d Fq(close_all)i Fv(for)f(the)h(top-le)n(v)o(el)f(resolv)o(er;)
+g(this)i(method)479 4557 y(should)e(close)i(itself)g(\(if)f(not)g
+(already)f(done\))f(and)i(all)h(clones.)396 4748 y Fu(Exceptions.)f
+Fv(It)h(is)g(possible)f(to)g(chain)g(resolv)o(ers)f(such)h(that)g(when)
+g(the)g(\002rst)h(resolv)o(er)e(is)i(not)f(able)g(to)g(open)f(the)396
+4856 y(entity)-5 b(,)20 b(the)g(other)f(resolv)o(ers)g(of)h(the)g
+(chain)g(are)g(tried)g(in)g(turn.)g(The)g(method)e Fq(open_in)i
+Fv(should)f(raise)i(the)f(e)o(xception)p Black 3797 5278
+a Fr(77)p Black eop
+%%Page: 78 78
+78 77 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(Not_competent)f
+Fv(to)h(indicate)g(that)g(the)g(ne)o(xt)g(resolv)o(er)f(should)g(try)h
+(to)g(open)f(the)i(entity)-5 b(.)19 b(If)h(the)g(resolv)o(er)f(is)i
+(able)f(to)396 687 y(handle)f(the)i(ID,)f(b)n(ut)g(some)g(other)f
+(error)g(occurs,)g(the)i(e)o(xception)d Fq(Not_resolvable)g
+Fv(should)i(be)g(raised)g(to)g(force)396 795 y(that)h(the)f(chain)f
+(breaks.)396 944 y(Example:)g(Ho)n(w)h(to)h(de\002ne)e(a)i(resolv)o(er)
+e(that)h(is)h(equi)n(v)n(alent)e(to)h(from_string:)e(...)-2
+1314 y Fp(4.2.3.)35 b(Prede\002ned)f(resolver)h(components)396
+1482 y Fv(There)20 b(are)g(some)g(classes)h(in)f(Pxp_reader)e(that)j
+(de\002ne)e(common)g(resolv)o(er)f(beha)n(viour)-5 b(.)396
+1662 y Fq(class)44 b(resolve_read_this_channel)d(:)576
+1759 y(?id:ext_id)i(->)576 1856 y(?fixenc:encoding)f(->)576
+1953 y(?auto_close:bool)g(->)576 2050 y(in_channel)h(->)755
+2147 y(resolver)396 2338 y Fv(Reads)21 b(from)e(the)h(passed)g(channel)
+f(\(it)i(may)f(be)g(e)n(v)o(en)f(a)i(pipe\).)e(If)h(the)g
+Fq(~id)g Fv(ar)o(gument)e(is)j(passed)f(to)h(the)f(object,)f(the)396
+2446 y(created)h(resolv)o(er)f(accepts)h(only)f(this)i(ID.)f(Otherwise)
+g(all)h(IDs)f(are)g(accepted.)f(-)i(Once)f(the)g(resolv)o(er)f(has)h
+(been)396 2554 y(cloned,)f(it)h(does)g(not)f(accept)h(an)o(y)f(ID.)g
+(This)h(means)g(that)g(this)g(resolv)o(er)e(cannot)h(handle)g(inner)g
+(references)f(to)i(e)o(xternal)396 2662 y(entities.)h(Note)f(that)g
+(you)f(can)h(combine)f(this)i(resolv)o(er)e(with)h(another)f(resolv)o
+(er)g(that)h(can)g(handle)f(inner)g(references)396 2770
+y(\(such)h(as)h(resolv)o(e_as_\002le\);)d(see)j(class)g('combine')d
+(belo)n(w)-5 b(.)19 b(-)h(If)g(you)g(pass)g(the)h Fq(~fixenc)e
+Fv(ar)o(gument,)f(the)i(encoding)396 2878 y(of)g(the)g(channel)f(is)i
+(set)g(to)g(the)f(passed)g(v)n(alue,)f(re)o(gardless)g(of)h(an)o(y)f
+(auto-recognition)e(or)j(an)o(y)f(XML)h(declaration.)f(-)h(If)396
+2986 y Fq(~auto_close)43 b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o
+(ault\),)e(the)h(channel)f(is)i(closed)f(after)g(use.)g(If)g
+Fq(~auto_close)43 b(=)396 3094 y(false)p Fv(,)20 b(the)g(channel)f(is)i
+(left)g(open.)396 3315 y Fq(class)44 b(resolve_read_any_channel)d(:)576
+3413 y(?auto_close:bool)h(->)576 3510 y(channel_of_id:\(ext_id)f(->)j
+(\(in_channel)f(*)i(encoding)f(option\)\))f(->)755 3607
+y(resolver)396 3798 y Fv(This)21 b(resolv)o(er)e(calls)h(the)h
+(function)d Fq(~channel_of_id)h Fv(to)h(open)f(a)i(ne)n(w)f(channel)f
+(for)g(the)h(passed)g Fq(ext_id)p Fv(.)g(This)396 3906
+y(function)f(must)h(either)g(return)f(the)h(channel)f(and)h(the)g
+(encoding,)e(or)i(it)g(must)h(f)o(ail)f(with)h(Not_competent.)c(The)396
+4014 y(function)i(must)h(return)f Fq(None)h Fv(as)h(encoding)d(if)j
+(the)f(def)o(ault)f(mechanism)g(to)h(recognize)f(the)h(encoding)e
+(should)h(be)396 4122 y(used.)g(It)i(must)e(return)g
+Fq(Some)44 b(e)20 b Fv(if)g(it)h(is)f(already)f(kno)n(wn)f(that)i(the)g
+(encoding)d(of)j(the)f(channel)g(is)i Fq(e)p Fv(.)e(If)h
+Fq(~auto_close)396 4230 y(=)45 b(true)19 b Fv(\(which)g(is)h(the)f(def)
+o(ault\),)f(the)i(channel)e(is)i(closed)f(after)g(use.)h(If)f
+Fq(~auto_close)43 b(=)h(false)p Fv(,)19 b(the)h(channel)e(is)396
+4337 y(left)j(open.)396 4559 y Fq(class)44 b(resolve_read_url_channel)d
+(:)576 4656 y(?base_url:Neturl.url)g(->)576 4753 y(?auto_close:bool)h
+(->)576 4851 y(url_of_id:\(ext_id)g(->)i(Neturl.url\))f(->)p
+Black 3800 5278 a Fr(78)p Black eop
+%%Page: 79 79
+79 78 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq
+(channel_of_url:\(Neturl.url)40 b(->)45 b(\(in_channel)e(*)h(encoding)g
+(option\)\))f(->)755 676 y(resolver)396 867 y Fv(When)20
+b(this)h(resolv)o(er)e(gets)h(an)h(ID)f(to)g(read)g(from,)f(it)i(calls)
+g(the)f(function)e Fq(~url_of_id)h Fv(to)i(get)f(the)g(corresponding)
+396 975 y(URL.)h(This)f(URL)h(may)f(be)g(a)g(relati)n(v)o(e)g(URL;)h
+(ho)n(we)n(v)o(er)m(,)c(a)k(URL)g(scheme)f(must)g(be)g(used)g(which)f
+(contains)h(a)h(path.)396 1083 y(The)f(resolv)o(er)f(con)m(v)o(erts)g
+(the)h(URL)h(to)f(an)g(absolute)f(URL)i(if)g(necessary)-5
+b(.)19 b(The)g(second)h(function,)396 1191 y Fq(~channel_of_url)p
+Fv(,)e(is)j(fed)f(with)h(the)f(absolute)f(URL)i(as)g(input.)e(This)h
+(function)f(opens)g(the)i(resource)e(to)h(read)396 1299
+y(from,)f(and)h(returns)f(the)h(channel)f(and)h(the)g(encoding)e(of)i
+(the)g(resource.)396 1448 y(Both)g(functions,)f Fq(~url_of_id)g
+Fv(and)h Fq(~channel_of_url)p Fv(,)e(can)i(raise)g(Not_competent)e(to)i
+(indicate)g(that)g(the)396 1556 y(object)g(is)h(not)f(able)g(to)g(read)
+g(from)f(the)h(speci\002ed)g(resource.)f(Ho)n(we)n(v)o(er)m(,)f(there)i
+(is)h(a)f(dif)n(ference:)f(A)h(Not_competent)396 1664
+y(from)f Fq(~url_of_id)g Fv(is)j(left)e(as)h(it)g(is,)g(b)n(ut)f(a)h
+(Not_competent)c(from)i Fq(~channel_of_url)g Fv(is)i(con)m(v)o(erted)c
+(to)396 1772 y(Not_resolv)n(able.)h(So)i(only)g Fq(~url_of_id)f
+Fv(decides)h(which)f(URLs)i(are)f(accepted)g(by)f(the)i(resolv)o(er)e
+(and)g(which)h(not.)396 1921 y(The)g(function)f Fq(~channel_of_url)f
+Fv(must)i(return)f Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o(ault)f
+(mechanism)g(to)i(recognize)396 2029 y(the)f(encoding)f(should)g(be)h
+(used.)g(It)g(must)g(return)f Fq(Some)44 b(e)21 b Fv(if)g(it)f(is)i
+(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i(the)396
+2137 y(channel)f(is)i Fq(e)p Fv(.)396 2287 y(If)f Fq(~auto_close)43
+b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o(ault\),)e(the)h(channel)f
+(is)i(closed)f(after)g(use.)g(If)g Fq(~auto_close)43
+b(=)396 2395 y(false)p Fv(,)20 b(the)g(channel)f(is)i(left)g(open.)396
+2544 y(Objects)f(of)g(this)g(class)h(contain)e(a)h(base)g(URL)g(relati)
+n(v)o(e)f(to)h(which)g(relati)n(v)o(e)f(URLs)h(are)g(interpreted.)e
+(When)i(creating)e(a)396 2652 y(ne)n(w)i(object,)g(you)f(can)h(specify)
+f(the)i(base)f(URL)h(by)f(passing)f(it)i(as)g Fq(~base_url)e
+Fv(ar)o(gument.)f(When)i(an)g(e)o(xisting)396 2760 y(object)g(is)h
+(cloned,)e(the)h(base)g(URL)h(of)f(the)g(clone)g(is)h(the)f(URL)h(of)f
+(the)g(original)f(object.)h(-)g(Note)g(that)g(the)h(term)f("base)396
+2868 y(URL")h(has)f(a)h(strict)g(de\002nition)e(in)h(RFC)i(1808.)396
+3089 y Fq(class)44 b(resolve_read_this_string)d(:)576
+3187 y(?id:ext_id)i(->)576 3284 y(?fixenc:encoding)f(->)576
+3381 y(string)h(->)755 3478 y(resolver)396 3669 y Fv(Reads)21
+b(from)e(the)h(passed)g(string.)g(If)g(the)g Fq(~id)h
+Fv(ar)o(gument)c(is)k(passed)g(to)f(the)g(object,)g(the)g(created)f
+(resolv)o(er)g(accepts)396 3777 y(only)h(this)g(ID.)g(Otherwise)g(all)h
+(IDs)g(are)f(accepted.)f(-)h(Once)g(the)g(resolv)o(er)f(has)i(been)e
+(cloned,)g(it)i(does)f(not)g(accept)g(an)o(y)396 3885
+y(ID.)g(This)h(means)f(that)g(this)h(resolv)o(er)e(cannot)g(handle)g
+(inner)g(references)g(to)h(e)o(xternal)f(entities.)i(Note)f(that)g(you)
+f(can)396 3993 y(combine)g(this)i(resolv)o(er)e(with)h(another)f
+(resolv)o(er)g(that)h(can)g(handle)f(inner)g(references)g(\(such)h(as)h
+(resolv)o(e_as_\002le\);)396 4101 y(see)g(class)g('combine')d(belo)n(w)
+-5 b(.)19 b(-)i(If)f(you)f(pass)i(the)f Fq(~fixenc)f
+Fv(ar)o(gument,)f(the)i(encoding)e(of)i(the)g(string)g(is)h(set)g(to)g
+(the)396 4209 y(passed)f(v)n(alue,)g(re)o(gardless)e(of)i(an)o(y)g
+(auto-recognition)c(or)k(an)o(y)f(XML)i(declaration.)396
+4430 y Fq(class)44 b(resolve_read_any_string)d(:)576
+4527 y(string_of_id:\(ext_id)g(->)k(\(string)e(*)i(encoding)e
+(option\)\))h(->)755 4625 y(resolver)p Black 3800 5278
+a Fr(79)p Black eop
+%%Page: 80 80
+80 79 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(This)h(resolv)o(er)e
+(calls)h(the)h(function)d Fq(~string_of_id)h Fv(to)h(get)g(the)g
+(string)g(for)g(the)g(passed)g Fq(ext_id)p Fv(.)g(This)g(function)396
+687 y(must)g(either)g(return)f(the)i(string)e(and)h(the)g(encoding,)e
+(or)i(it)h(must)f(f)o(ail)h(with)f(Not_competent.)e(The)h(function)g
+(must)396 795 y(return)g Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o
+(ault)g(mechanism)e(to)j(recognize)d(the)i(encoding)f(should)g(be)h
+(used.)g(It)g(must)396 903 y(return)f Fq(Some)44 b(e)21
+b Fv(if)g(it)f(is)i(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i
+(the)g(string)g(is)h Fq(e)p Fv(.)396 1124 y Fq(class)44
+b(resolve_as_file)f(:)576 1222 y(?file_prefix:[)f(`Not_recognized)g(|)j
+(`Allowed)f(|)g(`Required)g(])g(->)576 1319 y(?host_prefix:[)e
+(`Not_recognized)g(|)j(`Allowed)f(|)g(`Required)g(])g(->)576
+1416 y(?system_encoding:encoding)c(->)576 1513 y(?url_of_id:\(ext_id)h
+(->)k(Neturl.url\))e(->)576 1610 y(?channel_of_url:)f(\(Neturl.url)h
+(->)h(\(in_channel)f(*)i(encoding)e(option\)\))h(->)576
+1707 y(unit)g(->)755 1804 y(resolver)396 1995 y Fv(Reads)21
+b(from)e(the)h(local)g(\002le)h(system.)f(Ev)o(ery)f(\002le)i(name)f
+(is)h(interpreted)d(as)j(\002le)g(name)f(of)f(the)i(local)f(\002le)h
+(system,)f(and)396 2103 y(the)g(referred)f(\002le)i(is)g(read.)396
+2253 y(The)f(full)g(form)f(of)h(a)h(\002le)g(URL)g(is:)g
+(\002le://host/path,)e(where)h('host')f(speci\002es)i(the)f(host)g
+(system)g(where)g(the)g(\002le)396 2361 y(identi\002ed)g('path')f
+(resides.)h(host)g(=)g("")h(or)f(host)g(=)h("localhost")e(are)h
+(accepted;)f(other)h(v)n(alues)f(will)i(raise)396 2468
+y(Not_competent.)d(The)i(standard)f(for)g(\002le)i(URLs)g(is)g
+(de\002ned)e(in)i(RFC)g(1738.)396 2618 y(Option)f Fq(~file_prefix)p
+Fv(:)e(Speci\002es)j(ho)n(w)f(the)g("\002le:")h(pre\002x)e(of)h(\002le)
+h(names)f(is)h(handled:)p Black 396 2850 a Ft(\225)p
+Black 60 w Fq(`Not_recognized:)p Fv(The)c(pre\002x)j(is)h(not)f
+(recognized.)p Black 396 2958 a Ft(\225)p Black 60 w
+Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h(allo)n(wed)e(b)n(ut)i(not)f
+(required)e(\(the)i(def)o(ault\).)p Black 396 3066 a
+Ft(\225)p Black 60 w Fq(`Required:)f Fv(The)h(pre\002x)g(is)h
+(required.)396 3257 y(Option)f Fq(~host_prefix:)e Fv(Speci\002es)j(ho)n
+(w)e(the)i("//host")f(phrase)f(of)h(\002le)h(names)f(is)h(handled:)p
+Black 396 3490 a Ft(\225)p Black 60 w Fq(`Not_recognized:)p
+Fv(The)c(pre\002x)j(is)h(not)f(recognized.)p Black 396
+3598 a Ft(\225)p Black 60 w Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h
+(allo)n(wed)e(b)n(ut)i(not)f(required)e(\(the)i(def)o(ault\).)p
+Black 396 3706 a Ft(\225)p Black 60 w Fq(`Required:)f
+Fv(The)h(pre\002x)g(is)h(required.)396 3896 y(Option)f
+Fq(~system_encoding:)e Fv(Speci\002es)i(the)g(encoding)e(of)i(\002le)h
+(names)f(of)g(the)g(local)g(\002le)h(system.)f(Def)o(ault:)396
+4004 y(UTF-8.)396 4154 y(Options)g Fq(~url_of_id)p Fv(,)f
+Fq(~channel_of_url)p Fv(:)f(Not)i(for)g(the)g(casual)g(user!)396
+4376 y Fq(class)44 b(combine)g(:)576 4473 y(?prefer:resolver)e(->)576
+4570 y(resolver)h(list)h(->)755 4667 y(resolver)p Black
+3800 5278 a Fr(80)p Black eop
+%%Page: 81 81
+81 80 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Combines)g(se)n(v)o
+(eral)f(resolv)o(er)g(objects.)h(If)g(a)h(concrete)e(entity)g(with)i
+(an)f Fq(ext_id)g Fv(is)h(to)f(be)g(opened,)f(the)h(combined)396
+687 y(resolv)o(er)f(tries)i(the)f(contained)f(resolv)o(ers)g(in)h(turn)
+g(until)g(a)g(resolv)o(er)f(accepts)h(opening)f(the)h(entity)g(\(i.e.)g
+(it)g(does)g(not)396 795 y(raise)h(Not_competent)c(on)j(open_in\).)396
+944 y(Clones:)h(If)f(the)g('clone')f(method)g(is)i(in)m(v)n(ok)o(ed)d
+(before)h('open_in',)e(all)k(contained)e(resolv)o(ers)g(are)h(cloned)f
+(separately)396 1052 y(and)h(again)f(combined.)f(If)i(the)g('clone')f
+(method)g(is)i(in)m(v)n(ok)o(ed)e(after)g('open_in')f(\(i.e.)i(while)g
+(the)g(resolv)o(er)f(is)i(open\),)396 1160 y(additionally)e(the)h
+(clone)f(of)h(the)h(acti)n(v)o(e)e(resolv)o(er)g(is)i(\003agged)f(as)g
+(being)g(preferred,)d(i.e.)k(it)f(is)i(tried)e(\002rst.)-2
+1662 y Fx(4.3.)39 b(The)g(DTD)g(c)m(lasses)396 1841 y
+Fr(Sorry)-5 b(,)21 b(not)f(yet)g(written.)h(P)-7 b(erhaps)20
+b(the)g(interface)g(de\002nition)e(of)j(Pxp_dtd)d(e)n(xpr)m(esses)j
+(the)f(same:)396 2063 y Fq(\(****************************************)o
+(******)o(******)o(******)o(******)o(*****)o(*\))396
+2160 y(\(*)3048 b(*\))396 2257 y(\(*)45 b(Pxp_dtd:)2643
+b(*\))396 2354 y(\(*)224 b(Object)44 b(model)g(of)g(document)g(type)g
+(declarations)939 b(*\))396 2452 y(\(*)3048 b(*\))396
+2549 y(\(****************************************)o(******)o(******)o
+(******)o(******)o(*****)o(*\))396 2743 y(\(*)45 b
+(======================================)o(======)o(======)o(======)o
+(======)o(=====)o(===)441 2840 y(*)g(OVERVIEW)441 2937
+y(*)441 3034 y(*)g(class)f(dtd)g(...............)e(represents)i(the)g
+(whole)g(DTD,)g(including)f(element)441 3132 y(*)1210
+b(declarations,)43 b(entity)h(declarations,)f(notation)441
+3229 y(*)1210 b(declarations,)43 b(and)h(processing)g(instructions)441
+3326 y(*)h(class)f(dtd_element)f(.......)g(represents)h(an)g(element)g
+(declaration)f(consisting)441 3423 y(*)1210 b(of)45 b(a)g(content)e
+(model)h(and)h(an)f(attribute)f(list)441 3520 y(*)1210
+b(declaration)441 3617 y(*)45 b(class)f(dtd_notation)f(......)g
+(represents)h(a)g(notation)g(declaration)441 3714 y(*)h(class)f
+(proc_instruction)e(..)i(represents)g(a)g(processing)f(instruction)441
+3811 y(*)i(======================================)o(======)o(======)o
+(======)o(======)o(=====)o(===)441 3909 y(*)441 4006
+y(*\))396 4297 y(class)f(dtd)h(:)486 4394 y(\(*)f(Creation:)531
+4491 y(*)134 b(new)44 b(dtd)531 4589 y(*)g(creates)g(a)h(new,)f(empty)g
+(DTD)g(object)g(without)g(any)g(declaration,)f(without)g(a)i(root)531
+4686 y(*)f(element,)g(without)g(an)g(ID.)531 4783 y(*\))p
+Black 3800 5278 a Fr(81)p Black eop
+%%Page: 82 82
+82 81 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 486 579 a Fq
+(Pxp_types.collect_warnings)40 b(-)p Fo(>)486 676 y Fq
+(Pxp_types.rep_encoding)h(-)p Fo(>)486 773 y Fq(object)576
+870 y(method)i(root)i(:)f(string)g(option)665 967 y(\(*)h(get)f(the)g
+(name)h(of)f(the)g(root)h(element)e(if)i(present)e(*\))576
+1162 y(method)g(set_root)h(:)h(string)e(-)p Fo(>)i Fq(unit)665
+1259 y(\(*)g(set)f(the)g(name)h(of)f(the)g(root)h(element.)e(This)h
+(method)g(can)g(be)h(invoked)710 1356 y(*)g(only)f(once)710
+1453 y(*\))576 1647 y(method)f(id)i(:)g(Pxp_types.dtd_id)d(option)665
+1745 y(\(*)j(get)f(the)g(identifier)g(for)g(this)g(DTD)g(*\))576
+1939 y(method)f(set_id)h(:)h(Pxp_types.dtd_id)d(-)p Fo(>)i
+Fq(unit)665 2036 y(\(*)h(set)f(the)g(identifier.)f(This)i(method)e(can)
+i(be)f(invoked)g(only)g(once)g(*\))576 2230 y(method)f(encoding)h(:)h
+(Pxp_types.rep_encoding)665 2327 y(\(*)g(returns)e(the)i(encoding)e
+(used)h(for)h(character)e(representation)g(*\))576 2619
+y(method)g(allow_arbitrary)g(:)h(unit)665 2716 y(\(*)h(After)f(this)g
+(method)g(has)g(been)g(invoked,)g(the)g(ob-)396 2813
+y(ject)g(changes)g(its)g(behaviour:)710 2910 y(*)h(-)f(elements)g(and)g
+(notations)g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h
+(an)710 3007 y(*)134 b(arbitrary)44 b(way;)g(the)g(methods)g("element")
+f(and)i("notation")e(indicate)g(this)710 3104 y(*)134
+b(by)45 b(raising)f(Undeclared)f(instead)g(of)i(Validation_error.)710
+3202 y(*\))576 3396 y(method)e(disallow_arbitrary)f(:)j(unit)576
+3590 y(method)e(arbitrary_allowed)f(:)j(bool)665 3687
+y(\(*)g(Returns)e(whether)h(arbitrary)f(contents)h(are)g(allowed)g(or)g
+(not.)h(*\))576 3882 y(method)e(standalone_declaration)f(:)i(bool)665
+3979 y(\(*)h(Whether)e(there)h(is)h(a)g('standalone')d(declaration)h
+(or)i(not.)f(Strictly)710 4076 y(*)h(speaking,)e(this)h(declaration)f
+(is)i(not)f(part)g(of)h(the)f(DTD,)g(but)h(it)f(is)710
+4173 y(*)h(included)e(here)h(because)g(of)h(practical)e(reasons.)710
+4270 y(*)i(If)f(not)h(set,)f(this)g(property)f(defaults)h(to)g
+('false'.)710 4367 y(*\))576 4561 y(method)f
+(set_standalone_declaration)e(:)k(bool)f(-)p Fo(>)g Fq(unit)665
+4659 y(\(*)h(Sets)f(the)g('standalone')f(declaration.)g(*\))p
+Black 3800 5278 a Fr(82)p Black eop
+%%Page: 83 83
+83 82 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(add_element)g(:)i(dtd_element)e(-)p Fo(>)h Fq(unit)665
+676 y(\(*)h(add)f(the)g(given)g(element)g(declaration)f(to)i(this)f
+(DTD.)g(Raises)g(Not_found)710 773 y(*)h(if)f(there)g(is)h(already)e
+(an)i(element)f(declaration)f(with)h(the)g(same)g(name.)710
+870 y(*\))576 1065 y(method)f(add_gen_entity)g(:)i(Pxp_entity.entity)d
+(-)p Fo(>)i Fq(bool)g(-)p Fo(>)g Fq(unit)665 1162 y(\(*)h
+(add_gen_entity)d(e)j(extdecl:)710 1259 y(*)g(add)f(the)g(entity)g('e')
+h(as)f(general)g(entity)g(to)g(this)g(DTD)h(\(general)e(entities)710
+1356 y(*)i(are)f(those)g(represented)f(by)i(&name;\).)e(If)i(there)f
+(is)g(already)g(a)g(declaration)710 1453 y(*)h(with)f(the)g(same)g
+(name,)g(the)h(second)f(definition)f(is)h(ignored;)g(as)g(excep-)396
+1550 y(tion)g(from)710 1647 y(*)h(this)f(rule,)g(entities)f(with)i
+(names)f("lt",)g("gt",)g("amp",)f("quot",)h(and)g("apos")710
+1745 y(*)h(may)f(only)g(be)h(redeclared)e(with)h(a)h(definition)e(that)
+h(is)h(equivalent)e(to)h(the)710 1842 y(*)h(standard)e(definition;)g
+(otherwise)h(a)g(Validation_error)e(is)j(raised.)710
+1939 y(*)710 2036 y(*)g('extdecl':)e('true')h(indicates)f(that)h(the)h
+(entity)e(declaration)g(occurs)h(in)710 2133 y(*)h(an)f(external)g
+(entity.)f(\(Used)h(for)h(the)f(standalone)f(check.\))710
+2230 y(*\))576 2424 y(method)g(add_par_entity)g(:)i(Pxp_entity.entity)d
+(-)p Fo(>)i Fq(unit)665 2522 y(\(*)h(add)f(the)g(given)g(entity)g(as)h
+(parameter)e(entity)h(to)g(this)h(DTD)f(\(parameter)710
+2619 y(*)h(entities)e(are)i(those)f(represented)f(by)h(\045name;\).)g
+(If)g(there)g(is)h(already)e(a)710 2716 y(*)i(declaration)e(with)h(the)
+g(same)g(name,)g(the)h(second)f(definition)f(is)h(ignored.)710
+2813 y(*\))576 3007 y(method)f(add_notation)g(:)i(dtd_notation)e(-)p
+Fo(>)h Fq(unit)665 3104 y(\(*)h(add)f(the)g(given)g(notation)g(to)g
+(this)h(DTD.)f(If)g(there)g(is)h(al-)396 3202 y(ready)f(a)h
+(declaration)710 3299 y(*)g(with)f(the)g(same)g(name,)g(a)h
+(Validation_error)d(is)j(raised.)710 3396 y(*\))576 3590
+y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p Fo(>)j
+Fq(unit)665 3687 y(\(*)g(add)f(the)g(given)g(processing)g(instruction)f
+(to)h(this)g(DTD.)g(*\))576 3882 y(method)f(element)h(:)h(string)f(-)p
+Fo(>)g Fq(dtd_element)665 3979 y(\(*)h(looks)f(up)g(the)h(element)e
+(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
+4076 y(*)h(Validation_error)d(if)i(the)h(element)e(can-)396
+4173 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
+4270 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
+f(instead.\))710 4367 y(*\))576 4561 y(method)f(element_names)g(:)i
+(string)f(list)665 4659 y(\(*)h(returns)e(the)i(list)f(of)g(the)h
+(names)f(of)g(all)h(element)e(declarations.)g(*\))576
+4853 y(method)g(gen_entity)h(:)g(string)g(-)p Fo(>)g
+Fq(\(Pxp_entity.entity)e(*)j(bool\))p Black 3800 5278
+a Fr(83)p Black eop
+%%Page: 84 84
+84 83 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
+b(let)f(e,)h(extdecl)e(=)i(obj)f(#)h(gen_entity)e(n:)710
+676 y(*)i(looks)f(up)g(the)h(general)e(entity)h('e')g(with)h(the)f
+(name)g('n'.)g(Raises)710 773 y(*)h(WF_error)e(if)i(the)f(entity)g
+(cannot)g(be)g(found.)710 870 y(*)h('extdecl':)e(indicates)g(whether)h
+(the)g(entity)g(declaration)f(occured)h(in)g(an)710 967
+y(*)h(external)e(entity.)710 1065 y(*\))576 1259 y(method)g
+(gen_entity_names)g(:)h(string)g(list)665 1356 y(\(*)h(returns)e(the)i
+(list)f(of)g(all)h(general)e(entity)h(names)g(*\))576
+1550 y(method)f(par_entity)h(:)g(string)g(-)p Fo(>)g
+Fq(Pxp_entity.entity)665 1647 y(\(*)h(looks)f(up)g(the)h(parameter)e
+(entity)h(with)g(the)g(given)g(name.)g(Raises)710 1745
+y(*)h(WF_error)e(if)i(the)f(entity)g(cannot)g(be)g(found.)710
+1842 y(*\))576 2036 y(method)f(par_entity_names)g(:)h(string)g(list)665
+2133 y(\(*)h(returns)e(the)i(list)f(of)g(all)h(parameter)e(entity)h
+(names)g(*\))576 2327 y(method)f(notation)h(:)h(string)e(-)p
+Fo(>)i Fq(dtd_notation)665 2424 y(\(*)g(looks)f(up)g(the)h(notation)e
+(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
+2522 y(*)h(Validation_error)d(if)i(the)h(notation)e(can-)396
+2619 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
+2716 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
+f(instead.\))710 2813 y(*\))576 3007 y(method)f(notation_names)g(:)i
+(string)e(list)665 3104 y(\(*)i(Returns)e(the)i(list)f(of)g(the)h
+(names)f(of)g(all)h(added)f(notations)f(*\))576 3299
+y(method)g(pinstr)h(:)h(string)f(-)p Fo(>)g Fq(proc_instruction)e(list)
+665 3396 y(\(*)j(looks)f(up)g(all)h(processing)e(instructions)g(with)h
+(the)g(given)g(target.)710 3493 y(*)h(The)f("target")g(is)g(the)g
+(identifier)g(following)f(")p Fo(<)p Fq(?".)710 3590
+y(*)i(Note:)f(It)g(is)h(not)f(possible)g(to)g(find)g(out)h(the)f(exact)
+g(position)f(of)i(the)710 3687 y(*)g(processing)e(instruction.)710
+3784 y(*\))576 3979 y(method)g(pinstr_names)g(:)i(string)f(list)665
+4076 y(\(*)h(Returns)e(the)i(list)f(of)g(the)h(names)f(\(targets\))f
+(of)i(all)f(added)g(pinstrs)f(*\))576 4270 y(method)g(validate)h(:)h
+(unit)665 4367 y(\(*)g(ensures)e(that)i(the)f(DTD)g(is)h(valid.)f(This)
+g(method)g(is)g(optimized)f(such)h(that)710 4464 y(*)h(actual)f
+(validation)f(is)h(only)g(performed)g(if)g(DTD)h(has)f(changed.)710
+4561 y(*)h(If)f(the)h(DTD)f(is)g(invalid,)g(mostly)g(a)g
+(Validation_error)f(is)h(raised,)710 4659 y(*)h(but)f(other)g
+(exceptions)f(are)i(possible,)e(too.)710 4756 y(*\))p
+Black 3800 5278 a Fr(84)p Black eop
+%%Page: 85 85
+85 84 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(only_deterministic_models)e(:)k(unit)665 676 y(\(*)g(Succeeds)e(if)i
+(all)f(regexp)g(content)g(models)f(are)i(deterministic.)710
+773 y(*)g(Otherwise)e(Validation_error.)710 870 y(*\))576
+1065 y(method)g(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(bool)f(-)396
+1162 y Fo(>)h Fq(unit)665 1259 y(\(*)g(write_compact_as_latin1)c(os)j
+(enc)h(doctype:)710 1356 y(*)g(Writes)f(the)g(DTD)g(as)h('enc'-encoded)
+d(string)i(to)h('os'.)f(If)g('doctype',)f(a)710 1453
+y(*)i(DTD)f(like)g Fo(<)p Fq(!DOCTYPE)f(root)i([)f(...)h(])p
+Fo(>)f Fq(is)g(written.)g(If)g('not)h(doctype',)710 1550
+y(*)g(only)f(the)g(declarations)f(are)h(written)g(\(the)g(material)g
+(within)g(the)710 1647 y(*)h(square)f(brackets\).)710
+1745 y(*\))576 1939 y(method)f(write_compact_as_latin1)e(:)k
+(Pxp_types.output_stream)c(-)p Fo(>)j Fq(bool)h(-)p Fo(>)f
+Fq(unit)665 2036 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)h(to)h
+(keep)f(compatibility)f(with)710 2133 y(*)i(older)f(versions)f(of)i
+(the)f(parser)710 2230 y(*\))576 2522 y
+(\(*---------------------------*\))576 2619 y(method)f(invalidate)h(:)g
+(unit)665 2716 y(\(*)h(INTERNAL)e(METHOD)h(*\))576 2813
+y(method)f(warner)h(:)h(Pxp_types.collect_warnings)665
+2910 y(\(*)g(INTERNAL)e(METHOD)h(*\))486 3007 y(end)396
+3396 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))396 3590 y(and)45 b(dtd_element)e(:)h(dtd)h(-)p
+Fo(>)f Fq(string)g(-)p Fo(>)486 3687 y Fq(\(*)g(Creation:)531
+3784 y(*)134 b(new)44 b(dtd_element)f(init_dtd)h(init_name:)531
+3882 y(*)g(creates)g(a)h(new)f(dtd_element)f(object)h(for)g(init_dtd)g
+(with)g(init_name.)531 3979 y(*)g(The)h(strings)e(are)i(represented)e
+(in)h(the)h(same)f(encoding)f(as)i(init_dtd.)531 4076
+y(*\))486 4173 y(object)576 4367 y(method)e(name)i(:)f(string)665
+4464 y(\(*)h(returns)e(the)i(name)f(of)g(the)h(declared)e(element)h
+(*\))576 4659 y(method)f(externally_declared)f(:)j(bool)665
+4756 y(\(*)g(returns)e(whether)h(the)g(element)g(declaration)f(occurs)h
+(in)g(an)h(external)710 4853 y(*)g(entity.)p Black 3800
+5278 a Fr(85)p Black eop
+%%Page: 86 86
+86 85 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 710 579 a Fq(*\))576
+773 y(method)43 b(content_model)g(:)i(Pxp_types.content_model_type)665
+870 y(\(*)g(get)f(the)g(content)g(model)g(of)h(this)f(element)f
+(declaration,)g(or)i(Unspecified)e(*\))576 1065 y(method)g(content_dfa)
+g(:)i(Pxp_dfa.dfa_definition)c(option)665 1162 y(\(*)k(return)f(the)g
+(DFA)g(of)h(the)f(content)g(model)g(if)g(there)g(is)h(a)f(DFA,)h(or)f
+(None.)710 1259 y(*)h(A)f(DFA)h(exists)f(only)g(for)g(regexp)g(style)g
+(content)g(models)f(which)h(are)710 1356 y(*)h(deterministic.)710
+1453 y(*\))576 1647 y(method)e(set_cm_and_extdecl)f(:)j
+(Pxp_types.content_model_type)40 b(-)p Fo(>)k Fq(bool)h(-)p
+Fo(>)f Fq(unit)665 1745 y(\(*)h(set_cm_and_extdecl)d(cm)i(extdecl:)710
+1842 y(*)h(set)f(the)g(content)g(model)g(to)h('cm'.)f(Once)g(the)g
+(content)g(model)g(is)g(not)710 1939 y(*)h(Unspecified,)e(it)h(cannot)g
+(be)g(set)h(to)f(a)h(different)e(value)h(again.)710 2036
+y(*)h(Furthermore,)e(it)h(is)h(set)f(whether)g(the)g(element)g(occurs)f
+(in)i(an)f(external)710 2133 y(*)h(entity)f(\('extdecl'\).)710
+2230 y(*\))576 2424 y(method)f(encoding)h(:)h(Pxp_types.rep_encoding)
+665 2522 y(\(*)g(Return)f(the)g(encoding)f(of)i(the)f(strings)g(*\))576
+2716 y(method)f(allow_arbitrary)g(:)h(unit)665 2813 y(\(*)h(After)f
+(this)g(method)g(has)g(been)g(invoked,)g(the)g(ob-)396
+2910 y(ject)g(changes)g(its)g(behaviour:)710 3007 y(*)h(-)f(attributes)
+g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h(an)710
+3104 y(*)134 b(arbitrary)44 b(way;)g(the)g(method)g("attribute")f
+(indicates)g(this)710 3202 y(*)134 b(by)45 b(raising)f(Undeclared)f
+(instead)g(of)i(Validation_error.)710 3299 y(*\))576
+3493 y(method)e(disallow_arbitrary)f(:)j(unit)576 3687
+y(method)e(arbitrary_allowed)f(:)j(bool)665 3784 y(\(*)g(Returns)e
+(whether)h(arbitrary)f(attributes)h(are)g(allowed)g(or)g(not.)g(*\))576
+3979 y(method)f(attribute)h(:)g(string)g(-)p Fo(>)1517
+4076 y Fq(Pxp_types.att_type)e(*)j(Pxp_types.att_default)665
+4173 y(\(*)g(get)f(the)g(type)h(and)f(default)g(value)g(of)g(a)h
+(declared)e(attribute,)g(or)i(raise)710 4270 y(*)g(Validation_error)d
+(if)i(the)h(attribute)e(does)h(not)h(exist.)710 4367
+y(*)g(If)f('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g
+(raised)g(instead)710 4464 y(*)h(of)f(Validation_error.)710
+4561 y(*\))576 4756 y(method)f
+(attribute_violates_standalone_declaration)38 b(:)1069
+4853 y(string)44 b(-)p Fo(>)g Fq(string)g(option)g(-)p
+Fo(>)g Fq(bool)p Black 3798 5278 a Fr(86)p Black eop
+%%Page: 87 87
+87 86 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
+b(attribute_violates_standalone_declarat)o(ion)39 b(name)44
+b(v:)710 676 y(*)h(Checks)f(whether)f(the)i(attribute)e('name')h
+(violates)f(the)i("standalone")710 773 y(*)g(declaration)e(if)h(it)h
+(has)f(value)g('v'.)710 870 y(*)h(The)f(method)g(returns)g(true)g(if:)
+710 967 y(*)h(-)f(The)h(attribute)e(declaration)g(occurs)h(in)g(an)h
+(external)e(entity,)710 1065 y(*)i(and)f(if)h(one)f(of)g(the)h(two)f
+(conditions)f(holds:)710 1162 y(*)i(-)f(v)h(=)g(None,)f(and)g(there)g
+(is)h(a)f(default)g(for)g(the)h(attribute)e(value)710
+1259 y(*)i(-)f(v)h(=)g(Some)f(s,)g(and)h(the)f(type)g(of)h(the)f
+(attribute)f(is)i(not)f(CDATA,)710 1356 y(*)134 b(and)45
+b(s)f(changes)g(if)h(normalized)e(according)g(to)i(the)f(rules)g(of)g
+(the)710 1453 y(*)134 b(attribute)44 b(type.)710 1550
+y(*)710 1647 y(*)h(The)f(method)g(raises)g(Validation_error)e(if)i(the)
+h(attribute)e(does)h(not)g(exist.)710 1745 y(*)h(If)f
+('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g(raised)g
+(instead)710 1842 y(*)h(of)f(Validation_error.)710 1939
+y(*\))576 2133 y(method)f(attribute_names)g(:)h(string)g(list)665
+2230 y(\(*)h(get)f(the)g(list)h(of)f(all)g(declared)g(attributes)f(*\))
+576 2424 y(method)g(names_of_required_attributes)e(:)j(string)g(list)
+665 2522 y(\(*)h(get)f(the)g(list)h(of)f(all)g(attributes)g(that)g(are)
+g(specified)f(as)i(required)710 2619 y(*)g(attributes)710
+2716 y(*\))576 2910 y(method)e(id_attribute_name)f(:)j(string)f(option)
+665 3007 y(\(*)h(Returns)e(the)i(name)f(of)g(the)h(attribute)e(with)h
+(type)g(ID,)h(or)f(None.)g(*\))576 3202 y(method)f
+(idref_attribute_names)f(:)i(string)g(list)665 3299 y(\(*)h(Returns)e
+(the)i(names)f(of)g(the)h(attributes)e(with)h(type)g(IDREF)g(or)h
+(IDREFS.)e(*\))576 3493 y(method)g(add_attribute)g(:)i(string)f(-)p
+Fo(>)1607 3590 y Fq(Pxp_types.att_type)e(-)p Fo(>)531
+3687 y Fq(Pxp_types.att_default)f(-)p Fo(>)531 3784 y
+Fq(bool)j(-)p Fo(>)620 3882 y Fq(unit)665 3979 y(\(*)h(add_attribute)d
+(name)j(type)f(default)f(extdecl:)710 4076 y(*)i(add)f(an)h(attribute)e
+(declaration)g(for)h(an)h(attribute)e(with)h(the)h(given)e(name,)710
+4173 y(*)i(type,)f(and)g(default)g(value.)g(If)g(there)g(is)h(more)f
+(than)g(one)g(declaration)f(for)710 4270 y(*)i(an)f(attribute)g(name,)g
+(the)g(first)g(declara-)396 4367 y(tion)g(counts;)g(the)g(other)g
+(declarations)710 4464 y(*)h(are)f(ignored.)710 4561
+y(*)h('extdecl':)e(if)h(true,)g(the)h(attribute)e(declaration)g(occurs)
+h(in)g(an)h(external)710 4659 y(*)g(entity.)e(This)i(property)e(is)i
+(used)f(to)g(check)g(the)h("standalone")d(attribute.)710
+4756 y(*\))p Black 3797 5278 a Fr(87)p Black eop
+%%Page: 88 88
+88 87 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(validate)h(:)h(unit)665 676 y(\(*)g(checks)f(whether)f(this)h
+(element)g(declaration)f(\(i.e.)h(the)g(content)g(model)g(and)710
+773 y(*)h(all)f(attribute)f(declarations\))g(is)i(valid)f(for)g(the)g
+(associated)f(DTD.)710 870 y(*)i(Raises)f(mostly)f(Validation_error)g
+(if)h(the)g(validation)g(fails.)710 967 y(*\))576 1162
+y(method)f(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
+1259 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
+1356 y(*)h(Writes)f(the)g Fo(<)p Fq(!ELEMENT)f(...)h
+Fo(>)h Fq(declaration)e(to)h('os')h(as)f('enc'-)396 1453
+y(encoded)g(string.)710 1550 y(*\))576 1745 y(method)f
+(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 1842 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 1939 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 2036 y(*\))486 2133 y(end)396
+2327 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))396 2522 y(and)45 b(dtd_notation)d(:)j(string)f(-)p
+Fo(>)g Fq(Pxp_types.ext_id)e(-)p Fo(>)j Fq(Pxp_types.rep_encoding)c(-)p
+Fo(>)486 2619 y Fq(\(*)j(Creation:)531 2716 y(*)179 b(new)44
+b(dtd_notation)f(a_name)h(an_external_ID)e(init_encoding)531
+2813 y(*)i(creates)g(a)h(new)f(dtd_notation)f(object)h(with)g(the)g
+(given)g(name)g(and)h(the)f(given)531 2910 y(*)g(external)g(ID.)531
+3007 y(*\))486 3104 y(object)576 3202 y(method)f(name)i(:)f(string)576
+3299 y(method)f(ext_id)h(:)h(Pxp_types.ext_id)576 3396
+y(method)e(encoding)h(:)h(Pxp_types.rep_encoding)576
+3590 y(method)e(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
+3687 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
+3784 y(*)h(Writes)f(the)g Fo(<)p Fq(!NOTATION)f(...)h
+Fo(>)h Fq(declaration)e(to)h('os')g(as)h('enc'-encoded)710
+3882 y(*)g(string.)710 3979 y(*\))576 4173 y(method)e
+(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 4270 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 4367 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 4464 y(*\))486 4659 y(end)396
+4853 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))p Black 3800 5278 a Fr(88)p Black eop
+%%Page: 89 89
+89 88 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 676 a Fq(and)45
+b(proc_instruction)d(:)i(string)g(-)p Fo(>)h Fq(string)e(-)p
+Fo(>)i Fq(Pxp_types.rep_encoding)c(-)p Fo(>)486 773 y
+Fq(\(*)j(Creation:)531 870 y(*)134 b(new)44 b(proc_instruction)f
+(a_target)g(a_value)531 967 y(*)h(creates)g(a)h(new)f(proc_instruction)
+e(object)i(with)g(the)h(given)f(target)f(string)h(and)531
+1065 y(*)g(the)h(given)f(value)g(string.)531 1162 y(*)g(Note:)g(A)h
+(processing)e(instruction)g(is)i(written)e(as)i Fo(<)p
+Fq(?target)e(value?)p Fo(>)p Fq(.)531 1259 y(*\))486
+1356 y(object)576 1453 y(method)g(target)h(:)h(string)576
+1550 y(method)e(value)h(:)h(string)576 1647 y(method)e(encoding)h(:)h
+(Pxp_types.rep_encoding)576 1842 y(method)e(write)h(:)h
+(Pxp_types.output_stream)c(-)p Fo(>)j Fq(Pxp_types.encoding)e(-)p
+Fo(>)j Fq(unit)665 1939 y(\(*)g(write)f(os)g(enc:)710
+2036 y(*)h(Writes)f(the)g Fo(<)p Fq(?...?)p Fo(>)f Fq(PI)i(to)f('os')h
+(as)f('enc'-encoded)f(string.)710 2133 y(*\))576 2327
+y(method)g(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 2424 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 2522 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 2619 y(*\))576 2813 y(method)f(parse_pxp_option)
+g(:)h(\(string)g(*)h(string)e(*)i(\(string)f(*)g(string\))g(list\))665
+2910 y(\(*)h(Parses)f(a)g(PI)h(containing)e(a)i(PXP)f(option.)g(Such)g
+(PIs)g(are)g(formed)g(like:)710 3007 y(*)134 b Fo(<)p
+Fq(?target)44 b(option-name)f(option-att="value")f(option-att="value")f
+(...)k(?)p Fo(>)710 3104 y Fq(*)g(The)f(method)g(returns)g(a)g(triple)
+710 3202 y(*)134 b(\(target,)44 b(option-name,)f([option-att,)g(value;)
+g(...]\))710 3299 y(*)i(or)f(raises)g(Error.)710 3396
+y(*\))486 3590 y(end)396 3784 y(;;)-2 4286 y Fx(4.4.)39
+b(In)-6 b(v)l(oking)38 b(the)h(par)n(ser)396 4466 y Fv(Here)20
+b(a)h(description)e(of)h(Pxp_yacc.)-2 4794 y Fp(4.4.1.)35
+b(Defaults)p Black 3800 5278 a Fr(89)p Black eop
+%%Page: 90 90
+90 89 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(follo)n(wing)f
+(def)o(aults)g(are)i(a)n(v)n(ailable:)396 759 y Fq(val)45
+b(default_config)d(:)j(config)396 856 y(val)g(default_extension)d(:)i
+(\('a)h(node)f(extension\))f(as)h('a)396 953 y(val)h(default_spec)d(:)j
+(\('a)f(node)h(extension)e(as)h('a\))h(spec)-2 1406 y
+Fp(4.4.2.)35 b(P)l(ar)n(sing)f(functions)396 1574 y Fv(In)20
+b(the)g(follo)n(wing,)f(the)h(term)g("closed)g(document")e(refers)h(to)
+i(an)f(XML)g(structure)f(lik)o(e)396 1754 y Fo(<)p Fq(!DOCTYPE)43
+b(...)i([)f Fn(declarations)f Fq(])i Fo(>)396 1851 y(<)p
+Fn(root)p Fo(>)396 1948 y Fq(...)396 2045 y Fo(<)p Fq(/)p
+Fn(root)p Fo(>)396 2236 y Fv(The)20 b(term)g("fragment")e(refers)i(to)g
+(an)g(XML)h(structure)e(lik)o(e)396 2416 y Fo(<)p Fn(root)p
+Fo(>)396 2513 y Fq(...)396 2611 y Fo(<)p Fq(/)p Fn(root)p
+Fo(>)396 2802 y Fv(i.e.)h(only)g(to)g(one)g(isolated)g(element)f
+(instance.)396 3023 y Fq(val)45 b(parse_dtd_entity)d(:)i(config)g(->)h
+(source)f(->)g(dtd)396 3214 y Fv(P)o(arses)21 b(the)f(declarations)f
+(which)h(are)g(contained)e(in)j(the)f(entity)-5 b(,)19
+b(and)h(returns)f(them)h(as)h Fq(dtd)f Fv(object.)396
+3436 y Fq(val)45 b(extract_dtd_from_document_entity)39
+b(:)45 b(config)f(->)g(source)g(->)g(dtd)396 3627 y Fv(Extracts)20
+b(the)g(DTD)h(from)e(a)h(closed)g(document.)e(Both)i(the)h(internal)e
+(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted)d(and)396
+3735 y(combined)g(to)i(one)f Fq(dtd)h Fv(object.)f(This)h(function)e
+(does)h(not)h(parse)f(the)h(whole)f(document,)f(b)n(ut)i(only)e(the)i
+(parts)g(that)g(are)396 3843 y(necessary)g(to)g(e)o(xtract)f(the)i
+(DTD.)396 4064 y Fq(val)45 b(parse_document_entity)c(:)576
+4161 y(?transform_dtd:\(dtd)g(->)k(dtd\))f(->)576 4259
+y(?id_index:\('ext)e(index\))i(->)576 4356 y(config)f(->)576
+4453 y(source)g(->)576 4550 y('ext)h(spec)g(->)755 4647
+y('ext)g(document)p Black 3800 5278 a Fr(90)p Black eop
+%%Page: 91 91
+91 90 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(P)o(arses)h(a)g(closed)
+e(document)g(and)g(v)n(alidates)h(it)h(against)e(the)i(DTD)f(that)g(is)
+h(contained)e(in)h(the)h(document)d(\(internal)396 687
+y(and)i(e)o(xternal)f(subsets\).)h(The)g(option)f Fq(~transform_dtd)f
+Fv(can)i(be)g(used)g(to)g(transform)f(the)h(DTD)h(in)f(the)g(document,)
+396 795 y(and)g(to)g(use)h(the)f(transformed)e(DTD)i(for)g(v)n
+(alidation.)e(If)i Fq(~id_index)g Fv(is)h(speci\002ed,)e(an)h(inde)o(x)
+f(of)h(all)h(ID)f(attrib)n(utes)h(is)396 903 y(created.)396
+1124 y Fq(val)45 b(parse_wfdocument_entity)c(:)576 1222
+y(config)i(->)576 1319 y(source)g(->)576 1416 y('ext)h(spec)g(->)755
+1513 y('ext)g(document)396 1704 y Fv(P)o(arses)21 b(a)g(closed)e
+(document,)f(b)n(ut)j(checks)e(it)i(only)e(on)h(well-formedness.)396
+1926 y Fq(val)45 b(parse_content_entity)86 b(:)576 2023
+y(?id_index:\('ext)42 b(index\))i(->)576 2120 y(config)f(->)576
+2217 y(source)g(->)576 2314 y(dtd)h(->)576 2411 y('ext)g(spec)g(->)755
+2508 y('ext)g(node)396 2699 y Fv(P)o(arses)21 b(a)g(fragment,)d(and)h
+(v)n(alidates)h(the)g(element.)396 2921 y Fq(val)45 b
+(parse_wfcontent_entity)c(:)576 3018 y(config)i(->)576
+3115 y(source)g(->)576 3212 y('ext)h(spec)g(->)755 3310
+y('ext)g(node)396 3500 y Fv(P)o(arses)21 b(a)g(fragment,)d(b)n(ut)i
+(checks)g(it)g(only)g(on)g(well-formedness.)-2 3870 y
+Fp(4.4.3.)35 b(Con\002guration)f(options)396 4110 y Fq(type)44
+b(config)g(=)576 4207 y({)g(warner)g(:)h(collect_warnings;)665
+4304 y(errors_with_line_numbers)c(:)k(bool;)665 4401
+y(enable_pinstr_nodes)d(:)j(bool;)665 4499 y(enable_super_root_node)c
+(:)k(bool;)665 4596 y(enable_comment_nodes)d(:)i(bool;)665
+4693 y(encoding)g(:)g(rep_encoding;)665 4790 y
+(recognize_standalone_declaration)c(:)k(bool;)p Black
+3800 5278 a Fr(91)p Black eop
+%%Page: 92 92
+92 91 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq
+(store_element_positions)41 b(:)k(bool;)665 676 y(idref_pass)e(:)i
+(bool;)665 773 y(validate_by_dfa)e(:)h(bool;)665 870
+y(accept_only_deterministic_models)c(:)k(bool;)665 967
+y(...)576 1065 y(})p Black 396 1422 a Ft(\225)p Black
+60 w Fq(warner:)p Fv(The)19 b(parser)h(prints)f(w)o(arnings)h(by)f(in)m
+(v)n(oking)f(the)j(method)d Fq(warn)j Fv(for)e(this)i(w)o(arner)e
+(object.)h(\(Def)o(ault:)f(all)479 1530 y(w)o(arnings)h(are)g
+(dropped\))p Black 396 1637 a Ft(\225)p Black 60 w Fq
+(errors_with_line_numbers:)p Fv(If)c(true,)k(errors)f(contain)g(line)i
+(numbers;)d(if)j(f)o(alse,)f(errors)g(contain)f(only)g(byte)479
+1745 y(positions.)h(The)g(latter)g(mode)f(is)i(f)o(aster)-5
+b(.)21 b(\(Def)o(ault:)e(true\))p Black 396 1853 a Ft(\225)p
+Black 60 w Fq(enable_pinstr_nodes:)p Fv(If)e(true,)j(the)g(parser)f
+(creates)i(e)o(xtra)e(nodes)g(for)h(processing)f(instructions.)g(If)h
+(f)o(alse,)479 1961 y(processing)f(instructions)g(are)h(simply)g(added)
+f(to)i(the)f(element)f(or)h(document)f(surrounding)e(the)j
+(instructions.)479 2069 y(\(Def)o(ault:)g(f)o(alse\))p
+Black 396 2177 a Ft(\225)p Black 60 w Fq(enable_super_root_node:)p
+Fv(If)c(true,)k(the)g(parser)g(creates)g(an)g(e)o(xtra)g(node)f(which)g
+(is)j(the)e(parent)f(of)h(the)g(root)479 2285 y(of)g(the)g(document)f
+(tree.)h(This)g(node)f(is)i(called)f(super)g(root;)f(it)i(is)g(an)g
+(element)e(with)i(type)e Fq(T_super_root)p Fv(.)g(-)h(If)479
+2393 y(there)g(are)g(processing)f(instructions)g(outside)h(the)g(root)f
+(element)h(and)g(outside)f(the)i(DTD,)f(the)o(y)f(are)h(added)f(to)i
+(the)479 2501 y(super)f(root)f(instead)h(of)g(the)g(document.)e(-)j(If)
+f(f)o(alse,)g(the)g(super)g(root)g(node)f(is)i(not)f(created.)f(\(Def)o
+(ault:)h(f)o(alse\))p Black 396 2609 a Ft(\225)p Black
+60 w Fq(enable_comment_nodes:)p Fv(If)d(true,)i(the)i(parser)e(creates)
+h(nodes)g(for)f(comments)g(with)i(type)f Fq(T_comment)p
+Fv(;)f(if)479 2717 y(f)o(alse,)i(such)f(nodes)f(are)h(not)g(created.)f
+(\(Def)o(ault:)h(f)o(alse\))p Black 396 2825 a Ft(\225)p
+Black 60 w Fq(encoding:)p Fv(Speci\002es)f(the)i(internal)e(encoding)f
+(of)i(the)g(parser)-5 b(.)20 b(Most)g(strings)h(are)f(then)f
+(represented)g(according)479 2933 y(to)i(this)f(encoding;)f(ho)n(we)n
+(v)o(er)f(there)h(are)i(some)f(e)o(xceptions)e(\(especially)i
+Fq(ext_id)f Fv(v)n(alues)h(which)g(are)g(al)o(w)o(ays)479
+3041 y(UTF-8)g(encoded\).)e(\(Def)o(ault:)h(`Enc_iso88591\))p
+Black 396 3148 a Ft(\225)p Black 60 w Fq
+(recognize_standalone_declaration:)c Fv(If)21 b(true)e(and)h(if)h(the)f
+(parser)f(is)i(v)n(alidating,)e(the)479 3256 y Fq(standalone="yes")f
+Fv(declaration)h(forces)h(that)g(it)h(is)g(check)o(ed)e(whether)g(the)h
+(document)e(is)j(a)g(standalone)479 3364 y(document.)d(-)j(If)f(f)o
+(alse,)g(or)g(if)g(the)h(parser)e(is)i(in)g(well-formedness)d(mode,)h
+(such)h(declarations)f(are)h(ignored.)479 3472 y(\(Def)o(ault:)g
+(true\))p Black 396 3580 a Ft(\225)p Black 60 w Fq
+(store_element_positions:)d Fv(If)j(true,)g(for)f(e)n(v)o(ery)g
+(non-data)f(node)h(the)i(source)e(position)g(is)j(stored.)d(If)h(f)o
+(alse,)479 3688 y(the)g(position)g(information)e(is)j(lost.)f(If)g(a)n
+(v)n(ailable,)g(you)f(can)h(get)g(the)g(positions)g(of)g(nodes)f(by)h
+(in)m(v)n(oking)e(the)479 3796 y Fq(position)i Fv(method.)e(\(Def)o
+(ault:)i(true\))p Black 396 3904 a Ft(\225)p Black 60
+w Fq(idref_pass:)p Fv(If)e(true)i(and)g(if)g(there)g(is)h(an)f(ID)h
+(inde)o(x,)e(the)h(parser)f(checks)h(whether)f(e)n(v)o(ery)g(IDREF)i
+(or)e(IDREFS)479 4012 y(attrib)n(ute)h(refer)g(to)g(an)g(e)o(xisting)f
+(node;)h(this)g(requires)g(that)g(the)g(parser)g(tra)n(v)o(erses)g(the)
+g(whole)f(doument)g(tree.)h(If)479 4120 y(f)o(alse,)h(this)f(check)g
+(is)h(left)f(out.)g(\(Def)o(ault:)g(f)o(alse\))p Black
+396 4228 a Ft(\225)p Black 60 w Fq(validate_by_dfa:)p
+Fv(If)e(true)h(and)h(if)h(the)f(content)f(model)g(for)h(an)g(element)g
+(type)f(is)i(deterministic,)e(a)479 4336 y(deterministic)h(\002nite)g
+(automaton)e(is)j(used)f(to)h(v)n(alidate)e(whether)g(the)i(element)e
+(contents)h(match)f(the)i(content)479 4444 y(model)e(of)h(the)g(type.)g
+(If)g(f)o(alse,)g(or)g(if)g(a)g(DF)-6 b(A)21 b(is)g(not)f(a)n(v)n
+(ailable,)f(a)h(backtracking)e(algorithm)g(is)j(used)f(for)f(v)n
+(alidation.)479 4552 y(\(Def)o(ault:)h(true\))p Black
+396 4659 a Ft(\225)p Black 60 w Fq(accept_only_deterministic_models:)15
+b Fv(If)21 b(true,)e(only)h(deterministic)f(content)g(models)h(are)g
+(accepted;)f(if)479 4767 y(f)o(alse,)i(an)o(y)e(syntactically)h
+(correct)f(content)g(models)h(can)g(be)g(processed.)f(\(Def)o(ault:)g
+(true\))p Black 3800 5278 a Fr(92)p Black eop
+%%Page: 93 93
+93 92 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black -2 583 a Fp(4.4.4.)35
+b(Whic)o(h)f(con\002guration)g(should)g(I)f(use?)396
+751 y Fv(First,)21 b(I)f(recommend)e(to)i(v)n(ary)g(the)g(def)o(ault)f
+(con\002guration)f(instead)i(of)g(creating)f(a)i(ne)n(w)f
+(con\002guration)d(record.)i(F)o(or)396 859 y(instance,)h(to)g(set)h
+Fq(idref_pass)e Fv(to)i Fq(true)p Fv(,)e(change)g(the)i(def)o(ault)e
+(as)i(in:)396 1039 y Fq(let)45 b(config)e(=)i({)g(default_config)d
+(with)i(idref_pass)g(=)g(true)g(})396 1230 y Fv(The)20
+b(background)d(is)k(that)f(I)h(can)f(add)f(more)h(options)f(to)h(the)g
+(record)f(in)i(future)e(v)o(ersions)g(of)h(the)g(parser)f(without)396
+1338 y(breaking)g(your)f(programs.)396 1487 y Fu(Do)i(I)i(need)e(extra)
+f(nodes)i(f)n(or)f(pr)o(ocessing)g(instructions?)g Fv(By)g(def)o(ault,)
+g(such)g(nodes)f(are)h(not)g(created.)f(This)i(does)396
+1595 y(not)f(mean)g(that)g(the)g(processing)f(instructions)g(are)h
+(lost;)h(ho)n(we)n(v)o(er)m(,)d(you)h(cannot)g(\002nd)h(out)g(the)g(e)o
+(xact)g(location)f(where)396 1703 y(the)o(y)h(occur)-5
+b(.)19 b(F)o(or)h(e)o(xample,)e(the)j(follo)n(wing)d(XML)i(te)o(xt)396
+1883 y Fq(<x><?pi1?><y/><?pi2?></x>)396 2074 y Fv(will)h(normally)e
+(create)h(one)f(element)h(node)f(for)h Fq(x)g Fv(containing)e
+Fr(one)i Fv(subnode)f(for)g Fq(y)p Fv(.)h(The)g(processing)f
+(instructions)396 2182 y(are)h(attached)g(to)g Fq(x)h
+Fv(in)f(a)h(separate)e(hash)h(table;)h(you)e(can)h(access)h(them)e
+(using)h Fq(x)45 b(#)f(pinstr)g("pi1")20 b Fv(and)g Fq(x)44
+b(#)396 2290 y(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o(ely)-5
+b(.)18 b(The)i(information)d(is)k(lost)g(where)f(the)g(instructions)f
+(occur)g(within)h Fq(x)p Fv(.)396 2439 y(If)g(the)h(option)d
+Fq(enable_pinstr_nodes)g Fv(is)j(turned)e(on,)h(the)g(parser)f(creates)
+i(e)o(xtra)e(nodes)g Fq(pi1)i Fv(and)e Fq(pi2)i Fv(such)f(that)396
+2547 y(the)g(subnodes)f(of)h Fq(x)h Fv(are)f(no)n(w:)396
+2728 y Fq(x)45 b(#)g(sub_nodes)e(=)i([)f(pi1;)g(y;)h(pi2)f(])396
+2919 y Fv(The)20 b(e)o(xtra)g(nodes)f(contain)g(the)h(processing)f
+(instructions)g(in)i(the)f(usual)g(w)o(ay)-5 b(,)20 b(i.e.)g(you)f(can)
+h(access)h(them)f(using)f Fq(pi1)396 3026 y(#)45 b(pinstr)f("pi1")20
+b Fv(and)f Fq(pi2)45 b(#)f(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o
+(ely)-5 b(.)396 3176 y(Note)20 b(that)h(you)e(will)i(need)e(an)i(e)o(x)
+o(emplar)d(for)h(the)i(PI)f(nodes)g(\(see)g Fq(make_spec_from_alist)p
+Fv(\).)396 3325 y Fu(Do)g(I)i(need)e(a)h(super)g(r)o(oot)d(node?)i
+Fv(By)h(def)o(ault,)e(there)h(is)h(no)f(super)f(root)h(node.)f(The)h
+Fq(document)f Fv(object)h(refers)396 3433 y(directly)g(to)g(the)g(node)
+f(representing)f(the)j(root)e(element)h(of)g(the)g(document,)e(i.e.)396
+3613 y Fq(doc)45 b(#)f(root)g(=)h(r)396 3804 y Fv(if)21
+b Fq(r)f Fv(is)h(the)g(root)e(node.)g(This)h(is)i(sometimes)d(incon)m
+(v)o(enient:)f(\(1\))h(Some)h(algorithms)f(become)g(simpler)h(if)g(e)n
+(v)o(ery)f(node)396 3912 y(has)i(a)f(parent,)f(e)n(v)o(en)g(the)i(root)
+e(node.)g(\(2\))h(Some)g(standards)f(such)h(as)h(XP)o(ath)f(call)g(the)
+h("root)e(node")g(the)h(node)f(whose)396 4020 y(child)h(represents)f
+(the)i(root)e(of)h(the)g(document.)e(\(3\))i(The)g(super)f(root)h(node)
+f(can)h(serv)o(e)f(as)i(a)g(container)e(for)g(processing)396
+4128 y(instructions)g(outside)h(the)g(root)g(element.)f(Because)i(of)e
+(these)i(reasons,)e(it)i(is)g(possible)f(to)h(create)f(an)g(e)o(xtra)f
+(super)h(root)396 4236 y(node,)f(whose)h(child)g(is)h(the)f(root)g
+(node:)396 4416 y Fq(doc)45 b(#)f(root)g(=)h(sr)403 b(&&)396
+4513 y(sr)45 b(#)f(sub_nodes)g(=)g([)h(r)g(])396 4704
+y Fv(When)20 b(e)o(xtra)g(nodes)f(are)h(also)h(created)e(for)h
+(processing)f(instructions,)g(these)h(nodes)f(can)h(be)h(added)e(to)h
+(the)g(super)g(root)396 4812 y(node)f(if)h(the)o(y)e(occur)h(outside)g
+(the)g(root)g(element)g(\(reason)f(\(3\)\),)h(and)g(the)g(order)g
+(re\003ects)g(the)h(order)e(in)i(the)f(source)g(te)o(xt.)p
+Black 3800 5278 a Fr(93)p Black eop
+%%Page: 94 94
+94 93 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Note)g(that)h(you)e
+(will)i(need)e(an)i(e)o(x)o(emplar)d(for)h(the)i(super)e(root)h(node)f
+(\(see)h Fq(make_spec_from_alist)p Fv(\).)396 728 y Fu(What)g(is)h(the)
+g(effect)e(of)h(the)h(UTF-8)e(encoding?)h Fv(By)h(def)o(ault,)e(the)h
+(parser)g(represents)f(strings)h(\(with)g(fe)n(w)396
+836 y(e)o(xceptions\))e(as)j(ISO-8859-1)c(strings.)i(These)h(are)g
+(well-kno)n(wn,)d(and)j(there)f(are)h(tools)g(and)f(fonts)g(for)h(this)
+g(encoding.)396 986 y(Ho)n(we)n(v)o(er)m(,)e(internationalization)g
+(may)h(require)g(that)i(you)e(switch)h(o)o(v)o(er)f(to)i(UTF-8)e
+(encoding.)f(In)i(most)396 1094 y(en)m(vironments,)d(the)k(immediate)e
+(ef)n(fect)h(will)g(be)h(that)f(you)f(cannot)g(read)h(strings)g(with)g
+(character)f(codes)h(>=)h(160)e(an)o(y)396 1202 y(longer;)g(your)g
+(terminal)h(will)h(only)e(sho)n(w)h(funn)o(y)e(glyph)h(combinations.)f
+(It)i(is)h(strongly)e(recommended)e(to)k(install)396
+1310 y(Unicode)e(fonts)h(\(GNU)g(Unifont)f
+(\(http://czyborra.com/unifon)o(t/\),)c(Markus)k(K)o(uhn')-5
+b(s)19 b(fonts)396 1417 y(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/do)
+m(wnlo)o(ad/u)o(cs-fo)o(nts.tar)g(.g)o(z\)\))14 b(and)20
+b(terminal)f(emulators)h(that)g(can)g(handle)396 1525
+y(UTF-8)g(byte)g(sequences)f(\(http://myweb)m(.clark.net/pub/d)o(ick)o
+(e)o(y)o(/xter)o(m/x)o(term.)o(html\))o(.)c(Furthermore,)i(a)k(Unicode)
+396 1633 y(editor)f(may)f(be)i(helpful)e(\(such)g(as)i(Y)-9
+b(udit)20 b(\(ftp://metalab)m(.unc.edu/pub)o(/Linu)o(x/ap)o(ps/ed)o
+(itors/X/\)\))o(.)15 b(There)k(are)h(also)396 1741 y(F)-6
+b(A)h(Q)21 b(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/unico)o(de)o
+(.htm)o(l\))15 b(by)20 b(Markus)f(K)o(uhn.)396 1891 y(By)i(setting)f
+Fq(encoding)f Fv(to)i Fq(`Enc_utf8)e Fv(all)i(strings)f(originating)e
+(from)h(the)i(parsed)e(XML)h(document)e(are)396 1999
+y(represented)h(as)i(UTF-8)e(strings.)h(This)h(includes)e(not)h(only)f
+(character)g(data)h(and)g(attrib)n(ute)g(v)n(alues)g(b)n(ut)g(also)g
+(element)396 2107 y(names,)g(attrib)n(ute)g(names)g(and)f(so)i(on,)e
+(as)i(it)g(is)g(possible)f(to)h(use)f(an)o(y)f(Unicode)g(letter)i(to)f
+(form)f(such)h(names.)g(Strictly)396 2214 y(speaking,)f(PXP)i(is)g
+(only)e(XML-compliant)f(if)j(the)f(UTF-8)g(mode)f(is)i(used;)f
+(otherwise)g(it)h(will)g(ha)n(v)o(e)e(dif)n(\002culties)396
+2322 y(when)h(v)n(alidating)f(documents)f(containing)g
+(non-ISO-8859-1-names.)396 2472 y(This)j(mode)e(does)h(not)g(ha)n(v)o
+(e)f(an)o(y)h(impact)f(on)h(the)g(e)o(xternal)f(representation)f(of)i
+(documents.)f(The)g(character)g(set)396 2580 y(assumed)h(when)g
+(reading)e(a)j(document)d(is)j(set)g(in)g(the)f(XML)g(declaration,)e
+(and)i(character)f(set)i(when)e(writing)h(a)396 2688
+y(document)e(must)j(be)f(passed)g(to)g(the)g Fq(write)g
+Fv(method.)396 2837 y Fu(Ho)o(w)g(do)h(I)g(check)f(that)g(nodes)h
+(exist)f(which)h(ar)o(e)e(r)o(eferr)o(ed)g(by)i(IDREF)g(attrib)n(utes?)
+e Fv(First,)i(you)e(must)h(create)g(an)396 2945 y(inde)o(x)f(of)h(all)h
+(occurring)d(ID)i(attrib)n(utes:)396 3125 y Fq(let)45
+b(index)f(=)g(new)h(hash_index)396 3316 y Fv(This)21
+b(inde)o(x)e(must)h(be)g(passed)g(to)g(the)h(parsing)e(function:)396
+3496 y Fq(parse_document_entity)486 3593 y(~id_index:\(index)42
+b(:>)j(index\))486 3691 y(config)f(source)g(spec)396
+3882 y Fv(Ne)o(xt,)20 b(you)f(must)h(turn)g(on)g(the)g
+Fq(idref_pass)f Fv(mode:)396 4062 y Fq(let)45 b(config)e(=)i({)g
+(default_config)d(with)i(idref_pass)g(=)g(true)g(})396
+4253 y Fv(Note)20 b(that)h(no)n(w)e(the)i(whole)e(document)f(tree)j
+(will)g(be)f(tra)n(v)o(ersed,)f(and)g(e)n(v)o(ery)g(node)g(will)i(be)f
+(check)o(ed)f(for)h(IDREF)g(and)396 4361 y(IDREFS)h(attrib)n(utes.)f
+(If)g(the)g(tree)g(is)h(big,)f(this)h(may)f(tak)o(e)g(some)g(time.)396
+4510 y Fu(What)g(ar)o(e)g(deterministic)g(content)g(models?)g
+Fv(These)g(type)g(of)g(models)g(can)g(speed)f(up)h(the)g(v)n(alidation)
+f(checks;)396 4618 y(furthermore)f(the)o(y)h(ensure)g
+(SGML-compatibility)-5 b(.)18 b(In)i(particular)m(,)e(a)j(content)e
+(model)g(is)i(deterministic)e(if)i(the)f(parser)396 4726
+y(can)g(determine)f(the)h(actually)g(used)g(alternati)n(v)o(e)f(by)g
+(inspecting)g(only)h(the)g(current)f(tok)o(en.)g(F)o(or)h(e)o(xample,)e
+(this)396 4834 y(element)i(has)g(non-deterministic)e(contents:)p
+Black 3800 5278 a Fr(94)p Black eop
+%%Page: 95 95
+95 94 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(<!ELEMENT)44
+b(x)g(\(\(u,v\))g(|)h(\(u,y+\))f(|)g(v\)>)396 770 y Fv(If)20
+b(the)h(\002rst)f(element)g(in)g Fq(x)h Fv(is)g Fq(u)p
+Fv(,)f(the)h(parser)e(does)h(not)g(kno)n(w)f(which)h(of)g(the)g
+(alternati)n(v)o(es)f Fq(\(u,v\))h Fv(or)g Fq(\(u,y+\))g
+Fv(will)396 878 y(w)o(ork;)g(the)g(parser)g(must)g(also)g(inspect)g
+(the)h(second)e(element)g(to)i(be)f(able)g(to)g(distinguish)g(between)f
+(the)h(alternati)n(v)o(es.)396 986 y(Because)h(such)f(look-ahead)d
+(\(or)j("guessing"\))e(is)k(required,)c(this)i(e)o(xample)f(is)i
+(non-deterministic.)396 1135 y(The)f(XML)g(standard)f(demands)g(that)i
+(content)e(models)g(must)i(be)f(deterministic.)f(So)h(it)h(is)g
+(recommended)c(to)k(turn)e(the)396 1243 y(option)g Fq
+(accept_only_deterministic_models)d Fv(on;)j(ho)n(we)n(v)o(er)m(,)f
+(PXP)j(can)f(also)h(process)e(non-deterministic)396 1351
+y(models)h(using)g(a)g(backtracking)e(algorithm.)396
+1500 y(Deterministic)i(models)g(ensure)f(that)h(v)n(alidation)f(can)h
+(be)g(performed)e(in)i(linear)g(time.)g(In)g(order)f(to)h(get)g(the)396
+1608 y(maximum)f(bene\002ts,)h(PXP)h(also)f(implements)f(a)i(special)f
+(v)n(alidator)f(that)h(pro\002ts)g(from)f(deterministic)h(models;)f
+(this)396 1716 y(is)i(the)g(deterministic)e(\002nite)h(automaton)f
+(\(DF)-6 b(A\).)19 b(This)i(v)n(alidator)d(is)k(enabled)d(per)g
+(element)h(type)g(if)g(the)g(element)396 1824 y(type)g(has)g(a)h
+(deterministic)e(model)h(and)f(if)i(the)f(option)f Fq(validate_by_dfa)f
+Fv(is)j(turned)e(on.)396 1974 y(In)h(general,)f(I)h(e)o(xpect)g(that)g
+(the)g(DF)-6 b(A)21 b(method)e(is)i(f)o(aster)f(than)g(the)g
+(backtracking)e(method;)g(especially)i(in)h(the)f(w)o(orst)396
+2082 y(case)h(the)f(DF)-6 b(A)21 b(tak)o(es)f(only)g(linear)f(time.)i
+(Ho)n(we)n(v)o(er)m(,)d(if)i(the)g(content)g(model)f(has)h(only)g(fe)n
+(w)g(alternati)n(v)o(es)f(and)h(the)396 2190 y(alternati)n(v)o(es)f(do)
+h(not)g(nest,)g(the)h(backtracking)c(algorithm)i(may)g(be)i(better)-5
+b(.)-2 2691 y Fx(4.5.)39 b(Updates)396 2871 y Fr(Some)20
+b(\(often)f(later)i(added\))d(featur)m(es)i(that)g(ar)m(e)h(otherwise)f
+(not)g(e)n(xplained)f(in)h(the)h(manual)d(b)n(ut)j(worth)f(to)g(be)396
+2979 y(mentioned.)p Black 396 3211 a Ft(\225)p Black
+60 w Fv(Methods)g(node_position,)d(node_path,)g(nth_node,)h(pre)n
+(vious_node,)e(ne)o(xt_node)h(for)j(nodes:)f(See)479
+3319 y(pxp_document.mli)p Black 396 3427 a Ft(\225)p
+Black 60 w Fv(Functions)h(to)g(determine)f(the)h(document)e(order)h(of)
+h(nodes:)f(compare,)g(create_ord_inde)o(x,)c(ord_number)m(,)479
+3535 y(ord_compare:)i(See)k(pxp_document.mli)p Black
+3800 5278 a Fr(95)p Black eop
+%%Page: 96 96
+96 95 bop Black Black Black Black eop
+%%Trailer
+end
+userdict /end-hook known{end-hook}if
+%%EOF
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent
new file mode 100644 (file)
index 0000000..f2e0eb8
--- /dev/null
@@ -0,0 +1,374 @@
+<!ENTITY markup-dtd1.mli '
+
+(**********************************************************************)
+(*                                                                    *)
+(* Pxp_dtd:                                                           *)
+(*     Object model of document type declarations                     *)
+(*                                                                    *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ *                           declarations, entity declarations, notation
+ *                           declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ *                           of a content model and an attribute list
+ *                           declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+  (* Creation:
+   *   new dtd
+   * creates a new, empty DTD object without any declaration, without a root
+   * element, without an ID.
+   *)
+  Pxp_types.collect_warnings -&gt; 
+  Pxp_types.rep_encoding -&gt;
+  object
+    method root : string option
+      (* get the name of the root element if present *)
+
+    method set_root : string -&gt; unit
+      (* set the name of the root element. This method can be invoked 
+       * only once
+       *)
+
+    method id : Pxp_types.dtd_id option
+      (* get the identifier for this DTD *)
+
+    method set_id : Pxp_types.dtd_id -&gt; unit
+      (* set the identifier. This method can be invoked only once *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* returns the encoding used for character representation *)
+
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - elements and notations that have not been added may be used in an
+       *   arbitrary way; the methods "element" and "notation" indicate this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary contents are allowed or not. *)
+
+    method standalone_declaration : bool
+      (* Whether there is a &apos;standalone&apos; declaration or not. Strictly 
+       * speaking, this declaration is not part of the DTD, but it is
+       * included here because of practical reasons. 
+       * If not set, this property defaults to &apos;false&apos;.
+       *)
+
+    method set_standalone_declaration : bool -&gt; unit
+      (* Sets the &apos;standalone&apos; declaration. *)
+
+
+    method add_element : dtd_element -&gt; unit
+      (* add the given element declaration to this DTD. Raises Not_found
+       * if there is already an element declaration with the same name.
+       *)
+
+    method add_gen_entity : Pxp_entity.entity -&gt; bool -&gt; unit
+      (* add_gen_entity e extdecl:
+       * add the entity &apos;e&apos; as general entity to this DTD (general entities
+       * are those represented by &amp;name;). If there is already a declaration
+       * with the same name, the second definition is ignored; as exception from
+       * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+       * may only be redeclared with a definition that is equivalent to the
+       * standard definition; otherwise a Validation_error is raised.
+       *
+       * &apos;extdecl&apos;: &apos;true&apos; indicates that the entity declaration occurs in
+       * an external entity. (Used for the standalone check.)
+       *)
+
+    method add_par_entity : Pxp_entity.entity -&gt; unit
+      (* add the given entity as parameter entity to this DTD (parameter
+       * entities are those represented by &percent;name;). If there is already a 
+       * declaration with the same name, the second definition is ignored.
+       *)
+
+    method add_notation : dtd_notation -&gt; unit
+      (* add the given notation to this DTD. If there is already a declaration
+       * with the same name, a Validation_error is raised.
+       *)
+
+    method add_pinstr : proc_instruction -&gt; unit
+      (* add the given processing instruction to this DTD. *)
+
+    method element : string -&gt; dtd_element
+      (* looks up the element declaration with the given name. Raises 
+       * Validation_error if the element cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method element_names : string list
+      (* returns the list of the names of all element declarations. *)
+
+    method gen_entity : string -&gt; (Pxp_entity.entity * bool)
+      (* let e, extdecl = obj # gen_entity n:
+       * looks up the general entity &apos;e&apos; with the name &apos;n&apos;. Raises
+       * WF_error if the entity cannot be found.
+       * &apos;extdecl&apos;: indicates whether the entity declaration occured in an 
+       * external entity.
+       *)
+
+    method gen_entity_names : string list
+      (* returns the list of all general entity names *)
+
+    method par_entity : string -&gt; Pxp_entity.entity
+      (* looks up the parameter entity with the given name. Raises
+       * WF_error if the entity cannot be found.
+       *)
+
+    method par_entity_names : string list
+      (* returns the list of all parameter entity names *)
+
+    method notation : string -&gt; dtd_notation
+      (* looks up the notation declaration with the given name. Raises
+       * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method notation_names : string list
+      (* Returns the list of the names of all added notations *)
+
+    method pinstr : string -&gt; proc_instruction list
+      (* looks up all processing instructions with the given target.
+       * The "target" is the identifier following "&lt;?".
+       * Note: It is not possible to find out the exact position of the
+       * processing instruction.
+       *)
+
+    method pinstr_names : string list
+      (* Returns the list of the names (targets) of all added pinstrs *)
+
+    method validate : unit
+      (* ensures that the DTD is valid. This method is optimized such that
+       * actual validation is only performed if DTD has changed.
+       * If the DTD is invalid, mostly a Validation_error is raised,
+       * but other exceptions are possible, too.
+       *)
+
+    method only_deterministic_models : unit
+      (* Succeeds if all regexp content models are deterministic. 
+       * Otherwise Validation_error.
+       *)
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; bool -&gt; unit
+      (* write_compact_as_latin1 os enc doctype:
+       * Writes the DTD as &apos;enc&apos;-encoded string to &apos;os&apos;. If &apos;doctype&apos;, a 
+       * DTD like &lt;!DOCTYPE root [ ... ]&gt; is written. If &apos;not doctype&apos;,
+       * only the declarations are written (the material within the
+       * square brackets).
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; bool -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+
+    (*----------------------------------------*)
+    method invalidate : unit
+      (* INTERNAL METHOD *)
+    method warner : Pxp_types.collect_warnings
+      (* INTERNAL METHOD *)
+  end
+
+'>
+<!ENTITY markup-dtd2.mli '
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -&gt; string -&gt; 
+  (* Creation:
+   *   new dtd_element init_dtd init_name:
+   * creates a new dtd_element object for init_dtd with init_name.
+   * The strings are represented in the same encoding as init_dtd.
+   *)
+  object
+
+    method name : string
+      (* returns the name of the declared element *)
+
+    method externally_declared : bool
+      (* returns whether the element declaration occurs in an external
+       * entity.
+       *)
+
+    method content_model : Pxp_types.content_model_type
+      (* get the content model of this element declaration, or Unspecified *)
+
+    method content_dfa : Pxp_dfa.dfa_definition option
+      (* return the DFA of the content model if there is a DFA, or None.
+       * A DFA exists only for regexp style content models which are
+       * deterministic.
+       *)
+
+    method set_cm_and_extdecl : Pxp_types.content_model_type -&gt; bool -&gt; unit
+      (* set_cm_and_extdecl cm extdecl:
+       * set the content model to &apos;cm&apos;. Once the content model is not 
+       * Unspecified, it cannot be set to a different value again.
+       * Furthermore, it is set whether the element occurs in an external
+       * entity (&apos;extdecl&apos;).
+       *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* Return the encoding of the strings *)
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - attributes that have not been added may be used in an
+       *   arbitrary way; the method "attribute" indicates this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary attributes are allowed or not. *)
+
+    method attribute : string -&gt; 
+                         Pxp_types.att_type * Pxp_types.att_default
+      (* get the type and default value of a declared attribute, or raise
+       * Validation_error if the attribute does not exist.
+       * If &apos;arbitrary_allowed&apos;, the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_violates_standalone_declaration : 
+               string -&gt; string option -&gt; bool
+      (* attribute_violates_standalone_declaration name v:
+       * Checks whether the attribute &apos;name&apos; violates the "standalone"
+       * declaration if it has value &apos;v&apos;.
+       * The method returns true if:
+       * - The attribute declaration occurs in an external entity, 
+       * and if one of the two conditions holds:
+       * - v = None, and there is a default for the attribute value
+       * - v = Some s, and the type of the attribute is not CDATA,
+       *   and s changes if normalized according to the rules of the
+       *   attribute type.
+       *
+       * The method raises Validation_error if the attribute does not exist.
+       * If &apos;arbitrary_allowed&apos;, the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_names : string list
+      (* get the list of all declared attributes *)
+
+    method names_of_required_attributes : string list
+      (* get the list of all attributes that are specified as required 
+       * attributes
+       *)
+
+    method id_attribute_name : string option
+      (* Returns the name of the attribute with type ID, or None. *)
+
+    method idref_attribute_names : string list
+      (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+    method add_attribute : string -&gt; 
+                           Pxp_types.att_type -&gt; 
+                          Pxp_types.att_default -&gt; 
+                          bool -&gt;
+                            unit
+      (* add_attribute name type default extdecl:
+       * add an attribute declaration for an attribute with the given name,
+       * type, and default value. If there is more than one declaration for
+       * an attribute name, the first declaration counts; the other declarations
+       * are ignored.
+       * &apos;extdecl&apos;: if true, the attribute declaration occurs in an external
+       * entity. This property is used to check the "standalone" attribute.
+       *)
+
+    method validate : unit
+      (* checks whether this element declaration (i.e. the content model and
+       * all attribute declarations) is valid for the associated DTD.
+       * Raises mostly Validation_error if the validation fails.
+       *)
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the &lt;!ELEMENT ... &gt; declaration to &apos;os&apos; as &apos;enc&apos;-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -&gt; Pxp_types.ext_id -&gt; Pxp_types.rep_encoding -&gt;
+  (* Creation:
+   *    new dtd_notation a_name an_external_ID init_encoding
+   * creates a new dtd_notation object with the given name and the given
+   * external ID.
+   *)
+  object
+    method name : string
+    method ext_id : Pxp_types.ext_id
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the &lt;!NOTATION ... &gt; declaration to &apos;os&apos; as &apos;enc&apos;-encoded 
+       * string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -&gt; string -&gt; Pxp_types.rep_encoding -&gt;
+  (* Creation:
+   *   new proc_instruction a_target a_value
+   * creates a new proc_instruction object with the given target string and
+   * the given value string. 
+   * Note: A processing instruction is written as &lt;?target value?&gt;. 
+   *)
+  object
+    method target : string
+    method value : string
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -&gt; Pxp_types.encoding -&gt; unit
+      (* write os enc:
+       * Writes the &lt;?...?&gt; PI to &apos;os&apos; as &apos;enc&apos;-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -&gt; unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+    method parse_pxp_option : (string * string * (string * string) list)
+      (* Parses a PI containing a PXP option. Such PIs are formed like:
+       *   &lt;?target option-name option-att="value" option-att="value" ... ?&gt;
+       * The method returns a triple
+       *   (target, option-name, [option-att, value; ...])
+       * or raises Error.
+       *)
+
+  end
+
+;;
+
+'>
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml b/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml
new file mode 100755 (executable)
index 0000000..4db6690
--- /dev/null
@@ -0,0 +1,56 @@
+#! /bin/sh
+# (*
+exec ocamlfattop "$0"
+*) directory ".";;
+
+open Str;;
+
+let name_re = regexp "(\\*\\$[ \t]*\\([a-zA-Z0-9.-]*\\)[ \t]*\\*)";;
+let subst_re = regexp "[<>&'%]";;
+
+let begin_entity name =
+  "<!ENTITY " ^  name ^ " '";;
+
+let end_entity () =
+  "'>\n"
+;;
+
+
+let text = ref "" in
+let within_entity = ref false in
+try
+  while true do
+    let line = read_line() in
+    if string_match name_re line 0 then begin
+      let name = matched_group 1 line in
+      if !within_entity then
+       text := !text ^ "\n" ^ end_entity();
+      within_entity := false;
+      if name <> "-" then begin
+       text := !text ^ begin_entity name;
+       within_entity := true
+      end
+    end
+    else
+      if !within_entity then begin
+       let line' =
+         global_substitute subst_re 
+           (fun s ->
+              let s' = matched_group 0 s in
+              match s' with
+                  "<" -> "&lt;"
+                | ">" -> "&gt;"
+                | "&" -> "&amp;"
+                | "'" -> "&apos;"
+                | "%" -> "&percent;"
+                | _ -> assert false)
+           line
+       in
+       text := !text ^ "\n" ^ line'
+      end
+  done;
+with End_of_file ->
+  if !within_entity then
+    text := !text ^ "\n" ^ end_entity();
+  print_string !text
+;;
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css
new file mode 100644 (file)
index 0000000..67dfaec
--- /dev/null
@@ -0,0 +1,4 @@
+.acronym { 
+  font-weight: bold;
+  color: #c71585
+}
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl
new file mode 100644 (file)
index 0000000..cd9b1e2
--- /dev/null
@@ -0,0 +1,74 @@
+<!DOCTYPE style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN" [
+
+<!-- The default is the print stylesheet. Call 'jade' with option '-ihtml'
+     to select the HTML stylesheet.
+  -->
+
+<!ENTITY % html "IGNORE">
+<![%html;[
+<!ENTITY % print "IGNORE">
+<!ENTITY docbook.dsl SYSTEM "docbook.dsl" CDATA dsssl>
+]]>
+<!ENTITY % print "INCLUDE">
+<![%print;[
+<!ENTITY docbook.dsl SYSTEM "docbook.dsl" CDATA dsssl>
+]]>
+]>
+<style-sheet>
+<style-specification use="docbook">
+<style-specification-body> 
+
+;; HTML:
+
+<![%html;[
+
+(define %footnotes-at-end%
+  ;; Should footnotes appear at the end of HTML pages?
+  #t)
+
+(define %html-ext% 
+  ;; Default extension for HTML output files
+  ".html")
+
+(define %root-filename%
+  ;; Name for the root HTML document
+  "index")
+
+(define %css-decoration%
+  ;; Enable CSS decoration of elements
+  #t)
+
+(define %stylesheet%
+  ;; Name of the stylesheet to use
+  "markup.css")
+
+(define %graphic-default-extension%
+  ;; Default extension for graphic FILEREFs
+  "gif")
+
+]]>
+
+;; printing:
+
+<![%print;[
+
+(define bop-footnotes
+  ;; Make "bottom-of-page" footnotes?
+  #t)
+
+(define %graphic-default-extension%
+  ;; Default extension for graphic FILEREFs
+  "ps")
+
+]]>
+
+;; both:
+
+(define %section-autolabel%
+   ;; Are sections enumerated?
+   #t)
+
+</style-specification-body>
+</style-specification>
+<external-specification id="docbook" document="docbook.dsl">
+</style-sheet>
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml
new file mode 100644 (file)
index 0000000..1cb2064
--- /dev/null
@@ -0,0 +1,5109 @@
+<!DOCTYPE book PUBLIC "-//Davenport//DTD DocBook V3.0//EN" [
+<!ENTITY markup "<acronym>PXP</acronym>">
+<!ENTITY pxp "<acronym>PXP</acronym>">
+<!ENTITY % readme.code.to-html SYSTEM "readme.ent">
+<!ENTITY apos "&#39;">
+<!ENTITY percent "&#37;">
+<!ENTITY % get.markup-yacc.mli SYSTEM "yacc.mli.ent">
+<!ENTITY % get.markup-dtd.mli SYSTEM "dtd.mli.ent">
+%readme.code.to-html;
+%get.markup-yacc.mli;
+%get.markup-dtd.mli;
+
+<!ENTITY fun "-&gt;">                       <!-- function type operator -->
+
+]>
+
+
+<book>
+
+  <title>The PXP user's guide</title>
+  <bookinfo>
+    <!-- <bookbiblio> -->
+    <authorgroup>
+      <author>
+       <firstname>Gerd</firstname>
+       <surname>Stolpmann</surname>
+       <authorblurb>
+         <para>
+        <address>
+          <email>gerd@gerd-stolpmann.de</email>
+        </address>
+      </para>
+       </authorblurb>
+      </author>
+    </authorgroup>
+    
+    <copyright>
+      <year>1999, 2000</year><holder>Gerd Stolpmann</holder>
+    </copyright>
+    <!-- </bookbiblio> -->
+
+    <abstract>
+      <para>
+&markup; is a validating parser for XML-1.0 which has been
+written entirely in Objective Caml.
+</para>
+      <formalpara>
+       <title>Download &markup;: </title>
+       <para>
+The free &markup; library can be downloaded at
+<ulink URL="http://www.ocaml-programming.de/packages/">
+http://www.ocaml-programming.de/packages/
+</ulink>. This user's guide is included.
+Newest releases of &markup; will be announced in
+<ulink URL="http://www.npc.de/ocaml/linkdb/">The OCaml Link
+Database</ulink>.
+</para>
+      </formalpara>
+    </abstract>
+
+    <legalnotice>
+      <title>License</title>
+      <para>
+This document, and the described software, "&markup;", are copyright by
+Gerd Stolpmann. 
+</para>
+
+<para>
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "&markup;" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+</para>
+      <para>
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+</para>
+      <para>
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
+</para>
+    </legalnotice>
+
+  </bookinfo>
+
+
+<!-- ********************************************************************** -->
+
+  <part>
+    <title>User's guide</title>
+    
+    <chapter>
+      <title>What is XML?</title>
+
+      <sect1>
+       <title>Introduction</title>
+
+       <para>XML (short for <emphasis>Extensible Markup Language</emphasis>)
+generalizes the idea that text documents are typically structured in sections,
+sub-sections, paragraphs, and so on. The format of the document is not fixed
+(as, for example, in HTML), but can be declared by a so-called DTD (document
+type definition). The DTD describes only the rules how the document can be
+structured, but not how the document can be processed. For example, if you want
+to publish a book that uses XML markup, you will need a processor that converts
+the XML file into a printable format such as Postscript. On the one hand, the
+structure of XML documents is configurable; on the other hand, there is no
+longer a canonical interpretation of the elements of the document; for example
+one XML DTD might want that paragraphes are delimited by
+<literal>para</literal> tags, and another DTD expects <literal>p</literal> tags
+for the same purpose. As a result, for every DTD a new processor is required.
+</para>
+
+       <para>
+Although XML can be used to express structured text documents it is not limited
+to this kind of application. For example, XML can also be used to exchange
+structured data over a network, or to simply store structured data in
+files. Note that XML documents cannot contain arbitrary binary data because
+some characters are forbidden; for some applications you need to encode binary
+data as text (e.g. the base 64 encoding).
+</para>
+
+
+       <sect2>
+         <title>The "hello world" example</title>
+       <para>
+The following example shows a very simple DTD, and a corresponding document
+instance. The document is structured such that it consists of sections, and
+that sections consist of paragraphs, and that paragraphs contain plain text:
+</para>
+
+       <programlisting>
+<![CDATA[<!ELEMENT document (section)+>
+<!ELEMENT section (paragraph)+>
+<!ELEMENT paragraph (#PCDATA)>
+]]>
+</programlisting>
+
+       <para>The following document is an instance of this DTD:</para>
+      
+       <programlisting>
+<![CDATA[<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE document SYSTEM "simple.dtd">
+<document>
+  <section>
+    <paragraph>This is a paragraph of the first section.</paragraph>
+    <paragraph>This is another paragraph of the first section.</paragraph>
+  </section>
+  <section>
+    <paragraph>This is the only paragraph of the second section.</paragraph>
+  </section>
+</document>
+]]>
+</programlisting>
+
+       <para>As in HTML (and, of course, in grand-father SGML), the "pieces" of
+the document are delimited by element braces, i.e. such a piece begins with
+<literal>&lt;name-of-the-type-of-the-piece&gt;</literal> and ends with
+<literal>&lt;/name-of-the-type-of-the-piece&gt;</literal>, and the pieces are
+called <emphasis>elements</emphasis>. Unlike HTML and SGML, both start tags and
+end tags (i.e. the delimiters written in angle brackets) can never be left
+out. For example, HTML calls the paragraphs simply <literal>p</literal>, and
+because paragraphs never contain paragraphs, a sequence of several paragraphs
+can be written as:
+
+<programlisting><![CDATA[<p>First paragraph 
+<p>Second paragraph]]></programlisting>
+
+This is not possible in XML; continuing our example above we must always write
+
+<programlisting><![CDATA[<paragraph>First paragraph</paragraph>
+<paragraph>Second paragraph</paragraph>]]></programlisting>
+
+The rationale behind that is to (1) simplify the development of XML parsers
+(you need not convert the DTD into a deterministic finite automaton which is
+required to detect omitted tags), and to (2) make it possible to parse the
+document independent of whether the DTD is known or not.
+</para>
+
+<para>
+The first line of our sample document,
+
+<programlisting>
+<![CDATA[<?xml version="1.0" encoding="ISO-8859-1"?>]]>
+</programlisting>
+
+is the so-called <emphasis>XML declaration</emphasis>. It expresses that the
+document follows the conventions of XML version 1.0, and that the document is
+encoded using characters from the ISO-8859-1 character set (often known as
+"Latin 1", mostly used in Western Europe). Although the XML declaration is not
+mandatory, it is good style to include it; everybody sees at the first glance
+that the document uses XML markup and not the similar-looking HTML and SGML
+markup languages. If you omit the XML declaration, the parser will assume
+that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
+it possible to distinguish between UTF-8 and UTF-16 automatically); these
+are encodings of Unicode's universal character set. (Note that &pxp;, unlike its
+predecessor "Markup", fully supports Unicode.)
+</para>
+
+<para>
+The second line,
+
+<programlisting>
+<![CDATA[<!DOCTYPE document SYSTEM "simple.dtd">]]>
+</programlisting>
+
+names the DTD that is going to be used for the rest of the document. In
+general, it is possible that the DTD consists of two parts, the so-called
+external and the internal subset. "External" means that the DTD exists as a
+second file; "internal" means that the DTD is included in the same file. In
+this example, there is only an external subset, and the system identifier
+"simple.dtd" specifies where the DTD file can be found. System identifiers are
+interpreted as URLs; for instance this would be legal:
+
+<programlisting>
+<![CDATA[<!DOCTYPE document SYSTEM "http://host/location/simple.dtd">]]>
+</programlisting>
+
+Please note that &pxp; cannot interpret HTTP identifiers by default, but it is
+possible to change the interpretation of system identifiers.
+</para>
+
+       <para>
+The word immediately following <literal>DOCTYPE</literal> determines which of
+the declared element types (here "document", "section", and "paragraph") is
+used for the outermost element, the <emphasis>root element</emphasis>. In this
+example it is <literal>document</literal> because the outermost element is
+delimited by <literal>&lt;document&gt;</literal> and
+<literal>&lt;/document&gt;</literal>. 
+</para>
+
+       <para>
+The DTD consists of three declarations for element types:
+<literal>document</literal>, <literal>section</literal>, and
+<literal>paragraph</literal>. Such a declaration has two parts:
+
+<programlisting>
+&lt;!ELEMENT <replaceable>name</replaceable> <replaceable>content-model</replaceable>&gt;
+</programlisting>
+
+The content model is a regular expression which describes the possible inner
+structure of the element. Here, <literal>document</literal> contains one or
+more sections, and a <literal>section</literal> contains one or more
+paragraphs. Note that these two element types are not allowed to contain
+arbitrary text. Only the <literal>paragraph</literal> element type is declared
+such that parsed character data (indicated by the symbol
+<literal>#PCDATA</literal>) is permitted.
+</para>
+
+       <para>
+See below for a detailed discussion of content models. 
+</para>
+       </sect2>
+
+       <sect2>
+         <title>XML parsers and processors</title>
+         <para>
+XML documents are human-readable, but this is not the main purpose of this
+language. XML has been designed such that documents can be read by a program
+called an <emphasis>XML parser</emphasis>. The parser checks that the document
+is well-formatted, and it represents the document as objects of the programming
+language. There are two aspects when checking the document: First, the document
+must follow some basic syntactic rules, such as that tags are written in angle
+brackets, that for every start tag there must be a corresponding end tag and so
+on. A document respecting these rules is
+<emphasis>well-formed</emphasis>. Second, the document must match the DTD in
+which case the document is <emphasis>valid</emphasis>. Many parsers check only
+on well-formedness and ignore the DTD; &pxp; is designed such that it can
+even validate the document.
+</para>
+
+         <para>
+A parser does not make a sensible application, it only reads XML
+documents. The whole application working with XML-formatted data is called an
+<emphasis>XML processor</emphasis>. Often XML processors convert documents into
+another format, such as HTML or Postscript. Sometimes processors extract data
+of the documents and output the processed data again XML-formatted. The parser
+can help the application processing the document; for example it can provide
+means to access the document in a specific manner. &pxp; supports an
+object-oriented access layer specially.
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Discussion</title>
+         <para>
+As we have seen, there are two levels of description: On the one hand, XML can
+define rules about the format of a document (the DTD), on the other hand, XML
+expresses structured documents. There are a number of possible applications:
+</para>
+
+         <itemizedlist mark="bullet" spacing="compact">
+           <listitem>
+             <para>
+XML can be used to express structured texts. Unlike HTML, there is no canonical
+interpretation; one would have to write a backend for the DTD that translates
+the structured texts into a format that existing browsers, printers
+etc. understand. The advantage of a self-defined document format is that it is
+possible to design the format in a more problem-oriented way. For example, if
+the task is to extract reports from a database, one can use a DTD that reflects
+the structure of the report or the database. A possible approach would be to
+have an element type for every database table and for every column. Once the
+DTD has been designed, the report procedure can be splitted up in a part that
+selects the database rows and outputs them as an XML document according to the
+DTD, and in a part that translates the document into other formats. Of course,
+the latter part can be solved in a generic way, e.g. there may be configurable
+backends for all DTDs that follow the approach and have element types for
+tables and columns.
+</para>
+             
+             <para>
+XML plays the role of a configurable intermediate format. The database
+extraction function can be written without having to know the details of
+typesetting; the backends can be written without having to know the details of
+the database.
+</para>
+
+             <para>
+Of course, there are traditional solutions. One can define an ad hoc
+intermediate text file format. This disadvantage is that there are no names for
+the pieces of the format, and that such formats usually lack of documentation
+because of this. Another solution would be to have a binary representation,
+either as language-dependent or language-independent structure (example of the
+latter can be found in RPC implementations). The disadvantage is that it is
+harder to view such representations, one has to write pretty printers for this
+purpose. It is also more difficult to enter test data; XML is plain text that
+can be written using an arbitrary editor (Emacs has even a good XML mode,
+PSGML). All these alternatives suffer from a missing structure checker,
+i.e. the programs processing these formats usually do not check the input file
+or input object in detail; XML parsers check the syntax of the input (the
+so-called well-formedness check), and the advanced parsers like &markup; even
+verify that the structure matches the DTD (the so-called validation).
+</para>
+             
+           </listitem>
+
+           <listitem>
+             <para>
+XML can be used as configurable communication language. A fundamental problem
+of every communication is that sender and receiver must follow the same
+conventions about the language. For data exchange, the question is usually
+which data records and fields are available, how they are syntactically
+composed, and which values are possible for the various fields. Similar
+questions arise for text document exchange. XML does not answer these problems
+completely, but it reduces the number of ambiguities for such conventions: The
+outlines of the syntax are specified by the DTD (but not necessarily the
+details), and XML introduces canonical names for the components of documents
+such that it is simpler to describe the rest of the syntax and the semantics
+informally.
+</para>
+           </listitem>
+
+           <listitem>
+             <para>
+XML is a data storage format. Currently, every software product tends to use
+its own way to store data; commercial software often does not describe such
+formats, and it is a pain to integrate such software into a bigger project. 
+XML can help to improve this situation when several applications share the same
+syntax of data files. DTDs are then neutral instances that check the format of
+data files independent of applications. 
+</para>
+           </listitem>
+
+         </itemizedlist>
+       </sect2>
+      </sect1>
+
+
+      <!-- ================================================== -->
+
+
+      <sect1>
+       <title>Highlights of XML</title>
+
+       <para>
+This section explains many of the features of XML, but not all, and some
+features not in detail. For a complete description, see the <ulink
+url="http://www.w3.org/TR/1998/REC-xml-19980210.html">XML
+specification</ulink>.
+</para>
+
+       <sect2>
+         <title>The DTD and the instance</title>
+         <para>
+The DTD contains various declarations; in general you can only use a feature if
+you have previously declared it. The document instance file may contain the
+full DTD, but it is also possible to split the DTD into an internal and an
+external subset. A document must begin as follows if the full DTD is included:
+
+<programlisting>
+&lt;?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?&gt;
+&lt;!DOCTYPE <replaceable>root</replaceable> [
+  <replaceable>Declarations</replaceable>
+]&gt;
+</programlisting>
+
+These declarations are called the <emphasis>internal subset</emphasis>. Note
+that the usage of entities and conditional sections is restricted within the
+internal subset.
+</para>
+         <para>
+If the declarations are located in a different file, you can refer to this file
+as follows:
+
+<programlisting>
+&lt;?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?&gt;
+&lt;!DOCTYPE <replaceable>root</replaceable> SYSTEM "<replaceable>file name</replaceable>"&gt;
+</programlisting>
+
+The declarations in the file are called the <emphasis>external
+subset</emphasis>. The file name is called the <emphasis>system
+identifier</emphasis>. 
+It is also possible to refer to the file by a so-called
+<emphasis>public identifier</emphasis>, but most XML applications won't use
+this feature.
+</para>
+         <para>
+You can also specify both internal and external subsets. In this case, the
+declarations of both subsets are mixed, and if there are conflicts, the
+declaration of the internal subset overrides those of the external subset with
+the same name. This looks as follows:
+
+<programlisting>
+&lt;?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?&gt;
+&lt;!DOCTYPE <replaceable>root</replaceable>  SYSTEM "<replaceable>file name</replaceable>" [
+  <replaceable>Declarations</replaceable>
+]&gt;
+</programlisting>
+</para>
+
+         <para>
+The XML declaration (the string beginning with <literal>&lt;?xml</literal> and
+ending at <literal>?&gt;</literal>) should specify the encoding of the
+file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
+that every file parsed by the XML processor can begin with an XML declaration
+and that every file may have its own encoding.
+</para>
+
+         <para>
+The name of the root element must be mentioned directly after the
+<literal>DOCTYPE</literal> string. This means that a full document instance
+looks like
+
+<programlisting>
+&lt;?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?&gt;
+&lt;!DOCTYPE <replaceable>root</replaceable>  SYSTEM "<replaceable>file name</replaceable>" [
+  <replaceable>Declarations</replaceable>
+]&gt;
+
+&lt;<replaceable>root</replaceable>&gt;
+  <replaceable>inner contents</replaceable>
+&lt;/<replaceable>root</replaceable>&gt;
+</programlisting>
+</para>
+       </sect2>
+
+        <!-- ======================================== -->
+
+       <sect2>
+         <title>Reserved characters</title>
+         <para>
+Some characters are generally reserved to indicate markup such that they cannot
+be used for character data. These characters are &lt;, &gt;, and
+&amp;. Furthermore, single and double quotes are sometimes reserved. If you
+want to include such a character as character, write it as follows:
+
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para>
+<literal>&amp;lt;</literal> instead of &lt;
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>&amp;gt;</literal> instead of &gt;
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>&amp;amp;</literal> instead of &amp;
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>&amp;apos;</literal> instead of '
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>&amp;quot;</literal> instead of "
+</para>
+             </listitem>
+           </itemizedlist>
+
+All other characters are free in the document instance. It is possible to
+include a character by its position in the Unicode alphabet: 
+
+<programlisting>
+&amp;#<replaceable>n</replaceable>;
+</programlisting>
+
+where <replaceable>n</replaceable> is the decimal number of the
+character. Alternatively, you can specify the character by its hexadecimal
+number: 
+
+<programlisting>
+&amp;#x<replaceable>n</replaceable>;
+</programlisting>
+
+In the scope of declarations, the character % is no longer free. To include it
+as character, you must use the notations <literal>&amp;#37;</literal> or
+<literal>&amp;#x25;</literal>.
+</para>
+
+         <para>Note that besides &amp;lt;, &amp;gt;, &amp;amp;,
+&amp;apos;, and &amp;quot; there are no predefines character entities. This is
+different from HTML which defines a list of characters that can be referenced
+by name (e.g. &amp;auml; for Ã¤); however, if you prefer named characters, you
+can declare such entities yourself (see below).</para>
+       </sect2>
+
+
+        <!-- ======================================== -->
+
+       <sect2>
+         <title>Elements and ELEMENT declarations</title>
+
+         <para>
+Elements structure the document instance in a hierarchical way. There is a
+top-level element, the <emphasis>root element</emphasis>, which contains a
+sequence of inner elements and character sections. The inner elements are
+structured in the same way. Every element has an <emphasis>element
+type</emphasis>. The beginning of the element is indicated by a <emphasis>start
+tag</emphasis>, written
+
+<programlisting>
+&lt;<replaceable>element-type</replaceable>&gt;
+</programlisting>
+
+and the element continues until the corresponding <emphasis>end tag</emphasis>
+is reached:
+
+<programlisting>
+&lt;/<replaceable>element-type</replaceable>&gt;
+</programlisting>
+
+In XML, it is not allowed to omit start or end tags, even if the DTD would
+permit this. Note that there are no special rules how to interpret spaces or
+newlines near start or end tags; all spaces and newlines count.
+</para>
+
+         <para>
+Every element type must be declared before it can be used. The declaration
+consists of two parts: the ELEMENT declaration describes the content model,
+i.e. which inner elements are allowed; the ATTLIST declaration describes the
+attributes of the element.
+</para>
+
+         <para>
+An element can simply allow everything as content. This is written:
+
+<programlisting>
+&lt!ELEMENT <replaceable>name</replaceable> ANY&gt;
+</programlisting>
+
+On the opposite, an element can be forced to be empty; declared by:
+
+<programlisting>
+&lt!ELEMENT <replaceable>name</replaceable> EMPTY&gt;
+</programlisting>
+
+Note that there is an abbreviated notation for empty element instances:
+<literal>&lt;<replaceable>name</replaceable>/&gt;</literal>. 
+</para>
+
+         <para>
+There are two more sophisticated forms of declarations: so-called
+<emphasis>mixed declarations</emphasis>, and <emphasis>regular
+expressions</emphasis>. An element with mixed content contains character data
+interspersed with inner elements, and the set of allowed inner elements can be
+specified. In contrast to this, a regular expression declaration does not allow
+character data, but the inner elements can be described by the more powerful
+means of regular expressions.
+</para>
+
+         <para>
+A declaration for mixed content looks as follows:
+
+<programlisting>
+&lt;!ELEMENT <replaceable>name</replaceable> (#PCDATA | <replaceable>element<subscript>1</subscript></replaceable> | ... | <replaceable>element<subscript>n</subscript></replaceable> )*&gt;
+</programlisting>
+
+or if you do not want to allow any inner element, simply
+
+<programlisting>
+&lt;!ELEMENT <replaceable>name</replaceable> (#PCDATA)&gt;
+</programlisting>
+</para>
+
+
+<blockquote>
+             <title>Example</title>
+             <para>
+If element type <literal>q</literal> is declared as
+
+<programlisting>
+<![CDATA[<!ELEMENT q (#PCDATA | r | s)*>]]>
+</programlisting>
+
+this is a legal instance:
+
+<programlisting>
+<![CDATA[<q>This is character data<r></r>with <s></s>inner elements</q>]]>
+</programlisting>
+
+But this is illegal because <literal>t</literal> has not been enumerated in the
+declaration:
+
+<programlisting>
+<![CDATA[<q>This is character data<r></r>with <t></t>inner elements</q>]]>
+</programlisting>
+</para>
+           </blockquote>
+         
+         <para>
+The other form uses a regular expression to describe the possible contents:
+
+<programlisting>
+&lt;!ELEMENT <replaceable>name</replaceable> <replaceable>regexp</replaceable>&gt;
+</programlisting>
+
+The following well-known regexp operators are allowed:
+
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para>
+<literal><replaceable>element-name</replaceable></literal>
+</para>
+             </listitem>
+             
+             <listitem>
+               <para>
+<literal>(<replaceable>subexpr<subscript>1</subscript></replaceable> ,</literal> ... <literal>, <replaceable>subexpr<subscript>n</subscript></replaceable> )</literal>
+</para>
+             </listitem>
+             
+             <listitem>
+               <para>
+<literal>(<replaceable>subexpr<subscript>1</subscript></replaceable> |</literal> ... <literal>| <replaceable>subexpr<subscript>n</subscript></replaceable> )</literal>
+</para>
+             </listitem>
+             
+             <listitem>
+               <para>
+<literal><replaceable>subexpr</replaceable>*</literal>
+</para>
+             </listitem>
+             
+             <listitem>
+               <para>
+<literal><replaceable>subexpr</replaceable>+</literal>
+</para>
+             </listitem>
+             
+             <listitem>
+               <para>
+<literal><replaceable>subexpr</replaceable>?</literal>
+</para>
+             </listitem>
+           </itemizedlist>
+
+The <literal>,</literal> operator indicates a sequence of sub-models, the
+<literal>|</literal> operator describes alternative sub-models. The
+<literal>*</literal> indicates zero or more repetitions, and
+<literal>+</literal> one or more repetitions. Finally, <literal>?</literal> can
+be used for optional sub-models. As atoms the regexp can contain names of
+elements; note that it is not allowed to include <literal>#PCDATA</literal>.
+</para>
+
+         <para>
+The exact syntax of the regular expressions is rather strange. This can be
+explained best by a list of constraints:
+
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para>
+The outermost expression must not be
+<literal><replaceable>element-name</replaceable></literal>. 
+</para>
+               <para><emphasis>Illegal:</emphasis> 
+<literal><![CDATA[<!ELEMENT x y>]]></literal>; this must be written as
+<literal><![CDATA[<!ELEMENT x (y)>]]></literal>.</para>
+             </listitem>
+             <listitem>
+               <para>
+For the unary operators <literal><replaceable>subexpr</replaceable>*</literal>,
+<literal><replaceable>subexpr</replaceable>+</literal>, and
+<literal><replaceable>subexpr</replaceable>?</literal>, the
+<literal><replaceable>subexpr</replaceable></literal> must not be again an
+unary operator.
+</para>
+               <para><emphasis>Illegal:</emphasis> 
+<literal><![CDATA[<!ELEMENT x y**>]]></literal>; this must be written as
+<literal><![CDATA[<!ELEMENT x (y*)*>]]></literal>.</para>
+      </listitem>
+             <listitem>
+               <para>
+Between <literal>)</literal> and one of the unary operatory
+<literal>*</literal>, <literal>+</literal>, or <literal>?</literal>, there must
+not be whitespace.</para>
+               <para><emphasis>Illegal:</emphasis> 
+<literal><![CDATA[<!ELEMENT x (y|z) *>]]></literal>; this must be written as
+<literal><![CDATA[<!ELEMENT x (y|z)*>]]></literal>.</para>
+             </listitem>
+             <listitem><para>There is the additional constraint that the
+right parenthsis must be contained in the same entity as the left parenthesis;
+see the section about parsed entities below.</para>
+             </listitem>
+           </itemizedlist>
+
+</para>
+
+<para>
+Note that there is another restriction on regular expressions which must be
+deterministic. This means that the parser must be able to see by looking at the
+next token which alternative is actually used, or whether the repetition
+stops. The reason for this is simply compatability with SGML (there is no
+intrinsic reason for this rule; XML can live without this restriction).
+</para>
+
+         <blockquote>
+           <title>Example</title>
+           <para>
+The elements are declared as follows:
+
+<programlisting>
+<![CDATA[<!ELEMENT q (r?, (s | t)+)>
+<!ELEMENT r (#PCDATA)>
+<!ELEMENT s EMPTY>
+<!ELEMENT t (q | r)>
+]]></programlisting>
+
+This is a legal instance:
+
+<programlisting>
+<![CDATA[<q><r>Some characters</r><s/></q>]]>
+</programlisting>
+
+(Note: <literal>&lt;s/&gt;</literal> is an abbreviation for
+<literal>&lt;s&gt;&lt;/s&gt;</literal>.)
+
+It would be illegal to leave <literal><![CDATA[<s/>]]></literal> out because at
+least one instance of <literal>s</literal> or <literal>t</literal> must be
+present. It would be illegal, too, if characters existed outside the
+<literal>r</literal> element; the only exception is white space. -- This is
+legal, too:
+
+<programlisting>
+<![CDATA[<q><s/><t><q><s/></q></t></q>]]>
+</programlisting>
+</para>
+         </blockquote>
+
+       </sect2>
+
+        <!-- ======================================== -->
+
+       <sect2>
+         <title>Attribute lists and ATTLIST declarations</title>
+         <para>
+Elements may have attributes. These are put into the start tag of an element as
+follows:
+
+<programlisting>
+&lt;<replaceable>element-name</replaceable> <replaceable>attribute<subscript>1</subscript></replaceable>="<replaceable>value<subscript>1</subscript></replaceable>" ... <replaceable>attribute<subscript>n</subscript></replaceable>="<replaceable>value<subscript>n</subscript></replaceable>"&gt;
+</programlisting>
+
+Instead of
+<literal>"<replaceable>value<subscript>k</subscript></replaceable>"</literal>
+it is also possible to use single quotes as in
+<literal>'<replaceable>value<subscript>k</subscript></replaceable>'</literal>.
+Note that you cannot use double quotes literally within the value of the
+attribute if double quotes are the delimiters; the same applies to single
+quotes. You can generally not use &lt; and &amp; as characters in attribute
+values. It is possible to include the paraphrases &amp;lt;, &amp;gt;,
+&amp;amp;, &amp;apos;, and &amp;quot; (and any other reference to a general
+entity as long as the entity is not defined by an external file) as well as
+&amp;#<replaceable>n</replaceable>;.
+</para>
+
+         <para>
+Before you can use an attribute you must declare it. An ATTLIST declaration
+looks as follows:
+
+<programlisting>
+&lt;!ATTLIST <replaceable>element-name</replaceable> 
+          <replaceable>attribute-name</replaceable> <replaceable>attribute-type</replaceable> <replaceable>attribute-default</replaceable>
+          ...
+          <replaceable>attribute-name</replaceable> <replaceable>attribute-type</replaceable> <replaceable>attribute-default</replaceable>
+&gt;
+</programlisting>
+
+There are a lot of types, but most important are:
+
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para>
+<literal>CDATA</literal>: Every string is allowed as attribute value.
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>NMTOKEN</literal>: Every nametoken is allowed as attribute
+value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
+order.
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>NMTOKENS</literal>: A space-separated list of nametokens is allowed as
+attribute value.
+</para>
+             </listitem>
+           </itemizedlist>
+
+The most interesting default declarations are:
+
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para>
+<literal>#REQUIRED</literal>: The attribute must be specified.
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>#IMPLIED</literal>: The attribute can be specified but also can be
+left out. The application can find out whether the attribute was present or
+not. 
+</para>
+             </listitem>
+             <listitem>
+               <para>
+<literal>"<replaceable>value</replaceable>"</literal> or
+<literal>'<replaceable>value</replaceable>'</literal>: This particular value is
+used as default if the attribute is omitted in the element.
+</para>
+             </listitem>
+           </itemizedlist>
+</para>
+
+         <blockquote>
+           <title>Example</title>
+           <para>
+This is a valid attribute declaration for element type <literal>r</literal>:
+
+<programlisting>
+<![CDATA[<!ATTLIST r 
+          x CDATA    #REQUIRED
+          y NMTOKEN  #IMPLIED
+          z NMTOKENS "one two three">
+]]></programlisting>
+
+This means that <literal>x</literal> is a required attribute that cannot be
+left out, while <literal>y</literal> and <literal>z</literal> are optional. The
+XML parser indicates the application whether <literal>y</literal> is present or
+not, but if <literal>z</literal> is missing the default value
+"one two three" is returned automatically. 
+</para>
+
+           <para>
+This is a valid example of these attributes:
+
+<programlisting>
+<![CDATA[<r x="He said: &quot;I don't like quotes!&quot;" y='1'>]]>
+</programlisting>
+</para>
+         </blockquote>
+
+       </sect2>
+
+       <sect2>
+         <title>Parsed entities</title>
+         <para>
+Elements describe the logical structure of the document, while
+<emphasis>entities</emphasis> determine the physical structure. Entities are
+the pieces of text the parser operates on, mostly files and macros. Entities
+may be <emphasis>parsed</emphasis> in which case the parser reads the text and
+interprets it as XML markup, or <emphasis>unparsed</emphasis> which simply
+means that the data of the entity has a foreign format (e.g. a GIF icon).
+</para>
+
+         <para>If the parsed entity is going to be used as part of the DTD, it
+is called a <emphasis>parameter entity</emphasis>. You can declare a parameter
+entity with a fixed text as content by:
+
+<programlisting>
+&lt;!ENTITY % <replaceable>name</replaceable> "<replaceable>value</replaceable>"&gt;
+</programlisting>
+
+Within the DTD, you can <emphasis>refer to</emphasis> this entity, i.e. read
+the text of the entity, by:
+
+<programlisting>
+%<replaceable>name</replaceable>;
+</programlisting>
+
+Such entities behave like macros, i.e. when they are referred to, the
+macro text is inserted and read instead of the original text.
+
+<blockquote>
+             <title>Example</title>
+             <para>
+For example, you can declare two elements with the same content model by:
+
+<programlisting>
+<![CDATA[
+<!ENTITY % model "a | b | c">
+<!ELEMENT x (%model;)>
+<!ELEMENT y (%model;)>
+]]>
+</programlisting>
+
+</para>
+           </blockquote>
+
+If the contents of the entity are given as string constant, the entity is
+called an <emphasis>internal</emphasis> entity. It is also possible to name a
+file to be used as content (an <emphasis>external</emphasis> entity):
+
+<programlisting>
+&lt;!ENTITY % <replaceable>name</replaceable> SYSTEM "<replaceable>file name</replaceable>"&gt;
+</programlisting>
+
+There are some restrictions for parameter entities:
+
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para>
+If the internal parameter entity contains the first token of a declaration
+(i.e. <literal>&lt;!</literal>), it must also contain the last token of the
+declaration, i.e. the <literal>&gt;</literal>. This means that the entity
+either contains a whole number of complete declarations, or some text from the
+middle of one declaration.
+</para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % e "(a | b | c)>">
+<!ELEMENT x %e;
+]]></programlisting> Because <literal>&lt;!</literal> is contained in the main
+entity, and the corresponding <literal>&gt;</literal> is contained in the
+entity <literal>e</literal>.</para>
+             </listitem>
+             <listitem>
+               <para>
+If the internal parameter entity contains a left paranthesis, it must also
+contain the corresponding right paranthesis.
+</para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % e "(a | b | c">
+<!ELEMENT x %e;)>
+]]></programlisting> Because <literal>(</literal> is contained in the entity 
+<literal>e</literal>, and the corresponding <literal>)</literal> is
+contained in the main entity.</para>
+             </listitem>
+             <listitem>
+               <para>
+When reading text from an entity, the parser automatically inserts one space
+character before the entity text and one space character after the entity
+text. However, this rule is not applied within the definition of another
+entity.</para>
+<para><emphasis>Legal:</emphasis>
+<programlisting>
+<![CDATA[ 
+<!ENTITY % suffix "gif"> 
+<!ENTITY iconfile 'icon.%suffix;'>
+]]></programlisting> Because <literal>%suffix;</literal> is referenced within
+the definition text for <literal>iconfile</literal>, no additional spaces are
+added.
+</para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % suffix "test">
+<!ELEMENT x.%suffix; ANY>
+]]></programlisting>
+Because <literal>%suffix;</literal> is referenced outside the definition
+text of another entity, the parser replaces <literal>%suffix;</literal> by
+<literal><replaceable>space</replaceable>test<replaceable>space</replaceable></literal>. </para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % e "(a | b | c)">
+<!ELEMENT x %e;*>
+]]></programlisting> Because there is a whitespace between <literal>)</literal>
+and <literal>*</literal>, which is illegal.</para>
+             </listitem>
+             <listitem>
+               <para>
+An external parameter entity must always consist of a whole number of complete
+declarations.
+</para>
+             </listitem>
+             <listitem>
+               <para>
+In the internal subset of the DTD, a reference to a parameter entity (internal
+or external) is only allowed at positions where a new declaration can start.
+</para>
+             </listitem>
+           </itemizedlist>
+</para>
+
+         <para>
+If the parsed entity is going to be used in the document instance, it is called
+a <emphasis>general entity</emphasis>. Such entities can be used as
+abbreviations for frequent phrases, or to include external files. Internal
+general entities are declared as follows:
+
+<programlisting>
+&lt;!ENTITY <replaceable>name</replaceable> "<replaceable>value</replaceable>"&gt;
+</programlisting>
+
+External general entities are declared this way:
+
+<programlisting>
+&lt;!ENTITY <replaceable>name</replaceable> SYSTEM "<replaceable>file name</replaceable>"&gt;
+</programlisting>
+
+References to general entities are written as:
+
+<programlisting>
+&<replaceable>name</replaceable>;
+</programlisting>
+
+The main difference between parameter and general entities is that the former
+are only recognized in the DTD and that the latter are only recognized in the
+document instance. As the DTD is parsed before the document, the parameter
+entities are expanded first; for example it is possible to use the content of a
+parameter entity as the name of a general entity:
+<literal>&amp;#38;%name;;</literal><footnote><para>This construct is only
+allowed within the definition of another entity; otherwise extra spaces would
+be added (as explained above). Such indirection is not recommended.
+</para>
+<para>Complete example:
+<programlisting>
+<![CDATA[
+<!ENTITY % variant "a">      <!-- or "b" -->
+<!ENTITY text-a "This is text A.">
+<!ENTITY text-b "This is text B.">
+<!ENTITY text "&#38;text-%variant;;">
+]]></programlisting>
+You can now write <literal>&amp;text;</literal> in the document instance, and
+depending on the value of <literal>variant</literal> either
+<literal>text-a</literal> or <literal>text-b</literal> is inserted.</para>
+</footnote>.
+</para>
+         <para>
+General entities must respect the element hierarchy. This means that there must
+be an end tag for every start tag in the entity value, and that end tags
+without corresponding start tags are not allowed.
+</para>
+
+         <blockquote>
+           <title>Example</title>
+           <para>
+If the author of a document changes sometimes, it is worthwhile to set up a
+general entity containing the names of the authors. If the author changes, you
+need only to change the definition of the entity, and do not need to check all
+occurrences of authors' names:
+
+<programlisting>
+<![CDATA[
+<!ENTITY authors "Gerd Stolpmann">
+]]>
+</programlisting>
+
+In the document text, you can now refer to the author names by writing
+<literal>&amp;authors;</literal>.
+</para>
+
+           <para>
+<emphasis>Illegal:</emphasis>
+The following two entities are illegal because the elements in the definition
+do not nest properly:
+
+<programlisting>
+<![CDATA[
+<!ENTITY lengthy-tag "<section textcolor='white' background='graphic'>">
+<!ENTITY nonsense    "<a></b>">
+]]></programlisting>
+</para>
+         </blockquote>
+
+         <para>
+Earlier in this introduction we explained that there are substitutes for
+reserved characters: &amp;lt;, &amp;gt;, &amp;amp;, &amp;apos;, and
+&amp;quot;. These are simply predefined general entities; note that they are
+the only predefined entities. It is allowed to define these entities again
+as long as the meaning is unchanged.
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Notations and unparsed entities</title>
+         <para>
+Unparsed entities have a foreign format and can thus not be read by the XML
+parser. Unparsed entities are always external. The format of an unparsed entity
+must have been declared, such a format is called a
+<emphasis>notation</emphasis>. The entity can then be declared by referring to
+this notation. As unparsed entities do not contain XML text, it is not possible
+to include them directly into the document; you can only declare attributes
+such that names of unparsed entities are acceptable values.
+</para>
+
+         <para>
+As you can see, unparsed entities are too complicated in order to have any
+purpose. It is almost always better to simply pass the name of the data file as
+normal attribute value, and let the application recognize and process the
+foreign format. 
+</para>
+       </sect2>
+
+      </sect1>
+
+
+      <!-- ================================================== -->
+
+
+      <sect1 id="sect.readme.dtd">
+       <title>A complete example: The <emphasis>readme</emphasis> DTD</title>
+       <para>
+The reason for <emphasis>readme</emphasis> was that I often wrote two versions
+of files such as README and INSTALL which explain aspects of a distributed
+software archive; one version was ASCII-formatted, the other was written in
+HTML. Maintaining both versions means double amount of work, and changes
+of one version may be forgotten in the other version. To improve this situation
+I invented the <emphasis>readme</emphasis> DTD which allows me to maintain only
+one source written as XML document, and to generate the ASCII and the HTML
+version from it.
+</para>
+
+       <para>
+In this section, I explain only the DTD. The <emphasis>readme</emphasis> DTD is
+contained in the &markup; distribution together with the two converters to
+produce ASCII and HTML. Another <link
+linkend="sect.readme.to-html">section</link> of this manual describes the HTML
+converter.
+</para>
+
+       <para>
+The documents have a simple structure: There are up to three levels of nested
+sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
+outermost element has usually the type <literal>readme</literal>, it is
+declared by
+
+<programlisting>
+<![CDATA[<!ELEMENT readme (sect1+)>
+<!ATTLIST readme
+          title CDATA #REQUIRED>
+]]></programlisting>
+
+This means that this element contains one or more sections of the first level
+(element type <literal>sect1</literal>), and that the element has a required
+attribute <literal>title</literal> containing character data (CDATA). Note that
+<literal>readme</literal> elements must not contain text data.
+</para>
+
+       <para>
+The three levels of sections are declared as follows:
+
+<programlisting>
+<![CDATA[<!ELEMENT sect1 (title,(sect2|p|ul)+)>
+
+<!ELEMENT sect2 (title,(sect3|p|ul)+)>
+
+<!ELEMENT sect3 (title,(p|ul)+)>
+]]></programlisting>
+
+Every section has a <literal>title</literal> element as first subelement. After
+the title an arbitrary but non-empty sequence of inner sections, paragraphs and
+item lists follows. Note that the inner sections must belong to the next higher
+section level; <literal>sect3</literal> elements must not contain inner
+sections because there is no next higher level.
+</para>
+
+       <para>
+Obviously, all three declarations allow paragraphs (<literal>p</literal>) and
+item lists (<literal>ul</literal>). The definition can be simplified at this
+point by using a parameter entity:
+
+<programlisting>
+<![CDATA[<!ENTITY % p.like "p|ul">
+
+<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
+
+<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
+
+<!ELEMENT sect3 (title,(%p.like;)+)>
+]]></programlisting>
+
+Here, the entity <literal>p.like</literal> is nothing but a macro abbreviating
+the same sequence of declarations; if new elements on the same level as
+<literal>p</literal> and <literal>ul</literal> are later added, it is
+sufficient only to change the entity definition. Note that there are some
+restrictions on the usage of entities in this context; most important, entities
+containing a left paranthesis must also contain the corresponding right
+paranthesis. 
+</para>
+
+       <para>
+Note that the entity <literal>p.like</literal> is a
+<emphasis>parameter</emphasis> entity, i.e. the ENTITY declaration contains a
+percent sign, and the entity is referred to by
+<literal>%p.like;</literal>. This kind of entity must be used to abbreviate
+parts of the DTD; the <emphasis>general</emphasis> entities declared without
+percent sign and referred to as <literal>&amp;name;</literal> are not allowed
+in this context.
+</para>
+
+       <para>
+The <literal>title</literal> element specifies the title of the section in
+which it occurs. The title is given as character data, optionally interspersed
+with line breaks (<literal>br</literal>):
+
+<programlisting>
+<![CDATA[<!ELEMENT title (#PCDATA|br)*>
+]]></programlisting>
+
+Compared with the <literal>title</literal> <emphasis>attribute</emphasis> of
+the <literal>readme</literal> element, this element allows inner markup
+(i.e. <literal>br</literal>) while attribute values do not: It is an error if
+an attribute value contains the left angle bracket &lt; literally such that it
+is impossible to include inner elements. 
+</para>
+
+       <para>
+The paragraph element <literal>p</literal> has a structure similar to
+<literal>title</literal>, but it allows more inner elements:
+
+<programlisting>
+<![CDATA[<!ENTITY % text "br|code|em|footnote|a">
+
+<!ELEMENT p (#PCDATA|%text;)*>
+]]></programlisting>
+
+Line breaks do not have inner structure, so they are declared as being empty:
+
+<programlisting>
+<![CDATA[<!ELEMENT br EMPTY>
+]]></programlisting>
+
+This means that really nothing is allowed within <literal>br</literal>; you
+must always write <literal><![CDATA[<br></br>]]></literal> or abbreviated
+<literal><![CDATA[<br/>]]></literal>.
+</para>
+
+       <para>
+Code samples should be marked up by the <literal>code</literal> tag; emphasized
+text can be indicated by <literal>em</literal>:
+
+<programlisting>
+<![CDATA[<!ELEMENT code (#PCDATA)>
+
+<!ELEMENT em (#PCDATA|%text;)*>
+]]></programlisting>
+
+That <literal>code</literal> elements are not allowed to contain further markup
+while <literal>em</literal> elements do is a design decision by the author of
+the DTD.
+</para>
+
+       <para>
+Unordered lists simply consists of one or more list items, and a list item may
+contain paragraph-level material:
+
+<programlisting>
+<![CDATA[<!ELEMENT ul (li+)>
+
+<!ELEMENT li (%p.like;)*>
+]]></programlisting>
+
+Footnotes are described by the text of the note; this text may contain
+text-level markup. There is no mechanism to describe the numbering scheme of
+footnotes, or to specify how footnote references are printed.
+
+<programlisting>
+<![CDATA[<!ELEMENT footnote (#PCDATA|%text;)*>
+]]></programlisting>
+
+Hyperlinks are written as in HTML. The anchor tag contains the text describing
+where the link points to, and the <literal>href</literal> attribute is the
+pointer (as URL). There is no way to describe locations of "hash marks". If the
+link refers to another <emphasis>readme</emphasis> document, the attribute
+<literal>readmeref</literal> should be used instead of <literal>href</literal>.
+The reason is that the converted document has usually a different system
+identifier (file name), and the link to a converted document must be
+converted, too.
+
+<programlisting>
+<![CDATA[<!ELEMENT a (#PCDATA)*>
+<!ATTLIST a 
+          href      CDATA #IMPLIED
+          readmeref CDATA #IMPLIED
+>
+]]></programlisting>
+
+Note that although it is only sensible to specify one of the two attributes,
+the DTD has no means to express this restriction.
+</para>
+
+<para>
+So far the DTD. Finally, here is a document for it:
+
+<programlisting>
+<![CDATA[
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd">
+<readme title="How to use the readme converters">
+<sect1>
+  <title>Usage</title>
+  <p>
+    The <em>readme</em> converter is invoked on the command line by:
+  </p>
+  <p>
+    <code>readme [ -text | -html ] input.xml</code>
+  </p>
+  <p>
+    Here a list of options:
+  </p>
+  <ul>
+    <li>
+      <p><code>-text</code>: specifies that ASCII output should be produced</p>
+    </li>
+    <li>
+      <p><code>-html</code>: specifies that HTML output should be produced</p>
+    </li>
+  </ul>
+  <p>
+    The input file must be given on the command line. The converted output is
+    printed to <em>stdout</em>.
+  </p>
+</sect1>
+<sect1>
+  <title>Author</title>
+  <p>
+    The program has been written by
+    <a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>.
+  </p>
+</sect1>
+</readme>
+]]></programlisting>
+
+</para>
+
+
+      </sect1>
+    </chapter>
+
+<!-- ********************************************************************** -->
+
+    <chapter>
+      <title>Using &markup;</title>
+
+      <sect1>
+       <title>Validation</title>
+       <para>
+The parser can be used to <emphasis>validate</emphasis> a document. This means
+that all the constraints that must hold for a valid document are actually
+checked. Validation is the default mode of &markup;, i.e. every document is
+validated while it is being parsed.
+</para>
+
+       <para>
+In the <literal>examples</literal> directory of the distribution you find the
+<literal>pxpvalidate</literal> application. It is invoked in the following way:
+
+<programlisting>
+pxpvalidate [ -wf ] <replaceable>file</replaceable>...
+</programlisting>
+
+The files mentioned on the command line are validated, and every warning and
+every error messages are printed to stderr.
+</para>
+
+       <para>
+The -wf switch modifies the behaviour such that a well-formedness parser is
+simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
+DTD are ignored, and only the ENTITY declarations will take effect. This mode
+is intended for documents lacking a DTD. Please note that the parser still
+scans the DTD fully and will report all errors in the DTD; such checks are not
+required by a well-formedness parser.
+</para>
+
+       <para>
+The <literal>pxpvalidate</literal> application is the simplest sensible program
+using &markup;, you may consider it as "hello world" program. 
+</para>
+      </sect1>
+
+
+      <!-- ================================================== -->
+
+
+      <sect1>
+       <title>How to parse a document from an application</title>
+       <para>
+Let me first give a rough overview of the object model of the parser. The
+following items are represented by objects:
+
+<itemizedlist mark="bullet" spacing="compact">
+           <listitem>
+             <para>
+<emphasis>Documents:</emphasis> The document representation is more or less the
+anchor for the application; all accesses to the parsed entities start here. It
+is described by the class <literal>document</literal> contained in the module
+<literal>Pxp_document</literal>. You can get some global information, such
+as the XML declaration the document begins with, the DTD of the document,
+global processing instructions, and most important, the document tree. 
+</para>
+           </listitem>
+
+           <listitem>
+             <para>
+<emphasis>The contents of documents:</emphasis> The contents have the structure
+of a tree: Elements contain other elements and text<footnote><para>Elements may
+also contain processing instructions. Unlike other document models, &markup;
+separates processing instructions from the rest of the text and provides a
+second interface to access them (method <literal>pinstr</literal>). However,
+there is a parser option (<literal>enable_pinstr_nodes</literal>) which changes
+the behaviour of the parser such that extra nodes for processing instructions
+are included into the tree.</para>
+<para>Furthermore, the tree does normally not contain nodes for XML comments;
+they are ignored by default. Again, there is an option
+(<literal>enable_comment_nodes</literal>) changing this.</para>
+</footnote>. 
+
+The common type to represent both kinds of content is <literal>node</literal>
+which is a class type that unifies the properties of elements and character
+data. Every node has a list of children (which is empty if the element is empty
+or the node represents text); nodes may have attributes; nodes have always text
+contents. There are two implementations of <literal>node</literal>, the class
+<literal>element_impl</literal> for elements, and the class
+<literal>data_impl</literal> for text data. You find these classes and class
+types in the module <literal>Pxp_document</literal>, too.
+</para>
+
+             <para>
+Note that attribute lists are represented by non-class values.
+</para>
+           </listitem>
+
+           <listitem>
+             <para>
+<emphasis>The node extension:</emphasis> For advanced usage, every node of the
+document may have an associated <emphasis>extension</emphasis> which is simply
+a second object. This object must have the three methods
+<literal>clone</literal>, <literal>node</literal>, and
+<literal>set_node</literal> as bare minimum, but you are free to add methods as
+you want. This is the preferred way to add functionality to the document
+tree<footnote><para>Due to the typing system it is more or less impossible to
+derive recursive classes in O'Caml. To get around this, it is common practice
+to put the modifiable or extensible part of recursive objects into parallel
+objects.</para> </footnote>. The class type <literal>extension</literal> is
+defined in <literal>Pxp_document</literal>, too.
+</para>
+           </listitem>
+
+           <listitem>
+             <para>
+<emphasis>The DTD:</emphasis> Sometimes it is necessary to access the DTD of a
+document; the average application does not need this feature. The class
+<literal>dtd</literal> describes DTDs, and makes it possible to get
+representations of element, entity, and notation declarations as well as
+processing instructions contained in the DTD. This class, and
+<literal>dtd_element</literal>, <literal>dtd_notation</literal>, and
+<literal>proc_instruction</literal> can be found in the module
+<literal>Pxp_dtd</literal>. There are a couple of classes representing
+different kinds of entities; these can be found in the module
+<literal>Pxp_entity</literal>. 
+</para>
+           </listitem>
+         </itemizedlist>
+
+Additionally, the following modules play a role:
+
+<itemizedlist mark="bullet" spacing="compact">
+           <listitem>
+             <para>
+<emphasis>Pxp_yacc:</emphasis> Here the main parsing functions such as
+<literal>parse_document_entity</literal> are located. Some additional types and
+functions allow the parser to be configured in a non-standard way.
+</para>
+           </listitem>
+
+           <listitem>
+             <para>
+<emphasis>Pxp_types:</emphasis> This is a collection of basic types and
+exceptions. 
+</para>
+           </listitem>
+         </itemizedlist>
+
+There are some further modules that are needed internally but are not part of
+the API.
+</para>
+
+       <para>
+Let the document to be parsed be stored in a file called
+<literal>doc.xml</literal>. The parsing process is started by calling the
+function
+
+<programlisting>
+val parse_document_entity : config -> source -> 'ext spec -> 'ext document
+</programlisting>
+
+defined in the module <literal>Pxp_yacc</literal>. The first argument
+specifies some global properties of the parser; it is recommended to start with
+the <literal>default_config</literal>. The second argument determines where the
+document to be parsed comes from; this may be a file, a channel, or an entity
+ID. To parse <literal>doc.xml</literal>, it is sufficient to pass
+<literal>from_file "doc.xml"</literal>. 
+</para>
+
+       <para>
+The third argument passes the object specification to use. Roughly
+speaking, it determines which classes implement the node objects of which
+element types, and which extensions are to be used. The <literal>'ext</literal>
+polymorphic variable is the type of the extension. For the moment, let us
+simply pass <literal>default_spec</literal> as this argument, and ignore it.
+</para>
+
+       <para>
+So the following expression parses <literal>doc.xml</literal>:
+
+<programlisting>
+open Pxp_yacc
+let d = parse_document_entity default_config (from_file "doc.xml") default_spec
+</programlisting>
+
+Note that <literal>default_config</literal> implies that warnings are collected
+but not printed. Errors raise one of the exception defined in
+<literal>Pxp_types</literal>; to get readable errors and warnings catch the
+exceptions as follows:
+
+<programlisting>
+<![CDATA[class warner =
+  object 
+    method warn w =
+      print_endline ("WARNING: " ^ w)
+  end
+;;
+
+try
+  let config = { default_config with warner = new warner } in
+  let d = parse_document_entity config (from_file "doc.xml") default_spec
+  in
+    ...
+with
+   e ->
+     print_endline (Pxp_types.string_of_exn e)
+]]></programlisting>
+
+Now <literal>d</literal> is an object of the <literal>document</literal>
+class. If you want the node tree, you can get the root element by
+
+<programlisting>
+let root = d # root
+</programlisting>
+
+and if you would rather like to access the DTD, determine it by
+
+<programlisting>
+let dtd = d # dtd
+</programlisting>
+
+As it is more interesting, let us investigate the node tree now. Given the root
+element, it is possible to recursively traverse the whole tree. The children of
+a node <literal>n</literal> are returned by the method
+<literal>sub_nodes</literal>, and the type of a node is returned by
+<literal>node_type</literal>. This function traverses the tree, and prints the
+type of each node:
+
+<programlisting>
+<![CDATA[let rec print_structure n =
+  let ntype = n # node_type in
+  match ntype with
+    T_element name ->
+      print_endline ("Element of type " ^ name);
+      let children = n # sub_nodes in
+      List.iter print_structure children
+  | T_data ->
+      print_endline "Data"
+  | _ ->
+      (* Other node types are not possible unless the parser is configured
+         differently.
+       *)
+      assert false
+]]></programlisting>
+
+You can call this function by
+
+<programlisting>
+print_structure root
+</programlisting>
+
+The type returned by <literal>node_type</literal> is either <literal>T_element
+name</literal> or <literal>T_data</literal>. The <literal>name</literal> of the
+element type is the string included in the angle brackets. Note that only
+elements have children; data nodes are always leaves of the tree.
+</para>
+
+       <para>
+There are some more methods in order to access a parsed node tree:
+
+<itemizedlist mark="bullet" spacing="compact">
+           <listitem>
+             <para>
+<literal>n # parent</literal>: Returns the parent node, or raises
+<literal>Not_found</literal> if the node is already the root
+</para>
+           </listitem>
+           <listitem>
+             <para>
+<literal>n # root</literal>: Returns the root of the node tree. 
+</para>
+           </listitem>
+           <listitem>
+             <para>
+<literal>n # attribute a</literal>: Returns the value of the attribute with
+name <literal>a</literal>. The method returns a value for every
+<emphasis>declared</emphasis> attribute, independently of whether the attribute
+instance is defined or not. If the attribute is not declared,
+<literal>Not_found</literal> will be raised. (In well-formedness mode, every
+attribute is considered as being implicitly declared with type
+<literal>CDATA</literal>.) 
+</para>
+
+<para>
+The following return values are possible: <literal>Value s</literal>, 
+<literal>Valuelist sl</literal> , and <literal>Implied_value</literal>. 
+The first two value types indicate that the attribute value is available,
+either because there is a definition
+<literal><replaceable>a</replaceable>="<replaceable>value</replaceable>"</literal>
+in the XML text, or because there is a default value (declared in the
+DTD). Only if both the instance definition and the default declaration are
+missing, the latter value <literal>Implied_value</literal> will be returned.
+</para>
+
+<para>
+In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
+IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
+<literal>Value s</literal> back, where <literal>s</literal> is the normalized
+string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
+represent list values, and the parser splits the XML literal into several
+tokens and returns these tokens as <literal>Valuelist sl</literal>.
+</para>
+
+<para>
+Normalization means that entity references (the
+<literal>&amp;<replaceable>name</replaceable>;</literal> tokens) and
+character references
+(<literal>&amp;#<replaceable>number</replaceable>;</literal>) are replaced
+by the text they represent, and that white space characters are converted into
+plain spaces.
+</para>
+           </listitem>
+           <listitem>
+             <para>
+<literal>n # data</literal>: Returns the character data contained in the
+node. For data nodes, the meaning is obvious as this is the main content of
+data nodes. For element nodes, this method returns the concatenated contents of
+all inner data nodes.
+</para>
+             <para>
+Note that entity references included in the text are resolved while they are
+being parsed; for example the text <![CDATA["a &lt;&gt; b"]]> will be returned
+as <![CDATA["a <> b"]]> by this method. Spaces of data nodes are always
+preserved. Newlines are preserved, but always converted to \n characters even
+if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
+data nodes because the parser collapses all data material at one location into
+one node. (However, if you create your own tree or transform the parsed tree,
+it is possible to have adjacent data nodes.)
+</para>
+             <para>
+Note that elements that do <emphasis>not</emphasis> allow #PCDATA as content
+will not have data nodes as children. This means that spaces and newlines, the
+only character material allowed for such elements, are silently dropped.
+</para>
+           </listitem>
+         </itemizedlist>
+
+For example, if the task is to print all contents of elements with type
+"valuable" whose attribute "priority" is "1", this function can help:
+
+<programlisting>
+<![CDATA[let rec print_valuable_prio1 n =
+  let ntype = n # node_type in
+  match ntype with
+    T_element "valuable" when n # attribute "priority" = Value "1" ->
+      print_endline "Valuable node with priotity 1 found:";
+      print_endline (n # data)
+  | (T_element _ | T_data) ->
+      let children = n # sub_nodes in
+      List.iter print_valuable_prio1 children
+  | _ ->
+      assert false
+]]></programlisting>
+
+You can call this function by:
+
+<programlisting>
+print_valuable_prio1 root
+</programlisting>
+
+If you like a DSSSL-like style, you can make the function
+<literal>process_children</literal> explicit:
+
+<programlisting>
+<![CDATA[let rec print_valuable_prio1 n =
+
+  let process_children n =
+    let children = n # sub_nodes in
+    List.iter print_valuable_prio1 children 
+  in
+
+  let ntype = n # node_type in
+  match ntype with
+    T_element "valuable" when n # attribute "priority" = Value "1" ->
+      print_endline "Valuable node with priority 1 found:";
+      print_endline (n # data)
+  | (T_element _ | T_data) ->
+      process_children n
+  | _ ->
+      assert false
+]]></programlisting>
+
+So far, O'Caml is now a simple "style-sheet language": You can form a big
+"match" expression to distinguish between all significant cases, and provide
+different reactions on different conditions. But this technique has
+limitations; the "match" expression tends to get larger and larger, and it is
+difficult to store intermediate values as there is only one big
+recursion. Alternatively, it is also possible to represent the various cases as
+classes, and to use dynamic method lookup to find the appropiate class. The
+next section explains this technique in detail.
+
+</para>
+      </sect1>
+
+
+      <!-- ================================================== -->
+
+
+      <sect1>
+       <title>Class-based processing of the node tree</title>
+       <para>
+By default, the parsed node tree consists of objects of the same class; this is
+a good design as long as you want only to access selected parts of the
+document. For complex transformations, it may be better to use different
+classes for objects describing different element types.
+</para>
+
+       <para>
+For example, if the DTD declares the element types <literal>a</literal>,
+<literal>b</literal>, and <literal>c</literal>, and if the task is to convert
+an arbitrary document into a printable format, the idea is to define for every
+element type a separate class that has a method <literal>print</literal>. The
+classes are <literal>eltype_a</literal>, <literal>eltype_b</literal>, and
+<literal>eltype_c</literal>, and every class implements
+<literal>print</literal> such that elements of the type corresponding to the
+class are converted to the output format.
+</para>
+
+       <para>
+The parser supports such a design directly. As it is impossible to derive
+recursive classes in O'Caml<footnote><para>The problem is that the subclass is
+usually not a subtype in this case because O'Caml has a contravariant subtyping
+rule. </para> </footnote>, the specialized element classes cannot be formed by
+simply inheriting from the built-in classes of the parser and adding methods
+for customized functionality. To get around this limitation, every node of the
+document tree is represented by <emphasis>two</emphasis> objects, one called
+"the node" and containing the recursive definition of the tree, one called "the
+extension". Every node object has a reference to the extension, and the
+extension has a reference to the node. The advantage of this model is that it
+is now possible to customize the extension without affecting the typing
+constraints of the recursive node definition.
+</para>
+
+       <para>
+Every extension must have the three methods <literal>clone</literal>,
+<literal>node</literal>, and <literal>set_node</literal>. The method
+<literal>clone</literal> creates a deep copy of the extension object and
+returns it; <literal>node</literal> returns the node object for this extension
+object; and <literal>set_node</literal> is used to tell the extension object
+which node is associated with it, this method is automatically called when the
+node tree is initialized. The following definition is a good starting point
+for these methods; usually <literal>clone</literal> must be further refined
+when instance variables are added to the class:
+
+<programlisting>
+<![CDATA[class custom_extension =
+  object (self)
+
+    val mutable node = (None : custom_extension node option)
+
+    method clone = {< >} 
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+    method set_node n =
+      node <- Some n
+
+  end
+]]>
+</programlisting>
+
+This part of the extension is usually the same for all classes, so it is a good
+idea to consider <literal>custom_extension</literal> as the super-class of the
+further class definitions. Continuining the example of above, we can define the
+element type classes as follows:
+
+<programlisting>
+<![CDATA[class virtual custom_extension =
+  object (self)
+    ... clone, node, set_node defined as above ...
+
+    method virtual print : out_channel -> unit
+  end
+
+class eltype_a =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+
+class eltype_b =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+
+class eltype_c =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+]]></programlisting>
+
+The method <literal>print</literal> can now be implemented for every element
+type separately. Note that you get the associated node by invoking
+
+<programlisting>
+self # node
+</programlisting>
+
+and you get the extension object of a node <literal>n</literal> by writing 
+
+<programlisting>
+n # extension
+</programlisting>
+
+It is guaranteed that 
+
+<programlisting>
+self # node # extension == self
+</programlisting>
+
+always holds.
+</para>
+
+       <para>Here are sample definitions of the <literal>print</literal>
+methods:
+
+<programlisting><![CDATA[
+class eltype_a =
+  object (self)
+    inherit custom_extension
+    method print ch = 
+      (* Nodes <a>...</a> are only containers: *)
+      output_string ch "(";
+      List.iter
+        (fun n -> n # extension # print ch)
+        (self # node # sub_nodes);
+      output_string ch ")";
+  end
+
+class eltype_b =
+  object (self)
+    inherit custom_extension
+    method print ch =
+      (* Print the value of the CDATA attribute "print": *)
+      match self # node # attribute "print" with
+        Value s       -> output_string ch s
+      | Implied_value -> output_string ch "<missing>"
+      | Valuelist l   -> assert false   
+                         (* not possible because the att is CDATA *)
+  end
+
+class eltype_c =
+  object (self)
+    inherit custom_extension
+    method print ch = 
+      (* Print the contents of this element: *)
+      output_string ch (self # node # data)
+  end
+
+class null_extension =
+  object (self)
+    inherit custom_extension
+    method print ch = assert false
+  end
+]]></programlisting>
+</para>
+
+
+       <para>
+The remaining task is to configure the parser such that these extension classes
+are actually used. Here another problem arises: It is not possible to
+dynamically select the class of an object to be created. As workaround,
+&markup; allows the user to specify <emphasis>exemplar objects</emphasis> for
+the various element types; instead of creating the nodes of the tree by
+applying the <literal>new</literal> operator the nodes are produced by
+duplicating the exemplars. As object duplication preserves the class of the
+object, one can create fresh objects of every class for which previously an
+exemplar has been registered.
+</para>
+
+       <para>
+Exemplars are meant as objects without contents, the only interesting thing is
+that exemplars are instances of a certain class. The creation of an exemplar
+for an element node can be done by:
+
+<programlisting>
+let element_exemplar = new element_impl extension_exemplar
+</programlisting>
+
+And a data node exemplar is created by:
+
+<programlisting>
+let data_exemplar = new data_impl extension_exemplar
+</programlisting>
+
+The classes <literal>element_impl</literal> and <literal>data_impl</literal>
+are defined in the module <literal>Pxp_document</literal>. The constructors
+initialize the fresh objects as empty objects, i.e. without children, without
+data contents, and so on. The <literal>extension_exemplar</literal> is the
+initial extension object the exemplars are associated with. 
+</para>
+
+       <para>
+Once the exemplars are created and stored somewhere (e.g. in a hash table), you
+can take an exemplar and create a concrete instance (with contents) by
+duplicating it. As user of the parser you are normally not concerned with this
+as this is part of the internal logic of the parser, but as background knowledge
+it is worthwhile to mention that the two methods
+<literal>create_element</literal> and <literal>create_data</literal> actually
+perform the duplication of the exemplar for which they are invoked,
+additionally apply modifications to the clone, and finally return the new
+object. Moreover, the extension object is copied, too, and the new node object
+is associated with the fresh extension object. Note that this is the reason why
+every extension object must have a <literal>clone</literal> method.
+</para>
+
+       <para>
+The configuration of the set of exemplars is passed to the
+<literal>parse_document_entity</literal> function as third argument. In our
+example, this argument can be set up as follows:
+
+<programlisting>
+<![CDATA[let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl (new null_extension))
+    ~default_element_exemplar: (new element_impl (new null_extension))
+    ~element_alist:
+       [ "a",  new element_impl (new eltype_a);
+         "b",  new element_impl (new eltype_b);
+         "c",  new element_impl (new eltype_c);
+       ]
+    ()
+]]></programlisting>
+
+The <literal>~element_alist</literal> function argument defines the mapping
+from element types to exemplars as associative list. The argument
+<literal>~data_exemplar</literal> specifies the exemplar for data nodes, and
+the <literal>~default_element_exemplar</literal> is used whenever the parser
+finds an element type for which the associative list does not define an
+exemplar. 
+</para>
+
+       <para>
+The configuration is now complete. You can still use the same parsing
+functions, only the initialization is a bit different. For example, call the
+parser by:
+
+<programlisting>
+let d = parse_document_entity default_config (from_file "doc.xml") spec
+</programlisting>
+
+Note that the resulting document <literal>d</literal> has a usable type;
+especially the <literal>print</literal> method we added is visible. So you can
+print your document by
+
+<programlisting>
+d # root # extension # print stdout
+</programlisting>
+</para>
+
+       <para>
+This object-oriented approach looks rather complicated; this is mostly caused
+by working around some problems of the strict typing system of O'Caml. Some
+auxiliary concepts such as extensions were needed, but the practical
+consequences are low. In the next section, one of the examples of the
+distribution is explained, a converter from <emphasis>readme</emphasis>
+documents to HTML.
+</para>
+
+      </sect1>
+
+
+      <!-- ================================================== -->
+
+
+      <sect1 id="sect.readme.to-html">
+       <title>Example: An HTML backend for the <emphasis>readme</emphasis>
+DTD</title>
+
+       <para>The converter from <emphasis>readme</emphasis> documents to HTML
+documents follows strictly the approach to define one class per element
+type. The HTML code is similar to the <emphasis>readme</emphasis> source,
+because of this most elements can be converted in the following way: Given the
+input element 
+
+<programlisting>
+<![CDATA[<e>content</e>]]>
+</programlisting>
+
+the conversion text is the concatenation of a computed prefix, the recursively
+converted content, and a computed suffix. 
+</para>
+
+       <para>
+Only one element type cannot be handled by this scheme:
+<literal>footnote</literal>. Footnotes are collected while they are found in
+the input text, and they are printed after the main text has been converted and
+printed. 
+</para>
+
+       <sect2>
+         <title>Header</title>
+         <para>
+<programlisting>&readme.code.header;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Type declarations</title>
+         <para>
+<programlisting>&readme.code.footnote-printer;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Class <literal>store</literal></title>
+         <para>
+The <literal>store</literal> is a container for footnotes. You can add a
+footnote by invoking <literal>alloc_footnote</literal>; the argument is an
+object of the class <literal>footnote_printer</literal>, the method returns the
+number of the footnote. The interesting property of a footnote is that it can
+be converted to HTML, so a <literal>footnote_printer</literal> is an object
+with a method <literal>footnote_to_html</literal>. The class
+<literal>footnote</literal> which is defined below has a compatible method
+<literal>footnote_to_html</literal> such that objects created from it can be
+used as <literal>footnote_printer</literal>s.
+</para>
+         <para>
+The other method, <literal>print_footnotes</literal> prints the footnotes as
+definition list, and is typically invoked after the main material of the page
+has already been printed. Every item of the list is printed by
+<literal>footnote_to_html</literal>.
+</para>
+
+         <para>
+<programlisting>&readme.code.store;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Function <literal>escape_html</literal></title>
+         <para>
+This function converts the characters &lt;, &gt;, &amp;, and " to their HTML
+representation. For example, 
+<literal>escape_html "&lt;&gt;" = "&amp;lt;&amp;gt;"</literal>. Other
+characters are left unchanged.
+
+<programlisting>&readme.code.escape-html;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Virtual class <literal>shared</literal></title>
+         <para>
+This virtual class is the abstract superclass of the extension classes shown
+below. It defines the standard methods <literal>clone</literal>,
+<literal>node</literal>, and <literal>set_node</literal>, and declares the type
+of the virtual method <literal>to_html</literal>. This method recursively
+traverses the whole element tree, and prints the converted HTML code to the
+output channel passed as second argument. The first argument is the reference
+to the global <literal>store</literal> object which collects the footnotes.
+
+<programlisting>&readme.code.shared;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Class <literal>only_data</literal></title>
+         <para>
+This class defines <literal>to_html</literal> such that the character data of
+the current node is converted to HTML. Note that <literal>self</literal> is an
+extension object, <literal>self # node</literal> is the node object, and
+<literal>self # node # data</literal> returns the character data of the node. 
+
+<programlisting>&readme.code.only-data;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Class <literal>readme</literal></title>
+         <para>
+This class converts elements of type <literal>readme</literal> to HTML. Such an
+element is (by definition) always the root element of the document. First, the
+HTML header is printed; the <literal>title</literal> attribute of the element
+determines the title of the HTML page. Some aspects of the HTML page can be
+configured by setting certain parameter entities, for example the background
+color, the text color, and link colors. After the header, the
+<literal>body</literal> tag, and the headline have been printed, the contents
+of the page are converted by invoking <literal>to_html</literal> on all
+children of the current node (which is the root node). Then, the footnotes are
+appended to this by telling the global <literal>store</literal> object to print
+the footnotes. Finally, the end tags of the HTML pages are printed.
+</para>
+
+         <para>
+This class is an example how to access the value of an attribute: The value is
+determined by invoking <literal>self # node # attribute "title"</literal>. As
+this attribute has been declared as CDATA and as being required, the value has
+always the form <literal>Value s</literal> where <literal>s</literal> is the
+string value of the attribute. 
+</para>
+
+         <para>
+You can also see how entity contents can be accessed. A parameter entity object
+can be looked up by <literal>self # node # dtd # par_entity "name"</literal>,
+and by invoking <literal>replacement_text</literal> the value of the entity
+is returned after inner parameter and character entities have been
+processed. Note that you must use <literal>gen_entity</literal> instead of
+<literal>par_entity</literal> to access general entities.
+</para>
+
+         <para>
+<programlisting>&readme.code.readme;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Classes <literal>section</literal>, <literal>sect1</literal>,
+<literal>sect2</literal>, and <literal>sect3</literal></title>
+         <para>
+As the conversion process is very similar, the conversion classes of the three
+section levels are derived from the more general <literal>section</literal>
+class. The HTML code of the section levels only differs in the type of the
+headline, and because of this the classes describing the section levels can be
+computed by replacing the class argument <literal>the_tag</literal> of
+<literal>section</literal> by the HTML name of the headline tag.
+</para>
+
+         <para>
+Section elements are converted to HTML by printing a headline and then
+converting the contents of the element recursively. More precisely, the first
+sub-element is always a <literal>title</literal> element, and the other
+elements are the contents of the section. This structure is declared in the
+DTD, and it is guaranteed that the document matches the DTD. Because of this
+the title node can be separated from the rest without any checks.
+</para>
+
+         <para>
+Both the title node, and the body nodes are then converted to HTML by calling
+<literal>to_html</literal> on them.
+</para>
+
+         <para>
+<programlisting>&readme.code.section;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Classes <literal>map_tag</literal>, <literal>p</literal>,
+<literal>em</literal>, <literal>ul</literal>, <literal>li</literal></title>
+         <para>
+Several element types are converted to HTML by simply mapping them to
+corresponding HTML element types. The class <literal>map_tag</literal>
+implements this, and the class argument <literal>the_target_tag</literal>
+determines the tag name to map to. The output consists of the start tag, the
+recursively converted inner elements, and the end tag.
+
+<programlisting>&readme.code.map-tag;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Class <literal>br</literal></title>
+         <para>
+Element of type <literal>br</literal> are mapped to the same HTML type. Note
+that HTML forbids the end tag of <literal>br</literal>.
+
+<programlisting>&readme.code.br;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Class <literal>code</literal></title>
+         <para>
+The <literal>code</literal> type is converted to a <literal>pre</literal>
+section (preformatted text). As the meaning of tabs is unspecified in HTML,
+tabs are expanded to spaces.
+
+<programlisting>&readme.code.code;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Class <literal>a</literal></title>
+         <para>
+Hyperlinks, expressed by the <literal>a</literal> element type, are converted
+to the HTML <literal>a</literal> type. If the target of the hyperlink is given
+by <literal>href</literal>, the URL of this attribute can be used
+directly. Alternatively, the target can be given by
+<literal>readmeref</literal> in which case the ".html" suffix must be added to
+the file name. 
+</para>
+
+         <para>
+Note that within <literal>a</literal> only #PCDATA is allowed, so the contents
+can be converted directly by applying <literal>escape_html</literal> to the
+character data contents.
+
+<programlisting>&readme.code.a;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Class <literal>footnote</literal></title>
+         <para>
+The <literal>footnote</literal> class has two methods:
+<literal>to_html</literal> to convert the footnote reference to HTML, and
+<literal>footnote_to_html</literal> to convert the footnote text itself.
+</para>
+
+         <para>
+The footnote reference is converted to a local hyperlink; more precisely, to
+two anchor tags which are connected with each other. The text anchor points to
+the footnote anchor, and the footnote anchor points to the text anchor.
+</para>
+
+         <para>
+The footnote must be allocated in the <literal>store</literal> object. By
+allocating the footnote, you get the number of the footnote, and the text of
+the footnote is stored until the end of the HTML page is reached when the
+footnotes can be printed. The <literal>to_html</literal> method stores simply
+the object itself, such that the <literal>footnote_to_html</literal> method is
+invoked on the same object that encountered the footnote.
+</para>
+
+         <para>
+The <literal>to_html</literal> only allocates the footnote, and prints the
+reference anchor, but it does not print nor convert the contents of the
+note. This is deferred until the footnotes actually get printed, i.e. the
+recursive call of <literal>to_html</literal> on the sub nodes is done by
+<literal>footnote_to_html</literal>. 
+</para>
+
+         <para>
+Note that this technique does not work if you make another footnote within a
+footnote; the second footnote gets allocated but not printed.
+</para>
+
+         <para>
+<programlisting>&readme.code.footnote;</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>The specification of the document model</title>
+         <para>
+This code sets up the hash table that connects element types with the exemplars
+of the extension classes that convert the elements to HTML.
+
+<programlisting>&readme.code.tag-map;</programlisting>
+</para>
+       </sect2>
+
+<!-- <![RCDATA[&readme.code.to-html;]]> -->
+      </sect1>
+
+    </chapter>
+
+<!-- ********************************************************************** -->
+
+    <chapter>
+      <title>The objects representing the document</title>
+
+      <para>
+<emphasis>This description might be out-of-date. See the module interface files
+for updated information.</emphasis></para>
+
+      <sect1>
+       <title>The <literal>document</literal> class</title>
+       <para>
+<programlisting>
+<![CDATA[
+class [ 'ext ] document :
+  Pxp_types.collect_warnings -> 
+  object
+    method init_xml_version : string -> unit
+    method init_root : 'ext node -> unit
+
+    method xml_version : string
+    method xml_standalone : bool
+    method dtd : dtd
+    method root : 'ext node
+
+    method encoding : Pxp_types.rep_encoding
+
+    method add_pinstr : proc_instruction -> unit
+    method pinstr : string -> proc_instruction list
+    method pinstr_names : string list
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+  end
+;;
+]]>
+</programlisting>
+
+The methods beginning with <literal>init_</literal> are only for internal use
+of the parser.
+</para>
+
+       <itemizedlist mark="bullet" spacing="compact">
+         <listitem>
+           <para>
+<literal>xml_version</literal>: returns the version string at the beginning of
+the document. For example, "1.0" is returned if the document begins with
+<literal>&lt;?xml version="1.0"?&gt;</literal>.</para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>xml_standalone</literal>: returns the boolean value of
+<literal>standalone</literal> declaration in the XML declaration. If the
+<literal>standalone</literal> attribute is missing, <literal>false</literal> is
+returned. </para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>dtd</literal>: returns a reference to the global DTD object.</para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>root</literal>: returns a reference to the root element.</para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>encoding</literal>: returns the internal encoding of the
+document. This means that all strings of which the document consists are
+encoded in this character set.
+</para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>pinstr</literal>: returns the processing instructions outside the DTD
+and outside the root element. The argument passed to the method names a
+<emphasis>target</emphasis>, and the method returns all instructions with this
+target. The target is the first word inside <literal>&lt;?</literal> and
+<literal>?&gt;</literal>.</para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>pinstr_names</literal>: returns the names of the processing instructions</para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>add_pinstr</literal>: adds another processing instruction. This method
+is used by the parser itself to enter the instructions returned by
+<literal>pinstr</literal>, but you can also enter additional instructions.
+</para>
+         </listitem>
+         <listitem>
+           <para>
+<literal>write</literal>: writes the document to the passed stream as XML
+text using the passed (external) encoding. The generated text is always valid
+XML and can be parsed by PXP; however, the text is badly formatted (this is not
+a pretty printer).</para>
+         </listitem>
+       </itemizedlist>
+      </sect1>
+
+<!-- ********************************************************************** -->
+
+      <sect1>
+       <title>The class type <literal>node</literal></title>
+       <para>
+
+From <literal>Pxp_document</literal>:
+
+<programlisting>
+type node_type =
+  T_data
+| T_element of string
+| T_super_root
+| T_pinstr of string
+| T_comment
+<replaceable>and some other, reserved types</replaceable>
+;;
+
+class type [ 'ext ] node =
+  object ('self)
+    constraint 'ext = 'ext node #extension
+
+    <anchor id="type-node-general.sig"
+   >(* <link linkend="type-node-general" endterm="type-node-general.title"
+       ></link> *)
+
+    method extension : 'ext
+    method dtd : dtd
+    method parent : 'ext node
+    method root : 'ext node
+    method sub_nodes : 'ext node list
+    method iter_nodes : ('ext node &fun; unit) &fun; unit
+    method iter_nodes_sibl : 
+           ('ext node option &fun; 'ext node &fun; 'ext node option &fun; unit) &fun; unit
+    method node_type : node_type
+    method encoding : Pxp_types.rep_encoding
+    method data : string
+    method position : (string * int * int)
+    method comment : string option
+    method pinstr : string &fun; proc_instruction list
+    method pinstr_names : string list
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+    <anchor id="type-node-atts.sig"
+   >(* <link linkend="type-node-atts" endterm="type-node-atts.title"
+       ></link> *)
+
+    method attribute : string &fun; Pxp_types.att_value
+    method required_string_attribute : string &fun; string
+    method optional_string_attribute : string &fun; string option
+    method required_list_attribute : string &fun; string list
+    method optional_list_attribute : string &fun; string list
+    method attribute_names : string list
+    method attribute_type : string &fun; Pxp_types.att_type
+    method attributes : (string * Pxp_types.att_value) list
+    method id_attribute_name : string
+    method id_attribute_value : string
+    method idref_attribute_names : string
+
+    <anchor id="type-node-mods.sig"
+   >(* <link linkend="type-node-mods" endterm="type-node-mods.title"
+       ></link> *)
+
+    method add_node : ?force:bool &fun; 'ext node &fun; unit
+    method add_pinstr : proc_instruction &fun; unit
+    method delete : unit
+    method set_nodes : 'ext node list &fun; unit
+    method quick_set_attributes : (string * Pxp_types.att_value) list &fun; unit
+    method set_comment : string option &fun; unit
+
+    <anchor id="type-node-cloning.sig"
+   >(* <link linkend="type-node-cloning" endterm="type-node-cloning.title"
+       ></link> *)
+
+    method orphaned_clone : 'self
+    method orphaned_flat_clone : 'self
+    method create_element : 
+              ?position:(string * int * int) &fun;
+              dtd &fun; node_type &fun; (string * string) list &fun;
+                  'ext node
+    method create_data : dtd &fun; string &fun; 'ext node
+    method keep_always_whitespace_mode : unit
+
+    <anchor id="type-node-weird.sig"
+   >(* <link linkend="type-node-weird" endterm="type-node-weird.title"
+       ></link> *)
+
+    method local_validate : ?use_dfa:bool -> unit -> unit
+
+    (* ... Internal methods are undocumented. *)
+
+  end
+;;
+</programlisting>
+
+In the module <literal>Pxp_types</literal> you can find another type
+definition that is important in this context:
+
+<programlisting>
+type Pxp_types.att_value =
+    Value     of string
+  | Valuelist of string list
+  | Implied_value
+;;
+</programlisting>
+</para>
+
+       <sect2>
+         <title>The structure of document trees</title>
+
+<para>
+A node represents either an element or a character data section. There are two
+classes implementing the two aspects of nodes: <literal>element_impl</literal>
+and <literal>data_impl</literal>. The latter class does not implement all
+methods because some methods do not make sense for data nodes.
+</para>
+
+<para>
+(Note: PXP also supports a mode which forces that processing instructions and
+comments are represented as nodes of the document tree. However, these nodes
+are instances of <literal>element_impl</literal> with node types
+<literal>T_pinstr</literal> and <literal>T_comment</literal>,
+respectively. This mode must be explicitly configured; the basic representation
+knows only element and data nodes.)
+</para>
+
+       <para>The following figure 
+(<link linkend="node-term" endterm="node-term"></link>) shows an example how
+a tree is constructed from element and data nodes. The circular areas 
+represent element nodes whereas the ovals denote data nodes. Only elements
+may have subnodes; data nodes are always leaves of the tree. The subnodes
+of an element can be either element or data nodes; in both cases the O'Caml
+objects storing the nodes have the class type <literal>node</literal>.</para>
+
+       <para>Attributes (the clouds in the picture) are not directly
+integrated into the tree; there is always an extra link to the attribute
+list. This is also true for processing instructions (not shown in the
+picture). This means that there are separated access methods for attributes and
+processing instructions.</para>
+
+<figure id="node-term" float="1">
+<title>A tree with element nodes, data nodes, and attributes</title>
+<graphic fileref="pic/node_term" format="GIF"></graphic>
+</figure>
+
+       <para>Only elements, data sections, attributes and processing
+instructions (and comments, if configured) can, directly or indirectly, occur
+in the document tree. It is impossible to add entity references to the tree; if
+the parser finds such a reference, not the reference as such but the referenced
+text (i.e. the tree representing the structured text) is included in the
+tree.</para>
+
+       <para>Note that the parser collapses as much data material into one
+data node as possible such that there are normally never two adjacent data
+nodes. This invariant is enforced even if data material is included by entity
+references or CDATA sections, or if a data sequence is interrupted by
+comments. So <literal>a &amp;amp; b &lt;-- comment --&gt; c &lt;![CDATA[
+&lt;&gt; d]]&gt;</literal> is represented by only one data node, for
+instance. However, you can create document trees manually which break this
+invariant; it is only the way the parser forms the tree.
+</para> 
+
+<figure id="node-general" float="1">
+<title>Nodes are doubly linked trees</title>
+<graphic fileref="pic/node_general" format="GIF"></graphic>
+</figure>
+
+       <para>
+The node tree has links in both directions: Every node has a link to its parent
+(if any), and it has links to the subnodes (see 
+figure <link linkend="node-general" endterm="node-general"></link>). Obviously,
+this doubly-linked structure simplifies the navigation in the tree; but has
+also some consequences for the possible operations on trees.</para>
+
+       <para>
+Because every node must have at most <emphasis>one</emphasis> parent node,
+operations are illegal if they violate this condition. The following figure
+(<link linkend="node-add" endterm="node-add"></link>) shows on the left side
+that node <literal>y</literal> is added to <literal>x</literal> as new subnode
+which is allowed because <literal>y</literal> does not have a parent yet. The
+right side of the picture illustrates what would happen if <literal>y</literal>
+had a parent node; this is illegal because <literal>y</literal> would have two
+parents after the operation.</para>
+
+<figure id="node-add" float="1">
+<title>A node can only be added if it is a root</title>
+<graphic fileref="pic/node_add" format="GIF">
+</graphic>
+</figure>
+
+       <para>
+The "delete" operation simply removes the links between two nodes. In the
+picture (<link linkend="node-delete" endterm="node-delete"></link>) the node
+<literal>x</literal> is deleted from the list of subnodes of
+<literal>y</literal>. After that, <literal>x</literal> becomes the root of the
+subtree starting at this node.</para>
+
+<figure id="node-delete" float="1">
+<title>A deleted node becomes the root of the subtree</title>
+<graphic fileref="pic/node_delete" format="GIF"></graphic>
+</figure>
+
+       <para>
+It is also possible to make a clone of a subtree; illustrated in 
+<link linkend="node-clone" endterm="node-clone"></link>. In this case, the
+clone is a copy of the original subtree except that it is no longer a
+subnode. Because cloning never keeps the connection to the parent, the clones
+are called <emphasis>orphaned</emphasis>.
+</para>
+
+<figure id="node-clone" float="1">
+<title>The clone of a subtree</title>
+<graphic fileref="pic/node_clone" format="GIF"></graphic>
+</figure>
+       </sect2>
+
+       <sect2>
+         <title>The methods of the class type <literal>node</literal></title>
+
+         <anchor id="type-node-general">
+         <formalpara>
+           <title id="type-node-general.title">
+              <link linkend="type-node-general.sig">General observers</link>
+            </title>
+
+           <para>
+             <itemizedlist mark="bullet" spacing="compact">
+               <listitem>
+                 <para>
+<literal>extension</literal>: The reference to the extension object which
+belongs to this node (see ...).</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>dtd</literal>: Returns a reference to the global DTD. All nodes
+of a tree must share the same DTD.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>parent</literal>: Get the father node. Raises
+<literal>Not_found</literal> in the case the node does not have a
+parent, i.e. the node is the root.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>root</literal>: Gets the reference to the root node of the tree.
+Every node is contained in a tree with a root, so this method always 
+succeeds. Note that this method <emphasis>searches</emphasis> the root,
+which costs time proportional to the length of the path to the root.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>sub_nodes</literal>: Returns references to the children. The returned
+list reflects the order of the children. For data nodes, this method returns
+the empty list.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>iter_nodes f</literal>: Iterates over the children, and calls
+<literal>f</literal> for every child in turn. 
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>iter_nodes_sibl f</literal>: Iterates over the children, and calls
+<literal>f</literal> for every child in turn. <literal>f</literal> gets as
+arguments the previous node, the current node, and the next node.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>node_type</literal>: Returns either <literal>T_data</literal> which
+means that the node is a data node, or <literal>T_element n</literal>
+which means that the node is an element of type <literal>n</literal>. 
+If configured, possible node types are also <literal>T_pinstr t</literal>
+indicating that the node represents a processing instruction with target
+<literal>t</literal>, and <literal>T_comment</literal> in which case the node
+is a comment.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>encoding</literal>: Returns the encoding of the strings.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>data</literal>: Returns the character data of this node and all
+children, concatenated as one string. The encoding of the string is what
+the method <literal>encoding</literal> returns.
+- For data nodes, this method simply returns the represented characters.
+For elements, the meaning of the method has been extended such that it
+returns something useful, i.e. the effectively contained characters, without
+markup. (For <literal>T_pinstr</literal> and <literal>T_comment</literal>
+nodes, the method returns the empty string.)
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>position</literal>: If configured, this method returns the position of
+the element as triple (entity, line, byteposition). For data nodes, the
+position is not stored. If the position is not available the triple
+<literal>"?", 0, 0</literal> is returned.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>comment</literal>: Returns <literal>Some text</literal> for comment
+nodes, and <literal>None</literal> for other nodes. The <literal>text</literal>
+is everything between the comment delimiters <literal>&lt;--</literal> and
+<literal>--&gt;</literal>.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>pinstr n</literal>: Returns all processing instructions that are
+directly contained in this element and that have a <emphasis>target</emphasis>
+specification of <literal>n</literal>. The target is the first word after
+the <literal>&lt;?</literal>.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>pinstr_names</literal>: Returns the list of all targets of processing
+instructions directly contained in this element.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>write s enc</literal>: Prints the node and all subnodes to the passed
+output stream as valid XML text, using the passed external encoding.
+</para>
+               </listitem>
+             </itemizedlist>
+            </para>
+         </formalpara>
+
+         <anchor id="type-node-atts">
+         <formalpara>
+           <title id="type-node-atts.title">
+              <link linkend="type-node-atts.sig">Attribute observers</link>
+            </title>
+           <para>
+             <itemizedlist mark="bullet" spacing="compact">
+               <listitem>
+                 <para>
+<literal>attribute n</literal>: Returns the value of the attribute with name
+<literal>n</literal>. This method returns a value for every declared 
+attribute, and it raises <literal>Not_found</literal> for any undeclared
+attribute. Note that it even returns a value if the attribute is actually
+missing but is declared as <literal>#IMPLIED</literal> or has a default
+value. - Possible values are:
+                  <itemizedlist mark="bullet" spacing="compact">
+                     <listitem>
+                       <para>
+<literal>Implied_value</literal>: The attribute has been declared with the
+keyword <literal>#IMPLIED</literal>, and the attribute is missing in the
+attribute list of this element.</para>
+                     </listitem>
+                     <listitem>
+                       <para>
+<literal>Value s</literal>: The attribute has been declared as type
+<literal>CDATA</literal>, as <literal>ID</literal>, as
+<literal>IDREF</literal>, as <literal>ENTITY</literal>, or as
+<literal>NMTOKEN</literal>, or as enumeration or notation, and one of the two
+conditions holds: (1) The attribute value is present in the attribute list in
+which case the value is returned in the string <literal>s</literal>. (2) The
+attribute has been omitted, and the DTD declared the attribute with a default
+value. The default value is returned in <literal>s</literal>. 
+- Summarized, <literal>Value s</literal> is returned for non-implied, non-list 
+attribute values.
+</para>
+                     </listitem>
+                     <listitem>
+                       <para>
+<literal>Valuelist l</literal>: The attribute has been declared as type
+<literal>IDREFS</literal>, as <literal>ENTITIES</literal>, or
+as <literal>NMTOKENS</literal>, and one of the two conditions holds: (1) The
+attribute value is present in the attribute list in which case the
+space-separated tokens of the value are returned in the string list
+<literal>l</literal>. (2) The attribute has been omitted, and the DTD declared
+the attribute with a default value. The default value is returned in
+<literal>l</literal>. 
+- Summarized, <literal>Valuelist l</literal> is returned for all list-type
+attribute values.
+</para>
+                     </listitem>
+                   </itemizedlist>
+
+Note that before the attribute value is returned, the value is normalized. This
+means that newlines are converted to spaces, and that references to character
+entities (i.e. <literal>&amp;#<replaceable>n</replaceable>;</literal>) and
+general entities
+(i.e. <literal>&amp;<replaceable>name</replaceable>;</literal>) are expanded;
+if necessary, expansion is performed recursively.
+</para>
+
+<para>
+In well-formedness mode, there is no DTD which could declare an
+attribute. Because of this, every occuring attribute is considered as a CDATA
+attribute.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>required_string_attribute n</literal>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is convenient
+if you expect a non-implied and non-list attribute value.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>optional_string_attribute n</literal>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method returns None. - This method is 
+convenient if you expect a non-list attribute value including the implied
+value.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>required_list_attribute n</literal>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is 
+convenient if you expect a list attribute value.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>optional_list_attribute n</literal>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, an empty list will be returned. - This method
+is convenient if you expect a list attribute value or the implied value.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>attribute_names</literal>: returns the list of all attribute names of
+this element. As this is a validating parser, this list is equal to the
+list of declared attributes.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>attribute_type n</literal>: returns the type of the attribute called
+<literal>n</literal>. See the module <literal>Pxp_types</literal> for a
+description of the encoding of the types.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>attributes</literal>: returns the list of pairs of names and values
+for all attributes of
+this element.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>id_attribute_name</literal>: returns the name of the attribute that is
+declared with type ID. There is at most one such attribute. The method raises
+<literal>Not_found</literal> if there is no declared ID attribute for the
+element type.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>id_attribute_value</literal>: returns the value of the attribute that
+is declared with type ID. There is at most one such attribute. The method raises
+<literal>Not_found</literal> if there is no declared ID attribute for the
+element type.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>idref_attribute_names</literal>: returns the list of attribute names
+that are declared as IDREF or IDREFS.</para>
+               </listitem>
+             </itemizedlist>
+          </para>
+         </formalpara>
+         
+         <anchor id="type-node-mods">
+         <formalpara>
+           <title id="type-node-mods.title">
+              <link linkend="type-node-mods.sig">Modifying methods</link>
+            </title>
+           
+           <para>
+The following methods are only defined for element nodes (more exactly:
+the methods are defined for data nodes, too, but fail always).
+
+             <itemizedlist mark="bullet" spacing="compact">
+               <listitem>
+                 <para>
+<literal>add_node sn</literal>: Adds sub node <literal>sn</literal> to the list
+of children. This operation is illustrated in the picture 
+<link linkend="node-add" endterm="node-add"></link>. This method expects that
+<literal>sn</literal> is a root, and it requires that <literal>sn</literal> and
+the current object share the same DTD.
+</para>
+
+<para>Because <literal>add_node</literal> is the method the parser itself uses
+to add new nodes to the tree, it performs by default some simple validation
+checks: If the content model is a regular expression, it is not allowed to add
+data nodes to this node unless the new nodes consist only of whitespace. In
+this case, the new data nodes are silently dropped (you can change this by
+invoking <literal>keep_always_whitespace_mode</literal>).
+</para>
+
+<para>If the document is flagged as stand-alone, these data nodes only
+containing whitespace are even forbidden if the element declaration is
+contained in an external entity. This case is detected and rejected.</para>
+
+<para>If the content model is <literal>EMPTY</literal>, it is not allowed to
+add any data node unless the data node is empty. In this case, the new data
+node is silently dropped.
+</para>
+
+<para>These checks only apply if there is a DTD. In well-formedness mode, it is
+assumed that every element is declared with content model
+<literal>ANY</literal> which prohibits any validation check. Furthermore, you
+turn these checks off by passing <literal>~force:true</literal> as first
+argument.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>add_pinstr pi</literal>: Adds the processing instruction
+<literal>pi</literal> to the list of processing instructions.
+</para>
+               </listitem>
+
+               <listitem>
+                 <para>
+<literal>delete</literal>: Deletes this node from the tree. After this
+operation, this node is no longer the child of the former father node; and the
+node loses the connection to the father as well. This operation is illustrated
+by the figure <link linkend="node-delete" endterm="node-delete"></link>.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>set_nodes nl</literal>: Sets the list of children to
+<literal>nl</literal>. It is required that every member of <literal>nl</literal>
+is a root, and that all members and the current object share the same DTD.
+Unlike <literal>add_node</literal>, no validation checks are performed.
+</para>
+             </listitem>
+             <listitem>
+                 <para>
+<literal>quick_set_attributes atts</literal>: sets the attributes of this
+element to <literal>atts</literal>. It is <emphasis>not</emphasis> checked
+whether <literal>atts</literal> matches the DTD or not; it is up to the
+caller of this method to ensure this. (This method may be useful to transform
+the attribute values, i.e. apply a mapping to every attribute.)
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>set_comment text</literal>: This method is only applicable to
+<literal>T_comment</literal> nodes; it sets the comment text contained by such
+nodes. </para>
+               </listitem>
+             </itemizedlist>
+</para>
+         </formalpara>
+         
+         <anchor id="type-node-cloning">
+         <formalpara>
+           <title id="type-node-cloning.title">
+              <link linkend="type-node-cloning.sig">Cloning methods</link>
+            </title>
+
+           <para>
+             <itemizedlist mark="bullet" spacing="compact">
+               <listitem>
+                 <para>
+<literal>orphaned_clone</literal>: Returns a clone of the node and the complete
+tree below this node (deep clone). The clone does not have a parent (i.e. the
+reference to the parent node is <emphasis>not</emphasis> cloned). While
+copying the subtree, strings are skipped; it is likely that the original tree
+and the copy tree share strings. Extension objects are cloned by invoking
+the <literal>clone</literal> method on the original objects; how much of
+the extension objects is cloned depends on the implemention of this method.
+</para>
+                 <para>This operation is illustrated by the figure 
+<link linkend="node-clone" endterm="node-clone"></link>.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>orphaned_flat_clone</literal>: Returns a clone of the node,
+but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<anchor id="type-node-meth-create-element">
+<literal>create_element dtd nt al</literal>: Returns a flat copy of this node
+(which must be an element) with the following modifications: The DTD is set to
+<literal>dtd</literal>; the node type is set to <literal>nt</literal>, and the
+new attribute list is set to <literal>al</literal> (given as list of
+(name,value) pairs). The copy does not have children nor a parent. It does not
+contain processing instructions. See 
+<link linkend="type-node-ex-create-element">the example below</link>.
+</para>
+
+                 <para>Note that you can specify the position of the new node
+by the optional argument <literal>~position</literal>.</para>
+               </listitem>
+               <listitem>
+                 <para>
+<anchor id="type-node-meth-create-data">
+<literal>create_data dtd cdata</literal>: Returns a flat copy of this node
+(which must be a data node) with the following modifications: The DTD is set to
+<literal>dtd</literal>; the node type is set to <literal>T_data</literal>; the
+attribute list is empty (data nodes never have attributes); the list of
+children and PIs is empty, too (same reason). The new node does not have a
+parent. The value <literal>cdata</literal> is the new character content of the
+node. See 
+<link linkend="type-node-ex-create-data">the example below</link>.
+</para>
+               </listitem>
+               <listitem>
+                 <para>
+<literal>keep_always_whitespace_mode</literal>: Even data nodes which are
+normally dropped because they only contain ignorable whitespace, can added to
+this node once this mode is turned on. (This mode is useful to produce
+canonical XML.)
+</para>
+               </listitem>
+             </itemizedlist>
+</para>
+         </formalpara>
+         
+         <anchor id="type-node-weird">
+         <formalpara>
+           <title id="type-node-weird.title">
+              <link linkend="type-node-weird.sig">Validating methods</link>
+            </title>
+           <para>
+There is one method which locally validates the node, i.e. checks whether the
+subnodes match the content model of this node.
+
+             <itemizedlist mark="bullet" spacing="compact">
+               <listitem>
+                 <para>
+<literal>local_validate</literal>: Checks that this node conforms to the
+DTD by comparing the type of the subnodes with the content model for this
+node. (Applications need not call this method unless they add new nodes
+themselves to the tree.)
+</para>
+               </listitem>
+             </itemizedlist>
+</para>
+         </formalpara>
+       </sect2>
+
+       <sect2>
+         <title>The class <literal>element_impl</literal></title>
+         <para>
+This class is an implementation of <literal>node</literal> which
+realizes element nodes:
+
+<programlisting>
+<![CDATA[
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+]]>
+</programlisting>
+
+</para>
+         <formalpara>
+           <title>Constructor</title>
+           <para>
+You can create a new instance by
+
+<programlisting>
+new element_impl <replaceable>extension_object</replaceable>
+</programlisting>
+
+which creates a special form of empty element which already contains a
+reference to the <replaceable>extension_object</replaceable>, but is
+otherwise empty. This special form is called an
+<emphasis>exemplar</emphasis>. The purpose of exemplars is that they serve as
+patterns that can be duplicated and filled with data. The method
+<link linkend="type-node-meth-create-element">
+<literal>create_element</literal></link> is designed to perform this action.
+</para>
+         </formalpara>
+
+         <anchor id="type-node-ex-create-element">
+         <formalpara>
+           <title>Example</title>
+
+           <para>First, create an exemplar by
+
+<programlisting>
+let exemplar_ext = ... in
+let exemplar     = new element_impl exemplar_ext in
+</programlisting>
+
+The <literal>exemplar</literal> is not used in node trees, but only as
+a pattern when the element nodes are created:
+
+<programlisting>
+let element = exemplar # <link linkend="type-node-meth-create-element">create_element</link> dtd (T_element name) attlist 
+</programlisting>
+
+The <literal>element</literal> is a copy of <literal>exemplar</literal>
+(even the extension <literal>exemplar_ext</literal> has been copied)
+which ensures that <literal>element</literal> and its extension are objects
+of the same class as the exemplars; note that you need not to pass a 
+class name or other meta information. The copy is initially connected 
+with the <literal>dtd</literal>, it gets a node type, and the attribute list
+is filled. The <literal>element</literal> is now fully functional; it can
+be added to another element as child, and it can contain references to
+subnodes.
+</para>
+         </formalpara>
+
+       </sect2>
+
+       <sect2>
+         <title>The class <literal>data_impl</literal></title>
+         <para>
+This class is an implementation of <literal>node</literal> which
+should be used for all character data nodes:
+
+<programlisting>
+<![CDATA[
+class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
+]]>
+</programlisting>
+
+</para>
+
+         <formalpara>
+           <title>Constructor</title>
+           <para>
+You can create a new instance by
+
+<programlisting>
+new data_impl <replaceable>extension_object</replaceable>
+</programlisting>
+
+which creates an empty exemplar node which is connected to
+<replaceable>extension_object</replaceable>. The node does not contain a
+reference to any DTD, and because of this it cannot be added to node trees.
+</para>
+         </formalpara>
+           
+         <para>To get a fully working data node, apply the method
+<link linkend="type-node-meth-create-data"><literal>create_data</literal>
+</link> to the exemplar (see example).
+</para>
+
+         <anchor id="type-node-ex-create-data">
+         <formalpara>
+           <title>Example</title>
+
+           <para>First, create an exemplar by
+
+<programlisting>
+let exemplar_ext = ... in
+let exemplar     = new exemplar_ext data_impl in
+</programlisting>
+
+The <literal>exemplar</literal> is not used in node trees, but only as
+a pattern when the data nodes are created:
+
+<programlisting>
+let data_node = exemplar # <link
+                                linkend="type-node-meth-create-data">create_data</link> dtd "The characters contained in the data node" 
+</programlisting>
+
+The <literal>data_node</literal> is a copy of <literal>exemplar</literal>.
+The copy is initially connected 
+with the <literal>dtd</literal>, and it is filled with character material.
+The <literal>data_node</literal> is now fully functional; it can
+be added to an element as child.
+</para>
+         </formalpara>
+       </sect2>
+
+       <sect2>
+         <title>The type <literal>spec</literal></title>
+         <para>
+The type <literal>spec</literal> defines a way to handle the details of
+creating nodes from exemplars.
+
+<programlisting><![CDATA[
+type 'ext spec
+constraint 'ext = 'ext node #extension
+
+val make_spec_from_mapping :
+      ?super_root_exemplar : 'ext node ->
+      ?comment_exemplar : 'ext node ->
+      ?default_pinstr_exemplar : 'ext node ->
+      ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+      data_exemplar: 'ext node ->
+      default_element_exemplar: 'ext node ->
+      element_mapping: (string, 'ext node) Hashtbl.t -> 
+      unit -> 
+        'ext spec
+
+val make_spec_from_alist :
+      ?super_root_exemplar : 'ext node ->
+      ?comment_exemplar : 'ext node ->
+      ?default_pinstr_exemplar : 'ext node ->
+      ?pinstr_alist : (string * 'ext node) list ->
+      data_exemplar: 'ext node ->
+      default_element_exemplar: 'ext node ->
+      element_alist: (string * 'ext node) list -> 
+      unit -> 
+        'ext spec
+]]></programlisting>
+
+The two functions <literal>make_spec_from_mapping</literal> and
+<literal>make_spec_from_alist</literal> create <literal>spec</literal>
+values. Both functions are functionally equivalent and the only difference is
+that the first function prefers hashtables and the latter associative lists to
+describe mappings from names to exemplars.
+</para>
+
+<para>
+You can specify exemplars for the various kinds of nodes that need to be
+generated when an XML document is parsed:
+             
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para><literal>~super_root_exemplar</literal>: This exemplar
+is used to create the super root. This special node is only created if the
+corresponding configuration option has been selected; it is the parent node of
+the root node which may be convenient if every working node must have a parent.</para>
+             </listitem>
+             <listitem>
+               <para><literal>~comment_exemplar</literal>: This exemplar is
+used when a comment node must be created. Note that such nodes are only created
+if the corresponding configuration option is "on".
+</para>
+             </listitem>
+             <listitem>
+               <para><literal>~default_pinstr_exemplar</literal>: If a node
+for a processing instruction must be created, and the instruction is not listed
+in the table passed by <literal>~pinstr_mapping</literal> or
+<literal>~pinstr_alist</literal>, this exemplar is used.
+Again the configuration option must be "on" in order to create such nodes at
+all. 
+</para>
+             </listitem>
+             <listitem>
+               <para><literal>~pinstr_mapping</literal> or
+<literal>~pinstr_alist</literal>: Map the target names of processing
+instructions to exemplars. These mappings are only used when nodes for
+processing instructions are created.</para>
+             </listitem>
+             <listitem>
+               <para><literal>~data_exemplar</literal>: The exemplar for
+ordinary data nodes.</para>
+             </listitem>
+             <listitem>
+               <para><literal>~default_element_exemplar</literal>: This
+exemplar is used if an element node must be created, but the element type
+cannot be found in the tables <literal>element_mapping</literal> or
+<literal>element_alist</literal>.</para>
+             </listitem>
+             <listitem>
+               <para><literal>~element_mapping</literal> or
+<literal>~element_alist</literal>: Map the element types to exemplars. These
+mappings are used to create element nodes.</para>
+             </listitem>
+           </itemizedlist>
+
+In most cases, you only want to create <literal>spec</literal> values to pass
+them to the parser functions found in <literal>Pxp_yacc</literal>. However, it
+might be useful to apply <literal>spec</literal> values directly.
+</para>
+
+<para>The following functions create various types of nodes by selecting the
+corresponding exemplar from the passed <literal>spec</literal> value, and by
+calling <literal>create_element</literal> or <literal>create_data</literal> on
+the exemplar.
+
+<programlisting><![CDATA[
+val create_data_node : 
+      'ext spec -> 
+      dtd -> 
+      (* data material: *) string -> 
+          'ext node
+
+val create_element_node : 
+      ?position:(string * int * int) ->
+      'ext spec -> 
+      dtd -> 
+      (* element type: *) string -> 
+      (* attributes: *) (string * string) list -> 
+          'ext node
+
+val create_super_root_node :
+      ?position:(string * int * int) ->
+      'ext spec -> 
+       dtd -> 
+           'ext node
+
+val create_comment_node :
+      ?position:(string * int * int) ->
+      'ext spec -> 
+      dtd -> 
+      (* comment text: *) string -> 
+          'ext node
+
+val create_pinstr_node :
+      ?position:(string * int * int) ->
+      'ext spec -> 
+      dtd -> 
+      proc_instruction -> 
+          'ext node
+]]></programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Examples</title>
+
+         <formalpara>
+           <title>Building trees.</title>
+
+           <para>Here is the piece of code that creates the tree of
+the figure <link linkend="node-term" endterm="node-term"></link>. The extension
+object and the DTD are beyond the scope of this example.
+
+<programlisting>
+let exemplar_ext = ... (* some extension *) in
+let dtd = ... (* some DTD *) in
+
+let element_exemplar = new element_impl exemplar_ext in
+let data_exemplar    = new data_impl    exemplar_ext in
+
+let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
+and b1 = element_exemplar # create_element dtd (T_element "b") []
+and c1 = element_exemplar # create_element dtd (T_element "c") []
+and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
+in
+
+let cherries = data_exemplar # create_data dtd "Cherries" in
+let orange   = data_exemplar # create_data dtd "An orange" in
+
+a1 # add_node b1;
+a1 # add_node c1;
+b1 # add_node a2;
+b1 # add_node cherries;
+a2 # add_node orange;
+</programlisting>
+
+Alternatively, the last block of statements could also be written as:
+
+<programlisting>
+a1 # set_nodes [b1; c1];
+b1 # set_nodes [a2; cherries];
+a2 # set_nodes [orange];
+</programlisting>
+
+The root of the tree is <literal>a1</literal>, i.e. it is true that
+
+<programlisting>
+x # root == a1
+</programlisting>
+
+for every x from { <literal>a1</literal>, <literal>a2</literal>,
+<literal>b1</literal>, <literal>c1</literal>, <literal>cherries</literal>,
+<literal>orange</literal> }.
+</para>
+         </formalpara>
+         <para>
+Furthermore, the following properties hold:
+
+<programlisting>
+  a1 # attribute "att" = Value "apple"
+& a2 # attribute "att" = Value "orange"
+
+& cherries # data = "Cherries"
+&   orange # data = "An orange"
+&       a1 # data = "CherriesAn orange"
+
+&       a1 # node_type = T_element "a"
+&       a2 # node_type = T_element "a"
+&       b1 # node_type = T_element "b"
+&       c1 # node_type = T_element "c"
+& cherries # node_type = T_data
+&   orange # node_type = T_data
+
+&       a1 # sub_nodes = [ b1; c1 ]
+&       a2 # sub_nodes = [ orange ]
+&       b1 # sub_nodes = [ a2; cherries ]
+&       c1 # sub_nodes = []
+& cherries # sub_nodes = []
+&   orange # sub_nodes = []
+
+&       a2 # parent == a1
+&       b1 # parent == b1
+&       c1 # parent == a1
+& cherries # parent == b1
+&   orange # parent == a2
+</programlisting>
+</para>
+         <formalpara>
+           <title>Searching nodes.</title>
+
+           <para>The following function searches all nodes of a tree 
+for which a certain condition holds:
+
+<programlisting>
+let rec search p t =
+  if p t then
+    t :: search_list p (t # sub_nodes)
+  else
+    search_list p (t # sub_nodes)
+
+and search_list p l =
+  match l with
+    []      -&gt; []
+  | t :: l' -&gt; (search p t) @ (search_list p l')
+;;
+</programlisting>
+</para>
+         </formalpara>
+
+         <para>For example, if you want to search all elements of a certain
+type <literal>et</literal>, the function <literal>search</literal> can be
+applied as follows:
+
+<programlisting>
+let search_element_type et t =
+  search (fun x -&gt; x # node_type = T_element et) t
+;;
+</programlisting>
+</para>
+
+         <formalpara>
+           <title>Getting attribute values.</title>
+
+           <para>Suppose we have the declaration:
+
+<programlisting><![CDATA[
+<!ATTLIST e a CDATA #REQUIRED
+            b CDATA #IMPLIED
+            c CDATA "12345">]]>
+</programlisting>
+
+In this case, every element <literal>e</literal> must have an attribute 
+<literal>a</literal>, otherwise the parser would indicate an error. If
+the O'Caml variable <literal>n</literal> holds the node of the tree 
+corresponding to the element, you can get the value of the attribute
+<literal>a</literal> by
+
+<programlisting>
+let value_of_a = n # required_string_attribute "a"
+</programlisting>
+
+which is more or less an abbreviation for 
+
+<programlisting><![CDATA[
+let value_of_a = 
+  match n # attribute "a" with
+    Value s -> s
+  | _       -> assert false]]>
+</programlisting>
+
+- as the attribute is required, the <literal>attribute</literal> method always
+returns a <literal>Value</literal>.
+</para>
+         </formalpara>
+         
+         <para>In contrast to this, the attribute <literal>b</literal> can be
+omitted. In this case, the method <literal>required_string_attribute</literal>
+works only if the attribute is there, and the method will fail if the attribute
+is missing. To get the value, you can apply the method
+<literal>optional_string_attribute</literal>:
+
+<programlisting>
+let value_of_b = n # optional_string_attribute "b"
+</programlisting>
+
+Now, <literal>value_of_b</literal> is of type <literal>string option</literal>,
+and <literal>None</literal> represents the omitted attribute. Alternatively, 
+you could also use <literal>attribute</literal>:
+
+<programlisting><![CDATA[
+let value_of_b = 
+  match n # attribute "b" with
+    Value s       -> Some s
+  | Implied_value -> None
+  | _             -> assert false]]>
+</programlisting>
+</para>
+
+         <para>The attribute <literal>c</literal> behaves much like
+<literal>a</literal>, because it has always a value. If the attribute is
+omitted, the default, here "12345", will be returned instead. Because of this,
+you can again use <literal>required_string_attribute</literal> to get the
+value.
+</para>
+
+         <para>The type <literal>CDATA</literal> is the most general string
+type. The types <literal>NMTOKEN</literal>, <literal>ID</literal>,
+<literal>IDREF</literal>, <literal>ENTITY</literal>, and all enumerators and
+notations are special forms of string types that restrict the possible
+values. From O'Caml, they behave like <literal>CDATA</literal>, i.e. you can
+use the methods <literal>required_string_attribute</literal> and
+<literal>optional_string_attribute</literal>, too.
+</para>
+
+         <para>In contrast to this, the types <literal>NMTOKENS</literal>,
+<literal>IDREFS</literal>, and <literal>ENTITIES</literal> mean lists of
+strings. Suppose we have the declaration:
+
+<programlisting><![CDATA[
+<!ATTLIST f d NMTOKENS #REQUIRED
+            e NMTOKENS #IMPLIED>]]>
+</programlisting>
+
+The type <literal>NMTOKENS</literal> stands for lists of space-separated
+tokens; for example the value <literal>"1 abc 23ef"</literal> means the list
+<literal>["1"; "abc"; "23ef"]</literal>. (Again, <literal>IDREFS</literal>
+and <literal>ENTITIES</literal> have more restricted values.) To get the
+value of attribute <literal>d</literal>, one can use
+
+<programlisting>
+let value_of_d = n # required_list_attribute "d"
+</programlisting>
+
+or
+
+<programlisting><![CDATA[
+let value_of_d = 
+  match n # attribute "d" with
+    Valuelist l -> l
+  | _           -> assert false]]>
+</programlisting>
+As <literal>d</literal> is required, the attribute cannot be omitted, and 
+the <literal>attribute</literal> method returns always a
+<literal>Valuelist</literal>. 
+</para>
+
+         <para>For optional attributes like <literal>e</literal>, apply
+
+<programlisting>
+let value_of_e = n # optional_list_attribute "e"
+</programlisting>
+
+or
+
+<programlisting><![CDATA[
+let value_of_e = 
+  match n # attribute "e" with
+    Valuelist l   -> l
+  | Implied_value -> []
+  | _             -> assert false]]>
+</programlisting>
+
+Here, the case that the attribute is missing counts like the empty list.
+</para>
+
+       </sect2>
+
+
+       <sect2>
+         <title>Iterators</title>
+
+         <para>There are also several iterators in Pxp_document; please see
+the mli file for details. You can find examples for them in the
+"simple_transformation" directory.
+
+<programlisting><![CDATA[
+val find : ?deeply:bool -> 
+           f:('ext node -> bool) -> 'ext node -> 'ext node
+
+val find_all : ?deeply:bool ->
+               f:('ext node -> bool) -> 'ext node -> 'ext node list
+
+val find_element : ?deeply:bool ->
+                   string -> 'ext node -> 'ext node
+
+val find_all_elements : ?deeply:bool ->
+                        string -> 'ext node -> 'ext node list
+
+exception Skip
+val map_tree :  pre:('exta node -> 'extb node) ->
+               ?post:('extb node -> 'extb node) ->
+               'exta node -> 
+                   'extb node
+
+
+val map_tree_sibl : 
+        pre: ('exta node option -> 'exta node -> 'exta node option -> 
+                  'extb node) ->
+       ?post:('extb node option -> 'extb node -> 'extb node option -> 
+                  'extb node) ->
+       'exta node -> 
+           'extb node
+
+val iter_tree : ?pre:('ext node -> unit) ->
+                ?post:('ext node -> unit) ->
+                'ext node -> 
+                    unit
+
+val iter_tree_sibl :
+       ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+       ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+       'ext node -> 
+           unit
+]]></programlisting>
+</para>
+       </sect2>
+
+      </sect1>
+
+<!-- ********************************************************************** -->
+
+      <sect1>
+       <title>The class type <literal>extension</literal></title>
+       <para>
+
+<programlisting>
+<![CDATA[
+class type [ 'node ] extension =
+  object ('self)
+    method clone : 'self
+      (* "clone" should return an exact deep copy of the object. *)
+    method node : 'node
+      (* "node" returns the corresponding node of this extension. This method
+       * intended to return exactly what previously has been set by "set_node".
+       *)
+    method set_node : 'node -> unit
+      (* "set_node" is invoked once the extension is associated to a new
+       * node object.
+       *)
+  end
+]]>
+</programlisting>
+
+This is the type of classes used for node extensions. For every node of the
+document tree, there is not only the <literal>node</literal> object, but also
+an <literal>extension</literal> object. The latter has minimal
+functionality; it has only the necessary methods to be attached to the node
+object containing the details of the node instance. The extension object is
+called extension because its purpose is extensibility.</para>
+
+       <para>For some reasons, it is impossible to derive the
+<literal>node</literal> classes (i.e. <literal>element_impl</literal> and
+<literal>data_impl</literal>) such that the subclasses can be extended by new
+new methods. But
+subclassing nodes is a great feature, because it allows the user to provide
+different classes for different types of nodes. The extension objects are a
+workaround that is as powerful as direct subclassing, the costs are
+some notation overhead.
+</para>
+
+<figure id="extension-general" float="1">
+<title>The structure of nodes and extensions</title>
+<graphic fileref="pic/extension_general" format="GIF">
+</graphic>
+</figure>
+
+       <para>The picture shows how the nodes and extensions are linked
+together. Every node has a reference to its extension, and every extension has
+a reference to its node. The methods <literal>extension</literal> and
+<literal>node</literal> follow these references; a typical phrase is 
+
+<programlisting>
+self # node # attribute "xy"
+</programlisting>
+
+to get the value of an attribute from a method defined in the extension object;
+or 
+
+<programlisting>
+self # node # iter
+  (fun n -&gt; n # extension # my_method ...)
+</programlisting>
+
+to iterate over the subnodes and to call <literal>my_method</literal> of the
+corresponding extension objects.
+</para>
+
+       <para>Note that extension objects do not have references to subnodes
+(or "subextensions") themselves; in order to get one of the children of an
+extension you must first go to the node object, then get the child node, and
+finally reach the extension that is logically the child of the extension you
+started with.</para>
+
+       <sect2>
+         <title>How to define an extension class</title>
+
+         <para>At minimum, you must define the methods
+<literal>clone</literal>, <literal>node</literal>, and
+<literal>set_node</literal> such that your class is compatible with the type
+<literal>extension</literal>. The method <literal>set_node</literal> is called
+during the initialization of the node, or after a node has been cloned; the
+node object invokes <literal>set_node</literal> on the extension object to tell
+it that this node is now the object the extension is linked to. The extension
+must return the node object passed as argument of <literal>set_node</literal>
+when the <literal>node</literal> method is called.</para>
+
+         <para>The <literal>clone</literal> method must return a copy of the
+extension object; at least the object itself must be duplicated, but if
+required, the copy should deeply duplicate all objects and values that are
+referred by the extension, too. Whether this is required, depends on the
+application; <literal>clone</literal> is invoked by the node object when one of
+its cloning methods is called.</para>
+
+         <para>A good starting point for an extension class:
+
+<programlisting>
+<![CDATA[class custom_extension =
+  object (self)
+
+    val mutable node = (None : custom_extension node option)
+
+    method clone = {< >} 
+
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+
+    method set_node n =
+      node <- Some n
+
+  end
+]]>
+</programlisting>
+
+This class is compatible with <literal>extension</literal>. The purpose of
+defining such a class is, of course, adding further methods; and you can do it
+without restriction. 
+</para>
+
+         <para>Often, you want not only one extension class. In this case,
+it is the simplest way that all your classes (for one kind of document) have
+the same type (with respect to the interface; i.e. it does not matter if your
+classes differ in the defined private methods and instance variables, but
+public methods count). This approach avoids lots of coercions and problems with
+type incompatibilities. It is simple to implement:
+
+<programlisting>
+<![CDATA[class custom_extension =
+  object (self)
+    val mutable node = (None : custom_extension node option)
+
+    method clone = ...      (* see above *)
+    method node = ...       (* see above *)
+    method set_node n = ... (* see above *)
+
+    method virtual my_method1 : ...
+    method virtual my_method2 : ...
+    ... (* etc. *)
+  end
+
+class custom_extension_kind_A =
+  object (self)
+    inherit custom_extension
+
+    method my_method1 = ...
+    method my_method2 = ...
+  end
+
+class custom_extension_kind_B =
+  object (self)
+    inherit custom_extension
+
+    method my_method1 = ...
+    method my_method2 = ...
+  end
+]]>
+</programlisting>
+
+If a class does not need a method (e.g. because it does not make sense, or it
+would violate some important condition), it is possible to define the method
+and to always raise an exception when the method is invoked
+(e.g. <literal>assert false</literal>).
+</para>
+
+         <para>The latter is a strong recommendation: do not try to further
+specialize the types of extension objects. It is difficult, sometimes even
+impossible, and almost never worth-while.</para>
+       </sect2>
+
+       <sect2>
+         <title>How to bind extension classes to element types</title>
+
+         <para>Once you have defined your extension classes, you can bind them
+to element types. The simplest case is that you have only one class and that
+this class is to be always used. The parsing functions in the module
+<literal>Pxp_yacc</literal> take a <literal>spec</literal> argument which
+can be customized. If your single class has the name <literal>c</literal>,
+this argument should be 
+
+<programlisting>
+let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl c)
+    ~default_element_exemplar: (new element_impl c)
+    ~element_alist:            []
+    ()
+</programlisting>
+
+This means that data nodes will be created from the exemplar passed by
+~data_exemplar and that all element nodes will be made from the exemplar
+specified by ~default_element_exemplar. In ~element_alist, you can 
+pass that different exemplars are to be used for different element types; but
+this is an optional feature. If you do not need it, pass the empty list.
+</para>
+
+<para>
+Remember that an exemplar is a (node, extension) pair that serves as pattern
+when new nodes (and the corresponding extension objects) are added to the
+document tree. In this case, the exemplar contains <literal>c</literal> as
+extension, and when nodes are created, the exemplar is cloned, and cloning
+makes also a copy of <literal>c</literal> such that all nodes of the document
+tree will have a copy of <literal>c</literal> as extension.
+</para>
+
+         <para>The <literal>~element_alist</literal> argument can bind
+specific element types to specific exemplars; as exemplars may be instances of
+different classes it is effectively possible to bind element types to
+classes. For example, if the element type "p" is implemented by class "c_p",
+and "q" is realized by "c_q", you can pass the following value:
+
+<programlisting>
+let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl c)
+    ~default_element_exemplar: (new element_impl c)
+    ~element_alist:            
+      [ "p", new element_impl c_p;
+        "q", new element_impl c_q;
+      ]
+    ()
+</programlisting>
+
+The extension object <literal>c</literal> is still used for all data nodes and
+for all other element types.
+</para>
+
+       </sect2>
+
+      </sect1>
+
+<!-- ********************************************************************** -->
+
+      <sect1>
+       <title>Details of the mapping from XML text to the tree representation
+</title>
+
+       <sect2>
+         <title>The representation of character-free elements</title>
+
+         <para>If an element declaration does not allow the element to 
+contain character data, the following rules apply.</para>
+
+         <para>If the element must be empty, i.e. it is declared with the
+keyword <literal>EMPTY</literal>, the element instance must be effectively
+empty (it must not even contain whitespace characters). The parser guarantees
+that a declared <literal>EMPTY</literal> element does never contain a data
+node, even if the data node represents the empty string.</para>
+
+         <para>If the element declaration only permits other elements to occur
+within that element but not character data, it is still possible to insert
+whitespace characters between the subelements. The parser ignores these
+characters, too, and does not create data nodes for them.</para>
+
+         <formalpara>
+           <title>Example.</title>
+
+           <para>Consider the following element types:
+
+<programlisting><![CDATA[
+<!ELEMENT x ( #PCDATA | z )* >
+<!ELEMENT y ( z )* >
+<!ELEMENT z EMPTY>
+]]></programlisting>
+
+Only <literal>x</literal> may contain character data, the keyword
+<literal>#PCDATA</literal> indicates this. The other types are character-free. 
+</para>
+         </formalpara>
+
+         <para>The XML term
+
+<programlisting><![CDATA[
+<x><z/> <z/></x>
+]]></programlisting>
+
+will be internally represented by an element node for <literal>x</literal> 
+with three subnodes: the first <literal>z</literal> element, a data node
+containing the space character, and the second <literal>z</literal> element. 
+In contrast to this, the term
+
+<programlisting><![CDATA[
+<y><z/> <z/></y>
+]]></programlisting>
+
+is represented by an  element node for <literal>y</literal> with only
+<emphasis>two</emphasis> subnodes, the two <literal>z</literal> elements. There
+is no data node for the space character because spaces are ignored in the
+character-free element <literal>y</literal>.
+</para>
+
+       </sect2>
+
+       <sect2>
+         <title>The representation of character data</title>
+
+         <para>The XML specification allows all Unicode characters in XML
+texts. This parser can be configured such that UTF-8 is used to represent the
+characters internally; however, the default character encoding is
+ISO-8859-1. (Currently, no other encodings are possible for the internal string
+representation; the type <literal>Pxp_types.rep_encoding</literal> enumerates
+the possible encodings. Principially, the parser could use any encoding that is
+ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
+ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
+encodings (or other multibyte encodings which are not ASCII-compatible) unless
+major parts of the parser are rewritten - unlikely...)
+</para>
+
+<para>
+The internal encoding may be different from the external encoding (specified
+in the XML declaration <literal>&lt;?xml ... encoding="..."?&gt;</literal>); in
+this case the strings are automatically converted to the internal encoding.
+</para>
+
+<para>
+If the internal encoding is ISO-8859-1, it is possible that there are
+characters that cannot be represented. In this case, the parser ignores such
+characters and prints a warning (to the <literal>collect_warning</literal>
+object that must be passed when the parser is called).
+</para>
+
+         <para>The XML specification allows lines to be separated by single LF
+characters, by CR LF character sequences, or by single CR
+characters. Internally, these separators are always converted to single LF
+characters.</para>
+
+         <para>The parser guarantees that there are never two adjacent data
+nodes; if necessary, data material that would otherwise be represented by
+several nodes is collapsed into one node. Note that you can still create node
+trees with adjacent data nodes; however, the parser does not return such trees.
+</para>
+
+         <para>Note that CDATA sections are not represented specially; such
+sections are added to the current data material that being collected for the
+next data node.</para>
+       </sect2>
+
+
+       <sect2>
+         <title>The representation of entities within documents</title>
+
+         <para><emphasis>Entities are not represented within
+documents!</emphasis> If the parser finds an entity reference in the document
+content, the reference is immediately expanded, and the parser reads the
+expansion text instead of the reference.
+</para>
+       </sect2>
+
+       <sect2>
+         <title>The representation of attributes</title> <para>As attribute
+values are composed of Unicode characters, too, the same problems with the
+character encoding arise as for character material. Attribute values are
+converted to the internal encoding, too; and if there are characters that
+cannot be represented, these are dropped, and a warning is printed.</para>
+
+         <para>Attribute values are normalized before they are returned by
+methods like <literal>attribute</literal>. First, any remaining entity
+references are expanded; if necessary, expansion is performed recursively.
+Second, newline characters (any of LF, CR LF, or CR characters) are converted
+to single space characters. Note that especially the latter action is
+prescribed by the XML standard (but <literal>&#10;</literal> is not converted
+such that it is still possible to include line feeds into attributes).
+</para>
+       </sect2>
+
+       <sect2>
+         <title>The representation of processing instructions</title>
+<para>Processing instructions are parsed to some extent: The first word of the
+PI is called the target, and it is stored separated from the rest of the PI:
+
+<programlisting><![CDATA[
+<?target rest?>
+]]></programlisting>
+
+The exact location where a PI occurs is not represented (by default). The
+parser puts the PI into the object that represents the embracing construct (an
+element, a DTD, or the whole document); that means you can find out which PIs
+occur in a certain element, in the DTD, or in the whole document, but you
+cannot lookup the exact position within the construct.
+</para>
+
+         <para>If you require the exact location of PIs, it is possible to
+create extra nodes for them. This mode is controled by the option
+<literal>enable_pinstr_nodes</literal>. The additional nodes have the node type
+<literal>T_pinstr <replaceable>target</replaceable></literal>, and are created
+from special exemplars contained in the <literal>spec</literal> (see
+pxp_document.mli).</para>
+       </sect2>
+
+       <sect2>
+         <title>The representation of comments</title> 
+
+<para>Normally, comments are not represented; they are dropped by
+default. However, if you require them, it is possible to create
+<literal>T_comment</literal> nodes for them. This mode can be specified by the
+option <literal>enable_comment_nodes</literal>. Comment nodes are created from
+special exemplars contained in the <literal>spec</literal> (see
+pxp_document.mli). You can access the contents of comments through the 
+method <literal>comment</literal>.</para>
+       </sect2>
+
+       <sect2>
+         <title>The attributes <literal>xml:lang</literal> and
+<literal>xml:space</literal></title>
+
+         <para>These attributes are not supported specially; they are handled
+like any other attribute.</para>
+       </sect2>
+
+
+       <sect2>
+         <title>And what about namespaces?</title>
+         <para>Currently, there is no special support for namespaces.
+However, the parser allows it that the colon occurs in names such that it is
+possible to implement namespaces on top of the current API.</para>
+
+         <para>Some future release of PXP will support namespaces as built-in
+feature...</para>
+       </sect2>
+
+      </sect1>
+
+    </chapter>
+
+<!-- ********************************************************************** -->
+
+    <chapter>
+      <title>Configuring and calling the parser</title>
+
+<!--
+      <para>
+<emphasis>
+Sorry, this chapter has not yet been written. For an introduction into parser
+configuration, see the previous chapters. As a first approximation, the
+interface definition of Markup_yacc outlines what could go here.
+</emphasis>
+</para>
+-->
+
+<!--
+      <para>
+<programlisting>&markup-yacc.mli;</programlisting>
+</para>
+-->
+
+      <sect1>
+       <title>Overview</title>
+       <para>
+There are the following main functions invoking the parser (in Pxp_yacc):
+
+          <itemizedlist mark="bullet" spacing="compact">
+           <listitem>
+             <para><emphasis>parse_document_entity:</emphasis> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+the body is validated against the DTD. This mode is interesting if you have a
+file
+
+<programlisting><![CDATA[
+<!DOCTYPE root ... [ ... ] > <root> ... </root>
+]]></programlisting>
+
+and you can accept any DTD that is included in the file (e.g. because the file
+is under your control).
+</para>
+           </listitem>
+           <listitem>
+             <para><emphasis>parse_wfdocument_entity:</emphasis> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+but the body is not validated, only checked for well-formedness. This mode is
+preferred if validation costs too much time or if the DTD is missing.
+</para>
+           </listitem>
+           <listitem>
+             <para><emphasis>parse_dtd_entity:</emphasis> You want only to
+parse an entity (file) containing the external subset of a DTD. Sometimes it is
+interesting to read such a DTD, for example to compare it with the DTD included
+in a document, or to apply the next mode:
+</para>
+           </listitem>
+           <listitem>
+             <para><emphasis>parse_content_entity:</emphasis> You want only to
+parse an entity (file) containing a fragment of a document body; this fragment
+is validated against the DTD you pass to the function. Especially, the fragment
+must not have a <literal> &lt;!DOCTYPE&gt;</literal> clause, and must directly
+begin with an element.  The element is validated against the DTD.  This mode is
+interesting if you want to check documents against a fixed, immutable DTD.
+</para>
+           </listitem>
+           <listitem>
+             <para><emphasis>parse_wfcontent_entity:</emphasis> This function
+also parses a single element without DTD, but does not validate it.</para>
+           </listitem>
+           <listitem>
+             <para><emphasis>extract_dtd_from_document_entity:</emphasis> This
+function extracts the DTD from a closed document consisting of a DTD and a
+document body. Both the internal and the external subsets are extracted.</para>
+           </listitem>
+         </itemizedlist>
+</para>
+
+<para>
+In many cases, <literal>parse_document_entity</literal> is the preferred mode
+to parse a document in a validating way, and
+<literal>parse_wfdocument_entity</literal> is the mode of choice to parse a
+file while only checking for well-formedness.
+</para>
+
+<para>
+There are a number of variations of these modes. One important application of a
+parser is to check documents of an untrusted source against a fixed DTD. One
+solution is to not allow the <literal>&lt;!DOCTYPE&gt;</literal> clause in
+these documents, and treat the document like a fragment (using mode
+<emphasis>parse_content_entity</emphasis>). This is very simple, but
+inflexible; users of such a system cannot even define additional entities to
+abbreviate frequent phrases of their text.
+</para>
+
+<para>
+It may be necessary to have a more intelligent checker. For example, it is also
+possible to parse the document to check fully, i.e. with DTD, and to compare
+this DTD with the prescribed one. In order to fully parse the document, mode
+<emphasis>parse_document_entity</emphasis> is applied, and to get the DTD to
+compare with mode <emphasis>parse_dtd_entity</emphasis> can be used.
+</para>
+
+<para>
+There is another very important configurable aspect of the parser: the
+so-called resolver. The task of the resolver is to locate the contents of an
+(external) entity for a given entity name, and to make the contents accessible
+as a character stream. (Furthermore, it also normalizes the character set;
+but this is a detail we can ignore here.) Consider you have a file called
+<literal>"main.xml"</literal> containing 
+
+<programlisting><![CDATA[
+<!ENTITY % sub SYSTEM "sub/sub.xml">
+%sub;
+]]></programlisting>
+
+and a file stored in the subdirectory <literal>"sub"</literal> with name
+<literal>"sub.xml"</literal> containing
+
+<programlisting><![CDATA[
+<!ENTITY % subsub SYSTEM "subsub/subsub.xml">
+%subsub;
+]]></programlisting>
+
+and a file stored in the subdirectory <literal>"subsub"</literal> of
+<literal>"sub"</literal> with name <literal>"subsub.xml"</literal> (the
+contents of this file do not matter). Here, the resolver must track that
+the second entity <literal>subsub</literal> is located in the directory
+<literal>"sub/subsub"</literal>, i.e. the difficulty is to interpret the
+system (file) names of entities relative to the entities containing them,
+even if the entities are deeply nested.
+</para>
+
+<para>
+There is not a fixed resolver already doing everything right - resolving entity
+names is a task that highly depends on the environment. The XML specification
+only demands that <literal>SYSTEM</literal> entities are interpreted like URLs
+(which is not very precise, as there are lots of URL schemes in use), hoping
+that this helps overcoming the local peculiarities of the environment; the idea
+is that if you do not know your environment you can refer to other entities by
+denoting URLs for them. I think that this interpretation of
+<literal>SYSTEM</literal> names may have some applications in the internet, but
+it is not the first choice in general. Because of this, the resolver is a
+separate module of the parser that can be exchanged by another one if
+necessary; more precisely, the parser already defines several resolvers.
+</para>
+
+<para>
+The following resolvers do already exist:
+
+          <itemizedlist mark="bullet" spacing="compact">
+           <listitem>
+             <para>Resolvers reading from arbitrary input channels. These
+can be configured such that a certain ID is associated with the channel; in
+this case inner references to external entities can be resolved. There is also
+a special resolver that interprets SYSTEM IDs as URLs; this resolver can
+process relative SYSTEM names and determine the corresponding absolute URL.
+</para>
+           </listitem>
+           <listitem>
+             <para>A resolver that reads always from a given O'Caml
+string. This resolver is not able to resolve further names unless the string is
+not associated with any name, i.e. if the document contained in the string
+refers to an external entity, this reference cannot be followed in this
+case.</para>
+           </listitem>
+           <listitem>
+             <para>A resolver for file names. The <literal>SYSTEM</literal>
+name is interpreted as file URL with the slash "/" as separator for
+directories. - This resolver is derived from the generic URL resolver.</para>
+           </listitem>
+         </itemizedlist>
+
+The interface a resolver must have is documented, so it is possible to write
+your own resolver. For example, you could connect the parser with an HTTP
+client, and resolve URLs of the HTTP namespace. The resolver classes support
+that several independent resolvers are combined to one more powerful resolver;
+thus it is possible to combine a self-written resolver with the already
+existing resolvers.
+</para>
+
+<para>
+Note that the existing resolvers only interpret <literal>SYSTEM</literal>
+names, not <literal>PUBLIC</literal> names. If it helps you, it is possible to
+define resolvers for <literal>PUBLIC</literal> names, too; for example, such a
+resolver could look up the public name in a hash table, and map it to a system
+name which is passed over to the existing resolver for system names. It is
+relatively simple to provide such a resolver.
+</para>
+
+
+      </sect1>
+
+      <sect1>
+       <title>Resolvers and sources</title>
+       
+       <sect2>
+         <title>Using the built-in resolvers (called sources)</title>
+
+         <para>The type <literal>source</literal> enumerates the two
+possibilities where the document to parse comes from.
+
+<programlisting>
+type source =
+    Entity of ((dtd -&gt; Pxp_entity.entity) * Pxp_reader.resolver)
+  | ExtID of (ext_id * Pxp_reader.resolver)
+</programlisting>
+
+You normally need not to worry about this type as there are convenience
+functions that create <literal>source</literal> values:
+
+
+            <itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para><literal>from_file s</literal>: The document is read from
+file <literal>s</literal>; you may specify absolute or relative path names.
+The file name must be encoded as UTF-8 string.
+</para>
+
+<para>There is an optional argument <literal>~system_encoding</literal>
+specifying the character encoding which is used for the names of the file
+system. For example, if this encoding is ISO-8859-1 and <literal>s</literal> is
+also a ISO-8859-1 string, you can form the source:
+
+<programlisting><![CDATA[
+let s_utf8  =  recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
+from_file ~system_encoding:`Enc_iso88591 s_utf8
+]]></programlisting>
+</para>
+
+<para>
+This <literal>source</literal> has the advantage that
+it is able to resolve inner external entities; i.e. if your document includes
+data from another file (using the <literal>SYSTEM</literal> attribute), this
+mode will find that file. However, this mode cannot resolve
+<literal>PUBLIC</literal> identifiers nor <literal>SYSTEM</literal> identifiers
+other than "file:".
+</para>
+             </listitem>
+             <listitem>
+               <para><literal>from_channel ch</literal>: The document is read
+from the channel <literal>ch</literal>. In general, this source also supports
+file URLs found in the document; however, by default only absolute URLs are
+understood. It is possible to associate an ID with the channel such that the
+resolver knows how to interpret relative URLs:
+
+<programlisting>
+from_channel ~id:(System "file:///dir/dir1/") ch
+</programlisting>
+
+There is also the ~system_encoding argument specifying how file names are
+encoded. - The example from above can also be written (but it is no
+longer possible to interpret relative URLs because there is no ~id argument,
+and computing this argument is relatively complicated because it must
+be a valid URL):
+
+<programlisting>
+let ch = open_in s in
+let src = from_channel ~system_encoding:`Enc_iso88591 ch in
+...;
+close_in ch
+</programlisting>
+</para>
+             </listitem>
+             <listitem>
+               <para><literal>from_string s</literal>: The string
+<literal>s</literal> is the document to parse. This mode is not able to
+interpret file names of <literal>SYSTEM</literal> clauses, nor it can look up
+<literal>PUBLIC</literal> identifiers. </para> 
+
+               <para>Normally, the encoding of the string is detected as usual
+by analyzing the XML declaration, if any. However, it is also possible to
+specify the encoding directly:
+
+<programlisting>
+let src = from_string ~fixenc:`ISO-8859-2 s
+</programlisting>
+</para>
+             </listitem>
+             <listitem>
+               <para><literal>ExtID (id, r)</literal>: The document to parse
+is denoted by the identifier <literal>id</literal> (either a
+<literal>SYSTEM</literal> or <literal>PUBLIC</literal> clause), and this
+identifier is interpreted by the resolver <literal>r</literal>. Use this mode
+if you have written your own resolver.</para>
+               <para>Which character sets are possible depends on the passed
+resolver <literal>r</literal>.</para>
+             </listitem>
+             <listitem>
+               <para><literal>Entity (get_entity, r)</literal>: The document
+to parse is returned by the function invocation <literal>get_entity
+dtd</literal>, where <literal>dtd</literal> is the DTD object to use (it may be
+empty). Inner external references occuring in this entity are resolved using
+the resolver <literal>r</literal>.</para>
+               <para>Which character sets are possible depends on the passed
+resolver <literal>r</literal>.</para>
+             </listitem>
+           </itemizedlist></para>
+       </sect2>
+
+
+       <sect2>
+         <title>The resolver API</title>
+
+         <para>A resolver is an object that can be opened like a file, but you
+do not pass the file name to the resolver, but the XML identifier of the entity
+to read from (either a <literal>SYSTEM</literal> or <literal>PUBLIC</literal>
+clause). When opened, the resolver must return the
+<literal>Lexing.lexbuf</literal> that reads the characters.  The resolver can
+be closed, and it can be cloned. Furthermore, it is possible to tell the
+resolver which character set it should assume. - The following from Pxp_reader:
+
+<programlisting><![CDATA[
+exception Not_competent
+exception Not_resolvable of exn
+
+class type resolver =
+  object
+    method init_rep_encoding : rep_encoding -> unit
+    method init_warner : collect_warnings -> unit
+    method rep_encoding : rep_encoding
+    method open_in : ext_id -> Lexing.lexbuf
+    method close_in : unit
+    method change_encoding : string -> unit
+    method clone : resolver
+    method close_all : unit
+  end
+]]></programlisting>
+
+The resolver object must work as follows:</para>
+
+<para>
+            <itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para>When the parser is called, it tells the resolver the
+warner object and the internal encoding by invoking
+<literal>init_warner</literal> and <literal>init_rep_encoding</literal>. The
+resolver should store these values. The method <literal>rep_encoding</literal>
+should return the internal encoding.
+</para>
+             </listitem>
+             <listitem>
+               <para>If the parser wants to read from the resolver, it invokes
+the method <literal>open_in</literal>. Either the resolver succeeds, in which
+case the <literal>Lexing.lexbuf</literal> reading from the file or stream must
+be returned, or opening fails. In the latter case the method implementation
+should raise an exception (see below).</para>
+             </listitem>
+             <listitem>
+               <para>If the parser finishes reading, it calls the
+<literal>close_in</literal> method.</para>
+             </listitem>
+             <listitem>
+               <para>If the parser finds a reference to another external
+entity in the input stream, it calls <literal>clone</literal> to get a second
+resolver which must be initially closed (not yet connected with an input
+stream).  The parser then invokes <literal>open_in</literal> and the other
+methods as described.</para>
+             </listitem>
+             <listitem>
+               <para>If you already know the character set of the input
+stream, you should recode it to the internal encoding, and define the method
+<literal>change_encoding</literal> as an empty method.</para>
+             </listitem>
+             <listitem>
+               <para>If you want to support multiple external character sets,
+the object must follow a much more complicated protocol. Directly after
+<literal>open_in</literal> has been called, the resolver must return a lexical
+buffer that only reads one byte at a time. This is only possible if you create
+the lexical buffer with <literal>Lexing.from_function</literal>; the function
+must then always return 1 if the EOF is not yet reached, and 0 if EOF is
+reached. If the parser has read the first line of the document, it will invoke
+<literal>change_encoding</literal> to tell the resolver which character set to
+assume. From this moment, the object can return more than one byte at once. The
+argument of <literal>change_encoding</literal> is either the parameter of the
+"encoding" attribute of the XML declaration, or the empty string if there is
+not any XML declaration or if the declaration does not contain an encoding
+attribute. </para>
+
+               <para>At the beginning the resolver must only return one
+character every time something is read from the lexical buffer. The reason for
+this is that you otherwise would not exactly know at which position in the
+input stream the character set changes.</para>
+
+               <para>If you want automatic recognition of the character set,
+it is up to the resolver object to implement this.</para>
+             </listitem>
+
+             <listitem><para>If an error occurs, the parser calls the method
+<literal>close_all</literal> for the top-level resolver; this method should
+close itself (if not already done) and all clones.</para>
+             </listitem>
+           </itemizedlist>
+</para>
+         <formalpara><title>Exceptions</title>
+           <para>
+It is possible to chain resolvers such that when the first resolver is not able
+to open the entity, the other resolvers of the chain are tried in turn. The
+method <literal>open_in</literal> should raise the exception
+<literal>Not_competent</literal> to indicate that the next resolver should try
+to open the entity. If the resolver is able to handle the ID, but some other
+error occurs, the exception <literal>Not_resolvable</literal> should be raised
+to force that the chain breaks.
+         </para>
+         </formalpara>
+
+       <para>Example: How to define a resolver that is equivalent to
+from_string: ...</para>
+
+       </sect2>
+       
+       <sect2>
+         <title>Predefined resolver components</title>
+         <para>
+There are some classes in Pxp_reader that define common resolver behaviour.
+
+<programlisting><![CDATA[
+class resolve_read_this_channel : 
+    ?id:ext_id -> 
+    ?fixenc:encoding -> 
+    ?auto_close:bool -> 
+    in_channel -> 
+        resolver
+]]></programlisting>
+
+Reads from the passed channel (it may be even a pipe). If the
+<literal>~id</literal> argument is passed to the object, the created resolver
+accepts only this ID. Otherwise all IDs are accepted.  - Once the resolver has
+been cloned, it does not accept any ID. This means that this resolver cannot
+handle inner references to external entities. Note that you can combine this
+resolver with another resolver that can handle inner references (such as
+resolve_as_file); see class 'combine' below.  - If you pass the
+<literal>~fixenc</literal> argument, the encoding of the channel is set to the
+passed value, regardless of any auto-recognition or any XML declaration. - If
+<literal>~auto_close = true</literal> (which is the default), the channel is
+closed after use. If <literal>~auto_close = false</literal>, the channel is
+left open.
+ </para>
+
+         <para>
+<programlisting><![CDATA[
+class resolve_read_any_channel : 
+    ?auto_close:bool -> 
+    channel_of_id:(ext_id -> (in_channel * encoding option)) -> 
+        resolver
+]]></programlisting>
+
+This resolver calls the function <literal>~channel_of_id</literal> to open a
+new channel for the passed <literal>ext_id</literal>. This function must either
+return the channel and the encoding, or it must fail with Not_competent.  The
+function must return <literal>None</literal> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<literal>Some e</literal> if it is already known that the encoding of the
+channel is <literal>e</literal>.  If <literal>~auto_close = true</literal>
+(which is the default), the channel is closed after use. If
+<literal>~auto_close = false</literal>, the channel is left open.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+class resolve_read_url_channel :
+    ?base_url:Neturl.url ->
+    ?auto_close:bool -> 
+    url_of_id:(ext_id -> Neturl.url) -> 
+    channel_of_url:(Neturl.url -> (in_channel * encoding option)) -> 
+        resolver
+]]></programlisting>
+
+When this resolver gets an ID to read from, it calls the function
+<literal>~url_of_id</literal> to get the corresponding URL. This URL may be a
+relative URL; however, a URL scheme must be used which contains a path.  The
+resolver converts the URL to an absolute URL if necessary.  The second
+function, <literal>~channel_of_url</literal>, is fed with the absolute URL as
+input. This function opens the resource to read from, and returns the channel
+and the encoding of the resource.
+</para>
+<para>
+Both functions, <literal>~url_of_id</literal> and
+<literal>~channel_of_url</literal>, can raise Not_competent to indicate that
+the object is not able to read from the specified resource. However, there is a
+difference: A Not_competent from <literal>~url_of_id</literal> is left as it
+is, but a Not_competent from <literal>~channel_of_url</literal> is converted to
+Not_resolvable. So only <literal>~url_of_id</literal> decides which URLs are
+accepted by the resolver and which not.
+</para>
+<para>
+The function <literal>~channel_of_url</literal> must return
+<literal>None</literal> as encoding if the default mechanism to recognize the
+encoding should be used. It must return <literal>Some e</literal> if it is
+already known that the encoding of the channel is <literal>e</literal>.
+</para>
+<para>
+If <literal>~auto_close = true</literal> (which is the default), the channel is
+closed after use. If <literal>~auto_close = false</literal>, the channel is
+left open.
+</para>
+<para>
+Objects of this class contain a base URL relative to which relative URLs are
+interpreted. When creating a new object, you can specify the base URL by
+passing it as <literal>~base_url</literal> argument. When an existing object is
+cloned, the base URL of the clone is the URL of the original object. - Note
+that the term "base URL" has a strict definition in RFC 1808.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+class resolve_read_this_string : 
+    ?id:ext_id -> 
+    ?fixenc:encoding -> 
+    string -> 
+        resolver
+]]></programlisting>
+
+Reads from the passed string. If the <literal>~id</literal> argument is passed
+to the object, the created resolver accepts only this ID. Otherwise all IDs are
+accepted. - Once the resolver has been cloned, it does not accept any ID. This
+means that this resolver cannot handle inner references to external
+entities. Note that you can combine this resolver with another resolver that
+can handle inner references (such as resolve_as_file); see class 'combine'
+below. - If you pass the <literal>~fixenc</literal> argument, the encoding of
+the string is set to the passed value, regardless of any auto-recognition or
+any XML declaration.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+class resolve_read_any_string : 
+    string_of_id:(ext_id -> (string * encoding option)) -> 
+        resolver
+]]></programlisting>
+
+This resolver calls the function <literal>~string_of_id</literal> to get the
+string for the passed <literal>ext_id</literal>. This function must either
+return the string and the encoding, or it must fail with Not_competent.  The
+function must return <literal>None</literal> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<literal>Some e</literal> if it is already known that the encoding of the
+string is <literal>e</literal>.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+class resolve_as_file :
+    ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+    ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+    ?system_encoding:encoding ->
+    ?url_of_id:(ext_id -> Neturl.url) -> 
+    ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+    unit -> 
+        resolver
+]]></programlisting>
+Reads from the local file system. Every file name is interpreted as
+file name of the local file system, and the referred file is read.
+</para>
+<para>
+The full form of a file URL is: file://host/path, where
+'host' specifies the host system where the file identified 'path'
+resides. host = "" or host = "localhost" are accepted; other values
+will raise Not_competent. The standard for file URLs is 
+defined in RFC 1738.
+</para>
+<para>
+Option <literal>~file_prefix</literal>: Specifies how the "file:" prefix of
+file names is handled:
+            <itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para><literal>`Not_recognized:</literal>The prefix is not
+recognized.</para>
+             </listitem>
+             <listitem>
+               <para><literal>`Allowed:</literal> The prefix is allowed but
+not required (the default).</para>
+             </listitem>
+             <listitem>
+               <para><literal>`Required:</literal> The prefix is
+required.</para>
+             </listitem>
+           </itemizedlist>
+</para>
+<para>
+Option <literal>~host_prefix:</literal> Specifies how the "//host" phrase of
+file names is handled:
+            <itemizedlist mark="bullet" spacing="compact">
+             <listitem>
+               <para><literal>`Not_recognized:</literal>The prefix is not
+recognized.</para>
+             </listitem>
+             <listitem>
+               <para><literal>`Allowed:</literal> The prefix is allowed but
+not required (the default).</para>
+             </listitem>
+             <listitem>
+               <para><literal>`Required:</literal> The prefix is
+required.</para>
+             </listitem>
+           </itemizedlist>
+</para>
+<para>
+Option <literal>~system_encoding:</literal> Specifies the encoding of file
+names of the local file system. Default: UTF-8.
+</para>
+<para>
+Options <literal>~url_of_id</literal>, <literal>~channel_of_url</literal>: Not
+for the casual user!
+</para>
+
+         <para>
+<programlisting><![CDATA[
+class combine : 
+    ?prefer:resolver -> 
+    resolver list -> 
+        resolver
+]]></programlisting>
+
+Combines several resolver objects. If a concrete entity with an
+<literal>ext_id</literal> is to be opened, the combined resolver tries the
+contained resolvers in turn until a resolver accepts opening the entity
+(i.e. it does not raise Not_competent on open_in).
+</para>
+<para>
+Clones: If the 'clone' method is invoked before 'open_in', all contained
+resolvers are cloned separately and again combined. If the 'clone' method is 
+invoked after 'open_in' (i.e. while the resolver is open), additionally the
+clone of the active resolver is flagged as being preferred, i.e. it is tried
+first. 
+</para>
+
+       </sect2>
+      </sect1>
+
+      <sect1>
+       <title>The DTD classes</title> <para><emphasis>Sorry, not yet
+written. Perhaps the interface definition of Pxp_dtd expresses the same:
+</emphasis></para>
+       <para>
+<programlisting>&markup-dtd1.mli;&markup-dtd2.mli;</programlisting>
+</para>
+      </sect1>
+
+      <sect1>
+       <title>Invoking the parser</title>
+
+       <para>Here a description of Pxp_yacc.</para>
+
+       <sect2>
+         <title>Defaults</title>
+         <para>The following defaults are available:
+
+<programlisting>
+val default_config : config
+val default_extension : ('a node extension) as 'a
+val default_spec : ('a node extension as 'a) spec
+</programlisting>
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Parsing functions</title>
+         <para>In the following, the term "closed document" refers to
+an XML structure like
+
+<programlisting>
+&lt;!DOCTYPE ... [ <replaceable>declarations</replaceable> ] &gt;
+&lt;<replaceable>root</replaceable>&gt;
+...
+&lt;/<replaceable>root</replaceable>&gt;
+</programlisting>
+
+The term "fragment" refers to an XML structure like
+
+<programlisting>
+&lt;<replaceable>root</replaceable>&gt;
+...
+&lt;/<replaceable>root</replaceable>&gt;
+</programlisting>
+
+i.e. only to one isolated element instance.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+val parse_dtd_entity : config -> source -> dtd
+]]></programlisting>
+
+Parses the declarations which are contained in the entity, and returns them as
+<literal>dtd</literal> object.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+val extract_dtd_from_document_entity : config -> source -> dtd
+]]></programlisting>
+
+Extracts the DTD from a closed document. Both the internal and the external
+subsets are extracted and combined to one <literal>dtd</literal> object. This
+function does not parse the whole document, but only the parts that are
+necessary to extract the DTD.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+val parse_document_entity : 
+    ?transform_dtd:(dtd -> dtd) ->
+    ?id_index:('ext index) ->
+    config -> 
+    source -> 
+    'ext spec -> 
+        'ext document
+]]></programlisting>
+
+Parses a closed document and validates it against the DTD that is contained in
+the document (internal and external subsets). The option
+<literal>~transform_dtd</literal> can be used to transform the DTD in the
+document, and to use the transformed DTD for validation. If
+<literal>~id_index</literal> is specified, an index of all ID attributes is
+created.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+val parse_wfdocument_entity : 
+    config -> 
+    source -> 
+    'ext spec -> 
+        'ext document
+]]></programlisting>
+
+Parses a closed document, but checks it only on well-formedness.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+val parse_content_entity  : 
+    ?id_index:('ext index) ->
+    config ->  
+    source -> 
+    dtd -> 
+    'ext spec -> 
+        'ext node
+]]></programlisting>
+
+Parses a fragment, and validates the element.
+</para>
+
+         <para>
+<programlisting><![CDATA[
+val parse_wfcontent_entity : 
+    config -> 
+    source -> 
+    'ext spec -> 
+        'ext node
+]]></programlisting>
+
+Parses a fragment, but checks it only on well-formedness.
+</para>
+       </sect2>
+
+       <sect2>
+         <title>Configuration options</title>
+         <para>
+
+<programlisting><![CDATA[
+type config =
+    { warner : collect_warnings;
+      errors_with_line_numbers : bool;
+      enable_pinstr_nodes : bool;
+      enable_super_root_node : bool;
+      enable_comment_nodes : bool;
+      encoding : rep_encoding;
+      recognize_standalone_declaration : bool;
+      store_element_positions : bool;
+      idref_pass : bool;
+      validate_by_dfa : bool;
+      accept_only_deterministic_models : bool;
+      ...
+    }
+]]></programlisting>
+
+<itemizedlist mark="bullet" spacing="compact">
+             <listitem><para><literal>warner:</literal>The parser prints
+warnings by invoking the method <literal>warn</literal> for this warner
+object. (Default: all warnings are dropped)</para>
+             </listitem>
+             <listitem><para><literal>errors_with_line_numbers:</literal>If
+true, errors contain line numbers; if false, errors contain only byte
+positions. The latter mode is faster. (Default: true)</para>
+             </listitem>
+             <listitem><para><literal>enable_pinstr_nodes:</literal>If true,
+the parser creates extra nodes for processing instructions. If false,
+processing instructions are simply added to the element or document surrounding
+the instructions. (Default: false)</para>
+             </listitem>
+             <listitem><para><literal>enable_super_root_node:</literal>If
+true, the parser creates an extra node which is the parent of the root of the
+document tree. This node is called super root; it is an element with type
+<literal>T_super_root</literal>. - If there are processing instructions outside
+the root element and outside the DTD, they are added to the super root instead
+of the document. - If false, the super root node is not created. (Default:
+false)</para>
+             </listitem>
+             <listitem><para><literal>enable_comment_nodes:</literal>If true,
+the parser creates nodes for comments with type <literal>T_comment</literal>;
+if false, such nodes are not created. (Default: false)</para>
+             </listitem>
+             <listitem><para><literal>encoding:</literal>Specifies the
+internal encoding of the parser. Most strings are then represented according to
+this encoding; however there are some exceptions (especially
+<literal>ext_id</literal> values which are always UTF-8 encoded).
+(Default: `Enc_iso88591)</para>
+             </listitem>
+             <listitem><para><literal>
+recognize_standalone_declaration:</literal> If true and if the parser is
+validating, the <literal>standalone="yes"</literal> declaration forces that it
+is checked whether the document is a standalone document. - If false, or if the
+parser is in well-formedness mode, such declarations are ignored.
+(Default: true)
+</para>
+             </listitem>
+             <listitem><para><literal>store_element_positions:</literal> If
+true, for every non-data node the source position is stored. If false, the
+position information is lost. If available, you can get the positions of nodes
+by invoking the <literal>position</literal> method.
+(Default: true)</para>
+             </listitem>
+             <listitem><para><literal>idref_pass:</literal>If true and if
+there is an ID index, the parser checks whether every IDREF or IDREFS attribute
+refer to an existing node; this requires that the parser traverses the whole
+doument tree. If false, this check is left out. (Default: false)</para>
+             </listitem>
+             <listitem><para><literal>validate_by_dfa:</literal>If true and if
+the content model for an element type is deterministic, a deterministic finite
+automaton is used to validate whether the element contents match the content
+model of the type. If false, or if a DFA is not available, a backtracking
+algorithm is used for validation. (Default: true)
+</para>
+             </listitem>
+             <listitem><para><literal>
+accept_only_deterministic_models:</literal> If true, only deterministic content
+models are accepted; if false, any syntactically correct content models can be
+processed. (Default: true)</para>
+             </listitem>
+           </itemizedlist></para>
+       </sect2>
+
+       <sect2>
+         <title>Which configuration should I use?</title>
+         <para>First, I recommend to vary the default configuration instead of
+creating a new configuration record. For instance, to set
+<literal>idref_pass</literal> to <literal>true</literal>, change the default
+as in:
+<programlisting>
+let config = { default_config with idref_pass = true }
+</programlisting>
+The background is that I can add more options to the record in future versions
+of the parser without breaking your programs.</para>
+
+         <formalpara>
+           <title>Do I need extra nodes for processing instructions?</title>
+<para>By default, such nodes are not created. This does not mean that the
+processing instructions are lost; however, you cannot find out the exact
+location where they occur. For example, the following XML text
+
+<programlisting><![CDATA[
+<x><?pi1?><y/><?pi2?></x> 
+]]></programlisting> 
+
+will normally create one element node for <literal>x</literal> containing
+<emphasis>one</emphasis> subnode for <literal>y</literal>. The processing
+instructions are attached to <literal>x</literal> in a separate hash table; you
+can access them using <literal>x # pinstr "pi1"</literal> and <literal>x #
+pinstr "pi2"</literal>, respectively. The information is lost where the
+instructions occur within <literal>x</literal>.
+</para>
+         </formalpara>
+
+           <para>If the option <literal>enable_pinstr_nodes</literal> is
+turned on, the parser creates extra nodes <literal>pi1</literal> and
+<literal>pi2</literal> such that the subnodes of <literal>x</literal> are now: 
+
+<programlisting><![CDATA[
+x # sub_nodes = [ pi1; y; pi2 ]
+]]></programlisting>
+
+The extra nodes contain the processing instructions in the usual way, i.e. you
+can access them using <literal>pi1 # pinstr "pi1"</literal> and <literal>pi2 #
+pinstr "pi2"</literal>, respectively.
+</para>
+
+         <para>Note that you will need an exemplar for the PI nodes (see
+<literal>make_spec_from_alist</literal>).</para> 
+
+         <formalpara>
+           <title>Do I need a super root node?</title>
+           <para>By default, there is no super root node. The
+<literal>document</literal> object refers directly to the node representing the
+root element of the document, i.e.
+
+<programlisting><![CDATA[
+doc # root = r
+]]></programlisting>
+
+if <literal>r</literal> is the root node. This is sometimes inconvenient: (1)
+Some algorithms become simpler if every node has a parent, even the root
+node. (2) Some standards such as XPath call the "root node" the node whose
+child represents the root of the document. (3) The super root node can serve
+as a container for processing instructions outside the root element. Because of
+these reasons, it is possible to create an extra super root node, whose child
+is the root node:
+
+<programlisting><![CDATA[
+doc # root = sr         &&
+sr # sub_nodes = [ r ]
+]]></programlisting>
+
+When extra nodes are also created for processing instructions, these nodes can
+be added to the super root node if they occur outside the root element (reason
+(3)), and the order reflects the order in the source text.</para>
+         </formalpara>
+
+         <para>Note that you will need an exemplar for the super root node
+(see <literal>make_spec_from_alist</literal>).</para>
+
+         <formalpara>
+           <title>What is the effect of the UTF-8 encoding?</title>
+           <para>By default, the parser represents strings (with few
+exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
+and fonts for this encoding.</para>
+         </formalpara>
+         <para>However, internationalization may require that you switch over
+to UTF-8 encoding. In most environments, the immediate effect will be that you
+cannot read strings with character codes >= 160 any longer; your terminal will
+only show funny glyph combinations. It is strongly recommended to install
+Unicode fonts (<ulink URL="http://czyborra.com/unifont/">GNU Unifont</ulink>, 
+<ulink URL="http://www.cl.cam.ac.uk/~mgk25/download/ucs-fonts.tar.gz">
+Markus Kuhn's fonts</ulink>) and <ulink
+URL="http://myweb.clark.net/pub/dickey/xterm/xterm.html">terminal emulators
+that can handle UTF-8 byte sequences</ulink>. Furthermore, a Unicode editor may
+be helpful (such as <ulink
+URL="ftp://metalab.unc.edu/pub/Linux/apps/editors/X/">Yudit</ulink>). There are
+also <ulink URL="http://www.cl.cam.ac.uk/~mgk25/unicode.html">FAQ</ulink> by
+Markus Kuhn.
+</para>
+         <para>By setting <literal>encoding</literal> to
+<literal>`Enc_utf8</literal> all strings originating from the parsed XML
+document are represented as UTF-8 strings. This includes not only character
+data and attribute values but also element names, attribute names and so on, as
+it is possible to use any Unicode letter to form such names.  Strictly
+speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
+will have difficulties when validating documents containing
+non-ISO-8859-1-names.
+</para>
+
+         <para>This mode does not have any impact on the external
+representation of documents. The character set assumed when reading a document
+is set in the XML declaration, and character set when writing a document must
+be passed to the <literal>write</literal> method.
+</para>
+
+         <formalpara>
+           <title>How do I check that nodes exist which are referred by IDREF attributes?</title>
+           <para>First, you must create an index of all occurring ID
+attributes:
+
+<programlisting><![CDATA[
+let index = new hash_index
+]]></programlisting>
+
+This index must be passed to the parsing function:
+
+<programlisting><![CDATA[
+parse_document_entity
+  ~id_index:(index :> index)
+  config source spec
+]]></programlisting>
+
+Next, you must turn on the <literal>idref_pass</literal> mode:
+
+<programlisting><![CDATA[
+let config = { default_config with idref_pass = true }
+]]></programlisting>
+
+Note that now the whole document tree will be traversed, and every node will be
+checked for IDREF and IDREFS attributes. If the tree is big, this may take some
+time.
+</para>
+         </formalpara>
+
+         <formalpara>
+           <title>What are deterministic content models?</title>
+           <para>These type of models can speed up the validation checks;
+furthermore they ensure SGML-compatibility. In particular, a content model is
+deterministic if the parser can determine the actually used alternative by
+inspecting only the current token. For example, this element has
+non-deterministic contents:
+
+<programlisting><![CDATA[
+<!ELEMENT x ((u,v) | (u,y+) | v)>
+]]></programlisting>
+
+If the first element in <literal>x</literal> is <literal>u</literal>, the
+parser does not know which of the alternatives <literal>(u,v)</literal> or
+<literal>(u,y+)</literal> will work; the parser must also inspect the second
+element to be able to distinguish between the alternatives. Because such
+look-ahead (or "guessing") is required, this example is
+non-deterministic.</para>
+         </formalpara>
+
+         <para>The XML standard demands that content models must be
+deterministic. So it is recommended to turn the option
+<literal>accept_only_deterministic_models</literal> on; however, PXP can also
+process non-deterministic models using a backtracking algorithm.</para>
+
+         <para>Deterministic models ensure that validation can be performed in
+linear time. In order to get the maximum benefits, PXP also implements a
+special validator that profits from deterministic models; this is the
+deterministic finite automaton (DFA). This validator is enabled per element
+type if the element type has a deterministic model and if the option
+<literal>validate_by_dfa</literal> is turned on.</para>
+
+         <para>In general, I expect that the DFA method is faster than the
+backtracking method; especially in the worst case the DFA takes only linear
+time. However, if the content model has only few alternatives and the
+alternatives do not nest, the backtracking algorithm may be better.</para>
+
+       </sect2>
+
+
+      </sect1>
+
+
+      <sect1>
+       <title>Updates</title> 
+
+       <para><emphasis>Some (often later added) features that are otherwise
+not explained in the manual but worth to be mentioned.</emphasis></para>
+
+       <itemizedlist mark="bullet" spacing="compact">
+         <listitem><para>Methods node_position, node_path, nth_node,
+previous_node, next_node for nodes: See pxp_document.mli</para>
+         </listitem>
+         <listitem><para>Functions to determine the document order of nodes:
+compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli</para>
+         </listitem>
+       </itemizedlist>
+      </sect1>
+
+    </chapter>
+  </part>
+</book>
+
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig
new file mode 100644 (file)
index 0000000..445095f
--- /dev/null
@@ -0,0 +1,47 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2250 229 229 1575 2250 1800 2295
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 3375 225 225 1575 3375 1800 3375
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 675 3375 229 229 675 3375 900 3420
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2475 3375 229 229 2475 3375 2700 3420
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 2475 180 180 3600 2475 3780 2475
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 2880 2475 180 180 2880 2475 3060 2475
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 4320 2475 186 186 4320 2475 4500 2520
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 1485 186 186 3600 1485 3780 1530
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        675 3150 1395 2385
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        1575 2475 1575 3150
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        1755 2385 2475 3150
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+       0 0 1.00 60.00 120.00
+        1537 2010 3412 1462
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+       0 0 1.00 60.00 120.00
+        3412 1537 1672 2047
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+       0 0 1.00 60.00 120.00
+       0 0 1.00 60.00 120.00
+        810 3195 2707 2512
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+       0 0 1.00 60.00 120.00
+       0 0 1.00 60.00 120.00
+        1740 3217 3442 2580
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+       0 0 1.00 60.00 120.00
+       0 0 1.00 60.00 120.00
+        2640 3210 4177 2610
+4 0 0 80 0 14 12 0.0000 4 75 105 3555 1530 x\001
+4 0 0 80 0 14 12 0.0000 4 75 105 1530 2295 n\001
+4 0 0 80 0 12 12 0.2967 4 135 1365 1658 1950 n # extension\001
+4 0 0 80 0 12 12 0.2967 4 135 840 2475 1950 x # node\001
+4 0 0 80 0 16 12 0.0000 4 135 1140 1020 4050 The node tree\001
+4 0 0 80 0 16 12 0.0000 4 135 1245 3225 3285 The extensions\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig
new file mode 100644 (file)
index 0000000..0716834
--- /dev/null
@@ -0,0 +1,107 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 1350 242 229 6141 1350 6379 1395
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 2250 242 229 6141 2250 6379 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 5426 2250 242 229 5426 2250 5665 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6856 2250 242 229 6856 2250 7094 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 7571 2925 242 229 7571 2925 7809 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8524 2925 242 229 8524 2925 8762 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8047 2250 242 229 8047 2250 8285 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 1350 242 229 1866 1350 2104 1395
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 2250 242 229 1866 2250 2104 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1151 2250 242 229 1151 2250 1390 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 2581 2250 242 229 2581 2250 2819 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3296 2925 242 229 3296 2925 3534 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 4249 2925 242 229 4249 2925 4487 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3772 2250 242 229 3772 2250 4010 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8325 1350 242 229 8325 1350 8563 1395
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.76 123.53
+        5910 1440 5402 2017
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.76 123.53
+        6109 1590 6101 2025
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.76 123.53
+        6307 1537 6697 2070
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.76 123.53
+        7832 2347 7602 2692
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.76 123.53
+        8150 2452 8349 2752
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.76 123.53
+        5490 2017 5958 1492
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.76 123.53
+        6164 2010 6173 1575
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.76 123.53
+        6768 2025 6355 1470
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.76 123.53
+        7673 2715 7880 2415
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.76 123.53
+        8412 2707 8222 2415
+2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
+        6387 1372 8023 2017
+2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
+        4950 900 9000 900 9000 3375 4950 3375 4950 900
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.75 123.51
+        1635 1440 1127 2017
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.75 123.51
+        1834 1590 1826 2025
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.75 123.51
+        2032 1537 2422 2070
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.75 123.51
+        3557 2347 3327 2692
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 61.75 123.51
+        3875 2452 4074 2752
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.75 123.51
+        1215 2017 1683 1492
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.75 123.51
+        1889 2010 1898 1575
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.75 123.51
+        2493 2025 2080 1470
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.75 123.51
+        3398 2715 3605 2415
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 61.75 123.51
+        4137 2707 3947 2415
+2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
+        2112 1372 3748 2017
+2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
+        675 900 4725 900 4725 3375 675 3375 675 900
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        8197 1545 8055 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        8137 2025 8280 1590
+2 1 0 3 0 7 95 0 -1 0.000 0 0 -1 1 0 4
+       2 1 2.00 120.00 180.00
+        7875 1500 7620 1965 7845 1920 7485 2355
+4 0 0 95 0 14 13 0.0000 4 79 111 6094 1379 x\001
+4 0 0 95 0 14 13 0.0000 4 111 111 7991 2265 y\001
+4 0 0 95 0 14 13 0.0000 4 79 111 1819 1379 x\001
+4 0 0 95 0 14 13 0.0000 4 111 111 3716 2265 y\001
+4 0 0 95 0 12 12 0.0000 4 150 1470 6459 1335 x # add_node y\001
+4 0 0 95 0 12 12 0.0000 4 150 1470 2214 1365 x # add_node y\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig
new file mode 100644 (file)
index 0000000..ed1865f
--- /dev/null
@@ -0,0 +1,111 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6345 2700 229 229 6345 2700 6570 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 5895 3600 229 229 5895 3600 6120 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6795 3600 229 229 6795 3600 7020 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+        4050 2610 4725 2610
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+        4050 2745 4725 2745
+2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
+        4500 2385 4950 2655 4500 2970
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2490 1905 2025 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2827 2002 3202 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2115 2475 2535 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        3255 2505 2872 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        6135 1905 5670 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        6472 2002 6847 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        5760 2475 6180 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        6900 2505 6517 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        8160 1957 7860 2460
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        8407 2032 8625 2520
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        7942 2467 8212 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        8685 2475 8467 1987
+2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
+        1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        6382 2460 6382 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        6307 2032 6307 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        6180 2857 5880 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        6427 2932 6645 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        5962 3367 6232 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        6705 3375 6487 2887
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2737 2460 2737 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2662 2032 2662 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2535 2857 2235 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2782 2932 3000 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2317 3367 2587 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        3060 3375 2842 2887
+4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 75 105 6285 2752 x\001
+4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
+4 0 0 80 0 12 12 0.0000 4 105 840 3690 2025 let x' =\001
+4 0 0 80 0 12 12 0.0000 4 150 1890 3690 2205 x # orphaned_clone\001
+4 0 0 80 0 14 12 0.0000 4 105 210 8235 1845 x'\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig
new file mode 100644 (file)
index 0000000..a9fc87e
--- /dev/null
@@ -0,0 +1,96 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+6 2550 2092 2865 2407
+2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
+        2595 2362 2820 2137
+2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
+        2595 2137 2820 2362
+-6
+6 1980 2430 3420 3870
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2535 2857 2235 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2782 2932 3000 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2317 3367 2587 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        3060 3375 2842 2887
+-6
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2737 2460 2737 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2662 2032 2662 2467
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+        4050 2610 4725 2610
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+        4050 2745 4725 2745
+2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
+        4500 2385 4950 2655 4500 2970
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2490 1905 2025 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2827 2002 3202 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2115 2475 2535 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        3255 2505 2872 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        6135 1905 5670 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        6472 2002 6847 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        5760 2475 6180 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        6900 2505 6517 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        8160 1957 7860 2460
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        8407 2032 8625 2520
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        7942 2467 8212 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        8685 2475 8467 1987
+2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
+        1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
+4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
+4 0 0 95 0 12 12 0.0000 4 135 1050 3960 2250 x # delete\001
+4 0 0 80 0 14 12 0.0000 4 75 105 8280 1845 x\001
+4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig
new file mode 100644 (file)
index 0000000..231e76d
--- /dev/null
@@ -0,0 +1,35 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 2025 229 229 2025 2025 2250 2070
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1350 2025 225 225 1350 2025 1575 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2700 2025 225 225 2700 2025 2925 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 1125 225 225 2025 1125 2250 1125
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        1380 1800 1845 1275
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        1815 1207 1282 1815
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2055 1792 2055 1350
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        1980 1350 1980 1807
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 1 1.00 60.00 120.00
+        2190 1297 2550 1867
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+       1 0 1.00 60.00 120.00
+        2602 1807 2220 1237
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+        450 675 3150 675 3150 2475 450 2475 450 675
+4 0 0 100 0 12 10 0.0000 4 120 540 2377 1342 parent\001
+4 0 0 100 0 12 10 0.0000 4 105 810 645 1628 sub_nodes\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig
new file mode 100644 (file)
index 0000000..54965fe
--- /dev/null
@@ -0,0 +1,63 @@
+#FIG 3.2
+Portrait
+Center
+Metric
+A4      
+100.00
+Single
+-2
+1200 2
+6 1665 2700 2835 3150
+2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
+        2835 3150 2835 2700 1665 2700 1665 3150 2835 3150
+4 0 0 80 0 18 12 0.0000 4 135 930 1815 3015 "Cherries"\001
+-6
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2250 1125 225 225 2250 1125 2475 1125
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2025 225 225 1575 2025 1800 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2925 2025 225 225 2925 2025 3150 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 900 2925 242 242 900 2925 1125 3015
+2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
+        1485 4275 1485 3825 315 3825 315 4275 1485 4275
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        2085 1275 1582 1807
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        2407 1297 2940 1800
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        1417 2190 900 2692
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        1740 2190 2257 2700
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+        892 3180 892 3825
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+        45 675 6525 675 6525 4950 45 4950 45 675
+3 3 0 1 0 7 100 0 -1 0.000 0 0 0 22
+        2115 3645 2250 3600 2520 3555 2745 3510 2925 3555 3150 3690
+        3375 3735 3600 3735 3825 3735 4140 3825 4140 4005 4005 4185
+        3735 4230 3420 4185 3150 4230 2835 4275 2520 4230 2340 4140
+        2115 4095 1980 4005 1980 3825 2025 3735
+        -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+        -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+        -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+3 3 0 1 0 7 100 0 -1 0.000 0 0 0 17
+        3465 1170 3645 1080 4050 1035 4320 1035 4545 1080 4770 1170
+        5130 1215 5355 1350 5400 1530 5265 1665 4860 1710 4455 1710
+        4095 1665 3780 1620 3555 1575 3420 1485 3420 1305
+        -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+        -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+        -1.000
+3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
+        2475 1215 2655 1350 2970 1440 3240 1395 3420 1260
+        0.000 -1.000 -1.000 -1.000 0.000
+3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
+        1125 3060 1215 3397 1410 3607 1687 3727 2025 3720
+        0.000 -1.000 -1.000 -1.000 0.000
+4 0 0 80 0 18 12 0.0000 4 180 1065 375 4125 "An orange"\001
+4 0 0 80 0 18 12 0.0000 4 90 315 750 2985 <a>\001
+4 0 0 80 0 18 12 0.0000 4 135 315 1410 2085 <b>\001
+4 0 0 80 0 18 12 0.0000 4 90 315 2790 2070 <c>\001
+4 0 0 80 0 18 12 0.0000 4 90 315 2100 1200 <a>\001
+4 0 0 100 0 16 12 0.0000 4 135 795 3600 1260 attributes:\001
+4 0 0 100 0 16 12 0.0000 4 180 1680 3600 1485 "att" -> Value "apple"\001
+4 0 0 100 0 16 12 0.0000 4 135 795 2250 3780 attributes:\001
+4 0 0 100 0 17 12 0.0000 4 180 5910 390 4725 <a att="apple"><b><a att="orange">An orange</a>Cherries</b><c/></a>\001
+4 0 0 100 0 16 12 0.0000 4 180 1800 2250 4005 "att" -> Value "orange"\001
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent
new file mode 100644 (file)
index 0000000..e9fdfc3
--- /dev/null
@@ -0,0 +1,364 @@
+<!ENTITY readme.code.header '
+open Pxp_types
+open Pxp_document
+'>
+<!ENTITY readme.code.footnote-printer '
+class type footnote_printer =
+  object
+    method footnote_to_html : store_type -&gt; out_channel -&gt; unit
+  end
+
+and store_type =
+  object
+    method alloc_footnote : footnote_printer -&gt; int
+    method print_footnotes : out_channel -&gt; unit
+  end
+;;
+'>
+<!ENTITY readme.code.store '
+class store =
+  object (self)
+
+    val mutable footnotes = ( [] : (int * footnote_printer) list )
+    val mutable next_footnote_number = 1
+
+    method alloc_footnote n =
+      let number = next_footnote_number in
+      next_footnote_number &lt;- number+1;
+      footnotes &lt;- footnotes @ [ number, n ];
+      number
+
+    method print_footnotes ch =
+      if footnotes &lt;&gt; [] then begin
+       output_string ch "&lt;hr align=left noshade=noshade width=\"30&percent;\"&gt;\n";
+       output_string ch "&lt;dl&gt;\n";
+       List.iter
+         (fun (_,n) -&gt; 
+            n # footnote_to_html (self : #store_type :&gt; store_type) ch)
+         footnotes;
+       output_string ch "&lt;/dl&gt;\n";
+      end
+
+  end
+;;
+'>
+<!ENTITY readme.code.escape-html '
+let escape_html s =
+  Str.global_substitute
+    (Str.regexp "&lt;\\|&gt;\\|&amp;\\|\"")
+    (fun s -&gt;
+      match Str.matched_string s with
+        "&lt;" -&gt; "&amp;lt;"
+      | "&gt;" -&gt; "&amp;gt;"
+      | "&amp;" -&gt; "&amp;amp;"
+      | "\"" -&gt; "&amp;quot;"
+      | _ -&gt; assert false)
+    s
+;;
+'>
+<!ENTITY readme.code.shared '
+class virtual shared =
+  object (self)
+
+    (* --- default_ext --- *)
+
+    val mutable node = (None : shared node option)
+
+    method clone = {&lt; &gt;} 
+    method node =
+      match node with
+          None -&gt;
+            assert false
+        | Some n -&gt; n
+    method set_node n =
+      node &lt;- Some n
+
+    (* --- virtual --- *)
+
+    method virtual to_html : store -&gt; out_channel -&gt; unit
+
+  end
+;;
+'>
+<!ENTITY readme.code.only-data '
+class only_data =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch (escape_html (self # node # data))
+  end
+;;
+'>
+<!ENTITY readme.code.no-markup '
+class no_markup =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes)
+  end
+;;
+'>
+<!ENTITY readme.code.readme '
+class readme =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      (* output header *)
+      output_string 
+       ch "&lt;!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\"&gt;";
+      output_string
+       ch "&lt;!-- WARNING! This is a generated file, do not edit! --&gt;\n";
+      let title = 
+       match self # node # attribute "title" with
+           Value s -&gt; s
+         | _ -&gt; assert false
+      in
+      let html_header, _ =
+       try (self # node # dtd # par_entity "readme:html:header") 
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_trailer, _ =
+       try (self # node # dtd # par_entity "readme:html:trailer")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_bgcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:bgcolor")
+            # replacement_text
+       with WF_error _ -&gt; "white", false in
+      let html_textcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:textcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_alinkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:alinkcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_vlinkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_linkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:linkcolor")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+      let html_background, _ =
+       try (self # node # dtd # par_entity "readme:html:background")
+            # replacement_text
+       with WF_error _ -&gt; "", false in
+
+      output_string ch "&lt;html&gt;&lt;header&gt;&lt;title&gt;\n";
+      output_string ch (escape_html title);
+      output_string ch "&lt;/title&gt;&lt;/header&gt;\n";
+      output_string ch "&lt;body ";
+      List.iter
+       (fun (name,value) -&gt;
+          if value &lt;&gt; "" then 
+            output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+       [ "bgcolor",    html_bgcolor;
+         "text",       html_textcolor;
+         "link",       html_linkcolor;
+         "alink",      html_alinkcolor;
+         "vlink",      html_vlinkcolor;
+       ];
+      output_string ch "&gt;\n";
+      output_string ch html_header;
+      output_string ch "&lt;h1&gt;";
+      output_string ch (escape_html title);
+      output_string ch "&lt;/h1&gt;\n";
+      (* process main content: *)
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      (* now process footnotes *)
+      store # print_footnotes ch;
+      (* trailer *)
+      output_string ch html_trailer;
+      output_string ch "&lt;/html&gt;\n";
+
+  end
+;;
+'>
+<!ENTITY readme.code.section '
+class section the_tag =
+  object (self)
+    inherit shared
+
+    val tag = the_tag
+
+    method to_html store ch =
+      let sub_nodes = self # node # sub_nodes in
+      match sub_nodes with
+         title_node :: rest -&gt;
+           output_string ch ("&lt;" ^ tag ^ "&gt;\n");
+           title_node # extension # to_html store ch;
+           output_string ch ("\n&lt;/" ^ tag ^ "&gt;");
+           List.iter
+             (fun n -&gt; n # extension # to_html store ch)
+             rest
+       | _ -&gt;
+           assert false
+  end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;
+'>
+<!ENTITY readme.code.map-tag '
+class map_tag the_target_tag =
+  object (self)
+    inherit shared
+
+    val target_tag = the_target_tag
+
+    method to_html store ch =
+      output_string ch ("&lt;" ^ target_tag ^ "&gt;\n");
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      output_string ch ("\n&lt;/" ^ target_tag ^ "&gt;");
+  end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;
+'>
+<!ENTITY readme.code.br '
+class br =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "&lt;br&gt;\n";
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+  end
+;;
+'>
+<!ENTITY readme.code.code '
+class code =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      let data = self # node # data in
+      (* convert tabs *)
+      let l = String.length data in
+      let rec preprocess i column =
+       (* this is very ineffective but comprehensive: *)
+       if i &lt; l then
+         match data.[i] with
+             &apos;\t&apos; -&gt;
+               let n = 8 - (column mod 8) in
+               String.make n &apos; &apos; ^ preprocess (i+1) (column + n)
+           | &apos;\n&apos; -&gt;
+               "\n" ^ preprocess (i+1) 0
+           | c -&gt;
+               String.make 1 c ^ preprocess (i+1) (column + 1)
+       else
+         ""
+      in
+      output_string ch "&lt;p&gt;&lt;pre&gt;";
+      output_string ch (escape_html (preprocess 0 0));
+      output_string ch "&lt;/pre&gt;&lt;/p&gt;";
+
+  end
+;;
+'>
+<!ENTITY readme.code.a '
+class a =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "&lt;a ";
+      let href =
+       match self # node # attribute "href" with
+           Value v -&gt; escape_html v
+         | Valuelist _ -&gt; assert false
+         | Implied_value -&gt;
+             begin match self # node # attribute "readmeref" with
+                 Value v -&gt; escape_html v ^ ".html"
+               | Valuelist _ -&gt; assert false
+               | Implied_value -&gt;
+                   ""
+             end
+      in
+      if href &lt;&gt; "" then
+       output_string ch ("href=\""  ^ href ^ "\"");
+      output_string ch "&gt;";
+      output_string ch (escape_html (self # node # data));
+      output_string ch "&lt;/a&gt;";
+       
+  end
+;;
+'>
+<!ENTITY readme.code.footnote '
+class footnote =
+  object (self)
+    inherit shared
+
+    val mutable footnote_number = 0
+
+    method to_html store ch =
+      let number = 
+       store # alloc_footnote (self : #shared :&gt; footnote_printer) in
+      let foot_anchor = 
+       "footnote" ^ string_of_int number in
+      let text_anchor =
+       "textnote" ^ string_of_int number in
+      footnote_number &lt;- number;
+      output_string ch ( "&lt;a name=\"" ^ text_anchor ^ "\" href=\"#" ^ 
+                        foot_anchor ^ "\"&gt;[" ^ string_of_int number ^ 
+                        "]&lt;/a&gt;" )
+
+    method footnote_to_html store ch =
+      (* prerequisite: we are in a definition list &lt;dl&gt;...&lt;/dl&gt; *)
+      let foot_anchor = 
+       "footnote" ^ string_of_int footnote_number in
+      let text_anchor =
+       "textnote" ^ string_of_int footnote_number in
+      output_string ch ("&lt;dt&gt;&lt;a name=\"" ^ foot_anchor ^ "\" href=\"#" ^ 
+                       text_anchor ^ "\"&gt;[" ^ string_of_int footnote_number ^ 
+                       "]&lt;/a&gt;&lt;/dt&gt;\n&lt;dd&gt;");
+      List.iter
+       (fun n -&gt; n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      output_string ch ("\n&lt;/dd&gt;")
+  end
+;;
+'>
+<!ENTITY readme.code.tag-map '
+open Pxp_yacc
+
+let tag_map =
+  make_spec_from_alist
+    ~data_exemplar:(new data_impl (new only_data))
+    ~default_element_exemplar:(new element_impl (new no_markup))
+    ~element_alist:
+      [ "readme", (new element_impl (new readme));
+       "sect1",  (new element_impl (new sect1));
+       "sect2",  (new element_impl (new sect2));
+       "sect3",  (new element_impl (new sect3));
+       "title",  (new element_impl (new no_markup));
+       "p",      (new element_impl (new p));
+       "br",     (new element_impl (new br));
+       "code",   (new element_impl (new code));
+       "em",     (new element_impl (new em));
+       "ul",     (new element_impl (new ul));
+       "li",     (new element_impl (new li));
+       "footnote", (new element_impl (new footnote : #shared :&gt; shared));
+       "a",      (new element_impl (new a));
+      ]
+    ()
+;;
+'>
diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent
new file mode 100644 (file)
index 0000000..604918b
--- /dev/null
@@ -0,0 +1,376 @@
+<!ENTITY markup-yacc.mli '
+
+open Pxp_types
+open Pxp_dtd
+open Pxp_document
+
+exception ID_not_unique
+
+class type [ &apos;ext ] index =
+object 
+  (* The type of indexes over the ID attributes of the elements. This type
+   * is the minimum requirement needed by the parser to create such an index.
+   *)
+  constraint &apos;ext = &apos;ext node #extension
+  method add : string -&gt; &apos;ext node -&gt; unit
+    (* Add the passed node to the index. If there is already an ID with
+     * the passed string value, the exception ID_not_unique should be
+     * raised. (But the index is free also to accept several identical IDs.)
+     *)
+  method find : string -&gt; &apos;ext node
+    (* Finds the node with the passed ID value, or raises Not_found *)
+end
+;;
+
+
+class [ &apos;ext ] hash_index : 
+object 
+  (* This is a simple implementation of &apos;index&apos; using a hash table. *)
+  constraint &apos;ext = &apos;ext node #extension
+  method add : string -&gt; &apos;ext node -&gt; unit
+    (* See above. *)
+  method find : string -&gt; &apos;ext node
+    (* See above. *)
+  method index : (string, &apos;ext node) Hashtbl.t
+    (* Returns the hash table. *)
+end
+;;
+
+
+type config =
+    { warner : collect_warnings;
+         (* An object that collects warnings. *)
+
+      errors_with_line_numbers : bool;
+         (* Whether error messages contain line numbers or not. The parser
+         * is 10 to 20 per cent faster if line numbers are turned off;
+         * you get only byte positions in this case.
+         *)
+
+      enable_pinstr_nodes : bool;
+         (* true: turns a special mode for processing instructions on. Normally,
+         * you cannot determine the exact location of a PI; you only know
+         * in which element the PI occurs. This mode makes it possible
+         * to find the exact location out: Every PI is artificially wrapped
+         * by a special node with type T_pinstr. For example, if the XML text
+         * is &lt;a&gt;&lt;?x?&gt;&lt;?y?&gt;&lt;/a&gt;, the parser normally produces only an element
+         * object for "a", and puts the PIs "x" and "y" into it (without
+         * order). In this mode, the object "a" will contain two objects
+         * with type T_pinstr, and the first object will contain "x", and the
+         * second "y": the object tree looks like
+         * - Node with type = T_element "a"
+         *   - Node with type = T_pinstr "x"
+         *     + contains processing instruction "x"
+         *   - Node with type = T_pinstr "y"
+         *     + contains processing instruction "y"
+         *
+         * Notes:
+         * (1) In past versions of PXP this mode was called
+         *     processing_instructions_inline, and it produced nodes of
+         *     type T_element "-pi" instead of T_pinstr.
+         * (2) The T_pinstr nodes are created from the pinstr exemplars
+         *     in your spec
+         *)
+
+      enable_super_root_node : bool;
+         (* true: the topmost element of the XML tree is not the root element,
+         * but the so-called super root. The root element is a son of the
+         * super root. The super root is a node with type T_super_root.
+         * The following behaviour changes, too:
+         * - PIs occurring outside the root element and outside the DTD are
+         *   added to the super root instead of the document object
+         * - If enable_pinstr_nodes is also turned on, the PI wrappers
+         *   are added to the super root
+         *
+         * For example, the document
+         *   &lt;?x?&gt;&lt;a&gt;y&lt;/a&gt;&lt;?y?&gt;
+         * is normally represented by:
+         * - document object
+         *   + contains PIs x and y
+         *   - reference to root node with type = T_element "a"
+         *     - node with type = T_data: contains "y"
+         * With enabled super root node:
+         * - document object
+         *   - reference to super root node with type = T_super_root
+         *     + contains PIs x and y
+         *     - root node with type = T_element "a"
+         *       - node with type = T_data: contains "y"
+         * If also enable_pinstr_nodes:
+         * - document object
+         *   - reference to super root node with type = T_super_root
+         *     - node with type = T_pinstr "x"
+         *       + contains PI "x"
+         *     - root node with type = T_element "a"
+         *       - node with type = T_data: contains "y"
+         *     - node with type = T_pinstr "y"
+         *       + contains PI "y"
+         * Notes:
+         * (1) In previous versions of PXP this mode was called
+         *     virtual_root, and it produced an additional node of type
+         *     T_element "-vr" instead of T_super_root.
+         * (2) The T_super_root node is created from the super root exemplar
+         *     in your spec.
+         *)
+
+      enable_comment_nodes : bool;
+         (* When enabled, comments are represented as nodes with type =
+         * T_comment.
+         * To access the contents of comments, use the method "comment"
+         * for the comment nodes. 
+         * These nodes behave like elements; however, they are normally
+         * empty and do not have attributes. Note that it is possible to
+         * add children to comment nodes and to set attributes, but it is
+         * strongly recommended not to do so. There are no checks on
+         * such abnormal use, because they would cost too
+         * much time, even when no comment nodes are generated at all.
+         *
+         * Comment nodes should be disabled unless you must parse a 
+         * third-party XML text which uses comments as another data
+         * container.
+         *
+         * The nodes of type T_comment are created from the comment exemplars
+         * in your spec.
+         *)
+
+      encoding : rep_encoding;
+        (* Specifies the encoding used for the *internal* representation
+        * of any character data.
+        * Note that the default is still Enc_iso88591.
+        *)
+
+      recognize_standalone_declaration : bool;
+        (* Whether the "standalone" declaration is recognized or not.
+        * This option does not have an effect on well-formedness parsing:
+        * in this case such declarations are never recognized.
+        *
+        * Recognizing the "standalone" declaration means that the 
+        * value of the declaration is scanned and passed to the DTD,
+        * and that the "standalone-check" is performed. 
+        *
+        * Standalone-check: If a document is flagged standalone=&apos;yes&apos; 
+        * some additional constraints apply. The idea is that a parser
+        * without access to any external document subsets can still parse
+        * the document, and will still return the same values as the parser
+        * with such access. For example, if the DTD is external and if
+        * there are attributes with default values, it is checked that there
+        * is no element instance where these attributes are omitted - the
+        * parser would return the default value but this requires access to
+        * the external DTD subset.
+        *)
+
+      store_element_positions : bool;
+        (* Whether the file name, the line and the column of the
+        * beginning of elements are stored in the element nodes.
+        * This option may be useful to generate error messages.
+        * 
+        * Positions are only stored for:
+        * - Elements
+        * - Wrapped processing instructions (see enable_pinstr_nodes)
+        * For all other node types, no position is stored.
+        *
+        * You can access positions by the method "position" of nodes.
+        *)
+
+      idref_pass : bool;
+        (* Whether the parser does a second pass and checks that all
+        * IDREF and IDREFS attributes contain valid references.
+        * This option works only if an ID index is available. To create
+        * an ID index, pass an index object as id_index argument to the
+        * parsing functions (such as parse_document_entity; see below).
+        *
+        * "Second pass" does not mean that the XML text is again parsed;
+        * only the existing document tree is traversed, and the check
+        * on bad IDREF/IDREFS attributes is performed for every node.
+        *)
+
+      validate_by_dfa : bool;
+        (* If true, and if DFAs are available for validation, the DFAs will
+        * actually be used for validation.
+        * If false, or if no DFAs are available, the standard backtracking
+        * algorithm will be used.
+        * DFA = deterministic finite automaton.
+        *
+        * DFAs are only available if accept_only_deterministic_models is
+        * "true" (because in this case, it is relatively cheap to construct
+        * the DFAs). DFAs are a data structure which ensures that validation
+        * can always be performed in linear time.
+        *
+        * I strongly recommend using DFAs; however, there are examples
+        * for which validation by backtracking is faster.
+        *)
+
+      accept_only_deterministic_models : bool;
+        (* Whether only deterministic content models are accepted in DTDs. *)
+
+      (* The following options are not implemented, or only for internal
+       * use.
+       *)
+
+      debugging_mode : bool;
+    }
+
+
+type source =
+    Entity of ((dtd -&gt; Pxp_entity.entity) * Pxp_reader.resolver)
+  | ExtID of (ext_id * Pxp_reader.resolver)
+
+val from_channel : 
+      ?system_encoding:encoding -&gt; ?id:ext_id -&gt; ?fixenc:encoding -&gt; 
+      in_channel -&gt; source
+
+val from_string :
+      ?fixenc:encoding -&gt; string -&gt; source
+
+val from_file :
+      ?system_encoding:encoding -&gt; string -&gt; source
+
+(* Notes on sources (version 2):
+ *
+ * Sources specify where the XML text to parse comes from. Sources not only
+ * represent character streams, but also external IDs (i.e. SYSTEM or PUBLIC
+ * names), and they are interpreted as a specific encoding of characters.
+ * A source should be associated with an external ID, because otherwise
+ * it is not known how to handle relative names.
+ *
+ * There are two primary sources, Entity and ExtID, and several functions
+ * for derived sources. First explanations for the functions:
+ *
+ * from_channel: The XML text is read from an in_channel. By default, the
+ *   channel is not associated with an external ID, and it is impossible
+ *   to resolve relative SYSTEM IDs found in the document.
+ *   If the ?id argument is passed, it is assumed that the channel has this
+ *   external ID. If relative SYSTEM IDs occur in the document, they can
+ *   be interpreted; however, it is only possible to read from "file:"
+ *   IDs.
+ *   By default, the channel automatically detects the encoding. You can
+ *   set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_string: The XML text is read from a string.
+ *   It is impossible to read from any external entity whose reference is found
+ *   in the string.
+ *   By default, the encoding of the string is detected automatically. You can
+ *   set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_file: The XML text is read from the file whose file name is
+ *   passed to the function (as UTF-8 string).
+ *   Relative system IDs can be interpreted by this function.
+ *   The ?system_encoding argument specifies the character encoding used
+ *   for file names (sic!). By default, UTF-8 is assumed.
+ *
+ * Examples:
+ *
+ * from_file "/tmp/file.xml": 
+ *   reads from this file, which is assumed to have the ID 
+ *   SYSTEM "file://localhost/tmp/file.xml".
+ *
+ * let ch = open_in "/tmp/file.xml" in
+ * from_channel ~id:(System "file://localhost/tmp/file.xml") ch
+ *   This does the same, but uses a channel.
+ *
+ * from_channel ~id:(System "http://host/file.xml")
+ *              ch
+ *   reads from the channel ch, and it is assumed that the ID is
+ *   SYSTEM "http://host/file.xml". If there is any relative SYSTEM ID,
+ *   it will be interpreted relative to this location; however, there is
+ *   no way to read via HTTP.
+ *   If there is any "file:" SYSTEM ID, it is possible to read the file.
+ *
+ * The primary sources:
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ *   entity to read from is passed to the resolver, and the resolver finds
+ *   the entity and opens it.
+ *   The intention of this option is to allow customized
+ *   resolvers to interpret external identifiers without any restriction.
+ *   The Pxp_reader module contains several classes allowing the user to
+ *   compose such a customized resolver from predefined components.
+ *
+ *   ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ *   entity class. Once the DTD object d is known that will be used during
+ *   parsing, the entity  e = m d  is determined and used together with the
+ *   resolver r.
+ *   This is only for hackers.
+ *)
+
+
+
+val default_config : config
+  (* - Warnings are thrown away
+   * - Error messages will contain line numbers
+   * - Neither T_super_root nor T_pinstr nor T_comment nodes are generated
+   * - The internal encoding is ISO-8859-1
+   * - The standalone declaration is checked
+   * - Element positions are stored
+   * - The IDREF pass is left out
+   * - If available, DFAs are used for validation
+   * - Only deterministic content models are accepted
+   *) 
+
+val default_extension : (&apos;a node extension) as &apos;a
+  (* A "null" extension; an extension that does not extend the functionality *)
+
+val default_spec : (&apos;a node extension as &apos;a) spec
+  (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity : config -&gt; source -&gt; dtd
+  (* Parse an entity containing a DTD (external subset), and return this DTD. *)
+
+val extract_dtd_from_document_entity : config -&gt; source -&gt; dtd
+  (* Parses a closed document, i.e. a document beginning with &lt;!DOCTYPE...&gt;,
+   * and returns the DTD contained in the document.
+   * The parts of the document outside the DTD are actually not parsed,
+   * i.e. parsing stops when all declarations of the DTD have been read.
+   *)
+
+val parse_document_entity : 
+  ?transform_dtd:(dtd -&gt; dtd) -&gt;
+  ?id_index:(&apos;ext index) -&gt;
+  config -&gt; source -&gt; &apos;ext spec -&gt; &apos;ext document
+  (* Parse a closed document, i.e. a document beginning with &lt;!DOCTYPE...&gt;,
+   * and validate the contents of the document against the DTD contained
+   * and/or referenced in the document.
+   *
+   * If the optional argument ~transform_dtd is passed, the following 
+   * modification applies: After the DTD (both the internal and external
+   * subsets) has been parsed, the function ~transform_dtd is called,
+   * and the resulting DTD is actually used to validate the document.
+   *
+   * If the optional argument ~transform_dtd is missing, the parser
+   * behaves in the same way as if the identity were passed as ~transform_dtd.
+   *
+   * If the optional argument ~id_index is present, the parser adds
+   * any ID attribute to the passed index. An index is required to detect
+   * violations of the uniqueness of IDs.
+   *)
+
+val parse_wfdocument_entity : 
+  config -&gt; source -&gt; &apos;ext spec -&gt; &apos;ext document
+  (* Parse a closed document (see parse_document_entity), but do not
+   * validate it. Only checks on well-formedness are performed.
+   *)
+
+val parse_content_entity  : 
+  ?id_index:(&apos;ext index) -&gt;
+  config -&gt; source -&gt; dtd -&gt; &apos;ext spec -&gt; &apos;ext node
+  (* Parse a file representing a well-formed fragment of a document. The
+   * fragment must be a single element (i.e. something like &lt;a&gt;...&lt;/a&gt;;
+   * not a sequence like &lt;a&gt;...&lt;/a&gt;&lt;b&gt;...&lt;/b&gt;). The element is validated
+   * against the passed DTD, but it is not checked whether the element is
+   * the root element specified in the DTD.
+   *
+   * If the optional argument ~id_index is present, the parser adds
+   * any ID attribute to the passed index. An index is required to detect
+   * violations of the uniqueness of IDs.
+   *)
+
+val parse_wfcontent_entity : 
+  config -&gt; source -&gt; &apos;ext spec -&gt; &apos;ext node
+  (* Parse a file representing a well-formed fragment of a document
+   * (see parse_content_entity). The fragment is not validated, only
+   * checked for well-formedness.
+   *)
+  
+
+'>
diff --git a/helm/DEVEL/pxp/pxp/examples/Makefile b/helm/DEVEL/pxp/pxp/examples/Makefile
new file mode 100644 (file)
index 0000000..9343857
--- /dev/null
@@ -0,0 +1,22 @@
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+       $(MAKE) -C xmlforms CLEAN
+       $(MAKE) -C validate CLEAN
+       $(MAKE) -C readme CLEAN
+       $(MAKE) -C simple_transformation CLEAN
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       $(MAKE) -C xmlforms distclean
+       $(MAKE) -C validate distclean
+       $(MAKE) -C readme distclean
+       $(MAKE) -C simple_transformation distclean
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore b/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore
new file mode 100644 (file)
index 0000000..2395c19
--- /dev/null
@@ -0,0 +1,10 @@
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+depend
+depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/Makefile b/helm/DEVEL/pxp/pxp/examples/readme/Makefile
new file mode 100644 (file)
index 0000000..df5f6ed
--- /dev/null
@@ -0,0 +1,34 @@
+# make readme:         make bytecode executable
+# make readme.opt:     make native executable
+# make clean:          remove intermediate files
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files
+# make install
+#----------------------------------------------------------------------
+
+BIN = /usr/local/bin
+
+.PHONY: readme
+readme:
+       $(MAKE) -f Makefile.code readme
+
+.PHONY: readme.opt
+readme.opt:
+       $(MAKE) -f Makefile.code readme.opt
+
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~ depend depend.pkg
+       rm -f readme readme.opt
+
+.PHONY: install
+install:
+       cp readme $(BIN)
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code b/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code
new file mode 100644 (file)
index 0000000..0514ddf
--- /dev/null
@@ -0,0 +1,57 @@
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS  = to_html.cmo to_text.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE  = readme.cma
+XARCHIVE = readme.cmxa
+NAME     = readme
+REQUIRES = str pxp
+
+readme: $(ARCHIVE) main.cmo
+       ocamlfind ocamlc -o readme -custom -package "$(REQUIRES)" \
+               -linkpkg $(ARCHIVE) main.cmo 
+
+readme.opt: $(XARCHIVE) main.cmx
+       ocamlfind ocamlopt -o readme.opt -custom -package "$(REQUIRES)" \
+               -linkpkg $(XARCHIVE) main.cmx
+
+$(ARCHIVE): $(OBJECTS)
+       $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+       $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS   =
+OCAMLC    = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT  = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP  = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli 
+       $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+       $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+       $(OCAMLOPT) -c $<
+
+.ml.cmo:
+       $(OCAMLC) -c $<
+
+.mli.cmi:
+       $(OCAMLC) -c $<
+
+.mll.ml:
+       ocamllex $<
+
+*.mli:
+
+include depend
+include depend.pkg
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/main.ml b/helm/DEVEL/pxp/pxp/examples/readme/main.ml
new file mode 100644 (file)
index 0000000..4e3837a
--- /dev/null
@@ -0,0 +1,108 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+
+
+let rec print_error e =
+  prerr_endline(string_of_exn e)
+;;
+
+
+let run f a =
+  try f a with
+      e -> print_error e
+;;
+
+
+let convert_to_html filename =
+  (* read in style definition *)
+  let document =
+    parse_document_entity
+      { default_config with encoding = `Enc_iso88591 }
+      (from_file filename)
+      To_html.tag_map
+  in
+  let root = document # root in
+  let store = new To_html.store in
+  root # extension # to_html store stdout
+;;
+
+
+let convert_to_text filename =
+  (* read in style definition *)
+  let document =
+    parse_document_entity
+      default_config
+      (from_file filename)
+      To_text.tag_map
+  in
+  let root = document # root in
+  let store = new To_text.store in
+  let box = new To_text.box 79 79 in
+  root # extension # to_box store box;
+  box # output 0 0 stdout
+;;
+
+
+let main() =
+  let want_html = ref false in
+  let want_text = ref false in
+  let filename = ref None in
+  Arg.parse
+      [ "-html", Arg.Set want_html, 
+             "  convert file to html";
+       "-text", Arg.Set want_text,
+             "  convert file to text";
+      ]
+      (fun s -> 
+        match !filename with
+            None -> filename := Some s
+          | Some _ ->
+              raise (Arg.Bad "Multiple arguments not allowed."))
+      "usage: readme [ -text | -html ] input.xml >output";
+  let fn =
+    match !filename with
+       None -> 
+         prerr_endline "readme: no input";
+         exit 1
+      | Some s -> s
+  in
+  match !want_html, !want_text with
+      true, false ->
+       run convert_to_html fn
+    | false, true ->
+       run convert_to_text fn
+    | _ ->
+       prerr_endline ("readme: Please select exactly one output format")
+;;
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:31  lpadovan
+ * Initial revision
+ *
+ * Revision 1.5  2000/07/08 17:58:17  gerd
+ *     Updated because of PXP API changes.
+ *
+ * Revision 1.4  2000/06/04 20:25:38  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.3  2000/05/01 16:46:40  gerd
+ *     Using the new error formatter.
+ *
+ * Revision 1.2  1999/08/23 16:54:19  gerd
+ *     Minor changes.
+ *
+ * Revision 1.1  1999/08/22 22:29:32  gerd
+ *     Initial revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd b/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd
new file mode 100644 (file)
index 0000000..8ff6a9f
--- /dev/null
@@ -0,0 +1,38 @@
+<!-- $Id -->
+
+<!ENTITY % p.like "p|ul">
+<!ENTITY % text   "br|code|em|footnote|a">
+
+<!ELEMENT readme (sect1+)>
+<!ATTLIST readme
+          title CDATA #REQUIRED>
+
+<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
+
+<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
+
+<!ELEMENT sect3 (title,(%p.like;)+)>
+
+<!ELEMENT title (#PCDATA|br)*>
+
+<!ELEMENT p (#PCDATA|%text;)*>
+
+<!ELEMENT br EMPTY>
+
+<!ELEMENT code (#PCDATA)>
+
+<!ELEMENT em (#PCDATA|%text;)*>
+
+<!ELEMENT ul (li+)>
+
+<!ELEMENT li (%p.like;)*>
+
+<!ELEMENT footnote (#PCDATA|%text;)*>
+
+<!ELEMENT a (#PCDATA)*>
+<!ATTLIST a 
+          href      CDATA #IMPLIED
+          readmeref CDATA #IMPLIED
+>
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml b/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml
new file mode 100644 (file)
index 0000000..f717b22
--- /dev/null
@@ -0,0 +1,432 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(*$ readme.code.header *)
+open Pxp_types
+open Pxp_document
+(*$-*)
+
+
+(*$ readme.code.footnote-printer *)
+class type footnote_printer =
+  object
+    method footnote_to_html : store_type -> out_channel -> unit
+  end
+
+and store_type =
+  object
+    method alloc_footnote : footnote_printer -> int
+    method print_footnotes : out_channel -> unit
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.store *)
+class store =
+  object (self)
+
+    val mutable footnotes = ( [] : (int * footnote_printer) list )
+    val mutable next_footnote_number = 1
+
+    method alloc_footnote n =
+      let number = next_footnote_number in
+      next_footnote_number <- number+1;
+      footnotes <- footnotes @ [ number, n ];
+      number
+
+    method print_footnotes ch =
+      if footnotes <> [] then begin
+       output_string ch "<hr align=left noshade=noshade width=\"30%\">\n";
+       output_string ch "<dl>\n";
+       List.iter
+         (fun (_,n) -> 
+            n # footnote_to_html (self : #store_type :> store_type) ch)
+         footnotes;
+       output_string ch "</dl>\n";
+      end
+
+  end
+;;
+(*$-*)
+
+
+
+(*$ readme.code.escape-html *)
+let escape_html s =
+  Str.global_substitute
+    (Str.regexp "<\\|>\\|&\\|\"")
+    (fun s ->
+      match Str.matched_string s with
+        "<" -> "&lt;"
+      | ">" -> "&gt;"
+      | "&" -> "&amp;"
+      | "\"" -> "&quot;"
+      | _ -> assert false)
+    s
+;;
+(*$-*)
+
+
+(*$ readme.code.shared *)
+class virtual shared =
+  object (self)
+
+    (* --- default_ext --- *)
+
+    val mutable node = (None : shared node option)
+
+    method clone = {< >} 
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+    method set_node n =
+      node <- Some n
+
+    (* --- virtual --- *)
+
+    method virtual to_html : store -> out_channel -> unit
+
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.only-data *)
+class only_data =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch (escape_html (self # node # data))
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.no-markup *)
+class no_markup =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      List.iter
+       (fun n -> n # extension # to_html store ch)
+       (self # node # sub_nodes)
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.readme *)
+class readme =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      (* output header *)
+      output_string 
+       ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
+      output_string
+       ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
+      let title = 
+       match self # node # attribute "title" with
+           Value s -> s
+         | _ -> assert false
+      in
+      let html_header, _ =
+       try (self # node # dtd # par_entity "readme:html:header") 
+            # replacement_text
+       with WF_error _ -> "", false in
+      let html_trailer, _ =
+       try (self # node # dtd # par_entity "readme:html:trailer")
+            # replacement_text
+       with WF_error _ -> "", false in
+      let html_bgcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:bgcolor")
+            # replacement_text
+       with WF_error _ -> "white", false in
+      let html_textcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:textcolor")
+            # replacement_text
+       with WF_error _ -> "", false in
+      let html_alinkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:alinkcolor")
+            # replacement_text
+       with WF_error _ -> "", false in
+      let html_vlinkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+            # replacement_text
+       with WF_error _ -> "", false in
+      let html_linkcolor, _ =
+       try (self # node # dtd # par_entity "readme:html:linkcolor")
+            # replacement_text
+       with WF_error _ -> "", false in
+      let html_background, _ =
+       try (self # node # dtd # par_entity "readme:html:background")
+            # replacement_text
+       with WF_error _ -> "", false in
+
+      output_string ch "<html><header><title>\n";
+      output_string ch (escape_html title);
+      output_string ch "</title></header>\n";
+      output_string ch "<body ";
+      List.iter
+       (fun (name,value) ->
+          if value <> "" then 
+            output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+       [ "bgcolor",    html_bgcolor;
+         "text",       html_textcolor;
+         "link",       html_linkcolor;
+         "alink",      html_alinkcolor;
+         "vlink",      html_vlinkcolor;
+       ];
+      output_string ch ">\n";
+      output_string ch html_header;
+      output_string ch "<h1>";
+      output_string ch (escape_html title);
+      output_string ch "</h1>\n";
+      (* process main content: *)
+      List.iter
+       (fun n -> n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      (* now process footnotes *)
+      store # print_footnotes ch;
+      (* trailer *)
+      output_string ch html_trailer;
+      output_string ch "</html>\n";
+
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.section *)
+class section the_tag =
+  object (self)
+    inherit shared
+
+    val tag = the_tag
+
+    method to_html store ch =
+      let sub_nodes = self # node # sub_nodes in
+      match sub_nodes with
+         title_node :: rest ->
+           output_string ch ("<" ^ tag ^ ">\n");
+           title_node # extension # to_html store ch;
+           output_string ch ("\n</" ^ tag ^ ">");
+           List.iter
+             (fun n -> n # extension # to_html store ch)
+             rest
+       | _ ->
+           assert false
+  end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;
+(*$-*)
+
+
+(*$ readme.code.map-tag *)
+class map_tag the_target_tag =
+  object (self)
+    inherit shared
+
+    val target_tag = the_target_tag
+
+    method to_html store ch =
+      output_string ch ("<" ^ target_tag ^ ">\n");
+      List.iter
+       (fun n -> n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      output_string ch ("\n</" ^ target_tag ^ ">");
+  end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;
+(*$-*)
+
+
+(*$ readme.code.br *)
+class br =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "<br>\n";
+      List.iter
+       (fun n -> n # extension # to_html store ch)
+       (self # node # sub_nodes);
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.code *)
+class code =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      let data = self # node # data in
+      (* convert tabs *)
+      let l = String.length data in
+      let rec preprocess i column =
+       (* this is very ineffective but comprehensive: *)
+       if i < l then
+         match data.[i] with
+             '\t' ->
+               let n = 8 - (column mod 8) in
+               String.make n ' ' ^ preprocess (i+1) (column + n)
+           | '\n' ->
+               "\n" ^ preprocess (i+1) 0
+           | c ->
+               String.make 1 c ^ preprocess (i+1) (column + 1)
+       else
+         ""
+      in
+      output_string ch "<p><pre>";
+      output_string ch (escape_html (preprocess 0 0));
+      output_string ch "</pre></p>";
+
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.a *)
+class a =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "<a ";
+      let href =
+       match self # node # attribute "href" with
+           Value v -> escape_html v
+         | Valuelist _ -> assert false
+         | Implied_value ->
+             begin match self # node # attribute "readmeref" with
+                 Value v -> escape_html v ^ ".html"
+               | Valuelist _ -> assert false
+               | Implied_value ->
+                   ""
+             end
+      in
+      if href <> "" then
+       output_string ch ("href=\""  ^ href ^ "\"");
+      output_string ch ">";
+      output_string ch (escape_html (self # node # data));
+      output_string ch "</a>";
+       
+  end
+;;
+(*$-*)
+
+
+(*$ readme.code.footnote *)
+class footnote =
+  object (self)
+    inherit shared
+
+    val mutable footnote_number = 0
+
+    method to_html store ch =
+      let number = 
+       store # alloc_footnote (self : #shared :> footnote_printer) in
+      let foot_anchor = 
+       "footnote" ^ string_of_int number in
+      let text_anchor =
+       "textnote" ^ string_of_int number in
+      footnote_number <- number;
+      output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^ 
+                        foot_anchor ^ "\">[" ^ string_of_int number ^ 
+                        "]</a>" )
+
+    method footnote_to_html store ch =
+      (* prerequisite: we are in a definition list <dl>...</dl> *)
+      let foot_anchor = 
+       "footnote" ^ string_of_int footnote_number in
+      let text_anchor =
+       "textnote" ^ string_of_int footnote_number in
+      output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^ 
+                       text_anchor ^ "\">[" ^ string_of_int footnote_number ^ 
+                       "]</a></dt>\n<dd>");
+      List.iter
+       (fun n -> n # extension # to_html store ch)
+       (self # node # sub_nodes);
+      output_string ch ("\n</dd>")
+  end
+;;
+(*$-*)
+
+
+(**********************************************************************)
+
+(*$ readme.code.tag-map *)
+open Pxp_yacc
+
+let tag_map =
+  make_spec_from_alist
+    ~data_exemplar:(new data_impl (new only_data))
+    ~default_element_exemplar:(new element_impl (new no_markup))
+    ~element_alist:
+      [ "readme", (new element_impl (new readme));
+       "sect1",  (new element_impl (new sect1));
+       "sect2",  (new element_impl (new sect2));
+       "sect3",  (new element_impl (new sect3));
+       "title",  (new element_impl (new no_markup));
+       "p",      (new element_impl (new p));
+       "br",     (new element_impl (new br));
+       "code",   (new element_impl (new code));
+       "em",     (new element_impl (new em));
+       "ul",     (new element_impl (new ul));
+       "li",     (new element_impl (new li));
+       "footnote", (new element_impl (new footnote : #shared :> shared));
+       "a",      (new element_impl (new a));
+      ]
+    ()
+;;
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:31  lpadovan
+ * Initial revision
+ *
+ * Revision 1.6  2000/08/22 14:34:25  gerd
+ *     Using make_spec_from_alist instead of make_spec_from_mapping.
+ *
+ * Revision 1.5  2000/08/18 21:15:14  gerd
+ *     Update because of PXP API change: par_entity raises WF_error
+ * instead of Validation error if the entity is not defined.
+ *     Further minor updates.
+ *
+ * Revision 1.4  2000/07/08 17:58:17  gerd
+ *     Updated because of PXP API changes.
+ *
+ * Revision 1.3  2000/06/04 20:25:38  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.2  1999/09/12 20:09:32  gerd
+ *     Added section marks.
+ *
+ * Revision 1.1  1999/08/22 22:29:32  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml b/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml
new file mode 100644 (file)
index 0000000..fc45f45
--- /dev/null
@@ -0,0 +1,599 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+
+
+(**********************************************************************)
+(* The box class represents formatted text                            *)
+(**********************************************************************)
+
+class type formatted_text =
+  object
+    method output : int -> int -> out_channel -> unit
+       (* output initial_indent indent ch:
+        * 'initial_indent' is how far the first line should be indented;
+        * 'indent' how far the rest. 'ch' is the channel on which the lines
+        * are to be printed.
+        *)
+
+    method multiline : bool
+        (* whether the box occupies multiple lines *)
+
+    method width_of_last_line : int
+        (* returns the width of the last line *)
+  end
+;;
+
+
+type text =
+    Text of string
+  | Box of formatted_text
+;;
+
+
+let textwidth tl =
+  let rec compute tl r =
+    match tl with
+       [] -> r
+      | t :: tl' ->
+         begin match t with
+             Text s ->
+                compute tl' (r + String.length s)
+           | Box b ->
+               if b # multiline then
+                 compute tl' (b # width_of_last_line)
+               else
+                 compute tl' (r + b # width_of_last_line)
+         end
+  in
+  compute (List.rev tl) 0
+;;
+
+
+class box the_initial_width the_width =
+  object (self)
+
+    (* The 'initial_width' is the width that is available on the first
+     * line of output; the 'width' is the width that is available in the
+     * rest.
+     *)
+
+    val initial_width = the_initial_width
+    val width = the_width
+
+    (* state: *)
+
+    val mutable space_added = false
+    val mutable linefeed_added = false
+    val mutable is_first_line = true
+    val mutable lines = []
+        (* lines in reverse order (first line = last element) *)
+    val mutable current_line = []
+        (* not member of 'lines'; again reverse order *)
+    val mutable current_indent = 0
+
+    method add_space =
+      if not space_added then begin
+       space_added <- true;
+       linefeed_added <- true;
+       current_line <- Text " " :: current_line
+      end
+
+    method ignore_space =
+      space_added <- true;
+      linefeed_added <- true
+
+    method add_linefeed =
+      if not linefeed_added then begin
+       linefeed_added <- true;
+       if not space_added then
+         current_line <- Text " " :: current_line
+      end
+
+    method ignore_linefeed =
+      linefeed_added <- true
+
+    method add_newline =
+      lines <- current_line :: lines;
+      current_line <- [];
+      space_added <- true;
+      linefeed_added <- true;
+      is_first_line <- false;
+      current_indent <- 0;
+
+    method add_word s =
+      (* first try to add 's' to 'current_line' *)
+      let current_line' = Text s :: current_line in
+      let current_width =
+       if is_first_line then initial_width else width in
+      if textwidth current_line' + current_indent <= current_width then begin
+       (* ok, the line does not become too long *)
+       current_line <- current_line';
+       space_added <- false;
+       linefeed_added <- false
+      end
+      else begin
+       (* The line would be too long. *)
+       lines <- current_line :: lines;
+       current_line <- [Text s];
+       space_added <- false;
+       linefeed_added <- false;
+       is_first_line <- false;
+       current_indent <- 0;
+      end
+
+    method add_box b =
+      current_line <- Box b :: current_line;
+      space_added <- false;
+      linefeed_added <- false;
+
+    method width_of_last_line =
+      textwidth current_line + current_indent
+
+
+    method available_width =
+      let current_width =
+       if is_first_line then initial_width else width in
+      current_width - textwidth current_line - current_indent
+  
+
+    method multiline =
+      lines <> [] or
+      (List.exists 
+        (function 
+             Text _ -> false
+           | Box b -> b # multiline) 
+        current_line)
+
+    method output initial_indent indent ch =
+      let eff_lines =
+       List.rev
+         (current_line :: lines) in
+      let rec out_lines cur_indent ll =
+       match ll with
+           [] ->  ()
+         | l :: ll' ->
+             output_string ch (String.make cur_indent ' ');
+             List.iter
+               (function
+                    Text s ->
+                      output_string ch s
+                  | Box b ->
+                      b # output 0 indent ch
+               )
+               (List.rev l);
+             if ll' <> [] then 
+               output_string ch "\n";
+             out_lines indent ll'
+      in
+      out_lines initial_indent eff_lines
+  end
+;;
+
+
+class listitem_box listmark indent totalwidth =
+  let initial_newline = String.length listmark >= indent in
+  object (self)
+    inherit box totalwidth (totalwidth - indent) as super
+
+    val extra_indent = indent
+
+    initializer
+    self # add_word listmark;
+    if initial_newline then
+      self # add_newline
+    else begin
+      current_line <- Text (String.make (indent - String.length listmark) ' ')
+                      :: current_line;
+      space_added <- true;
+      linefeed_added <- true;
+    end
+
+
+    method output initial_indent indent ch =
+      super # output initial_indent (indent + extra_indent) ch
+  end
+;;
+      
+
+(**********************************************************************)
+(* Footnotes etc.                                                     *)
+(**********************************************************************)
+
+
+class type footnote_printer =
+  object
+    method footnote_to_box : store_type -> box -> unit
+  end
+
+and store_type =
+  object
+    method alloc_footnote : footnote_printer -> int
+    method print_footnotes : box -> unit
+  end
+;;
+
+
+class store =
+  object (self)
+
+    val mutable footnotes = ( [] : (int * footnote_printer) list )
+    val mutable next_footnote_number = 1
+
+    method alloc_footnote n =
+      let number = next_footnote_number in
+      next_footnote_number <- number+1;
+      footnotes <- footnotes @ [ number, n ];
+      number
+
+    method print_footnotes (b : box) =
+      if footnotes <> [] then begin
+       b # add_newline;
+       b # add_newline;
+       let w = b # available_width in
+       b # add_word (String.make (w/3) '-');
+       b # add_newline;
+       b # add_newline;
+       List.iter
+         (fun (_,n) -> 
+            n # footnote_to_box (self : #store_type :> store_type) b)
+         footnotes;
+       b # add_newline;
+      end
+  end
+;;
+
+
+
+(**********************************************************************)
+(* The extension objects                                              *)
+(**********************************************************************)
+
+
+class virtual shared =
+  object (self)
+
+    (* --- default_ext --- *)
+
+    val mutable node = (None : shared node option)
+
+    method clone = {< >} 
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+    method set_node n =
+      node <- Some n
+
+    (* --- virtual --- *)
+
+    method virtual to_box : store -> box -> unit
+      (* to_box store b:
+       * formats the element using box 'b' 
+       *)
+  end
+;;
+
+
+class only_data =
+  object (self)
+    inherit shared
+
+    val white_space_re = Str.regexp "[ \t]+\\|\n"
+
+    method to_box store b =
+      let s = self # node # data in
+      let splitted = Str.full_split white_space_re s in
+      List.iter
+       (function
+            Str.Delim "\n" ->
+              b # add_linefeed
+          | Str.Delim _ ->
+              b # add_space
+          | Str.Text s ->
+              b # add_word s)
+       splitted
+  end
+;;
+
+
+class no_markup =
+  object (self)
+    inherit shared
+
+    method to_box store b =
+      List.iter
+       (fun n -> n # extension # to_box store b)
+       (self # node # sub_nodes)
+  end
+;;
+
+
+class readme =
+  object (self)
+    inherit shared
+
+    method to_box store b =
+      let title = 
+       match self # node # attribute "title" with
+           Value s -> s
+         | _ -> assert false
+      in
+      let w = b # available_width in
+      let line = String.make (w-1) '*' in
+      b # add_word line;
+      b # add_newline;
+      b # add_word title;
+      b # add_newline;
+      b # add_word line;
+      b # add_newline;
+      b # add_newline;
+      (* process main content: *)
+      List.iter
+       (fun n -> n # extension # to_box store b)
+       (self # node # sub_nodes);
+      (* now process footnotes *)
+      store # print_footnotes b;
+      (* trailer *)
+      b # add_newline;
+  end
+;;
+
+
+class section the_tag =
+  object (self)
+    inherit shared
+
+    val tag = the_tag
+
+    method to_box store b =
+      let sub_nodes = self # node # sub_nodes in
+      match sub_nodes with
+         title_node :: rest ->
+           b # add_newline;
+           let w = b # available_width in
+           let line = String.make (w-1) tag in
+           b # add_word line;
+           b # add_newline;
+           b # add_word (title_node # data);
+           b # add_newline;
+           b # add_word line;
+           b # add_newline;
+           List.iter
+             (fun n -> 
+                n # extension # to_box store b)
+             rest;
+       | _ ->
+           assert false
+  end
+;;
+
+class sect1 = section '=';;
+class sect2 = section '-';;
+class sect3 = section ':';;
+
+
+class p =
+  object (self)
+    inherit shared
+  
+    method to_box store b =
+      let within_list = 
+       match self # node # parent # node_type with
+           T_element "li" -> true
+         | T_element _    -> false 
+         | _ -> assert false
+      in
+      if not within_list then
+       b # add_newline;
+      let w = b # available_width in
+      let b' = new box w w in
+      b' # ignore_space;
+      List.iter
+       (fun n -> n # extension # to_box store b')
+       (self # node # sub_nodes);
+      b # add_box (b' :> formatted_text);
+      b # add_newline;
+  end
+;;
+
+
+class li =
+  object (self)
+    inherit shared
+  
+    method to_box store b =
+      b # add_newline;
+      let w = b # available_width in
+      let b' = new listitem_box "-" 3 w in
+      b' # ignore_space;
+      List.iter
+       (fun n -> n # extension # to_box store b')
+       (self # node # sub_nodes);
+      b # add_box (b' :> formatted_text);
+  end
+;;
+
+
+class code =
+  object (self)
+    inherit shared
+  
+    method to_box store b =
+      b # add_newline;
+      let w = b # available_width in
+      let b' = new box w w in
+      b' # ignore_space;
+      let data = self # node # data in
+      (* convert tabs *)
+      let l = String.length data in
+      let rec add s i column =
+       (* this is very ineffective but comprehensive: *)
+       if i < l then
+         match data.[i] with
+             '\t' ->
+               let n = 8 - (column mod 8) in
+               add (s ^ String.make n ' ') (i+1) (column + n)
+           | '\n' ->
+               b' # add_word s;
+               b' # add_newline;
+               add "" (i+1) 0
+           | c ->
+               add (s ^ String.make 1 c) (i+1) (column + 1)
+       else
+         if s <> "" then begin
+           b' # add_word s;
+           b' # add_newline;
+         end
+      in
+      add "" 0 0;
+      b # add_box (b' :> formatted_text);
+      b # add_newline;
+  end
+;;
+
+
+class br =
+  object (self)
+    inherit shared
+
+    method to_box store b =
+      b # add_newline;
+  end
+;;
+
+
+class footnote =
+  object (self)
+    inherit shared
+
+    val mutable footnote_number = 0
+
+    method to_box store b =
+      let number = 
+       store # alloc_footnote (self : #shared :> footnote_printer) in
+      footnote_number <- number;
+      b # add_space;
+      b # add_word ("[" ^ string_of_int number ^ "]");
+
+    method footnote_to_box store b =
+      let w = b # available_width in
+      let n = "[" ^ string_of_int footnote_number ^ "]" in
+      let b' = new listitem_box n 6 w in
+      b' # ignore_space;
+      List.iter
+       (fun n -> n # extension # to_box store b')
+       (self # node # sub_nodes);
+      b # add_box (b' :> formatted_text);
+      b # add_newline;
+      b # add_newline;
+  end
+;;
+
+
+class a =
+  object (self)
+    inherit shared
+
+    val mutable footnote_number = 0
+    val mutable a_href = ""
+
+    method to_box store b =
+      let href =
+       match self # node # attribute "href" with
+           Value v -> "see " ^ v
+         | Valuelist _ -> assert false
+         | Implied_value ->
+             begin match self # node # attribute "readmeref" with
+                 Value v -> "see file " ^ v 
+               | Valuelist _ -> assert false
+               | Implied_value ->
+                   ""
+             end
+      in
+      a_href <- href;
+      List.iter
+       (fun n -> n # extension # to_box store b)
+       (self # node # sub_nodes);
+      if href <> "" then begin
+       let number = 
+         store # alloc_footnote (self : #shared :> footnote_printer) in
+       footnote_number <- number;
+       b # add_space;
+       b # add_word ("[" ^ string_of_int number ^ "]");
+      end
+
+    method footnote_to_box store b =
+      if a_href <> "" then begin
+       let w = b # available_width in
+       let n = "[" ^ string_of_int footnote_number ^ "]" in
+       let b' = new listitem_box n 6 w in
+       b' # ignore_space;
+       b' # add_word a_href;
+       b # add_box (b' :> formatted_text);
+       b # add_newline;
+       b # add_newline;
+      end      
+  end
+;;
+
+(**********************************************************************)
+
+open Pxp_yacc
+
+let tag_map =
+  make_spec_from_alist
+    ~data_exemplar:(new data_impl (new only_data))
+    ~default_element_exemplar:(new element_impl (new no_markup))
+    ~element_alist:
+       [ "readme",   (new element_impl (new readme));
+        "sect1",    (new element_impl (new sect1));
+        "sect2",    (new element_impl (new sect2));
+        "sect3",    (new element_impl (new sect3));
+        "title",    (new element_impl (new no_markup));
+        "p",        (new element_impl (new p));
+        "br",       (new element_impl (new br));
+        "code",     (new element_impl (new code));
+        "em",       (new element_impl (new no_markup));
+        "ul",       (new element_impl (new no_markup));
+        "li",       (new element_impl (new li));
+        "footnote", (new element_impl (new footnote : #shared :> shared));
+        "a",        (new element_impl (new a : #shared :> shared));
+       ]
+    ()
+;;
+
+
+    
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:31  lpadovan
+ * Initial revision
+ *
+ * Revision 1.5  2000/08/22 14:34:25  gerd
+ *     Using make_spec_from_alist instead of make_spec_from_mapping.
+ *
+ * Revision 1.4  2000/08/18 21:15:25  gerd
+ *     Minor updates because of PXP API changes.
+ *
+ * Revision 1.3  2000/07/08 17:58:17  gerd
+ *     Updated because of PXP API changes.
+ *
+ * Revision 1.2  2000/06/04 20:25:38  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.1  1999/08/22 22:29:32  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile b/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile
new file mode 100644 (file)
index 0000000..27be18c
--- /dev/null
@@ -0,0 +1,21 @@
+all: print sort delcol
+
+print: print.ml
+       ocamlfind ocamlc -o print -package pxp -linkpkg -custom \
+               -predicates pxp_without_utf8 print.ml
+
+sort: sort.ml
+       ocamlfind ocamlc -o sort -package pxp -linkpkg -custom \
+               -predicates pxp_without_utf8 sort.ml
+
+delcol: delcol.ml
+       ocamlfind ocamlc -o delcol -package pxp -linkpkg -custom \
+               -predicates pxp_without_utf8 delcol.ml
+
+clean:
+       rm -f *.cmo *.cma *.cmi *.cmxa *.a *.o
+
+distclean: clean
+       rm -f *~ print sort delcol
+
+CLEAN: clean
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/README b/helm/DEVEL/pxp/pxp/examples/simple_transformation/README
new file mode 100644 (file)
index 0000000..5b92128
--- /dev/null
@@ -0,0 +1,17 @@
+Usage:
+       sort -by phone <sample.xml | print
+
+once sort and print are compiled.
+
+These examples illustrate iter_tree, map_tree and find_element.
+
+
+sort:  reads an XML file from stdin, sorts the records, and prints the
+       result as XML.
+delcol: reads an XML file from stdin, deletes a column from all records,
+       and prints the result as XML.
+print: reads an XML file from stdin, and pretty-prints the file
+
+The XML file must not contain a DTD. The programs assume the fixed DTD
+record.dtd.
+
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/delcol.ml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/delcol.ml
new file mode 100644 (file)
index 0000000..57c3327
--- /dev/null
@@ -0,0 +1,69 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list, delete a column, and print it as XML *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let delcol col tree =
+  map_tree
+    ~pre:
+      (fun n -> 
+        match n # node_type with
+            T_element name when name = col ->
+              raise Skip
+          | _ -> n # orphaned_flat_clone)
+    tree
+;;
+
+
+let main() =
+  let column = ref "" in
+  Arg.parse
+      [ "-col", Arg.String (fun s -> column := s),
+           " (last-name|first-name|phone)";
+      ]
+      (fun _ -> raise (Arg.Bad "Bad usage"))
+      "usage: sort [ options ]";
+  if !column = "" then (
+    prerr_endline "Column not specified!";
+    exit 1;
+  );
+  if not(List.mem !column ["last-name"; "first-name"; "phone"]) then (
+    prerr_endline ("Unknown column: " ^ !column);
+    exit 1
+  );
+  try
+    let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+    let tree = 
+      parse_content_entity default_config (from_channel stdin) dtd default_spec
+    in
+    print_endline "<?xml encoding='ISO-8859-1'?>";
+    (delcol !column tree) # write (Out_channel stdout) `Enc_iso88591
+  with
+      x ->
+       prerr_endline(string_of_exn x);
+       exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/24 09:42:52  gerd
+ *     Updated a comment.
+ *
+ * Revision 1.1  2000/08/24 09:39:59  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml
new file mode 100644 (file)
index 0000000..56f5fb6
--- /dev/null
@@ -0,0 +1,60 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list structure and print it *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let print tree =
+  iter_tree
+    ~pre:
+      (fun n ->
+        match n # node_type with
+            T_element "last-name" ->
+              print_endline ("Last name: " ^ n # data)
+          | T_element "first-name" ->
+              print_endline ("First name: " ^ n # data)
+          | T_element "phone" ->
+              print_endline ("Telephone number: " ^ n # data)
+          | _ ->
+              ())
+    ~post:
+      (fun n ->
+        match n # node_type with
+            T_element "record" -> 
+              print_newline()
+          | _ ->
+              ())
+    tree
+;;
+
+let main() =
+  try
+    let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+    let tree = 
+      parse_content_entity default_config (from_channel stdin) dtd default_spec in
+    print tree
+  with
+      x ->
+       prerr_endline(string_of_exn x);
+       exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/08/22 21:57:43  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd b/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd
new file mode 100644 (file)
index 0000000..b054ccd
--- /dev/null
@@ -0,0 +1,5 @@
+<!ELEMENT record-list (record*)>
+<!ELEMENT record (last-name?, first-name?, phone?)>
+<!ELEMENT last-name (#PCDATA)>
+<!ELEMENT first-name (#PCDATA)>
+<!ELEMENT phone (#PCDATA)>
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml
new file mode 100644 (file)
index 0000000..00d36b0
--- /dev/null
@@ -0,0 +1,18 @@
+<?xml encoding="ISO-8859-1"?>
+<record-list>
+  <record>
+    <last-name>Stolpmann</last-name>
+    <first-name>Gerd</first-name>
+    <phone>997705</phone>
+  </record>
+  <record>
+    <last-name>Smith</last-name>
+    <first-name>Jack</first-name>
+    <phone>12345</phone>
+  </record>
+  <record>
+    <last-name>Ãœtzgür</last-name>
+    <first-name>xxx</first-name>
+    <phone>7654</phone>
+  </record>
+</record-list>
diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml
new file mode 100644 (file)
index 0000000..297730f
--- /dev/null
@@ -0,0 +1,83 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list, sort it, and print it as XML *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let sort by tree =
+  map_tree
+    ~pre:
+      (fun n -> n # orphaned_flat_clone)
+    ~post:
+      (fun n ->
+        match n # node_type with
+            T_element "record-list" ->
+              let l = n # sub_nodes in
+              let l' = List.sort
+                         (fun a b ->
+                            let a_string = 
+                              try (find_element by a) # data 
+                              with Not_found -> "" in
+                            let b_string = 
+                              try (find_element by b) # data 
+                              with Not_found -> "" in
+                            Pervasives.compare a_string b_string)
+                         l in
+              n # set_nodes l';
+              n
+          | _ ->
+              n)
+    tree
+;;
+
+
+let main() =
+  let criterion = ref "last-name" in
+  Arg.parse
+      [ "-by", Arg.String (fun s -> criterion := s),
+           " (last-name|first-name|phone)";
+      ]
+      (fun _ -> raise (Arg.Bad "Bad usage"))
+      "usage: sort [ options ]";
+  if not(List.mem !criterion ["last-name"; "first-name"; "phone"]) then (
+    prerr_endline ("Unknown criterion: " ^ !criterion);
+    exit 1
+  );
+  try
+    let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+    let tree = 
+      parse_content_entity default_config (from_channel stdin) dtd default_spec
+    in
+    print_endline "<?xml encoding='ISO-8859-1'?>";
+    (sort !criterion tree) # write (Out_channel stdout) `Enc_iso88591
+  with
+      x ->
+       prerr_endline(string_of_exn x);
+       exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/08/30 16:05:44  gerd
+ *     Minor update
+ *
+ * Revision 1.2  2000/08/24 09:40:11  gerd
+ *     Allow that columns are missing.
+ *
+ * Revision 1.1  2000/08/22 21:57:44  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore b/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore
new file mode 100644 (file)
index 0000000..e125622
--- /dev/null
@@ -0,0 +1,13 @@
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+*.new
+*.mlf
+*.ml0
+depend
+depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/examples/validate/Makefile b/helm/DEVEL/pxp/pxp/examples/validate/Makefile
new file mode 100644 (file)
index 0000000..64b6918
--- /dev/null
@@ -0,0 +1,28 @@
+# make validate:        make bytecode executable
+# make validate.opt:    make native executable
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+pxpvalidate: validate.ml
+       ocamlfind ocamlc -o pxpvalidate -package "pxp" -linkpkg validate.ml
+
+pxpvalidate.opt: validate.ml
+       ocamlfind ocamlopt -o pxpvalidate.opt -package "pxp" -linkpkg validate.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa 
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       rm -f pxpvalidate pxpvalidate.opt
diff --git a/helm/DEVEL/pxp/pxp/examples/validate/validate.ml b/helm/DEVEL/pxp/pxp/examples/validate/validate.ml
new file mode 100644 (file)
index 0000000..3bb83d2
--- /dev/null
@@ -0,0 +1,126 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let print_error e =
+  print_endline (string_of_exn e)
+;;
+
+class warner =
+  object 
+    method warn w =
+      print_endline ("WARNING: " ^ w)
+  end
+;;
+
+let parse debug wf iso88591 filename =
+  try 
+    (* Parse the document: *)
+    let parse_fn =
+      if wf then parse_wfdocument_entity 
+      else 
+       let index = new hash_index in
+       parse_document_entity 
+         ?transform_dtd:None 
+         ~id_index:(index :> 'ext index)
+    in
+    let doc =
+      parse_fn
+         { default_config with 
+             debugging_mode = debug;
+             encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+             idref_pass = true;
+             warner = new warner
+          }
+         (from_file filename)
+         default_spec 
+    in
+    ()
+  with
+      e ->
+       (* Print error; remember that there was an error *)
+       error_happened := true;
+       print_error e
+;;
+
+
+let main() =
+  let debug = ref false in
+  let wf = ref false in
+  let iso88591 = ref false in
+  let files = ref [] in
+  Arg.parse
+      [ "-d",   Arg.Set debug, 
+           "             turn debugging mode on";
+       "-wf",  Arg.Set wf,    
+            "            check only on well-formedness";
+        "-iso-8859-1", Arg.Set iso88591, 
+                    "    use ISO-8859-1 as internal encoding instead of UTF-8";
+      ]
+      (fun x -> files := x :: !files)
+      "
+usage: pxpvalidate [options] file ...
+
+- checks the validity of XML documents. See below for list of options.
+
+<title>PXP - The XML parser for Objective Caml</title>
+
+List of options:";
+  files := List.rev !files;
+  List.iter (parse !debug !wf !iso88591) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:31  lpadovan
+ * Initial revision
+ *
+ * Revision 1.10  2000/08/30 15:58:41  gerd
+ *     Updated.
+ *
+ * Revision 1.9  2000/07/14 14:57:30  gerd
+ *     Updated: warner
+ *
+ * Revision 1.8  2000/07/14 14:13:15  gerd
+ *     Cosmetic changes.
+ *
+ * Revision 1.7  2000/07/14 14:11:06  gerd
+ *     Updated because of changes of the PXP API.
+ *
+ * Revision 1.6  2000/07/08 21:53:00  gerd
+ *     Updated because of PXP interface changes.
+ *
+ * Revision 1.5  2000/06/04 20:21:55  gerd
+ *     Updated to new module names.
+ *
+ * Revision 1.4  2000/05/01 16:44:57  gerd
+ *     Added check for ID uniqueness.
+ *     Using new error formatter.
+ *
+ * Revision 1.3  1999/11/09 22:27:30  gerd
+ *     The programs returns now an exit code of 1 if one of the
+ * XML files produces an error.
+ *
+ * Revision 1.2  1999/09/01 23:09:56  gerd
+ *     Added the option -wf that switches to well-formedness checking
+ * instead of validation.
+ *
+ * Revision 1.1  1999/08/14 22:20:53  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore b/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore
new file mode 100644 (file)
index 0000000..e125622
--- /dev/null
@@ -0,0 +1,13 @@
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+*.new
+*.mlf
+*.ml0
+depend
+depend.pkg
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile
new file mode 100644 (file)
index 0000000..5a0ba32
--- /dev/null
@@ -0,0 +1,33 @@
+# make xmlforms:       make bytecode executable
+# make xmlforms.opt:   make native executable
+# make clean:          remove intermediate files
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files
+# make release:        cleanup, create archive, tag CVS module 
+#                      (for developers)
+#----------------------------------------------------------------------
+
+.PHONY: xmlforms
+xmlforms:
+       $(MAKE) -f Makefile.code xmlforms
+
+.PHONY: xmlforms.opt
+xmlforms.opt:
+       $(MAKE) -f Makefile.code xmlforms.opt
+
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+       $(MAKE) -C styles CLEAN
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~ depend depend.pkg
+       rm -f xmlforms xmlforms.opt
+       $(MAKE) -C styles distclean
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code
new file mode 100644 (file)
index 0000000..f996740
--- /dev/null
@@ -0,0 +1,57 @@
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS  = ds_context.cmo ds_style.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE  = xmlforms.cma
+XARCHIVE = xmlforms.cmxa
+NAME     = xmlforms
+REQUIRES = camltk str pxp
+
+xmlforms: $(ARCHIVE) ds_app.cmo
+       ocamlfind ocamlc -g -o xmlforms -custom -package "$(REQUIRES)" \
+               -linkpkg $(ARCHIVE) ds_app.cmo 
+
+xmlform.opt: $(XARCHIVE) ds_app.cmx
+       ocamlfind ocamlopt -o xmlforms.opt -custom -package "$(REQUIRES)" \
+               -linkpkg $(XARCHIVE) ds_app.cmx
+
+$(ARCHIVE): $(OBJECTS)
+       $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+       $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS   =
+OCAMLC    = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT  = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP  = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli 
+       $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+       $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+       $(OCAMLOPT) -c $<
+
+.ml.cmo:
+       $(OCAMLC) -c $<
+
+.mli.cmi:
+       $(OCAMLC) -c $<
+
+.mll.ml:
+       ocamllex $<
+
+*.mli:
+
+include depend
+include depend.pkg
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/README b/helm/DEVEL/pxp/pxp/examples/xmlforms/README
new file mode 100644 (file)
index 0000000..806a409
--- /dev/null
@@ -0,0 +1,61 @@
+-----------------------------------------------------------------------------
+xmlforms
+-----------------------------------------------------------------------------
+
+THE IDEA:
+
+This example uses XML for two purposes:
+
+- The "story" and layout of the application is specified in XML
+- The data records are stored in XML
+
+An "application" is a set of "masks" or sequences of masks, and every mask
+is thought as a visible page of the application, containing layout
+elements and functional elements. Layout is specified in TeX-style using
+hboxes, vboxes, hspaces, vspaces. Functional elements are "entries" (input
+box for a string with one line), "textboxes" (input boxes with several
+lines), and buttons.
+
+See styles/ds-style.dtd for the DTD of an application specification, and
+the other xml files in this directory for examples.
+
+The entries and textboxes are bound to "slots", i.e. string variables. If
+the application is started, the slots are read from a file, and if the
+user presses a special "save" button, the slots are stored into this file.
+The format of this data file is again XML; the simplistic DTD can be found
+in styles/ds-object.dtd.
+
+
+THE IMPLEMENTATION:
+
+There is currently a mapping of the specifications to ocamltk, done by a
+program called "xmlforms".
+
+
+HOW TO COMPILE:
+
+It is assumed that "findlib" is present on your system; see ABOUT-FINDLIB
+in the toplevel directory.
+The "markup" module must have been installed.
+
+- "make xmlforms" produces a bytecode executable "xmlforms"
+- "make xmlforms.opt" produces a native executable "xmlforms.opt"
+
+Note that you cannot start the executables directly:
+
+
+HOW TO START AN APPLICATION:
+
+As "xmlforms" is a generic executable, there is a simple mechanism to bind
+it to a specific instance of an application. For example, in the "styles"
+subdirectory there is the application specification "crazy-style.xml". To
+start it, make a symlink called "crazy" referring to the "xmlforms"
+binary, set the environment variable DATASHEETS to the directory where the
+DTDs and XML files can be found, and start "crazy":
+
+       ln -s ../xmlforms crazy
+       DATASHEETS=. crazy my-record.xml
+
+(If you do not set DATASHEETS, a default directory, normally
+"/opt/xmlforms/lib" is used.)
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml
new file mode 100644 (file)
index 0000000..55589ea
--- /dev/null
@@ -0,0 +1,107 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Tk
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+open Ds_context
+open Ds_style
+
+
+let installdir       =
+  try Sys.getenv "DATASHEETS" with
+      Not_found -> "/opt/xmlforms/lib"
+let style_sysid      = ref ""
+let object_dtd_sysid = Filename.concat installdir "ds-object.dtd"
+let object_dtd_root  = "record" 
+
+
+let rec print_error e =
+  print_endline (string_of_exn e)
+;;
+
+
+let run f arg1 arg2 =
+  try f arg1 arg2 with
+      e -> print_error e
+;;
+
+
+let edit filename cmd =
+  (* read in style definition *)
+  let index = new hash_index in
+  let style =
+    parse_document_entity
+      ~id_index:(index :> 'ext index)
+      default_config
+      (from_file !style_sysid)
+      tag_map
+  in
+  let root = style # root in
+  root # extension # prepare (index :> 'ext index);
+
+  let obj_dtd =
+    parse_dtd_entity
+      default_config
+      (from_file object_dtd_sysid)
+  in
+  obj_dtd # set_root object_dtd_root;
+
+  let topframe = openTk() in
+  let context = new context filename obj_dtd index root topframe in
+
+  Toplevel.configure topframe [ Width (Centimeters 20.0);
+                                Height (Centimeters 12.0);
+                              ];
+  Pack.propagate_set topframe false;
+  Wm.title_set topframe cmd;
+  context # goto (root # extension # start_node_name);
+  mainLoop()
+;;
+
+
+let main() =
+  let cmd = Filename.basename Sys.argv.(0) in
+  match Sys.argv with
+      [| _; filename |] ->
+       style_sysid := Filename.concat installdir (cmd ^ "-style.xml");
+       run edit filename cmd
+    | _ ->
+       prerr_endline ("usage: " ^ cmd ^ " filename");
+       exit(1)
+;;
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.6  2000/07/16 19:36:03  gerd
+ *     Updated.
+ *
+ * Revision 1.5  2000/07/08 22:03:11  gerd
+ *     Updates because of PXP interface changes.
+ *
+ * Revision 1.4  2000/06/04 20:29:19  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.3  2000/05/01 16:48:45  gerd
+ *     Using the new error formatter.
+ *
+ * Revision 1.2  1999/12/17 21:34:29  gerd
+ *     The name of the root element is set to "record" in the
+ * object_dtd; otherwise the parser would not check that the root
+ * element is the right element.
+ *
+ * Revision 1.1  1999/08/21 19:11:05  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml
new file mode 100644 (file)
index 0000000..453ca00
--- /dev/null
@@ -0,0 +1,238 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+
+let empty_record = new element_impl (Pxp_yacc.default_extension);;
+let empty_dnode = new data_impl Pxp_yacc.default_extension;;
+
+class context the_filename the_obj_dtd the_index the_root the_topframe =
+  object (self)
+    val filename = the_filename
+    val obj_dtd = the_obj_dtd
+    val node_index = the_index
+    val mutable obj = empty_record # create_element
+                       the_obj_dtd (T_element "record") []
+    val root = the_root
+    val topframe = the_topframe
+    val mutable wdg = None
+
+    val mutable history = ( [| |] : string array )
+    val mutable index = 0
+
+    initializer
+      self # load_obj
+
+    method obj = obj
+
+    (* history *)
+
+    method private leave_node =
+      begin match wdg with
+         None -> ()
+       | Some w -> Tk.destroy w
+      end;
+      wdg <- None
+
+    method private enter_node =
+      let where = history.(index) in
+      let n =
+       try node_index # find where with
+           Not_found -> failwith ("Mask not found: " ^ where) in
+      let w = n # extension # create_widget topframe self in
+      Tk.pack [w] (n # extension # pack_opts @ [ Tk.Expand true] );
+      wdg <- Some w
+
+
+
+    method previous =
+      if index > 0 then
+       index <- index - 1
+      else
+       raise Not_found;
+      self # leave_node;
+      self # enter_node;
+
+
+    method next =
+      if index < Array.length history - 1 then
+       index <- index + 1
+      else
+       raise Not_found;
+      self # leave_node;
+      self # enter_node;
+
+
+    method goto where =
+      assert (index <= Array.length history);
+      self # leave_node;
+      let persisting_history =
+       if index < Array.length history then
+         Array.sub history 0 (index+1)
+       else
+         history
+      in
+      history <- Array.concat [ persisting_history; [| where |] ];
+      index <- Array.length history - 1;
+      self # enter_node;
+
+
+    method current =
+      if index < Array.length history then
+       history.(index)
+      else
+       raise Not_found
+
+
+    (* read, write the slots of object *)
+
+    method search_slot name =
+      let rec search n =
+       match n # node_type with
+           T_element "string" ->
+             if n # required_string_attribute "name" = name then
+               n
+             else raise Not_found
+         | T_element _ ->
+             search_list (n # sub_nodes)
+         | T_data ->
+             raise Not_found
+         | _ ->
+             assert false
+             
+       and search_list l =
+         match l with
+            x :: l' ->
+              (try search x with Not_found -> search_list l')
+          | [] ->
+              raise Not_found
+      in
+      search obj
+
+    method get_slot name =
+      let d = (self # search_slot name) # data in
+      d
+
+    method set_slot name value =
+      let dtd = obj # dtd in
+      begin try
+       let n = self # search_slot name in
+       n # delete
+      with
+         Not_found -> ()
+      end;
+      let e_string = empty_record # create_element dtd (T_element "string")
+               [ "name", name ] in
+      let dnode = empty_dnode # create_data dtd value in
+      e_string # add_node dnode;
+      e_string # local_validate();
+      obj # add_node e_string;
+      assert(self # get_slot name = value)
+
+    (* load, save object *)
+
+
+    method load_obj =
+      if Sys.file_exists filename then begin
+       obj <- parse_content_entity
+         default_config
+         (from_file filename)
+         obj_dtd
+         default_spec
+      end
+      else begin
+       print_string "New file!\n";
+       flush stdout
+      end
+
+
+    method save_obj =
+      let fd = open_out filename in
+      try
+
+       let re1 = Str.regexp "&" in
+       let re2 = Str.regexp "<" in
+       let re3 = Str.regexp "'" in
+       let re4 = Str.regexp ">" in
+       let protect s =
+         let s1 = Str.global_replace re1 "&amp;" s in
+         let s2 = Str.global_replace re2 "&lt;" s1 in
+         let s3 = Str.global_replace re3 "&apos;" s2 in
+         let s4 = Str.global_replace re2 "&gt;" s1 in
+         s3
+       in
+
+       let rec iterate (n : 'node extension node as 'node) =
+         match n # node_type with
+             T_data ->
+               output_string fd (protect (n # data))
+           | T_element name ->
+               output_string fd ("<" ^ name ^ "\n");
+               let anames = n # attribute_names in
+               List.iter
+                 (fun aname ->
+                    let aval = n # attribute aname in
+                    let v =
+                      match aval with
+                          Value s ->
+                            aname ^ "='" ^ protect s ^ "'\n"
+                        | Valuelist l ->
+                            aname ^ "='" ^ String.concat " " (List.map protect l) ^ "'\n"
+                        | Implied_value ->
+                            ""
+                    in
+                    output_string fd v)
+                 anames;
+               output_string fd ">";
+               List.iter iterate (n # sub_nodes);
+               output_string fd ("</" ^ name ^ "\n>");
+           | _ ->
+               assert false
+       in
+
+       output_string fd "<?xml version='1.0' encoding='ISO-8859-1'?>\n";
+       iterate obj;
+       close_out fd
+      with
+         e ->
+           close_out fd;
+           raise e
+
+  end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:31  lpadovan
+ * Initial revision
+ *
+ * Revision 1.7  2000/08/30 15:58:49  gerd
+ *     Updated.
+ *
+ * Revision 1.6  2000/07/23 20:25:05  gerd
+ *     Update because of API change: local_validate.
+ *
+ * Revision 1.5  2000/07/16 19:36:03  gerd
+ *     Updated.
+ *
+ * Revision 1.4  2000/07/08 22:03:11  gerd
+ *     Updates because of PXP interface changes.
+ *
+ * Revision 1.3  2000/06/04 20:29:19  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.2  2000/05/30 00:09:08  gerd
+ *     Minor fix.
+ *
+ * Revision 1.1  1999/08/21 19:11:05  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml
new file mode 100644 (file)
index 0000000..08d0daa
--- /dev/null
@@ -0,0 +1,778 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Ds_context
+
+
+let get_dimension s =
+  let re = Str.regexp "\\([0-9]*\\(.[0-9]+\\)?\\)[ \t\n]*\\(px\\|cm\\|in\\|mm\\|pt\\)" in
+  if Str.string_match re s 0 then begin
+    let number = Str.matched_group 1 s in
+    let dim = Str.matched_group 3 s in
+    match dim with
+       "px" -> Tk.Pixels (int_of_float (float_of_string number))
+      | "cm" -> Tk.Centimeters (float_of_string number)
+      | "in" -> Tk.Inches (float_of_string number)
+      | "mm" -> Tk.Millimeters (float_of_string number)
+      | "pt" -> Tk.PrinterPoint (float_of_string number)
+      | _ -> assert false
+  end
+  else
+    failwith ("Bad dimension: " ^ s)
+;;
+
+
+class virtual shared =
+  object(self)
+
+    (* --- default_ext --- *)
+
+    val mutable node = (None : shared node option)
+
+    method clone = {< >}
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+    method set_node n =
+      node <- Some n
+
+    (* --- shared attributes: color & font settings --- *)
+
+    val mutable fgcolor = (None : string option)
+    val mutable bgcolor = (None : string option)
+    val mutable font = (None : string option)
+
+    method fgcolor =
+      (* Get the foreground color: If there is a local value, return it;
+       * otherwise ask parent node
+       *)
+      match fgcolor with
+         Some c -> c
+       | None   -> try self # node # parent # extension # fgcolor with
+                   Not_found -> failwith "#fgcolor"
+
+    method bgcolor =
+      (* Get the background color: If there is a local value, return it;
+       * otherwise ask parent node
+       *)
+      match bgcolor with
+         Some c -> c
+       | None   -> try self # node # parent # extension # bgcolor with
+                   Not_found -> failwith "#bgcolor"
+
+    method font =
+      (* Get the current font: If there is a local value, return it;
+       * otherwise ask parent node
+       *)
+      match font with
+         Some c -> c
+       | None   -> try self # node # parent # extension # font with
+                   Not_found -> failwith "#font"
+
+    method private init_color_and_font =
+      let get_color n =
+       try
+         match self # node # attribute n with
+             Value v -> Some v
+           | Implied_value -> None
+           | _ -> assert false
+       with Not_found -> None in
+      fgcolor <- get_color "fgcolor";
+      bgcolor <- get_color "bgcolor";
+      font    <- get_color "font";      (* sic! *)
+
+
+    method private bg_color_opt =
+      [ Tk.Background (Tk.NamedColor (self # bgcolor)) ]
+
+    method private fg_color_opt =
+      [ Tk.Foreground (Tk.NamedColor (self # fgcolor)) ]
+
+    method private font_opt =
+      [ Tk.Font (self # font) ]
+
+    (* --- virtual --- *)
+
+    method virtual prepare : shared Pxp_yacc.index -> unit
+    method virtual create_widget : Widget.widget -> context -> Widget.widget
+
+    method pack_opts = ( [] : Tk.options list )
+    method xstretchable = false
+    method ystretchable = false
+
+    method accept (c:context) = ()
+
+    method private get_mask =
+      (* find parent which is a mask *)
+      let rec search n =
+       match n # node_type with
+           T_element "mask" ->
+             n # extension
+         | T_element _ ->
+             search (n # parent)
+         | _ ->
+             assert false
+      in
+      search (self # node)
+
+
+    method private accept_mask (c:context) =
+      let rec iterate n =
+       n # extension # accept c;
+       List.iter iterate (n # sub_nodes)
+      in
+      iterate (self # get_mask # node)
+
+
+    method start_node_name =
+      (failwith "#start_node_name" : string)
+
+    (* --- debug --- *)
+
+    method private name =
+      let nt = self # node # node_type in
+      match nt with
+         T_element n -> n
+       | T_data      -> "#PCDATA"
+       | _           -> assert false
+
+  end
+;;
+
+
+class default =
+  object (self)
+    inherit shared
+
+    method prepare idx =
+      self # init_color_and_font
+
+    method create_widget w c =
+      failwith "default # create_widget"
+  end
+;;
+
+
+let dummy_node = new element_impl (new default);;
+
+class application =
+  object (self)
+    inherit shared
+
+    val mutable start_node = dummy_node
+
+    method prepare idx =
+      (* prepare this node *)
+      self # init_color_and_font;
+      if fgcolor = None then fgcolor <- Some "black";
+      if bgcolor = None then bgcolor <- Some "white";
+      if font = None then font <- Some "fixed";
+      let start =
+       match self # node # attribute "start" with
+           Value v -> v
+         | _       -> assert false in
+      start_node <- (try idx # find start with
+         Not_found -> failwith "Start node not found");
+      (* iterate over the subtree *)
+      let rec iterate n =
+       n # extension # prepare idx;
+       List.iter iterate (n # sub_nodes)
+      in
+      List.iter iterate (self # node # sub_nodes)
+
+
+    method start_node_name =
+      match self # node # attribute "start" with
+         Value v -> v
+       | _       -> assert false
+
+    method create_widget w c =
+      start_node # extension # create_widget w c
+
+    method pack_opts =
+      start_node # extension # pack_opts
+  end
+;;
+
+
+class sequence =
+  object (self)
+    inherit shared
+
+    method prepare idx =
+      self # init_color_and_font;
+
+    method create_widget w c =
+      let node = List.hd (self # node # sub_nodes) in
+      node # extension # create_widget w c
+
+    method pack_opts =
+      let node = List.hd (self # node # sub_nodes) in
+      node # extension # pack_opts
+  end
+;;
+
+
+class vbox =
+  object (self)
+    inherit shared
+
+    val mutable att_halign = "left"
+
+    method prepare idx =
+      self # init_color_and_font;
+      match self # node # attribute "halign" with
+         Value v -> att_halign <- v
+       | _ -> assert false
+
+    method create_widget w c =
+      let f = Frame.create w (self # bg_color_opt) in
+      let nodes = self # node # sub_nodes in
+      let options =
+       match att_halign with
+           "left"     -> [ Tk.Anchor Tk.W ]
+         | "right"    -> [ Tk.Anchor Tk.E ]
+         | "center"   -> [ Tk.Anchor Tk.Center ]
+         | _ -> assert false
+      in
+      List.iter
+       (fun n ->
+          let opts = n # extension # pack_opts in
+          let wdg = n # extension # create_widget f c in
+          Tk.pack [wdg] (options @ opts);
+       )
+       nodes;
+      f
+
+    method pack_opts =
+      match self # xstretchable, self # ystretchable with
+         true, false  -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ]
+       | false, true  -> [ Tk.Fill Tk.Fill_Y;  (* Tk.Expand true *) ]
+       | true, true   -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ]
+       | false, false -> []
+
+    method xstretchable =
+      let nodes = self # node # sub_nodes in
+      List.exists (fun n -> n # extension # xstretchable) nodes
+
+    method ystretchable =
+      let nodes = self # node # sub_nodes in
+      List.exists (fun n -> n # extension # ystretchable) nodes
+
+  end
+
+;;
+
+
+class mask =
+  object (self)
+
+    inherit vbox
+
+    method prepare idx =
+      self # init_color_and_font;
+      att_halign <- "left"
+  end
+;;
+
+
+class hbox =
+  object (self)
+    inherit shared
+
+    val mutable att_width = None
+    val mutable att_halign = "left"
+    val mutable att_valign = "top"
+
+    method prepare idx =
+      self # init_color_and_font;
+      begin match self # node # attribute "halign" with
+         Value v -> att_halign <- v
+       | _ -> assert false
+      end;
+      begin match self # node # attribute "valign" with
+         Value v -> att_valign <- v
+       | _ -> assert false
+      end;
+      begin match self # node # attribute "width" with
+         Value v       -> att_width <- Some (get_dimension v)
+       | Implied_value -> att_width <- None
+       | _ -> assert false
+      end
+
+    method create_widget w c =
+      let f1 = Frame.create w (self # bg_color_opt) in
+      let f_extra =
+       match att_width with
+           None    -> []
+         | Some wd ->
+             [ Canvas.create f1
+                 ( [ Tk.Width wd; Tk.Height (Tk.Pixels 0);
+                     Tk.Relief Tk.Flat;
+                     Tk.HighlightThickness (Tk.Pixels 0);
+                   ] @
+                   self # bg_color_opt ) ]
+      in
+      let f2 = Frame.create f1 (self # bg_color_opt) in
+      let nodes = self # node # sub_nodes in
+
+      let outer_pack_opts =
+       match att_halign with
+           "left"     -> [ Tk.Anchor Tk.W ]
+         | "right"    -> [ Tk.Anchor Tk.E ]
+         | "center"   -> [ Tk.Anchor Tk.Center ]
+         | _ -> assert false
+      in
+      let inner_pack_opts =
+       match att_valign with
+           "top"      -> [ Tk.Anchor Tk.N ]
+         | "bottom"   -> [ Tk.Anchor Tk.S ]
+         | "center"   -> [ Tk.Anchor Tk.Center ]
+         | _ -> assert false
+      in
+      List.iter
+       (fun n ->
+          let opts = n # extension # pack_opts in
+          let wdg = n # extension # create_widget f2 c in
+          Tk.pack [wdg] (inner_pack_opts @ [ Tk.Side Tk.Side_Left ] @ opts);
+       )
+       nodes;
+      let extra_opts = self # pack_opts in
+      Tk.pack (f_extra @ [f2]) (outer_pack_opts @ extra_opts);
+      f1
+
+    method pack_opts =
+      match self # xstretchable, self # ystretchable with
+         true, false  -> [ Tk.Fill Tk.Fill_X;  (* Tk.Expand true *) ]
+       | false, true  -> [ Tk.Fill Tk.Fill_Y;  (* Tk.Expand true *) ]
+       | true, true   -> [ Tk.Fill Tk.Fill_Both;  (* Tk.Expand true *) ]
+       | false, false -> []
+
+    method xstretchable =
+      let nodes = self # node # sub_nodes in
+      List.exists (fun n -> n # extension # xstretchable) nodes
+
+    method ystretchable =
+      let nodes = self # node # sub_nodes in
+      List.exists (fun n -> n # extension # ystretchable) nodes
+
+  end
+;;
+
+class vspace =
+  object (self)
+    inherit shared
+
+    val mutable att_height = Tk.Pixels 0
+    val mutable att_fill  = false
+
+    method prepare idx =
+      self # init_color_and_font;
+      begin match self # node # attribute "height" with
+         Value v       -> att_height <- get_dimension v
+       | _ -> assert false
+      end;
+      begin match self # node # attribute "fill" with
+         Value "yes" -> att_fill <- true
+       | Value "no"  -> att_fill <- false
+       | _ -> assert false
+      end
+
+
+    method create_widget w c =
+      let f = Frame.create w ( self # bg_color_opt ) in
+      let strut =
+       Canvas.create f
+         ( [ Tk.Height att_height; Tk.Width (Tk.Pixels 0);
+             Tk.Relief Tk.Flat;
+             Tk.HighlightThickness (Tk.Pixels 0);
+           ] @
+           self # bg_color_opt ) in
+      if att_fill then
+       Tk.pack [strut] [Tk.Fill Tk.Fill_Y; Tk.Expand true]
+      else
+       Tk.pack [strut] [];
+      f
+
+    method pack_opts =
+      if att_fill then [ Tk.Fill Tk.Fill_Y; Tk.Expand true ] else []
+
+    method ystretchable = att_fill
+  end
+;;
+
+class hspace =
+  object (self)
+    inherit shared
+
+
+    val mutable att_width = Tk.Pixels 0
+    val mutable att_fill  = false
+
+    method prepare idx =
+      self # init_color_and_font;
+      begin match self # node # attribute "width" with
+         Value v       -> att_width <- get_dimension v
+       | _ -> assert false
+      end;
+      begin match self # node # attribute "fill" with
+         Value "yes" -> att_fill <- true
+       | Value "no"  -> att_fill <- false
+       | _ -> assert false
+      end
+
+
+    method create_widget w c =
+      let f = Frame.create w ( self # bg_color_opt ) in
+      let strut =
+       Canvas.create f
+         ( [ Tk.Width att_width; Tk.Height (Tk.Pixels 0);
+             Tk.Relief Tk.Flat;
+             Tk.HighlightThickness (Tk.Pixels 0);
+           ] @
+           self # bg_color_opt ) in
+      if att_fill then
+       Tk.pack [strut] [Tk.Fill Tk.Fill_X; Tk.Expand true]
+      else
+       Tk.pack [strut] [];
+      f
+
+    method pack_opts =
+      if att_fill then [ Tk.Fill Tk.Fill_X; Tk.Expand true ] else []
+
+    method xstretchable = att_fill
+  end
+;;
+
+class label =
+  object (self)
+    inherit shared
+
+    val mutable att_textwidth = (-1)
+    val mutable att_halign = "left"
+
+    method prepare idx =
+      self # init_color_and_font;
+      att_textwidth <- (match self # node # attribute "textwidth" with
+                           Value v ->
+                             let w = try int_of_string v
+                             with _ -> failwith ("Not an integer: " ^ v) in
+                             w
+                         | Implied_value ->
+                             (-1)
+                         | _ -> assert false);
+      att_halign <- (match self # node # attribute "halign" with
+                        Value v -> v
+                      | _ -> assert false);
+
+
+    method create_widget w c =
+      let opts_textwidth = if att_textwidth < 0 then [] else
+                                              [ Tk.TextWidth att_textwidth ] in
+      let opts_halign =
+       match att_halign with
+           "left"     -> [ Tk.Anchor Tk.W ]
+         | "right"    -> [ Tk.Anchor Tk.E ]
+         | "center"   -> [ Tk.Anchor Tk.Center ]
+         | _ -> assert false
+      in
+      let opts_content =
+       [ Tk.Text (self # node # data) ] in
+      let label = Label.create w (opts_textwidth @ opts_halign @
+                                 opts_content @ self # bg_color_opt @
+                                 self # fg_color_opt @ self # font_opt) in
+      label
+
+  end
+;;
+
+class entry =
+  object (self)
+    inherit shared
+
+    val mutable tv = lazy (Textvariable.create())
+    val mutable att_textwidth = (-1)
+    val mutable att_slot = ""
+
+    method prepare idx =
+      self # init_color_and_font;
+      tv <- lazy (Textvariable.create());
+      att_textwidth <- (match self # node # attribute "textwidth" with
+                           Value v ->
+                             let w = try int_of_string v
+                             with _ -> failwith ("Not an integer: " ^ v) in
+                             w
+                         | Implied_value ->
+                             (-1)
+                         | _ -> assert false);
+      att_slot <- (match self # node # attribute "slot" with
+         Value v -> v
+       | _ -> assert false);
+
+    method create_widget w c =
+      let opts_textwidth = if att_textwidth < 0 then [] else
+                                              [ Tk.TextWidth att_textwidth ] in
+      let e = Entry.create w ( [ Tk.TextVariable (Lazy.force tv) ] @
+                              self # fg_color_opt @
+                              self # bg_color_opt @
+                              self # font_opt @
+                              opts_textwidth
+                            ) in
+      let s =
+       try c # get_slot att_slot with
+           Not_found -> self # node # data in
+      Textvariable.set (Lazy.force tv) s;
+      e
+
+    method accept c =
+      c # set_slot att_slot (Textvariable.get (Lazy.force tv))
+
+  end
+;;
+
+class textbox =
+  object (self)
+    inherit shared
+
+    val mutable att_textwidth = (-1)
+    val mutable att_textheight = (-1)
+    val mutable att_slot = ""
+    val mutable last_widget = None
+
+    method prepare idx =
+      self # init_color_and_font;
+      att_textwidth <- (match self # node # attribute "textwidth" with
+                           Value v ->
+                             let w = try int_of_string v
+                             with _ -> failwith ("Not an integer: " ^ v) in
+                             w
+                         | Implied_value ->
+                             (-1)
+                         | _ -> assert false);
+      att_textheight <- (match self # node # attribute "textheight" with
+                           Value v ->
+                             let w = try int_of_string v
+                             with _ -> failwith ("Not an integer: " ^ v) in
+                             w
+                         | Implied_value ->
+                             (-1)
+                         | _ -> assert false);
+      att_slot <- (match self # node # attribute "slot" with
+                      Value v -> v
+                    | Implied_value -> ""
+                    | _ -> assert false);
+
+
+    method create_widget w c =
+      let opts_textwidth = if att_textwidth < 0 then [] else
+                                              [ Tk.TextWidth att_textwidth ] in
+      let opts_textheight = if att_textheight < 0 then [] else
+                                           [ Tk.TextHeight att_textheight ] in
+      let f = Frame.create w (self # bg_color_opt) in
+      let vscrbar = Scrollbar.create f [ Tk.Orient Tk.Vertical ] in
+      let e = Text.create f ( [ ] @
+                             self # fg_color_opt @
+                             self # bg_color_opt @
+                             self # font_opt @
+                             opts_textwidth @ opts_textheight
+                           ) in
+      last_widget <- Some e;
+      Scrollbar.configure vscrbar [ Tk.ScrollCommand
+                                     (fun s -> Text.yview e s);
+                                   Tk.Width (Tk.Pixels 9) ];
+      Text.configure e [ Tk.YScrollCommand
+                          (fun a b -> Scrollbar.set vscrbar a b) ];
+      let s =
+       if att_slot <> "" then
+         try c # get_slot att_slot with
+             Not_found -> self # node # data 
+       else 
+         self # node # data 
+      in
+      (* Text.insert appends always a newline to the last line; so strip 
+       * an existing newline first
+       *)
+      let s' = 
+       if s <> "" & s.[String.length s - 1] = '\n' then
+         String.sub s 0 (String.length s - 1)
+       else 
+         s in
+      Text.insert e (Tk.TextIndex(Tk.End,[])) s' [];
+      if att_slot = "" then
+       Text.configure e [ Tk.State Tk.Disabled ];
+      Tk.pack [e] [ Tk.Side Tk.Side_Left ];
+      Tk.pack [vscrbar] [ Tk.Side Tk.Side_Left; Tk.Fill Tk.Fill_Y ];
+      f
+
+    method accept c =
+      if att_slot <> "" then
+       match last_widget with
+           None -> ()
+         | Some w ->
+             let s =
+               Text.get
+                 w
+                 (Tk.TextIndex(Tk.LineChar(1,0),[]))
+                 (Tk.TextIndex(Tk.End,[])) in
+             c # set_slot att_slot s
+
+  end
+;;
+
+class button =
+  object (self)
+    inherit shared
+
+    val mutable att_label = ""
+    val mutable att_action = ""
+    val mutable att_goto = ""
+
+    method prepare idx =
+      self # init_color_and_font;
+      att_label <- (match self # node # attribute "label" with
+                       Value v -> v
+                     | _ -> assert false);
+      att_action <- (match self # node # attribute "action" with
+                        Value v -> v
+                      | _ -> assert false);
+      att_goto <- (match self # node # attribute "goto" with
+                      Value v -> v
+                    | Implied_value -> ""
+                    | _ -> assert false);
+      if att_action = "goto" then begin
+       try let _ = idx # find att_goto in () with
+           Not_found -> failwith ("Target `" ^ att_goto ^ "' not found")
+      end;
+      if att_action = "list-prev" or att_action = "list-next" then begin
+       let m = self # get_mask in
+       if m # node # parent # node_type <> T_element "sequence" then
+         failwith ("action " ^ att_action ^ " must not be used out of <sequence>");
+      end;
+
+
+    method create_widget w c =
+      let cmd () =
+       self # accept_mask c;
+       match att_action with
+           "goto" ->
+             c # goto att_goto
+         | "save" ->
+             c # save_obj
+         | "exit" ->
+             Protocol.closeTk()
+         | "save-exit" ->
+             c # save_obj;
+             Protocol.closeTk()
+         | "list-prev" ->
+             let m = self # get_mask # node in
+             let s = m # parent in
+             let rec search l =
+               match l with
+                   x :: y :: l' ->
+                     if y == m then
+                       match x # attribute "name" with
+                           Value s -> c # goto s
+                         | _ -> assert false
+                     else
+                       search (y :: l')
+                 | _ -> ()
+             in
+             search (s # sub_nodes)
+         | "list-next" ->
+             let m = self # get_mask # node in
+             let s = m # parent in
+             let rec search l =
+               match l with
+                   x :: y :: l' ->
+                     if x == m then
+                       match y # attribute "name" with
+                           Value s -> c # goto s
+                         | _ -> assert false
+                     else
+                       search (y :: l')
+                 | _ -> ()
+             in
+             search (s # sub_nodes)
+         | "hist-prev" ->
+             (try c # previous with Not_found -> ())
+         | "hist-next" ->
+             (try c # next with Not_found -> ())
+         | _ -> ()
+      in
+      let b = Button.create w ( [ Tk.Text att_label; Tk.Command cmd ] @
+                               self # fg_color_opt @
+                               self # bg_color_opt @
+                               self # font_opt ) in
+      b
+
+
+  end
+;;
+
+
+(**********************************************************************)
+
+open Pxp_yacc
+
+let tag_map =
+  make_spec_from_mapping
+    ~data_exemplar:(new data_impl (new default))
+    ~default_element_exemplar:(new element_impl (new default))
+    ~element_mapping:
+       (let m = Hashtbl.create 50 in
+       Hashtbl.add m "application"
+                     (new element_impl (new application));
+       Hashtbl.add m "sequence"
+                     (new element_impl (new sequence));
+       Hashtbl.add m "mask"
+                     (new element_impl (new mask));
+       Hashtbl.add m "vbox"
+                     (new element_impl (new vbox));
+       Hashtbl.add m "hbox"
+                     (new element_impl (new hbox));
+       Hashtbl.add m "vspace"
+                     (new element_impl (new vspace));
+       Hashtbl.add m "hspace"
+                     (new element_impl (new hspace));
+       Hashtbl.add m "label"
+                     (new element_impl (new label));
+       Hashtbl.add m "entry"
+                     (new element_impl (new entry));
+       Hashtbl.add m "textbox"
+                     (new element_impl (new textbox));
+       Hashtbl.add m "button"
+                     (new element_impl (new button));
+       m)
+    ()
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:31  lpadovan
+ * Initial revision
+ *
+ * Revision 1.5  2000/08/30 15:58:49  gerd
+ *     Updated.
+ *
+ * Revision 1.4  2000/07/16 19:36:03  gerd
+ *     Updated.
+ *
+ * Revision 1.3  2000/07/08 22:03:11  gerd
+ *     Updates because of PXP interface changes.
+ *
+ * Revision 1.2  2000/06/04 20:29:19  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.1  1999/08/21 19:11:05  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile
new file mode 100644 (file)
index 0000000..c0068a5
--- /dev/null
@@ -0,0 +1,16 @@
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+
+.PHONY: symlinks
+symlinks:
+       for x in *-style.xml; do ln -s ../xmlforms $${x%-style.xml} || true; done
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml
new file mode 100644 (file)
index 0000000..d3af5da
--- /dev/null
@@ -0,0 +1,361 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!-- $Id$ -->
+
+<!DOCTYPE application SYSTEM "ds-style.dtd" [
+<!ENTITY h1.font  '-*-helvetica-bold-r-*-*-18-*-*-*-*-*-*-*'>
+<!ENTITY h2.font  '-*-helvetica-bold-r-*-*-14-*-*-*-*-*-*-*'>
+<!ENTITY h3.font  '-*-helvetica-bold-r-*-*-12-*-*-*-*-*-*-*'>
+<!ENTITY dfl.font '-*-helvetica-medium-r-*-*-12-*-*-*-*-*-*-*'>
+<!ENTITY dfl.bold.font '-*-helvetica-bold-r-*-*-12-*-*-*-*-*-*-*'>
+<!ENTITY in.font  '-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*'>
+
+<!ENTITY bg.button 'lightblue'>
+<!ENTITY bg.hilfe  '#E0E0E0'>
+<!ENTITY fg.hilfe  'black'>
+
+<!ENTITY headline
+  '<vbox>
+     <label font="&h2.font;" fgcolor="darkgreen">More about person...</label>
+     <vspace height="2mm"/>
+     <hbox>
+        <hbox bgcolor="black">
+           <hspace width="18cm"/>
+           <vbox><vspace height="2px"/></vbox>
+        </hbox>
+        <hspace width="5mm"/>
+     </hbox>
+     <vspace height="3mm"/>
+   </vbox>'>
+
+<!ENTITY help.headline
+  '<vbox>
+     <label font="&h2.font;" fgcolor="darkgreen">Help</label>
+     <vspace height="2mm"/>
+     <hbox>
+        <hbox bgcolor="black">
+           <hspace width="18cm"/>
+           <vbox><vspace height="2px"/></vbox>
+        </hbox>
+        <hspace width="5mm"/>
+     </hbox>
+     <vspace height="3mm"/>
+   </vbox>'>
+
+<!ENTITY info.headline
+  '<vbox>
+     <label font="&h2.font;" fgcolor="darkgreen">About xmlforms</label>
+     <vspace height="2mm"/>
+     <hbox>
+        <hbox bgcolor="black">
+           <hspace width="18cm"/>
+           <vbox><vspace height="2px"/></vbox>
+        </hbox>
+        <hspace width="5mm"/>
+     </hbox>
+     <vspace height="3mm"/>
+   </vbox>'>
+
+<!ENTITY footline
+  '<vbox>
+     <hbox>
+        <hbox bgcolor="black">
+           <hspace width="18cm"/>
+           <vbox><vspace height="2px"/></vbox>
+        </hbox>
+        <hspace width="5mm"/>
+     </hbox>
+     <vspace height="2mm"/>
+     <hbox>
+       <button bgcolor="&bg.button;" label="Previous" action="list-prev"/>
+       <button bgcolor="&bg.button;" label="Next" action="list-next"/>
+       <hspace width="0pt" fill="yes"/>
+       <button bgcolor="&bg.button;" label="Home" goto="start-page"/>
+       <hspace width="5mm"/>
+      </hbox>
+   </vbox>'>
+
+<!ENTITY help.footline
+  '<vbox>
+     <hbox>
+        <hbox bgcolor="black">
+           <hspace width="18cm"/>
+           <vbox><vspace height="2px"/></vbox>
+        </hbox>
+        <hspace width="5mm"/>
+     </hbox>
+     <vspace height="2mm"/>
+     <hbox>
+       <button bgcolor="&bg.button;" label="Back" action="hist-prev"/>
+       <hspace width="0pt" fill="yes"/>
+      </hbox>
+   </vbox>'>
+
+<!ENTITY info.footline '&help.footline;'>
+
+]>
+
+<!-- ***************************************************************** -->
+<!-- ************************               ************************** -->
+<!-- ************************ Starting page ************************** -->
+<!-- ************************               ************************** -->
+<!-- ***************************************************************** -->
+
+<application start="start-page"
+             font="&dfl.font;"
+>
+
+  <mask name="start-page">
+    <vspace height="5mm"/>
+    <hbox>
+      <hspace width="5mm"/>
+      <vbox>
+       <vbox font="&h1.font;">
+         <label>A sample xmlforms application:</label>
+         <label>Address editor</label>
+       </vbox>
+       <vspace height="1cm"/>
+       <vbox>
+         <hbox>
+           <hbox width="6cm" halign="right">
+             <label>Name:</label>
+           </hbox>
+           <entry font="&in.font;" textwidth="40" slot="person.name"/>
+         </hbox>
+         <hbox>
+           <hbox width="6cm" halign="right">
+             <label>Postal address:</label>
+           </hbox>
+           <textbox font="&in.font;" 
+                    textwidth="40" 
+                    textheight="5"
+                    slot="person.address"/>
+         </hbox>
+         <hbox>
+           <hbox width="6cm" halign="right">
+             <label>Email:</label>
+           </hbox>
+           <entry font="&in.font;" textwidth="40" slot="person.email"/>
+         </hbox>
+         <hbox>
+           <hbox width="6cm" halign="right">
+             <label>Telephone number:</label>
+           </hbox>
+           <entry font="&in.font;" textwidth="20" slot="person.phone-number"/>
+         </hbox>
+       </vbox>
+       <vspace height="1cm"/>
+       <hbox>
+         <hspace width="3cm"/>
+         <hbox width="8cm">
+           <vbox>
+             <button bgcolor="&bg.button;"
+                     label="More about this person..."
+                     goto="person-list"/>
+             <button bgcolor="&bg.button;"
+                     label="Save"
+                     action="save"/>
+           </vbox>
+          </hbox>
+          <hbox>
+            <vbox>
+              <button bgcolor="&bg.button;"
+                     label="Info..."
+                     goto="info"/>
+             <button bgcolor="&bg.button;"
+                     label="Exit (without saving)"
+                     action="exit"/>
+            </vbox>
+          </hbox>
+       </hbox>
+       <vspace height="0px" fill="yes"/>
+       <hbox>
+         <hspace width="0px" fill="yes"/>
+       </hbox>
+      </vbox>
+    </hbox>
+  </mask>
+
+  <!-- ***************************************************************** -->
+  <!-- **********************               **************************** -->
+  <!-- ********************** More about... **************************** -->
+  <!-- **********************               **************************** -->
+  <!-- ***************************************************************** -->
+
+  <sequence name="person-list">
+    <mask name="Department">
+      <!-- ************************** HEADER ************************** -->
+      <vspace height="5mm"/>
+      <hbox>
+       <hspace width="5mm"/>
+       <vbox>
+         &headline;
+         <!-- ************************** CONTENT ************************* -->
+         <label font="&h1.font;">Department</label>
+         <vspace height="3mm"/>
+         <label>The person is working in this department:</label>
+         <hbox>
+           <hspace width="1cm"/>
+           <entry font="&in.font;"
+                  textwidth="70"
+                  slot="person.department"/>
+         </hbox>
+         <vspace height="3mm"/>
+         <label>The project he/she is working for:</label>
+         <hbox>
+           <hspace width="1cm"/>
+           <textbox font="&in.font;"
+                    textwidth="70"
+                    textheight="5"
+                    slot="person.project"/>
+         </hbox>
+         <vspace height="3mm"/>
+         <button bgcolor="&bg.button;"
+                 label="Help"
+                 goto="help.department"/>
+         <!-- ************************************************************ -->
+       </vbox>
+      </hbox>
+      <!-- ************************** FOOTER ************************** -->
+      <vspace height="0px" fill="yes"/>
+      <hbox>
+       <hspace width="5mm"/>
+       &footline;
+      </hbox>
+    </mask>
+
+
+    <mask name="business-contacts">
+      <!-- ************************** HEADER ************************** -->
+      <vspace height="5mm"/>
+      <hbox>
+       <hspace width="5mm"/>
+       <vbox>
+         &headline;
+         <!-- ************************** CONTENT ************************* -->
+         <label font="&h1.font;">Business Contacts</label>
+         <vspace height="3mm"/>
+         <label>Notes about contacts:</label>
+         <hbox>
+           <hspace width="1cm"/>
+           <textbox font="&in.font;"
+                    textwidth="70"
+                    textheight="10"
+                    slot="person.contacts"/>
+         </hbox>
+         <vspace height="3mm"/>
+         <button bgcolor="&bg.button;"
+                 label="Help"
+                 goto="help.business-contacts"/>
+         <!-- ************************************************************ -->
+       </vbox>
+      </hbox>
+      <!-- ************************** FOOTER ************************** -->
+      <vspace height="0px" fill="yes"/>
+      <hbox>
+       <hspace width="5mm"/>
+       &footline;
+      </hbox>
+    </mask>
+
+  </sequence>
+
+  <!-- ***************************************************************** -->
+  <!-- *****************************       ***************************** -->
+  <!-- ***************************** Help  ***************************** -->
+  <!-- *****************************       ***************************** -->
+  <!-- ***************************************************************** -->
+
+  <mask name="help.department">
+    <!-- ************************** HEADER ************************** -->
+    <vspace height="5mm"/>
+    <hbox>
+      <hspace width="5mm"/>
+      <vbox>
+       &help.headline;
+       <!-- ************************** CONTENT ************************* -->
+       <label font="&h1.font;">Department</label>
+       <vspace height="3mm"/>
+        <textbox fgcolor="&fg.hilfe;"
+                bgcolor="&bg.hilfe;"
+                textheight="15"
+                textwidth="70"
+>The help system should be designed to help you filling out your form, but
+writing help texts is so stupid...
+</textbox>
+       <!-- ************************************************************ -->
+      </vbox>
+    </hbox>
+    <!-- ************************** FOOTER ************************** -->
+    <vspace height="0px" fill="yes"/>
+    <hbox>
+      <hspace width="5mm"/>
+      &help.footline;
+    </hbox>
+  </mask>
+
+  <mask name="help.business-contacts">
+    <!-- ************************** HEADER ************************** -->
+    <vspace height="5mm"/>
+    <hbox>
+      <hspace width="5mm"/>
+      <vbox>
+       &help.headline;
+       <!-- ************************** CONTENT ************************* -->
+       <label font="&h1.font;">Business Contacts</label>
+       <vspace height="3mm"/>
+        <textbox fgcolor="&fg.hilfe;"
+                bgcolor="&bg.hilfe;"
+                textheight="15"
+                textwidth="70"
+>It is often helpful to remember the last telephone and/or email contacts
+quickly.
+</textbox>
+       <!-- ************************************************************ -->
+      </vbox>
+    </hbox>
+    <!-- ************************** FOOTER ************************** -->
+    <vspace height="0px" fill="yes"/>
+    <hbox>
+      <hspace width="5mm"/>
+      &help.footline;
+    </hbox>
+  </mask>
+
+  <!-- ***************************************************************** -->
+  <!-- ***************************************************************** -->
+  <!-- ****************************** Info ***************************** -->
+  <!-- ***************************************************************** -->
+  <!-- ***************************************************************** -->
+
+  <mask name="info">
+    <!-- ************************** HEADER ************************** -->
+    <vspace height="5mm"/>
+    <hbox>
+      <hspace width="5mm"/>
+      <vbox>
+       &info.headline;
+       <!-- ************************** CONTENT ************************* -->
+       <vspace height="3mm"/>
+        <textbox fgcolor="&fg.hilfe;"
+                bgcolor="&bg.hilfe;"
+                textheight="15"
+                textwidth="70"
+><![CDATA[About "xmlforms":
+Version <unknown>,
+written by Gerd Stolpmann
+
+Contact: Gerd.Stolpmann@darmstadt.netsurf.de
+]]></textbox>
+       <!-- ************************************************************ -->
+      </vbox>
+    </hbox>
+    <!-- ************************** FOOTER ************************** -->
+    <vspace height="0px" fill="yes"/>
+    <hbox>
+      <hspace width="5mm"/>
+      &info.footline;
+    </hbox>
+  </mask>
+
+
+</application>
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/crazy-style.xml b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/crazy-style.xml
new file mode 100644 (file)
index 0000000..cce8df0
--- /dev/null
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE application SYSTEM "./ds-style.dtd" [
+  <!ENTITY vz '<button label="&lt;" action="list-prev"/>
+               <button label=">" action="list-next"/>
+               <button label="exit" goto="first"/>'>
+]
+>
+
+<application start="first">
+<mask name="first" font="-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*">
+<vbox halign="right">
+<label>one</label>
+<label bgcolor="green">Number two</label>
+<hbox width="4cm" halign="center" valign="bottom" bgcolor="red" fgcolor="blue">
+<vbox>
+<label>a1</label>
+<vspace height="1cm"/>
+<label>a2</label>
+</vbox>
+<label>b
+c</label>
+</hbox>
+</vbox>
+<textbox slot="q" textheight="5" textwidth="60">A Text</textbox>
+<button label="sequence" goto="seq"/>
+<label bgcolor="blue">A very long label, bigger than the box</label>
+<vspace height="2cm" fill="yes"/>
+<hbox><button label="left" bgcolor="yellow" goto="second"/><hspace width="0px" fill="yes"/>
+<entry slot="a" textwidth="10" fgcolor="red">right</entry>
+</hbox>
+</mask>
+
+<mask name="second">
+<button label="main" bgcolor="yellow" goto="first"/>
+<button label="previous" action="hist-prev"/>
+<button label="save" action="save"/>
+</mask>
+
+<sequence name="seq">
+<mask name="n1">
+<label>n1</label>
+&vz;
+</mask>
+<mask name="n2">
+<label>n2</label>
+&vz;
+</mask>
+<mask name="n3">
+<label>n3</label>
+&vz;
+</mask>
+<mask name="n4">
+<label>n4</label>
+&vz;
+</mask>
+<mask name="n5">
+<label>n5</label>
+&vz;
+</mask>
+</sequence>
+
+</application>
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-object.dtd b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-object.dtd
new file mode 100644 (file)
index 0000000..750300c
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml encoding="ISO-8859-1"?>
+<!-- $Id$ -->
+
+<!ELEMENT record (string)*>
+
+<!ELEMENT string (#PCDATA)>
+<!ATTLIST string
+          name ID #REQUIRED>
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-style.dtd b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/ds-style.dtd
new file mode 100644 (file)
index 0000000..2f8b7a2
--- /dev/null
@@ -0,0 +1,183 @@
+<?xml encoding="ISO-8859-1"?>
+<!-- $Id$ -->
+
+<!-- entities describing content models -->
+
+<!ENTITY % vertical.only    "vspace">
+<!ENTITY % horizontal.only  "hspace">
+<!ENTITY % mixed            "vbox|hbox|label|entry|textbox|button">
+
+
+<!-- entities describing attribute type -->
+
+<!ENTITY % att.valign       "(top|bottom|center)">
+<!ENTITY % att.halign       "(left|right|center)">
+
+
+<!ENTITY % default.atts     "bgcolor CDATA   #IMPLIED
+                             fgcolor CDATA   #IMPLIED
+                             font    CDATA   #IMPLIED">
+
+<!-- "bgcolor", "fgcolor", and "font" are attribute applicable to every
+     element. They set the background color, foreground color, resp. the
+     font of the element and all sub elements that do not specifiy another
+     value.
+     Colors: all X windows names are allowed, e.g. "black", "white",
+     "lavenderblush", or "#A0B1C2".
+     Font: again X windows font names
+  -->
+
+
+<!ELEMENT application (mask|sequence)+>
+<!ATTLIST application
+          start IDREF #REQUIRED
+          %default.atts;
+>
+
+<!-- An "application" is the top-level element. The "start" attribute must 
+     contain the name of the mask or mask sequence to start with.
+  -->
+
+
+<!ELEMENT sequence (mask)+>
+<!ATTLIST sequence
+          name ID #REQUIRED
+          %default.atts;
+>
+
+<!-- A "sequence" of masks. In a sequence, you can use the special button
+     actions "list-prev" and "list-next" that go to the previous mask resp.
+     the next mask of the sequence.
+  -->
+
+
+<!ELEMENT mask (%vertical.only;|%horizontal.only;|%mixed;)*>
+<!ATTLIST mask
+          name ID #REQUIRED
+          %default.atts;
+>
+
+<!-- A "mask" contains layout and functional elements of a visible page. -->
+
+
+<!ELEMENT vbox (%vertical.only;|%mixed;)*>
+<!ATTLIST vbox
+          halign %att.halign; "left"
+          %default.atts;
+>
+
+<!-- A "vbox" (vertical box) renders the inner material in vertical direction.
+     The "halign" attribute specifies whether the inner material should be
+     left-aligned, right-aligned, or centered.
+  -->
+
+<!ELEMENT hbox (%horizontal.only;|%mixed;)*>
+<!ATTLIST hbox
+          width CDATA         #IMPLIED
+          halign %att.halign; "left"
+          valign %att.valign; "top"
+          %default.atts;
+>
+
+<!-- An "hbox" (horizontal box) renders the inner material in horizontal
+     direction. The "valign" attribute specifies whether the inner material 
+     should be top-aligned, bottom-aligned, or centered.
+     Normally, the width of an hbox is the sum of its members, but you can
+     also widen a box by specifying the "width" attribute. This is a number
+     with a dimension, e.g. "10.5 cm", "105 mm", "4.13 in". Other dimensions
+     are "pt" (points) and "px" (pixels).
+     If "width" is given, you may also set "halign" (see vbox for possible
+     values).
+  -->
+
+<!ELEMENT vspace EMPTY>
+<!ATTLIST vspace
+          height CDATA      #REQUIRED
+          fill  (yes|no)    "no"
+          %default.atts;
+>
+
+<!-- "vspace" is a vertical space of given "height" (again a number with a 
+     dimension, see hbox). 
+     If "fill" is "yes", the space is extended as much as possible.
+  -->
+
+<!ELEMENT hspace EMPTY>
+<!ATTLIST hspace
+          width CDATA       #REQUIRED
+          fill  (yes|no)    "no"
+          %default.atts;
+>
+
+<!-- "hspace" is a horizontal space of given "width" (again a number with a 
+     dimension, see hbox). 
+     If "fill" is "yes", the space is extended as much as possible.
+  -->
+
+<!ELEMENT label (#PCDATA)>
+<!ATTLIST label
+          textwidth CDATA    #IMPLIED
+          halign %att.halign; "left"
+          %default.atts;
+>
+
+<!-- A "label" is a piece of constant text. The text is included as #PCDATA
+     in the element. 
+     You may set "textwidth" to a (dimensionless) number to specify a fixed
+     width. In this case, "halign" determines the horizontal alignment.
+  -->
+
+<!ELEMENT entry (#PCDATA)>
+<!ATTLIST entry
+          textwidth CDATA   #REQUIRED
+          slot      NMTOKEN #REQUIRED
+          %default.atts;
+>
+
+<!-- An "entry" is an editable text line. "textwidth" specifies the width of
+     the visible line (but the contents can be longer). "slot" is the name of
+     a slot that is associated with the element.
+     If the element contains #PCDATA, this is used as default value if 
+     the slot has not yet been filled.
+  -->
+
+<!ELEMENT textbox (#PCDATA)>
+<!ATTLIST textbox
+          textwidth  CDATA   #REQUIRED
+          textheight CDATA   #REQUIRED
+          slot       NMTOKEN #IMPLIED
+          %default.atts;
+>
+
+<!-- A "textbox" is a text box with dimensions "textwidth" and "textheight"
+     (both dimensionless number). 
+     "slot" is the name of a slot that is associated with the element.
+     If the element contains #PCDATA, this is used as default value if 
+     the slot has not yet been filled.
+     If you omit "slot", the #PCDATA is displayed read-only.
+  -->
+
+<!ELEMENT button EMPTY>
+<!ATTLIST button
+          label  CDATA        #REQUIRED
+          action (goto|save|exit|save-exit|list-prev|list-next|
+                  hist-prev|hist-next) "goto"
+          goto   IDREF        #IMPLIED
+          %default.atts;
+>
+
+<!-- A "button" is specified as follows:
+     - "label" is what is written on the button
+     - "action" specifies what to if the button is pressed:
+       - "goto":  jump to another mask or mask sequence whose name is given
+                  in the attribute "goto"
+       - "save":  save the record
+       - "exit":  exit the application
+       - "save-exit": save, then exit
+       - "list-prev": jump to the previous mask in the sequence
+       - "list-next": jump to the next mask in the sequence
+       - "hist-prev": jump to the mask that has actually been the predecessor
+       - "hist-next": jump to the mask that has actually been the successor
+  -->
+
+
diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/mini-style.xml b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/mini-style.xml
new file mode 100644 (file)
index 0000000..844235c
--- /dev/null
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE application SYSTEM "./ds-style.dtd" [
+]>
+
+<application start="first">
+<mask name="first" font="-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*">
+<label>This is a label</label>
+</mask>
+</application>
diff --git a/helm/DEVEL/pxp/pxp/lexers/Makefile b/helm/DEVEL/pxp/pxp/lexers/Makefile
new file mode 100644 (file)
index 0000000..63ade70
--- /dev/null
@@ -0,0 +1,34 @@
+all_iso88591: generate_iso88591
+       $(MAKE) -f Makefile.code all_iso88591
+
+opt_iso88591: generate_iso88591
+       $(MAKE) -f Makefile.code opt_iso88591
+
+all_utf8: generate_utf8
+       $(MAKE) -f Makefile.code all_utf8
+
+opt_utf8: generate_utf8
+       $(MAKE) -f Makefile.code opt_utf8
+
+
+
+generate_iso88591:
+       $(MAKE) -f Makefile.generate all_iso88591
+       rm -f objects_iso88591 objects_utf8
+       $(MAKE) -f Makefile.generate objects_iso88591
+       touch objects_utf8
+       $(MAKE) -f Makefile.generate depend
+
+generate_utf8:
+       $(MAKE) -f Makefile.generate all_utf8
+       rm -f objects_iso88591 objects_utf8
+       $(MAKE) -f Makefile.generate objects_utf8
+       touch objects_iso88591
+       $(MAKE) -f Makefile.generate depend
+
+
+
+clean:
+       touch depend objects
+       $(MAKE) -f Makefile.code clean
+       $(MAKE) -f Makefile.generate clean
diff --git a/helm/DEVEL/pxp/pxp/lexers/Makefile.code b/helm/DEVEL/pxp/pxp/lexers/Makefile.code
new file mode 100644 (file)
index 0000000..781f296
--- /dev/null
@@ -0,0 +1,54 @@
+
+LARCHIVE_iso88591  = pxp_lex_iso88591.cma
+LARCHIVE_utf8      = pxp_lex_utf8.cma
+XLARCHIVE_iso88591 = $(LARCHIVE_iso88591:.cma=.cmxa)
+XLARCHIVE_utf8     = $(LARCHIVE_utf8:.cma=.cmxa)
+
+# LOBJECTS_* and XLOBJECTS_* are included from "objects_*":
+include objects_iso88591
+include objects_utf8
+
+#----------------------------------------------------------------------
+
+all_iso88591: $(LARCHIVE_iso88591)
+opt_iso88591: $(XLARCHIVE_iso88591)
+all_utf8:     $(LARCHIVE_utf8)
+opt_utf8:     $(XLARCHIVE_utf8)
+
+$(LARCHIVE_iso88591): $(LOBJECTS_iso88591)
+       $(OCAMLC) -a -o $(LARCHIVE_iso88591) $(LOBJECTS_iso88591)
+
+$(XLARCHIVE_iso88591): $(XLOBJECTS_iso88591)
+       $(OCAMLOPT) -a -o $(XLARCHIVE_iso88591) $(XLOBJECTS_iso88591)
+
+$(LARCHIVE_utf8): $(LOBJECTS_utf8)
+       $(OCAMLC) -a -o $(LARCHIVE_utf8) $(LOBJECTS_utf8)
+
+$(XLARCHIVE_utf8): $(XLOBJECTS_utf8)
+       $(OCAMLOPT) -a -o $(XLARCHIVE_utf8) $(XLOBJECTS_utf8)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS   =
+OCAMLC    = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS)
+OCAMLOPT  = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS) 
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli 
+
+.ml.cmx:
+       $(OCAMLOPT) -c $<
+
+.ml.cmo:
+       $(OCAMLC) -c $<
+
+.mli.cmi:
+       $(OCAMLC) -c $<
+
+
+*.mli:
+
+clean:
+       rm -f *.cmo *.cmx *.cma *.cmxa *.cmi *.o *.a
+
+include depend
diff --git a/helm/DEVEL/pxp/pxp/lexers/Makefile.generate b/helm/DEVEL/pxp/pxp/lexers/Makefile.generate
new file mode 100644 (file)
index 0000000..8ee39cb
--- /dev/null
@@ -0,0 +1,67 @@
+LEXERSRC = pxp_lex_misc.src \
+          pxp_lex_document.src \
+          pxp_lex_content.src \
+          pxp_lex_within_tag.src \
+          pxp_lex_document_type.src \
+          pxp_lex_declaration.src \
+          pxp_lex_dtd_string.src \
+          pxp_lex_content_string.src \
+          pxp_lex_name_string.src 
+
+OTHERSRC = open_pxp_lex_aux_iso88591.src \
+          pxp_lex_aux.src \
+          pxp_lex_defs_iso88591.def
+
+LEXERMLL_iso88591 = $(LEXERSRC:.src=_iso88591.mll)
+LEXERMLL_utf8     = $(LEXERSRC:.src=_utf8.mll)
+
+LEXERML_iso88591  = $(LEXERSRC:.src=_iso88591.ml)
+LEXERML_utf8      = $(LEXERSRC:.src=_utf8.ml)
+
+LEXERCMO_iso88591 = pxp_lex_aux_iso88591.cmo $(LEXERSRC:.src=_iso88591.cmo)
+LEXERCMO_utf8     = pxp_lex_aux_utf8.cmo     $(LEXERSRC:.src=_utf8.cmo)
+
+LEXERCMX_iso88591 = $(LEXERCMO_iso88591:.cmo=.cmx)
+LEXERCMX_utf8     = $(LEXERCMO_utf8:.cmo=.cmx)
+
+.PHONY: all_iso88591
+all_iso88591: iso88591_done 
+
+.PHONY: all_utf8
+all_utf8: utf8_done
+
+iso88591_done: $(LEXERSRC) $(OTHERSRC)
+       ../tools/insert_variant -variant iso88591 $(LEXERSRC)
+       for file in $(LEXERMLL_iso88591); do ocamllex $$file; done
+       touch iso88591_done
+
+utf8_done: $(LEXERSRC) $(OTHERSRC) pxp_lex_defs_utf8.def
+       ../tools/insert_variant -variant utf8 $(LEXERSRC)
+       for file in $(LEXERMLL_utf8); do ocamllex $$file; done
+       touch utf8_done
+
+pxp_lex_defs_utf8.def: pxp_lex_defs_generic.def pxp_lex_defs_drv_utf8.def
+       ../tools/ucs2_to_utf8/ucs2_to_utf8 <pxp_lex_defs_generic.def \
+                                  >pxp_lex_defs_utf8.def || \
+               rm -f pxp_lex_defs_utf8.def
+       cat pxp_lex_defs_drv_utf8.def >>pxp_lex_defs_utf8.def
+
+objects_iso88591:
+       echo LOBJECTS_iso88591  = $(LEXERCMO_iso88591)  >objects_iso88591
+       echo XLOBJECTS_iso88591 = $(LEXERCMX_iso88591) >>objects_iso88591
+
+objects_utf8:
+       echo LOBJECTS_utf8  = $(LEXERCMO_utf8)  >objects_utf8
+       echo XLOBJECTS_utf8 = $(LEXERCMX_utf8) >>objects_utf8
+
+depend: *.ml *.mli 
+       ocamldep *.ml *.mli >depend
+
+.PHONY: clean
+clean:
+       rm -f $(LEXERMLL_iso88591) $(LEXERML_iso88591) iso88591_done \
+             $(LEXERMLL_utf8)     $(LEXERML_utf8)     utf8_done \
+             pxp_lex_defs_utf8.def \
+             objects_iso88591 objects_utf8 depend
+
+*.mli:
diff --git a/helm/DEVEL/pxp/pxp/lexers/objects b/helm/DEVEL/pxp/pxp/lexers/objects
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_iso88591.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_iso88591.src
new file mode 100644 (file)
index 0000000..2377aff
--- /dev/null
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_aux_iso88591
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_utf8.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_aux_utf8.src
new file mode 100644 (file)
index 0000000..7c1b12a
--- /dev/null
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_aux_utf8
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_iso88591.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_iso88591.src
new file mode 100644 (file)
index 0000000..104eb9b
--- /dev/null
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_misc_iso88591
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_utf8.src b/helm/DEVEL/pxp/pxp/lexers/open_pxp_lex_misc_utf8.src
new file mode 100644 (file)
index 0000000..fc545f2
--- /dev/null
@@ -0,0 +1,19 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_misc_utf8
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux.src
new file mode 100644 (file)
index 0000000..2ab21a3
--- /dev/null
@@ -0,0 +1,82 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+  class dummy_entity = object end
+
+  let dummy_entity = ( new dummy_entity : entity_id )
+
+  (* The following tokens are pre-allocated to reduce the load on the
+   * GC.
+   *)
+
+  let tok_Doctype__Document_type = Doctype dummy_entity, Document_type
+  let tok_Ignore__Document       = Ignore, Document
+  let tok_Ignore__Within_tag     = Ignore, Within_tag
+  let tok_Ignore__Document_type  = Ignore, Document_type
+  let tok_Ignore__Declaration    = Ignore, Declaration
+  let tok_Ignore__Ignored        = Ignore, Ignored_section
+  let tok_Eof__Document          = Eof, Document
+  let tok_Eof__Content           = Eof, Content
+  let tok_Eof__Within_tag        = Eof, Within_tag
+  let tok_Eof__Document_type     = Eof, Document_type
+  let tok_Eof__Declaration       = Eof, Declaration
+  let tok_Eof__Ignored           = Eof, Ignored_section
+  let tok_LineEndCRLF__Content   = LineEnd "\r\n", Content
+  let tok_LineEndCR__Content     = LineEnd "\r", Content
+  let tok_LineEndLF__Content     = LineEnd "\n", Content
+  let tok_CharDataRBRACKET__Content = CharData "]", Content
+  let tok_Eq__Within_tag         = Eq, Within_tag
+  let tok_Rangle__Content        = Rangle, Content
+  let tok_Rangle_empty__Content  = Rangle_empty, Content
+  let tok_Dtd_begin__Declaration = Dtd_begin dummy_entity, Declaration
+  let tok_Doctype_rangle__Document = Doctype_rangle dummy_entity, Document
+  let tok_Percent__Declaration   = Percent, Declaration
+  let tok_Plus__Declaration      = Plus, Declaration
+  let tok_Star__Declaration      = Star, Declaration
+  let tok_Bar__Declaration       = Bar, Declaration
+  let tok_Comma__Declaration     = Comma, Declaration
+  let tok_Qmark__Declaration     = Qmark, Declaration
+  let tok_Lparen__Declaration    = Lparen dummy_entity, Declaration
+  let tok_RparenPlus__Declaration   = RparenPlus dummy_entity, Declaration
+  let tok_RparenStar__Declaration   = RparenStar dummy_entity, Declaration
+  let tok_RparenQmark__Declaration  = RparenQmark dummy_entity, Declaration
+  let tok_Rparen__Declaration    = Rparen dummy_entity, Declaration
+  let tok_Required__Declaration  = Required, Declaration
+  let tok_Implied__Declaration   = Implied, Declaration
+  let tok_Fixed__Declaration     = Fixed, Declaration
+  let tok_Pcdata__Declaration    = Pcdata, Declaration
+  let tok_Decl_element__Declaration  = Decl_element dummy_entity, Declaration
+  let tok_Decl_attlist__Declaration  = Decl_attlist dummy_entity, Declaration
+  let tok_Decl_entity__Declaration   = Decl_entity dummy_entity, Declaration
+  let tok_Decl_notation__Declaration = Decl_notation dummy_entity, Declaration
+  let tok_Conditional_begin__Declaration = Conditional_begin dummy_entity, 
+                                           Declaration 
+  let tok_Conditional_begin__Ignored     = Conditional_begin dummy_entity, 
+                                           Ignored_section
+  let tok_Conditional_end__Declaration   = Conditional_end dummy_entity, 
+                                           Declaration
+  let tok_Conditional_end__Ignored       = Conditional_end dummy_entity, 
+                                           Ignored_section
+  let tok_Conditional_body__Declaration  = Conditional_body dummy_entity, 
+                                           Declaration
+  let tok_Decl_rangle__Declaration   = Decl_rangle dummy_entity, Declaration
+  let tok_Dtd_end__Document_type     = Dtd_end dummy_entity, Document_type
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/18 20:19:59  gerd
+ *     Comments return different comment tokens.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_iso88591.ml b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_iso88591.ml
new file mode 100644 (file)
index 0000000..07f8c45
--- /dev/null
@@ -0,0 +1,97 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_utf8 *)
+
+  open Pxp_types
+  open Pxp_lexer_types
+
+  let get_name_end s k =
+    (* Get the index of the end+1 of the name beginning at position k *)
+    let l = String.length s in
+    let rec find j =
+      if j < l then
+       match s.[j] with
+         | ('\009'|'\010'|'\013'|'\032') -> j
+         |_                              -> find (j+1)
+      else
+       l
+    in
+    find k
+
+  let get_ws_end s k =
+    let l =  String.length s in
+    let rec find j =
+      if j < l then
+       match s.[j] with
+           (' '|'\t'|'\r'|'\n') -> find (j+1)
+         | _                    -> j
+      else
+       l
+    in
+    find k
+
+  let scan_pi pi xml_scanner =
+    let s = String.sub pi 2 (String.length pi - 4) in
+            (* the PI without the leading "<?" and the trailing "?>" *)
+    let xml_lexbuf = Lexing.from_string (s ^ " ") in
+      (* Add space because the lexer expects whitespace after every
+       * clause; by adding a space there is always whitespace at the 
+       * end of the string.
+       *)
+
+    (* The first word of a PI must be a name: Extract it. *)
+
+    let s_name, s_len =
+      match xml_scanner xml_lexbuf with
+         Pro_name n -> 
+           let ltok = String.length (Lexing.lexeme xml_lexbuf) in
+           if String.length n = ltok then
+              (* No whitespace after the name *)
+             raise (WF_error ("Bad processing instruction"));
+           n, ltok
+       | _ -> raise (WF_error ("Bad processing instruction"))
+    in
+
+    (* Note: s_len is the length of s_name + the whitespace following s_name *)
+
+    match s_name with
+       "xml" -> begin
+         (* It is a <?xml ...?> PI: Get the other tokens *)
+         let rec collect () =
+           let t = xml_scanner xml_lexbuf in
+           (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
+           if t = Pro_eof then
+             []
+           else
+             t :: collect()
+         in
+         PI_xml (collect())
+       end
+      | _ -> 
+         let len_param = String.length s - s_len in
+         (* It is possible that len_param = -1 *)
+         if len_param >= 1 then
+           PI(s_name, String.sub s s_len len_param)
+         else
+           PI(s_name, "")
+
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml
new file mode 100644 (file)
index 0000000..0b2c577
--- /dev/null
@@ -0,0 +1,95 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_iso88591 *)
+
+  open Pxp_types
+  open Pxp_lexer_types
+
+  let get_name_end s k =
+    (* Get the index of the end+1 of the name beginning at position k *)
+    let l = String.length s in
+    let rec find j =
+      if j < l then
+       match s.[j] with
+         | ('\009'|'\010'|'\013'|'\032') -> j
+         |_                              -> find (j+1)
+      else
+       l
+    in
+    find k
+
+  let get_ws_end s k =
+    let l =  String.length s in
+    let rec find j =
+      if j < l then
+       match s.[j] with
+           (' '|'\t'|'\r'|'\n') -> find (j+1)
+         | _                    -> j
+      else
+       l
+    in
+    find k
+
+  let scan_pi pi xml_scanner =
+    let s = String.sub pi 2 (String.length pi - 4) in
+            (* the PI without the leading "<?" and the trailing "?>" *)
+    let xml_lexbuf = Lexing.from_string (s ^ " ") in
+      (* Add space because the lexer expects whitespace after every
+       * clause; by adding a space there is always whitespace at the 
+       * end of the string.
+       *)
+
+    (* The first word of a PI must be a name: Extract it. *)
+
+    let s_name, s_len =
+      match xml_scanner xml_lexbuf with
+         Pro_name n -> 
+           let ltok = String.length (Lexing.lexeme xml_lexbuf) in
+           if String.length n = ltok then
+              (* No whitespace after the name *)
+             raise (WF_error ("Bad processing instruction"));
+           n, ltok
+       | _ -> raise (WF_error ("Bad processing instruction"))
+    in
+
+    (* Note: s_len is the length of s_name + the whitespace following s_name *)
+
+    match s_name with
+       "xml" -> begin
+         (* It is a <?xml ...?> PI: Get the other tokens *)
+         let rec collect () =
+           let t = xml_scanner xml_lexbuf in
+           (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
+           if t = Pro_eof then
+             []
+           else
+             t :: collect()
+         in
+         PI_xml (collect())
+       end
+      | _ -> 
+         let len_param = String.length s - s_len in
+         (* It is possible that len_param = -1 *)
+         if len_param >= 1 then
+           PI(s_name, String.sub s s_len len_param)
+         else
+           PI(s_name, "")
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content.src
new file mode 100644 (file)
index 0000000..3df2025
--- /dev/null
@@ -0,0 +1,107 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_content = parse
+    "<?" pi_string "?>"
+      { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Content }
+  | "<?"
+      { raise (WF_error ("Illegal processing instruction")) }
+  | "<!--"
+      { Comment_begin, Content_comment }
+  | '<' '/'? name
+      (* One rule for Tag_beg and Tag_end saves transitions. *)
+      { let s = Lexing.lexeme lexbuf in
+       if s.[1] = '/' then
+         Tag_end (String.sub s 2 (String.length s - 2), dummy_entity), 
+         Within_tag 
+       else
+         Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity), 
+         Within_tag 
+      }
+  | "<![CDATA[" cdata_string "]]>"
+      { let s = Lexing.lexeme lexbuf in
+       Cdata (String.sub s 9 (String.length s - 12)), Content }
+  | "<!"
+      { raise (WF_error "Declaration either malformed or not allowed in this context") 
+      }
+  | "<"
+      { raise (WF_error ("The left angle bracket '<' must be written as '&lt;'"))
+      }
+  | "&#" ascii_digit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string (String.sub s 2 (String.length s - 3))), Content }
+  | "&#x" ascii_hexdigit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))), Content }
+  | "&" name ";"
+      { let s = Lexing.lexeme lexbuf in
+       ERef (String.sub s 1 (String.length s - 2)), Content }
+  | "&" 
+      { raise (WF_error ("The ampersand '&' must be written as '&amp;'"))
+      }
+
+  (* LineEnd: Depending on whether we are reading from a primary source
+   * (file) or from the replacement text of an internal entity, line endings
+   * must be normalized (converted to \n) or not.
+   * The entity classes do that. The yacc parser will never see LineEnd;
+   * this token is always converted to the appropriate CharData token.
+   *)
+
+  | '\013' '\010'
+      { tok_LineEndCRLF__Content }
+  | '\013'
+      { tok_LineEndCR__Content }
+  | '\010'
+      { tok_LineEndLF__Content }
+  | eof
+      { tok_Eof__Content }
+  | "]]>" 
+      { raise (WF_error ("The sequence ']]>' must be written as ']]&gt;'"))
+      }
+  | "]"
+      { tok_CharDataRBRACKET__Content }
+  | normal_character+
+      { let s = Lexing.lexeme lexbuf in
+       CharData s, Content 
+      }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/18 20:19:59  gerd
+ *     Comments return different comment tokens.
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content_string.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_content_string.src
new file mode 100644 (file)
index 0000000..1eb76a4
--- /dev/null
@@ -0,0 +1,71 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+(* This lexer is used to expand and normalize attribute values: *)
+
+rule scan_content_string = parse
+    '&' name ';'
+      { let s = Lexing.lexeme lexbuf in
+       ERef (String.sub s 1 (String.length s - 2)) }
+  | "&#" ascii_digit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string (String.sub s 2 (String.length s - 3))) }
+  | "&#x" ascii_hexdigit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))) }
+  | '&'
+      { raise(WF_error("The character '&' must be written as '&amp;'")) }
+  | printable_character_except_amp_lt+
+      { CharData (Lexing.lexeme lexbuf) }
+  | '\009'
+      { CRef 32 }
+  | '\013' '\010'
+      { CRef(-1)   (* A special case *)
+      }
+  | '\013'
+      { CRef 32 }
+  | '\010'
+      { CRef 32 }
+  | '<'
+      { 
+       (* Depending on the situation, '<' may be legal or not: *)
+       CharData "<" 
+      }
+  | eof
+      { Eof }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_declaration.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_declaration.src
new file mode 100644 (file)
index 0000000..4f53d97
--- /dev/null
@@ -0,0 +1,138 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+(* scan_declaration: after "[" in DTD until matching "]" *)
+
+rule scan_declaration = parse
+    ws+
+      { tok_Ignore__Declaration }
+  | '%' name ';'
+      { let s = Lexing.lexeme lexbuf in
+       (PERef (String.sub s 1 (String.length s - 2))), Declaration }
+  | '%'
+      { tok_Percent__Declaration }
+  | '&' 
+      { raise(WF_error("References to general entities not allowed in DTDs")) }
+  | name
+      { Name (Lexing.lexeme lexbuf), Declaration }
+  | nmtoken
+      { Nametoken (Lexing.lexeme lexbuf), Declaration }
+  | '+'
+      { tok_Plus__Declaration }
+  | '*'
+      { tok_Star__Declaration }
+  | '|'
+      { tok_Bar__Declaration }
+  | ','
+      { tok_Comma__Declaration }
+  | '?'
+      { tok_Qmark__Declaration }
+  | '('
+      { tok_Lparen__Declaration }
+  | ")+" 
+      { tok_RparenPlus__Declaration }
+  | ")*" 
+      { tok_RparenStar__Declaration }
+  | ")?"
+      { tok_RparenQmark__Declaration }
+  | ')'
+      { tok_Rparen__Declaration }
+  | "#REQUIRED"
+      { tok_Required__Declaration }
+  | "#IMPLIED"
+      { tok_Implied__Declaration }
+  | "#FIXED"
+      { tok_Fixed__Declaration }
+  | "#PCDATA"
+      { tok_Pcdata__Declaration }
+  | "<!ELEMENT"
+      { tok_Decl_element__Declaration }
+  | "<!ATTLIST"
+      { tok_Decl_attlist__Declaration }
+  | "<!ENTITY"
+      { tok_Decl_entity__Declaration }
+  | "<!NOTATION"
+      { tok_Decl_notation__Declaration }
+  | "<!--"
+      { Comment_begin, Decl_comment }
+  | "<!["
+      { tok_Conditional_begin__Declaration }
+  | "]]>"
+      { tok_Conditional_end__Declaration }
+  | "["
+      { tok_Conditional_body__Declaration }
+
+  (* TODO: PIs modified *) 
+
+  | "<?" pi_string "?>"
+      { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Declaration }
+  | "<?"
+      { raise (WF_error ("Illegal processing instruction")) }
+  | '"' [^ '"']* '"'
+      { let s = Lexing.lexeme lexbuf in
+        (* Check that characters are well-formed: *)
+       ignore(scan_characters (Lexing.from_string s));
+       (Unparsed_string (String.sub s 1 (String.length s - 2))), Declaration }
+  | '"'
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | "'" [^ '\'']* "'"
+      { let s = Lexing.lexeme lexbuf in
+        (* Check that characters are well-formed: *)
+       ignore(scan_characters (Lexing.from_string s));
+       (Unparsed_string (String.sub s 1 (String.length s - 2))), Declaration }
+  | "'"
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | '>'
+      { tok_Decl_rangle__Declaration }
+  | ']'
+      { tok_Dtd_end__Document_type }
+  | eof
+      { tok_Eof__Declaration }
+  | "<!"
+      { raise (WF_error "Declaration either malformed or not allowed in this context") 
+      }
+  | character
+      { raise (WF_error("Illegal token or character")) }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/18 20:19:59  gerd
+ *     Comments return different comment tokens.
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_drv_utf8.def b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_drv_utf8.def
new file mode 100644 (file)
index 0000000..dc108ef
--- /dev/null
@@ -0,0 +1,160 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let ws = [ ' ' '\t' '\r' '\n' ]
+
+let ascii_digit = ['0'-'9']
+
+let ascii_hexdigit = ['0'-'9' 'a'-'h' 'A'-'H']
+
+let namechar = letter | digit | '.' | ':' | '-' | '_' | combiningChar | extender
+
+let name = ( letter | '_' | ':' ) namechar*
+
+let nmtoken = namechar+
+
+(* Valid characters are:
+ * #9, #10, #13, #32-#xD7FF, #xE000-#xFFFD, #x10000-#x10FFFF
+ *
+ * #xD7FF as UTF-8 sequence:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...D 10...7.. 10.F...F  = ED 9F BF
+ *
+ * #xE000 as UTF-8 sequence:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...E 10...0.. 10.0...0  = EE 80 80
+ *
+ * UTF-8 sequence CF BE BF as character:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...F 10111110 10111111  = #FFBF
+ *
+ * #xFFFD as UTF-8 sequence:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...F 10...F.. 10.F...D  = EF BF BD
+ *
+ * #x010000 as UTF-8 sequence:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 111100.. 10.1...0 10...0.. 10.0...0 = F0 90 80 80
+ *
+ * #x10FFFF as UTF-8 sequence:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 111101.. 10.0...F 10...F.. 10.F...F = F4 8F BF BF
+ *)
+
+
+let non_ascii_character = 
+  ['\192'-'\223'] ['\128'-'\191']                     (* #x80-#x7FF *)
+| ['\224'-'\236'] ['\128'-'\191'] ['\128'-'\191']     (* #x800-#xCFFF *)
+| '\237'          ['\128'-'\159'] ['\128'-'\191']     (* #xD000-#xD7FF *)
+| '\238'          ['\128'-'\191'] ['\128'-'\191']     (* #xE000-#xEFFF *)
+| '\239'          ['\128'-'\190'] ['\128'-'\191']     (* #xF000-#xFFBF *)
+| '\239'          '\191'          ['\128'-'\189']     (* #xFFC0-#xFFFD *)
+| '\240'          ['\144'-'\191'] ['\128'-'\191'] ['\128'-'\191']     
+                                                      (* #x010000-#x03FFFF *)
+| ['\241'-'\243'] ['\128'-'\191'] ['\128'-'\191'] ['\128'-'\191'] 
+                                                      (* #x040000-#x0FFFFF *)
+| '\244'          ['\128'-'\143'] ['\128'-'\191'] ['\128'-'\191'] 
+                                                      (* #x100000-#10FFFFF *)
+
+let character =
+  [ '\009' '\010' '\013' '\032'-'\127' ]
+| non_ascii_character
+
+
+let character_except_question_mark =                    (* '?' = '\063' *)
+  [ '\009' '\010' '\013' '\032'-'\062' '\064'-'\127' ]
+| non_ascii_character
+
+
+let character_except_right_angle_bracket =              (* '>' = '\062' *)
+  [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\127' ]
+| non_ascii_character
+
+
+let character_except_minus =                            (* '-' = '\045' *)
+  [ '\009' '\010' '\013' '\032'-'\044' '\046'-'\127' ]
+| non_ascii_character
+
+
+let character_except_quot =                             (* '"' = '\034' *)
+  [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\255' ]
+| non_ascii_character
+
+
+let character_except_apos =                             (* '\'' = '\039' *)
+  [ '\009' '\010' '\013' '\032'-'\038' '\040'-'\255' ]
+| non_ascii_character
+
+
+let pi_string = character_except_question_mark* 
+                ( '?' character_except_right_angle_bracket 
+                      character_except_question_mark* )* 
+                '?'?
+
+
+let comment_string = character_except_minus* 
+                     ('-' character_except_minus+ )*
+
+
+let normal_character = 
+  (* Character except '&' = '\038', '<' = '\060', ']' = '\093', and CR LF *)
+  [ '\009' '\032'-'\037' '\039'-'\059' '\061'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+
+let character_except_rbracket =                               (* ']' = '\093' *)
+  [ '\009' '\010' '\013' '\032'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+
+let character_except_rbracket_rangle =          (* ']' = '\093', '>' = '\062' *)
+  [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+
+let cdata_string = 
+  character_except_rbracket*
+  ( "]" character_except_rbracket+ |
+    "]]" ']'* character_except_rbracket_rangle character_except_rbracket*
+  )*
+  ']'*
+
+
+let printable_character_except_amp_lt =
+  (* '&' = '\038', '<' = '\060' *)
+  [ '\032'-'\037' '\039'-'\059' '\061'-'\127']
+| non_ascii_character
+
+
+let printable_character_except_amp_percent =
+  (* '%' = '\037', '&' = '\038' *)
+  [ '\032'-'\036' '\039'-'\127']
+| non_ascii_character
+
+
+let character_except_special =
+  (* '<'=060, ']'=093, '"'=034, '\''=039 *)
+  [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\038' '\040'-'\059' 
+                         '\061'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+  
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/26 19:58:08  gerd
+ *     Bugfix in character_except_apos. The bug caused that attribute
+ * values delimited by &apos; could not be scanned at all.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_generic.def b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_generic.def
new file mode 100644 (file)
index 0000000..aeece89
--- /dev/null
@@ -0,0 +1,122 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(*****************************************************************)
+(*          Claudio Sacerdoti Coen <sacerdot@cs.unibo.it>        *)
+(*                           14/05/2000                          *)
+(*                                                               *)
+(* These are taken from the appendix B of the XML reccomendation *)
+(*                                                               *)
+(*****************************************************************)
+
+(* 85 *)
+let baseChar =
+   [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6]
+ | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148]
+ | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
+ | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386
+ | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE]
+ | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3]
+ | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
+ | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB]
+ | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559
+ | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
+ | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE]
+ | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
+ | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8]
+ | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD]
+ | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
+ | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36]
+ | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74]
+ | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
+ | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0
+ | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
+ | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D]
+ | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95]
+ | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
+ | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C]
+ | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39]
+ | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
+ | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1]
+ | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39]
+ | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
+ | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A
+ | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5
+ | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
+ | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69]
+ | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103]
+ | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C
+ | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159
+ | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E]
+ | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF]
+ | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9
+ | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
+ | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B
+ | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE
+ | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB]
+ | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
+ | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094]
+ | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3] 
+;;
+
+(* 86 *)
+let ideographic = [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] ;;
+
+(* 84 *)
+let letter = baseChar | ideographic ;;
+
+(* 87 *)
+let combiningChar =
+   [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1]
+ | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4
+ | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF]
+ | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903]
+ | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963]
+ | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4]
+ | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02
+ | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48]
+ | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC
+ | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
+ | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D]
+ | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8]
+ | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
+ | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83]
+ | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6]
+ | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
+ | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1
+ | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
+ | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84]
+ | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD]
+ | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
+ | #x3099 | #x309A
+;;
+
+(* 88 *)
+let digit =
+   [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F]
+ | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F]
+ | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
+ | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
+;;
+
+(* 89 *)
+let extender =
+   #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005
+ | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_iso88591.def b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_defs_iso88591.def
new file mode 100644 (file)
index 0000000..b57da24
--- /dev/null
@@ -0,0 +1,85 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let ws = [ ' ' '\t' '\r' '\n' ]
+
+(* Note: ISO-8859-1 charset does not have 'combining characters' *)
+
+let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
+let extender = '\183'
+let digit = ['0'-'9']
+let ascii_digit = ['0'-'9']
+let ascii_hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
+let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
+let name = ( letter | '_' | ':' ) namechar*
+let nmtoken = namechar+
+
+let character = ['\009' '\010' '\013' '\032'-'\255']
+
+let character_except_question_mark =                    (* '?' = '\063' *)
+  [ '\009' '\010' '\013' '\032'-'\062' '\064'-'\255' ]
+
+let character_except_right_angle_bracket =              (* '>' = '\062' *)
+  [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\255' ]
+
+let character_except_minus =                            (* '-' = '\045' *)
+  [ '\009' '\010' '\013' '\032'-'\044' '\046'-'\255' ]
+
+let character_except_quot =                             (* '"' = '\034' *)
+  [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\255' ]
+
+let character_except_apos =                             (* '\'' = '\039' *)
+  [ '\009' '\010' '\013' '\032'-'\038' '\040'-'\255' ]
+
+let pi_string = character_except_question_mark* 
+                ( '?' character_except_right_angle_bracket 
+                      character_except_question_mark* )* 
+                '?'?
+
+let comment_string = character_except_minus* 
+                     ('-' character_except_minus+ )*
+
+let normal_character = 
+  [^ '&' '<' ']' '\000'-'\008' '\010'-'\031']
+
+let character_except_rbracket =                               (* ']' = '\093' *)
+  [ '\009' '\010' '\013' '\032'-'\092' '\094'-'\255' ]
+
+let character_except_rbracket_rangle =          (* ']' = '\093', '>' = '\062' *)
+  [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\092' '\094'-'\255' ]
+
+let cdata_string = 
+  character_except_rbracket*
+  ( "]" character_except_rbracket+ |
+    "]]" ']'* character_except_rbracket_rangle character_except_rbracket*
+  )*
+  ']'*
+(* cdata_string = char* - ( char* ']]>' char* ) *)
+
+let printable_character_except_amp_lt =
+  (* '&' = '\038', '<' = '\060' *)
+  [ '\032'-'\037' '\039'-'\059' '\061'-'\255']
+
+let printable_character_except_amp_percent =
+  (* '%' = '\037', '&' = '\038' *)
+  [ '\032'-'\036' '\039'-'\255']
+
+let character_except_special =
+  (* '<'=060, ']'=093, '"'=034, '\''=039 *)
+  [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\038' '\040'-'\059' 
+                         '\061'-'\092' '\094'-'\255' ]
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_document.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_document.src
new file mode 100644 (file)
index 0000000..33310c1
--- /dev/null
@@ -0,0 +1,70 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+
+(* scan_document: Lexer for the outermost structures *)
+
+rule scan_document = parse
+    "<?" pi_string "?>"
+      { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Document }
+  | "<?"
+      { raise (WF_error ("Illegal processing instruction")) }
+  | "<!DOCTYPE"
+      { tok_Doctype__Document_type }
+  | "<!--" 
+      { Comment_begin, Document_comment }
+  | "<!"
+      { raise (WF_error "Declaration either malformed or not allowed in this context") 
+      }
+  | "<" name
+      { let s = Lexing.lexeme lexbuf in
+       Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity), Within_tag
+      }
+  | '<'
+      { raise (WF_error ("Illegal token")) }
+  | ws+
+      { tok_Ignore__Document }
+  | eof
+      { tok_Eof__Document }
+  | character
+      { raise (WF_error ("Content not allowed here")) }
+  | _
+      { raise Netconversion.Malformed_code }
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/18 20:19:59  gerd
+ *     Comments return different comment tokens.
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_document_type.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_document_type.src
new file mode 100644 (file)
index 0000000..c775883
--- /dev/null
@@ -0,0 +1,72 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+
+(* scan_document_type: after "<!DOCTYPE" until matching ">" *)
+
+rule scan_document_type = parse
+    name
+      { let s = Lexing.lexeme lexbuf in
+       Name s, Document_type }
+  | ws+
+      { tok_Ignore__Document_type }
+  | '"' character_except_quot* '"'
+      { let s = Lexing.lexeme lexbuf in
+       (Unparsed_string (String.sub s 1 (String.length s - 2))), Document_type }
+  | '"'
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | "'" character_except_apos* "'"
+      { let s = Lexing.lexeme lexbuf in
+       (Unparsed_string (String.sub s 1 (String.length s - 2))), Document_type }
+  | "'"
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | '['
+      { tok_Dtd_begin__Declaration }
+  | '>'
+      { tok_Doctype_rangle__Document }
+  | eof
+      { tok_Eof__Document_type }
+  | '&' 
+      { raise (WF_error("References to general entities not allowed here")) }
+  | '%' 
+      { raise (WF_error("References to parameter entities not allowed here")) }
+  | character
+      { raise (WF_error("Content not allowed here")) }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_dtd_string.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_dtd_string.src
new file mode 100644 (file)
index 0000000..1fb11e6
--- /dev/null
@@ -0,0 +1,70 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+(* The following scanner is used to determine the replacement text of
+ * internal entities:
+ *)
+
+rule scan_dtd_string = parse
+    '%' name ';'
+      { let s = Lexing.lexeme lexbuf in
+       PERef (String.sub s 1 (String.length s - 2)) }
+  | '%'
+      { raise(WF_error("The character '%' must be written as '&#37;'")) }
+  | '&' name ';'
+      { let s = Lexing.lexeme lexbuf in
+       ERef (String.sub s 1 (String.length s - 2)) }
+  | "&#" ascii_digit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string (String.sub s 2 (String.length s - 3))) }
+  | "&#x" ascii_hexdigit+ ";"
+      { let s = Lexing.lexeme lexbuf in
+       CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))) }
+  | '&'
+      { raise(WF_error("The character '&' must be written as '&amp;'")) }
+  | '\013' '\010'
+      { CRef(-1) }
+  | '\013'
+      { CRef(-2) }
+  | '\010'
+      { CRef(-3) }
+  | '\009'
+      { CharData "\009" }
+  | printable_character_except_amp_percent+
+      { CharData (Lexing.lexeme lexbuf) }
+  | eof
+      { Eof }
+  | _
+      { raise Netconversion.Malformed_code }
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_misc.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_misc.src
new file mode 100644 (file)
index 0000000..2a0d29f
--- /dev/null
@@ -0,0 +1,146 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+(* The remaining, smaller lexers *)
+
+rule scan_characters = parse
+  character*
+    { () }
+| eof 
+    { () }
+| _
+    { raise Netconversion.Malformed_code }
+
+
+and scan_xml_pi = parse
+    name ws*
+      { let s = Lexing.lexeme lexbuf in
+       let j = get_name_end s 0 in
+       Pro_name (String.sub s 0 j)
+      }
+  | "=" ws*
+      { Pro_eq }
+  | "'" character_except_apos* "'" ws+
+      { let s = Lexing.lexeme lexbuf in
+       let j = String.index_from s 1 '\'' in
+       Pro_string (String.sub s 1 (j-1))
+      }
+  | "'"
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | '"' character_except_quot* '"' ws+
+      { let s = Lexing.lexeme lexbuf in
+       let j = String.index_from s 1 '"' in
+       Pro_string (String.sub s 1 (j-1))
+      }
+  | '"'
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | eof
+      { Pro_eof }
+  | character
+      { (* prerr_endline (Lexing.lexeme lexbuf); *)
+       raise (WF_error("Illegal token or character")) 
+      }
+  | _ 
+      { raise Netconversion.Malformed_code }
+
+and scan_only_xml_decl = parse
+    "<?xml" ws+ pi_string "?>"
+      { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi }
+  | ""
+      { Eof }
+
+and scan_for_crlf = parse
+  | '\013' '\010'
+      { CharData "\n" }
+  | '\013'
+      { CharData "\n" }
+  | '\010'
+      { CharData "\n" }
+  | [^ '\010' '\013' ]+
+      { CharData (Lexing.lexeme lexbuf) }
+  | eof 
+      { Eof }
+
+and scan_content_comment = parse
+    "-->"
+      { Comment_end, Content }
+  | "--"
+      { raise (WF_error "Double hyphens are illegal inside comments") }
+  | "-"
+      { Comment_material "-", Content_comment }
+  | character_except_minus+
+      { Comment_material(Lexing.lexeme lexbuf), Content_comment }
+  | eof
+      { Eof, Content_comment }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* In declarations, comments are always thrown away. *)
+
+and scan_decl_comment = parse
+    "-->"
+      { Comment_end, Declaration }
+  | "--"
+      { raise (WF_error "Double hyphens are illegal inside comments") }
+  | "-"
+      { Comment_material "", Decl_comment }
+  | character_except_minus+
+      { Comment_material "", Decl_comment }
+  | eof
+      { Eof, Decl_comment }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+and scan_document_comment = parse
+    "-->"
+      { Comment_end, Document }
+  | "--"
+      { raise (WF_error "Double hyphens are illegal inside comments") }
+  | "-"
+      { Comment_material "-", Document_comment }
+  | character_except_minus+
+      { Comment_material(Lexing.lexeme lexbuf), Document_comment }
+  | eof
+      { Eof, Document_comment }
+  | _
+      { raise Netconversion.Malformed_code }
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/18 20:19:59  gerd
+ *     Comments return different comment tokens.
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_name_string.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_name_string.src
new file mode 100644 (file)
index 0000000..57a0d54
--- /dev/null
@@ -0,0 +1,77 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_name_string = parse
+    name
+      { Name (Lexing.lexeme lexbuf) }
+  | ws+
+      { Ignore }
+  | nmtoken
+      { Nametoken (Lexing.lexeme lexbuf) }
+  | eof
+      { Eof }
+  | character
+      { CharData (Lexing.lexeme lexbuf) }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+and scan_ignored_section = parse
+  | "<!["
+      { tok_Conditional_begin__Ignored }
+  | "]]>"
+      { tok_Conditional_end__Ignored }
+  | "<!--" comment_string "-->"
+      { tok_Ignore__Ignored }
+  | '"' character_except_quot* '"'
+      { tok_Ignore__Ignored }
+  | "'" character_except_apos* "'"
+      { tok_Ignore__Ignored }
+  | eof
+      { tok_Eof__Ignored }
+  | character_except_special+
+      { tok_Ignore__Ignored }
+  | "<"
+      { tok_Ignore__Ignored }
+  | "]"
+      { tok_Ignore__Ignored }
+  | "'"
+      { tok_Ignore__Ignored }
+  | "\""
+      { tok_Ignore__Ignored }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_within_tag.src b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_within_tag.src
new file mode 100644 (file)
index 0000000..39697b0
--- /dev/null
@@ -0,0 +1,69 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+  open Pxp_types
+  open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+
+rule scan_within_tag = parse
+    ws+
+      { tok_Ignore__Within_tag }
+  | name
+      { Name (Lexing.lexeme lexbuf ), Within_tag }
+  | '='
+      { tok_Eq__Within_tag }
+  | '"' character_except_quot* '"'
+      { let s = Lexing.lexeme lexbuf in
+       let v = String.sub s 1 (String.length s - 2) in
+       Attval v, Within_tag }
+  | '"'
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | "'" character_except_apos* "'"
+      { let s = Lexing.lexeme lexbuf in
+       let v = String.sub s 1 (String.length s - 2) in
+       Attval v, Within_tag }
+  | "'"
+      { raise (WF_error ("Cannot find the second quotation mark"))
+      }
+  | '>'
+      { tok_Rangle__Content }
+  | "/>"
+      { tok_Rangle_empty__Content }
+  | eof
+      { tok_Eof__Within_tag }
+  | character
+      { raise (WF_error ("Illegal inside tags")) }
+  | _
+      { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/08/14 22:18:34  gerd
+ *     Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2  2000/05/29 23:53:12  gerd
+ *     Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1  2000/05/20 20:33:25  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/Makefile b/helm/DEVEL/pxp/pxp/m2parsergen/Makefile
new file mode 100644 (file)
index 0000000..78f5359
--- /dev/null
@@ -0,0 +1,62 @@
+# make all:            make bytecode executable
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+
+#----------------------------------------------------------------------
+
+SRC = ast.ml lexer.ml parser.ml generator.ml
+OBJ = $(SRC:.ml=.cmo)
+
+#----------------------------------------------------------------------
+
+
+.PHONY: all
+all: m2parsergen
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa lexer.ml parser.ml \
+             parser.mli
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~ depend depend.pkg m2parsergen a.out x.ml
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS   =
+OCAMLC    = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT  = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP  = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+#----------------------------------------------------------------------
+
+depend: $(SRC) 
+       $(OCAMLDEP) $(SRC) >depend
+
+m2parsergen: $(OBJ)
+       $(OCAMLC) -o m2parsergen $(OBJ)
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+       $(OCAMLOPT) -c $<
+
+.ml.cmo:
+       $(OCAMLC) -c $<
+
+.mli.cmi:
+       $(OCAMLC) -c $<
+
+.mll.ml:
+       ocamllex $<
+
+.mly.ml:
+       ocamlyacc $<
+
+include depend
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/README b/helm/DEVEL/pxp/pxp/m2parsergen/README
new file mode 100644 (file)
index 0000000..cccf7aa
--- /dev/null
@@ -0,0 +1,319 @@
+----------------------------------------------------------------------
+m2parsergen
+----------------------------------------------------------------------
+
+This is a parser generator for top-down (or recursively descending) parsers.
+The input file must be structured as follows:
+
+---------------------------------------- Begin of file
+
+<OCAML TEXT ("preamble")>
+
+%%
+
+<DECLARATIONS>
+
+%%
+
+<RULES>
+
+%%
+
+<OCAML TEXT ("postamble")>
+
+---------------------------------------- End of file
+
+The two-character combination %% separates the various sections. The
+text before the first %% and after the last %% will be copied verbatim
+to the output file.
+
+Within the declarations and rules sections you must use /* ... */ as
+comment braces.
+
+There are two types of declarations:
+
+%token Name
+
+declares that Name is a token without associated value, and
+
+%token <> Name
+
+declares that Name is a token with associated value (i.e. Name x).
+
+In contrast to ocamlyacc, you need not to specify a type. This is a
+fundamental difference, because m2parsergen will not generate a type
+declaration for a "token" type; you must do this yourself.
+
+You need not to declare start symbols; every grammar rule may be used
+as start symbol.
+
+The rules look like:
+
+name_of_rule(arg1, arg2, ...):
+  label1:symbol1 label2:symbol2 ... {{ CODE }}
+| label1:symbol1 label2:symbol2 ... {{ CODE }}
+...
+| label1:symbol1 label2:symbol2 ... {{ CODE }}
+
+The rules may have arguments (note that you must write the
+parantheses, even if the rule does not have arguments). Here, arg1,
+arg2, ... are the formal names of the arguments; you may refer to them
+in OCaml code.
+
+Furthermore, the symbols may have labels (you can leave the labels
+out). You can refer to the value associated with a symbol by its
+label, i.e. there is an OCaml variable with the same name as the label
+prescribes, and this variable contains the value.
+
+The OCaml code must be embraced by {{ and }}, and these separators
+must not occur within the code.
+
+EXAMPLE:
+
+prefix_term():
+  Plus_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
+    {{ v1 + v2 }}
+| Times_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
+    {{ v1 * v2 }}
+| n:Number
+    {{ n }}
+
+As you can see in the example, you must pass values for the arguments
+if you call non-terminal symbols (here, the argument list is empty: ()).
+
+The generated parsers behave as follows:
+
+- A rule is applicable to a token sequence if the first token is
+  matched by the rule.
+
+  In the example: prefix_term is applicable if the first token of a
+  sequence is either Plus_symbol, Times_symbol, or Number.
+
+- One branch of the applicable rule is selected: it is the first
+  branch that matches the first token. THE OTHER TOKENS DO NOT HAVE
+  ANY EFFECT ON BRANCH SELECTION!
+
+  For instance, in the following rule the second branch is never
+  selected, because only the A is used to select the branch:
+
+  a():
+    A B {{ ... }}
+  | A C {{ ... }}
+
+- Once a branch is selected, it is checked whether the branch matches
+  the token sequence. If this check succeeds, the code section of the
+  branch is executed, and the resulting value is returned to the
+  caller.
+  If the check fails, the exception Parsing.Parse_error is raised.
+  Normally, this exception is not caught, and will force the parser
+  to stop.
+
+  The check in detail:
+
+  If the rule demands a terminal, there a must be exactly this
+  terminal at the corresponding location in the token sequence.
+
+  If the rule demands a non-terminal, it is checked whether the rule
+  for to this non-terminal is applicable. If so, the branch
+  is selected, and recursively checked. If the rule is not applicable,
+  the check fails immediately.
+
+- THERE IS NO BACKTRACKING! 
+
+  Note that the following works (but the construction is resolved at
+  generation time):
+
+  rule1() =
+     rule2() A B ... {{ ... }}
+
+  rule2() =
+     C {{ ... }}
+   | D {{ ... }}
+
+  In this case, the (only) branch of rule1 is selected if the next
+  token is C or D.
+
+---
+
+
+
+*** Options and repetitions ***
+
+Symbols can be tagged as being optional, or to occur repeatedly:
+
+rule():
+  Name whitespace()* Question_mark?
+
+- "*": The symbol matches zero or more occurrences.
+
+- "?": The symbol matches zero or one occurrence.
+
+This is done as follows:
+
+- terminal*: The maximum number of consecutive tokens <terminal> are
+             matched.
+- non-terminal*: The maximum number of the subsequences matching
+                 <non-terminal> are matched. Before another
+                 subsequence is matched, it is checked whether the
+                 rule for <non-terminal> is applicable. If so, the
+                 rule is invoked and must succeed (otherwise Parsing.
+                Parse_error). If not, the loop is exited.
+
+- terminal?: If the next token is <terminal>, it is matched. If not,
+             no token is matched.
+
+- non-terminal?: It is checked whether the rule for <non-terminal>
+                 is applicable. If so, the rule is invoked, and
+                 matches a sequence of tokens. If not, no token is
+                matched.
+
+You may refer to repeated or optional symbols by labels. In this case,
+the label is associated with lists of values, or optional values, 
+respectively:
+
+rule():
+  A  lab:other()*  lab':unlikely()?
+    {{ let n = List.length lab in ... 
+       match lab' with
+         None -> ...
+       | Some v -> ... 
+    }}
+
+A different scheme is applied if the symbol is a token without
+associated value (%token Name, and NOT %token <> Name):
+
+rule():
+  A lab:B* lab':C?
+
+Here, "lab" becomes an integer variable counting the number of Bs, and
+"lab'" becomes a boolean variable denoting whether there is a C or not.
+
+
+*** Early let-binding ***
+
+You may put some OCaml code directly after the first symbol of a
+branch:
+
+rule():
+  A $ {{ let-binding }} C D ... {{ ... }}
+
+The code brace {{ let-binding }} must be preceded by a dollar
+sign. You can put "let ... = ... in" statements into this brace:
+
+rule1():
+  n:A $ {{ let twice = 2 * n in }} rule2(twice) {{ ... }}
+
+This code is executed once the branch is selected.
+
+
+*** Very early let-binding ***
+
+This is also possible:
+
+rule():
+  $ {{ CODE }}
+  A
+  ...
+
+The CODE is executed right when the branch is selected, and before any
+other happens. (Only for hacks!)
+
+
+
+*** Computed rules ***
+
+rule():
+  A $ {{ let followup = ... some function ... in }} [ followup ]() 
+    {{ ... }}
+
+Between [ and ], you can refer to the O'Caml name of *any* function.
+Here, the function "followup" is bound in the let-binding.
+
+
+*** Error handling ***
+
+If a branch is already selected, but the check fails whether the other
+symbols of the branch match, it is possible to catch the resulting
+exception and to find out at which position the failure has occurred.
+
+rule():
+  x:A y:B z:C {{ ... }} ? {{ ERROR-CODE }}
+
+After a question mark, it is allowed to append another code
+brace. This code is executed if the branch check fails (but not if the
+branch is not selected nor if no branches are selected). The string
+variable !yy_position contains the label of the symbol that caused the
+failure (or it contains the empty string if the symbol does not have a
+label). 
+
+Example:
+
+rule():
+  x:A y:B z:C {{ print_endline "SUCCESS" }} ? {{ print_endline !yy_position }}
+
+If the token sequence is A B C, "SUCCESS" will be printed. If the
+sequence is A C, the second symbol fails, and "y" will be printed. If
+the sequence is A B D, the third symbol fails, and "z" will be
+printed. If the sequence is B, the rule will be never selected because
+it is not applicable.
+
+
+
+*** Error recovery ***
+
+You may call the functions yy_current, yy_get_next, or one of the
+parse_* functions in the error brace to recover from the error
+(e.g. to move ahead until a certain token is reached). See below.
+
+
+
+*** How to call the parser ***
+
+The rules are rewritten into a OCaml let-binding:
+
+let rec parse_<rule1> ... = ...
+    and parse_<rule2> ... = ...
+    ...
+    and parse_<ruleN> ... = ...
+in
+
+i.e. there are lots of functions, and the name of the functions are
+"parse_" plus the name of the rules. You can call every function.
+
+The first two arguments of the functions have a special meaning; the
+other arguments are the arguments coming from the rule description:
+
+rule(a,b):
+  ...
+
+===>
+
+let rec parse_rule yy_current yy_get_next a b = ...
+
+The first argument, yy_current, is a function that returns the current
+token. The second arguments, yy_get_next, is a function that switches
+to the next token, and returns it.
+
+If the tokens are stored in a list, this may be a definition:
+
+let input = ref [ Token1; Token2; ... ] in
+let yy_current() = List.hd !input in
+let yy_get_next () =
+  input := List.tl !input;
+  List.hd !input
+
+When you call one of the parser functions, the current token must
+already be loaded, i.e. yy_current returns the first token to match by
+the function.
+
+After the functions has returned, the current token is the token
+following the sequence of tokens that have been matched by the
+function.
+
+The function returns the value computed by the OCaml code brace of the
+rule (or the value of the error brace).
+
+If the rule is not applicable, the exception Not_found is raised.
+
+If the rule is applicable, but it does not match, the exception
+Parsing.Parse_error is raised.
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/ast.ml b/helm/DEVEL/pxp/pxp/m2parsergen/ast.ml
new file mode 100644 (file)
index 0000000..219dd0c
--- /dev/null
@@ -0,0 +1,75 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type declaration =
+    D_token of string                         (* D_token name *)
+  | D_typed_token of string                   (* D_typed_token name *)
+;;
+
+type symbol =
+    U_symbol of (string * string option)      (* U_symbol(token, label) *)
+  | L_symbol of (string * string list * string option)
+                                        (* L_symbol(token, args, label) *)
+  | L_indirect of (string * string list * string option)
+;;
+
+
+type modifier =
+    Exact
+  | Option
+  | Repetition
+;;
+
+
+type pattern =
+    { pat_symbol : symbol;
+      pat_modifier : modifier;
+    }
+
+
+type branch = 
+    { branch_selector : symbol;
+      branch_early_code : (string * int * int);
+      branch_binding_code : (string * int * int);
+      branch_pattern : pattern list;
+      branch_result_code : (string * int * int);
+      branch_error_code : (string * int * int) option;
+    }
+;;
+
+type rule =
+    { rule_name : string;
+      rule_arguments : string list;           (* List of names *)
+      rule_branches : branch list;
+    }
+;;
+
+type text =
+    { text_decls : declaration list;
+      text_rules : rule list;
+    }
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/05/09 00:03:22  gerd
+ *     Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.2  2000/05/08 22:03:01  gerd
+ *     It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.1  2000/05/06 17:36:17  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/generator.ml b/helm/DEVEL/pxp/pxp/m2parsergen/generator.ml
new file mode 100644 (file)
index 0000000..4301f22
--- /dev/null
@@ -0,0 +1,920 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Parser
+open Ast
+
+(* Overall scheme:
+ *
+ * The rules are translated to:
+ *
+ * let rec parse_<rule1> ... = ...
+ *     and parse_<rule2> ... = ...
+ *     and ...
+ *     and parse_<ruleN> ... = ...
+ * in
+ *
+ * Every rule has at least two arguments: 'current' and 'get_next'.
+ * 'current()' is the token that should match the first symbol of the
+ * rule. 'get_next()' returns the next token.
+ *
+ * The rules may have further user arguments; these are the next arguments
+ * in turn.
+ *
+ * The rules return the user value. After they have returned to the caller 
+ * the current token is the token that follows the sequence of tokens 
+ * matching the rule.
+ *
+ * The rules will raise:
+ *  - Not_found if the first token does not match
+ *  - Parsing.Parse_error if the rest does not match.
+ *
+ * Rule scheme:
+ *
+ * rule(arg1,arg2,...):
+ *   (l1:x1)
+ *   {{ let-CODE }}
+ *   (l2:y2(name1,...)) y3 ... 
+ *   {{ CODE }}
+ *   ? {{ ?-CODE }}
+ * | x2 ...
+ * | ...
+ * | xN
+ *
+ * let parse_<rule> current get_next arg1 arg2 ... =
+ *   match current() with
+ *     S(x1) -> ...
+ *   | S(x2) -> ...
+ *   | ...
+ *   | S(xN) -> ...
+*    | _ -> raise Not_found
+ *
+ * Here, S(xi) denotes the set of tokens matched by xi without all tokens
+ * already matched by x1 to x(i-1). (If S(xi) = empty, a warning is printed,
+ * and this branch of the rule is omitted.)
+ *
+ * S(xi) may be a set because xi may be a reference to another rule. In this
+ * case, S(xi) bases on the set of tokens that match the first symbol of 
+ * the other rule. (In general, S(xi) must be computed recursively.)
+ *
+ * If the "?" clause is present, every branch is embraced by the following:
+ *
+ * let position = ref "<Label of x1>" in
+ * ( try ... 
+ *   with Parsing.Parse_error -> ( <<?-CODE>> )
+ * )
+ * 
+ * Next: The "..." is
+ *
+ * OPTIONAL: let <l1> = parse_<rule(x1)> in
+ * <<let-CODE>>
+ * M(y1)
+ * M(y2)
+ * ...
+ * M(yN)
+ * <<CODE>>
+ *
+ * If x1 is a rule invocation, it is now parsed, and the result is bound
+ * to a variable.
+ *
+ * Note: After x1 has matched, the Caml variable <l1> must be either
+ * bound to the result of the sub parsing, or to the value associated
+ * with the token (if any). The latter is already done in the main
+ * "match" statement, i.e. "match ... with S(x1) -> ..." is actually
+ * "match ... with Token1 <l1> -> ...".
+ *
+ * Note: After calling parse_<rule(x1)> the exception Not_found is NEVER
+ * converted to Parsing.Parse_error. It is simply not possible that this
+ * happens.
+
+ * For every remaining symbol yi of the rule, a matching statement M(yi)
+ * is produced. These statements have the form:
+ *
+ * OPTIONAL: position := "<Label of yi>";
+ * CASE: yi is a token without associated value
+ *     let yy_i = get_next()  OR  current() in
+ *     if yy_i <> Token(yi) then raise Parsing.Parse_error;
+ * CASE: yi is a token with value
+ *     let yy_i = get_next()  OR  current() in
+ *     let <li> = match yy_i with Token x -> x | _ -> raise Parsing.Parse_error 
+ *     in
+ * CASE: yi is a rule invocation
+ *     OPTIONAL: let _ = get_next() in
+ *     let <li> = try parse_<rule(yi)> 
+ *                with Not_found -> raise Parsing.Parse_error in
+ *
+ * yy_i is get_next() if y(i-1) was a token, and yy_i is current() if
+ * y(i-1) was a rule invocation.
+ *
+ * Repetitions:
+ *
+ * If yi = (yi')*:
+ *
+ * CASE no label given:
+ *
+ * ( try 
+ *     while true do 
+ *       M(yi') with the modification that top-level mismatches raise
+ *              Not_found instead of Parsing.Parse_error
+ *     done
+ *   with Not_found -> ()
+ * )
+ *
+ * CASE a label <li> is given: The list of results must be bound to <li>!
+ *
+ * let yy_list = ref [] in
+ * ( try 
+ *     while true do
+ *       let yy_first = M(yi') (with some modifications) in
+ *       yy_list := yy_first :: !yy_list;
+ *     done
+ *   with Not_found -> ()
+ * );
+ * let <li> = List.rev !yy_list in
+ *
+ * Note that this scheme minimizes stack and heap allocations.
+ *
+ * Options:
+ *
+ * If yi = (yi')?:
+ *
+ * CASE no label given:
+ *
+ * ( try 
+ *     M(yi') with the modification that top-level mismatches raise
+ *            Not_found instead of Parsing.Parse_error
+ *   with Not_found -> ()
+ * )
+ *
+ * CASE a label <li> is given: The optional result must be bound to <li>!
+ *
+ * let <li> =
+ *   try 
+ *     Some( M(yi') (with some modifications) )
+ *   with Not_found -> None
+ * );
+ *)
+
+
+let lookup_rule tree name =
+  try
+    List.find (fun r -> r.rule_name = name) tree.text_rules
+  with
+      Not_found ->
+       failwith ("Rule `" ^ name ^ "' not found")
+;;
+
+
+let is_typed tree name =
+  (* Find out whether the token 'name' is typed or not *)
+  let decl =
+    try
+      List.find (fun d -> match d with
+                    D_token n -> n = name
+                  | D_typed_token n -> n = name
+               )
+       tree.text_decls
+    with
+       Not_found -> 
+         failwith ("Token `" ^ name ^ "' not found")
+  in
+  match decl with
+      D_token _ -> false
+    | D_typed_token _ -> true
+;;
+
+
+let label_of_symbol tree sym =
+  match sym with
+      U_symbol (tok, lab) -> 
+       (* if is_typed tree tok then lab else None *)
+       lab
+    | L_symbol (_, _, lab) -> lab
+    | L_indirect (_, _, lab) -> lab
+;;
+
+
+let is_untyped_U_symbol tree sym =
+  match sym with
+      U_symbol (tok, _) -> 
+       not(is_typed tree tok)
+    | L_symbol (_, _, _) -> false
+    | L_indirect (_, _, _) -> false
+;;
+
+
+
+let rec set_of_list l =
+  (* Removes duplicate members of l *)
+  match l with
+      [] -> []
+    | x :: l' -> if List.mem x l' then set_of_list l' else x :: (set_of_list l')
+;;
+
+
+let selector_set_of_rule tree name =
+  (* Determines the set of tokens that match the first symbol of a rule *)
+  
+  let rec collect visited_rules name =
+    if List.mem name visited_rules then
+      []
+    else
+      let r = lookup_rule tree name in
+      List.flatten
+       (List.map
+          (fun branch ->
+             match branch.branch_selector with
+                 U_symbol (tok_name,_) ->
+                   [ tok_name ]
+               | L_symbol (rule_name, _, _) ->
+                   collect (name :: visited_rules) rule_name
+               | L_indirect (_, _, _) ->
+                   failwith("The first symbol in rule `" ^ name ^ 
+                            "' is an indirect call; this is not allowed")
+          )
+          r.rule_branches
+       )
+  in
+  set_of_list (collect [] name)
+;;
+
+
+let output_code_location b file_name (_, line, column) = 
+  Buffer.add_string b "\n";
+  Buffer.add_string b ("# " ^ string_of_int line ^ " \"" ^
+                      file_name ^ "\"\n");
+  Buffer.add_string b (String.make column ' ')
+;;
+
+
+let phantasy_line = ref 100000;;
+
+let output_code b file_name ((code, line, column) as triple) = 
+  if code <> "" then begin
+    output_code_location b file_name triple;
+    Buffer.add_string b code;
+    Buffer.add_string b ("\n# " ^ string_of_int !phantasy_line ^ " \"<Generated Code>\"\n");
+    phantasy_line := !phantasy_line + 10000;
+  end
+;;
+
+
+let process_branch b file_name tree branch =
+
+  let make_rule_invocation called_rule args lab allow_not_found =
+    (* Produces: let <label> = parse_<called_rule> ... args in 
+     * If not allow_not_found, the exception Not_found is caught and
+     * changed into Parsing.Parse_error.
+     *)
+    let r = lookup_rule tree called_rule in
+    if List.length r.rule_arguments <> List.length args then
+      failwith("Calling rule `" ^ called_rule ^ "' with the wrong number of arguments!");
+
+    Buffer.add_string b "let ";
+    begin match lab with
+       None   -> Buffer.add_string b "_"
+      | Some l -> Buffer.add_string b l
+    end;
+    Buffer.add_string b " = ";
+    if not allow_not_found then
+      Buffer.add_string b "try ";
+    Buffer.add_string b "parse_";
+    Buffer.add_string b called_rule;
+    Buffer.add_string b " yy_current yy_get_next";
+    List.iter
+      (fun a -> Buffer.add_string b " ";
+               Buffer.add_string b a;
+      )
+      args;
+    if not allow_not_found then
+      Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+    Buffer.add_string b " in\n"
+  in
+
+  let make_indirect_rule_invocation ml_name args lab allow_not_found =
+    (* Produces: let <label> = ml_name ... args in 
+     * If not allow_not_found, the exception Not_found is caught and
+     * changed into Parsing.Parse_error.
+     *)
+    Buffer.add_string b "let ";
+    begin match lab with
+       None   -> Buffer.add_string b "_"
+      | Some l -> Buffer.add_string b l
+    end;
+    Buffer.add_string b " = ";
+    if not allow_not_found then
+      Buffer.add_string b "try ";
+    Buffer.add_string b ml_name;
+    Buffer.add_string b " yy_current yy_get_next";
+    List.iter
+      (fun a -> Buffer.add_string b " ";
+               Buffer.add_string b a;
+      )
+      args;
+    if not allow_not_found then
+      Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+    Buffer.add_string b " in\n"
+  in
+
+  let process_symbol sym previous_was_token allow_not_found =
+    match sym with
+       U_symbol(tok, lab) ->
+         (* Distinguish between simple tokens and typed tokens *)
+         if is_typed tree tok then begin
+           (* Typed token *)
+           Buffer.add_string b "let ";
+           begin match lab with
+               None   -> Buffer.add_string b "_"
+             | Some l -> Buffer.add_string b l
+           end;
+           Buffer.add_string b " = match ";
+           if previous_was_token then
+             Buffer.add_string b "yy_get_next()"
+           else
+             Buffer.add_string b "yy_current()";
+           Buffer.add_string b " with ";
+           Buffer.add_string b tok;
+           Buffer.add_string b " x -> x | _ -> raise ";
+           if allow_not_found then
+             Buffer.add_string b "Not_found"
+           else
+             Buffer.add_string b "Parsing.Parse_error";
+           Buffer.add_string b " in\n";
+         end
+         else begin
+           (* Simple token *)
+           Buffer.add_string b "if (";
+           if previous_was_token then
+             Buffer.add_string b "yy_get_next()"
+           else
+             Buffer.add_string b "yy_current()";
+           Buffer.add_string b ") <> ";
+           Buffer.add_string b tok;
+           Buffer.add_string b " then raise ";
+           if allow_not_found then
+             Buffer.add_string b "Not_found;\n"
+           else
+             Buffer.add_string b "Parsing.Parse_error;\n"
+         end
+      | L_symbol(called_rule, args, lab) ->
+         if previous_was_token then
+           Buffer.add_string b "ignore(yy_get_next());\n";
+         make_rule_invocation called_rule args lab allow_not_found
+      | L_indirect(ml_name, args, lab) ->
+         if previous_was_token then
+           Buffer.add_string b "ignore(yy_get_next());\n";
+         make_indirect_rule_invocation ml_name args lab allow_not_found
+  in
+
+  let process_pattern (current_position, previous_was_token) pat =
+    (* Assign "position" if necessary. *)
+    let new_position =
+      if branch.branch_error_code <> None then begin
+       match pat.pat_symbol with
+           U_symbol(_,Some l)   -> l
+         | L_symbol(_,_,Some l) -> l
+         | L_indirect(_,_,Some l) -> l
+         | _ -> ""
+      end
+      else ""
+    in
+    if new_position <> current_position then begin
+      Buffer.add_string b "yy_position := \"";
+      Buffer.add_string b new_position;
+      Buffer.add_string b "\";\n";
+    end;
+
+    let this_is_token =
+      match pat.pat_symbol with
+         U_symbol(_,_)   -> pat.pat_modifier = Exact
+       | L_symbol(_,_,_) -> false
+       | L_indirect(_,_,_) -> false
+    in
+
+    (* First distinguish between Exact, Option, and Repetition: *)
+    begin match pat.pat_modifier with
+       Exact ->
+         process_symbol pat.pat_symbol previous_was_token false
+      | Option ->
+         begin match label_of_symbol tree pat.pat_symbol with
+             None ->
+               (* CASE: optional symbol without label *)
+               (* OPTIMIZATION: If the symbol is
+                * a token, the loop becomes very simple.
+                *)
+               if (match pat.pat_symbol with 
+                       U_symbol(t,_) -> not (is_typed tree t) | _ -> false) 
+               then begin
+                 let tok = match pat.pat_symbol with 
+                              U_symbol(t,_) -> t | _ -> assert false in
+                 (* Optimized case *)
+                 Buffer.add_string b "if ";
+                 if previous_was_token then
+                   Buffer.add_string b "yy_get_next()"
+                 else
+                   Buffer.add_string b "yy_current()";
+                 Buffer.add_string b " = ";
+                 Buffer.add_string b tok;
+                 Buffer.add_string b " then ignore(yy_get_next());\n";
+               end
+               else begin
+                 (* General, non-optimized case: *)
+                 Buffer.add_string b "( try (";
+                 process_symbol pat.pat_symbol previous_was_token true;
+                 Buffer.add_string b "ignore(yy_get_next());\n";
+                 Buffer.add_string b ") with Not_found -> ());\n";
+               end
+           | Some l ->
+               (* CASE: optional symbol with label *)
+               if is_untyped_U_symbol tree pat.pat_symbol then begin
+                 (* SUBCASE: The label becomes a boolean variable *)
+                 Buffer.add_string b "let ";
+                 Buffer.add_string b l;
+                 Buffer.add_string b " = try (";
+                 process_symbol pat.pat_symbol previous_was_token true;
+                 Buffer.add_string b ");\n";
+                 Buffer.add_string b "ignore(yy_get_next());\n";
+                 Buffer.add_string b "true with Not_found -> false in\n";
+               end
+               else begin
+                 (* SUBCASE: the symbol has a value *)
+                 Buffer.add_string b "let ";
+                 Buffer.add_string b l;
+                 Buffer.add_string b " = try let yy_tok = Some(";
+                 process_symbol pat.pat_symbol previous_was_token true;
+                 Buffer.add_string b l;
+                 Buffer.add_string b ") in\n";
+                 
+                 if (match pat.pat_symbol with
+                         U_symbol(_,_) -> true | _ -> false) then
+                   Buffer.add_string b "ignore(yy_get_next());\n";
+                 
+                 Buffer.add_string b "yy_tok with Not_found -> None in\n";
+               end
+         end
+      | Repetition ->
+         begin match label_of_symbol tree pat.pat_symbol with
+             None ->
+               (* CASE: repeated symbol without label *)
+               (* OPTIMIZATION: If the symbol is
+                * a token, the loop becomes very simple.
+                *)
+               if (match pat.pat_symbol with 
+                       U_symbol(t,_) -> not (is_typed tree t) | _ -> false) 
+               then begin
+                 let tok = match pat.pat_symbol with 
+                              U_symbol(t,_) -> t | _ -> assert false in
+                 if previous_was_token then begin
+                   (* Optimized case I *)
+                   Buffer.add_string b "while yy_get_next() = ";
+                   Buffer.add_string b tok;
+                   Buffer.add_string b " do () done;\n";
+                 end
+                 else begin
+                   (* Optimized case II *)
+                   Buffer.add_string b "if yy_current() = ";
+                   Buffer.add_string b tok;
+                   Buffer.add_string b " then (";
+                   Buffer.add_string b "while yy_get_next() = ";
+                   Buffer.add_string b tok;
+                   Buffer.add_string b " do () done);\n";
+                 end
+               end
+               else begin
+                 (* General, non-optimized case: *)
+                 if previous_was_token then
+                   Buffer.add_string b "ignore(yy_get_next());\n";
+                 Buffer.add_string b "( try while true do (";
+                 process_symbol pat.pat_symbol false true;
+
+                 if (match pat.pat_symbol with
+                        U_symbol(_,_) -> true | _ -> false) then
+                   Buffer.add_string b "ignore(yy_get_next());\n"
+                 else
+                   Buffer.add_string b "();\n";
+
+                 Buffer.add_string b ") done with Not_found -> ());\n";
+               end
+           | Some l ->
+               (* CASE: repeated symbol with label *)
+               if is_untyped_U_symbol tree pat.pat_symbol then begin
+                 (* SUBCASE: The label becomes an integer variable *)
+                 if previous_was_token then
+                   Buffer.add_string b "ignore(yy_get_next());\n";
+                 Buffer.add_string b "let yy_counter = ref 0 in\n";
+                 Buffer.add_string b "( try while true do \n";
+                 process_symbol pat.pat_symbol false true;
+                 Buffer.add_string b "incr yy_counter;\n";
+                 
+                 if (match pat.pat_symbol with
+                         U_symbol(_,_) -> true | _ -> false) then
+                   Buffer.add_string b "ignore(yy_get_next());\n";
+                 
+                 Buffer.add_string b "done with Not_found -> ());\n";
+                 Buffer.add_string b "let ";
+                 Buffer.add_string b l;
+                 Buffer.add_string b " = !yy_counter in\n";
+               end
+               else begin
+                 (* SUBCASE: the symbol has a value *)
+                 if previous_was_token then
+                   Buffer.add_string b "ignore(yy_get_next());\n";
+                 Buffer.add_string b "let yy_list = ref [] in\n";
+                 Buffer.add_string b "( try while true do \n";
+                 process_symbol pat.pat_symbol false true;
+                 Buffer.add_string b "yy_list := ";
+                 Buffer.add_string b l;
+                 Buffer.add_string b " :: !yy_list;\n";
+                 
+                 if (match pat.pat_symbol with
+                         U_symbol(_,_) -> true | _ -> false) then
+                   Buffer.add_string b "ignore(yy_get_next());\n";
+                 
+                 Buffer.add_string b "done with Not_found -> ());\n";
+                 Buffer.add_string b "let ";
+                 Buffer.add_string b l;
+                 Buffer.add_string b " = List.rev !yy_list in\n";
+               end
+         end
+    end;
+
+    (* Continue: *)
+    (new_position, this_is_token)
+  in
+
+
+  let process_inner_branch current_position =
+    (* If there is "early code", run this now: *)
+    output_code b file_name branch.branch_early_code;
+    Buffer.add_string b "\n";
+
+    (* If the first symbol is a rule invocation, call the corresponding
+     * parser function now.
+     *)
+    let previous_was_token =
+      begin match branch.branch_selector with
+         U_symbol(_,_) -> 
+           true
+       | L_symbol(called_rule, args, lab) ->
+           make_rule_invocation called_rule args lab true;
+           false
+       | L_indirect(_,_,_) -> 
+           failwith("The first symbol in some rule is an indirect call; this is not allowed")
+      end
+    in
+
+    (* Now output the "let-CODE". *)
+    output_code b file_name branch.branch_binding_code;
+    Buffer.add_string b "\n";
+
+    (* Process the other symbols in turn: *)
+    let (_, previous_was_token') =
+      (List.fold_left
+        process_pattern
+        (current_position, previous_was_token)
+        branch.branch_pattern
+      )
+    in
+
+    (* Special case: 
+     *
+     * If previous_was_token', we must invoke yy_get_next one more time.
+     * This is deferred until "CODE" is executed to give this code 
+     * the chance to make the next token available (in XML, the next token
+     * might come from a different entity, and "CODE" must switch to this
+     * entity).
+     *)
+
+    (* Now output "CODE": *)
+    Buffer.add_string b "let result = \n";
+    output_code b file_name branch.branch_result_code;
+    Buffer.add_string b "\nin\n";
+
+    if previous_was_token' then
+      Buffer.add_string b "ignore(yy_get_next());\nresult\n"
+    else
+      Buffer.add_string b "result\n"
+  in
+
+  (* If we have a ? clause, generate now the "try" statement *)
+  match branch.branch_error_code with
+      None ->
+       Buffer.add_string b "( ";
+       process_inner_branch "";
+       Buffer.add_string b " )";
+    | Some code ->
+
+       (* let position = ref "<label>" in *)
+
+       Buffer.add_string b "let yy_position = ref \"";
+       let current_position =
+         match branch.branch_selector with
+             U_symbol(_,_) -> ""
+           | L_symbol(_,_,None) -> ""
+           | L_symbol(_,_,Some l) -> l
+           | L_indirect(_,_,None) -> ""
+           | L_indirect(_,_,Some l) -> l
+       in
+       Buffer.add_string b current_position;
+       Buffer.add_string b "\" in\n";
+       
+       (* The "try" statement: *)
+
+       Buffer.add_string b "( try (\n";
+
+       process_inner_branch current_position;
+
+       Buffer.add_string b "\n) with Parsing.Parse_error -> (\n";
+       output_code b file_name code;
+       Buffer.add_string b "\n))\n"
+;;
+
+
+let process b file_name tree =
+  (* Iterate over the rules and output the parser functions: *)
+  let is_first = ref true in
+  List.iter
+    (fun r ->
+
+       (* Generate the function header: *)
+
+       if !is_first then
+        Buffer.add_string b "let rec "
+       else
+        Buffer.add_string b "and ";
+       is_first := false;
+       Buffer.add_string b "parse_";
+       Buffer.add_string b r.rule_name;
+       Buffer.add_string b " yy_current yy_get_next";
+       List.iter
+        (fun arg -> Buffer.add_string b " ";
+                    Buffer.add_string b arg)
+        r.rule_arguments;
+       Buffer.add_string b " =\n";
+
+       (* Generate the "match" statement: *)
+
+       Buffer.add_string b "match yy_current() with\n";
+       let s_done = ref [] in
+       (* s_done: The set of already matched tokens *)
+
+       List.iter
+        (fun branch ->
+           match branch.branch_selector with
+               U_symbol(tok, lab) ->
+                 (* A simple token *)
+                 if List.mem tok !s_done then begin
+                   prerr_endline("WARNING: In rule `" ^ r.rule_name ^ 
+                                 "': Match for token `" ^
+                                 tok ^ "' hidden by previous match");
+                 end
+                 else
+                   if is_typed tree tok then begin
+                     match lab with
+                         None ->
+                           Buffer.add_string b "| ";
+                           Buffer.add_string b tok;
+                           Buffer.add_string b " _ -> ";
+                           process_branch b file_name tree branch;
+                           Buffer.add_string b "\n";
+                           s_done := tok :: !s_done;
+                       | Some l ->
+                           Buffer.add_string b "| ";
+                           Buffer.add_string b tok;
+                           Buffer.add_string b " ";
+                           Buffer.add_string b l;
+                           Buffer.add_string b " -> ";
+                           process_branch b file_name tree branch;
+                           Buffer.add_string b "\n";
+                           s_done := tok :: !s_done;
+                 end
+                 else begin
+                   Buffer.add_string b "| ";
+                   Buffer.add_string b tok;
+                   Buffer.add_string b " -> ";
+                   process_branch b file_name tree branch;
+                   Buffer.add_string b "\n";
+                   s_done := tok :: !s_done;
+                 end
+             | L_symbol(called_rule, args, lab) ->
+                 (* An invocation of a rule *)
+                 let s_rule = selector_set_of_rule tree called_rule in
+                 let s_rule' =
+                   List.filter
+                     (fun tok ->
+                        if List.mem tok !s_done then begin
+                          prerr_endline("WARNING: In rule `" ^ r.rule_name ^ 
+                                        "': Match for token `" ^
+                                        tok ^ "' hidden by previous match");
+                          false
+                        end
+                        else true)
+                     s_rule in
+                 if s_rule' <> [] then begin
+                   Buffer.add_string b "| ( ";
+                   let is_first = ref true in
+                   List.iter
+                     (fun tok ->
+                        if not !is_first then
+                          Buffer.add_string b " | ";
+                        is_first := false;
+                        Buffer.add_string b tok;
+                        if is_typed tree tok then
+                          Buffer.add_string b " _";
+                     )
+                     s_rule';
+                   Buffer.add_string b ") -> ";
+                   process_branch b file_name tree branch;
+                   Buffer.add_string b "\n";
+                   s_done := s_rule' @ !s_done;
+                 end
+             | L_indirect(ml_name, args, lab) ->
+                 (* An invocation of an indirect rule *)
+                 failwith("The first symbol in rule `" ^ r.rule_name ^ 
+                          "' is an indirect call; this is not allowed")
+        )
+        r.rule_branches;
+
+       Buffer.add_string b "\n| _ -> raise Not_found\n";
+    )
+    tree.text_rules;
+
+  Buffer.add_string b " in\n"
+;;
+
+
+let count_lines s =
+  (* returns number of lines in s, number of columns of the last line *)
+  let l = String.length s in
+
+  let rec count n k no_cr no_lf =
+    let next_cr = 
+      if no_cr then
+        (-1)
+      else
+        try String.index_from s k '\013' with Not_found -> (-1) in
+    let next_lf = 
+      if no_lf then
+        (-1)
+      else
+        try String.index_from s k '\010' with Not_found -> (-1) in
+    if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
+      if next_cr+1 < l & s.[next_cr+1] = '\010' then
+        count (n+1) (next_cr+2) false (next_lf < 0)
+      else
+        count (n+1) (next_cr+1) false (next_lf < 0)
+    end
+    else if next_lf >= 0 then begin
+      count (n+1) (next_lf+1) (next_cr < 0) false
+    end
+    else
+      n, (l - k)
+
+  in
+  count 0 0 false false
+;;
+
+
+type scan_context =
+    { mutable old_line : int;
+      mutable old_column : int;
+      mutable line : int;
+      mutable column : int;
+    }
+;;
+
+
+let rec next_token context lexbuf =
+  let t = Lexer.scan_file lexbuf in
+  let line = context.line in
+  let column = context.column in
+  context.old_line <- line;
+  context.old_column <- column;
+  let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+  if n_lines > 0 then begin
+    context.line <- line + n_lines;
+    context.column <- n_columns;
+  end 
+  else 
+    context.column <- column + n_columns;
+  match t with
+      Space -> next_token context lexbuf
+    | Code(s,_,_) -> Code(s,line,column + 2)
+    | Eof   -> failwith "Unexpected end of file"
+    | _     -> t
+;;
+
+
+let parse_and_generate ch =
+  let b = Buffer.create 20000 in
+
+  let rec find_sep context lexbuf =
+    let t = Lexer.scan_header lexbuf in
+    let line = context.line in
+    let column = context.column in
+    context.old_line <- line;
+    context.old_column <- column;
+    let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+    if n_lines > 0 then begin
+      context.line <- line + n_lines;
+      context.column <- n_columns;
+    end 
+    else 
+      context.column <- column + n_columns;
+    match t with
+       Code(s,_,_) -> 
+         Buffer.add_string b s;
+          find_sep context lexbuf
+      | Eof    -> failwith "Unexpected end of file"
+      | Separator -> ()
+      | _         -> assert false
+  in
+
+  let rec find_rest context lexbuf =
+    let t = Lexer.scan_header lexbuf in
+    let line = context.line in
+    let column = context.column in
+    context.old_line <- line;
+    context.old_column <- column;
+    let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+    if n_lines > 0 then begin
+      context.line <- line + n_lines;
+      context.column <- n_columns;
+    end 
+    else 
+      context.column <- column + n_columns;
+    match t with
+       Code(s,_,_) -> 
+         Buffer.add_string b s;
+          find_rest context lexbuf
+      | Eof    -> ()
+      | _      -> assert false
+  in
+
+  (* First read until '%%' *)
+  let lexbuf = Lexing.from_channel ch in
+  let context = { old_line = 0; old_column = 0; line = 1; column = 0 } in
+  let file_name = "stdin" in
+  try
+    output_code_location b file_name ("", 1, 0);
+    find_sep context lexbuf;
+    (* Parse the following text *)
+    let text = (Parser.text (next_token context) lexbuf : Ast.text) in
+    (* Process it: *)
+    process b file_name text;
+    (* Read rest *)
+    output_code_location b file_name ("", context.line, context.column);
+    find_rest context lexbuf;
+    (* Output everything: *)
+    print_string (Buffer.contents b)
+  with
+      any ->
+       Printf.eprintf 
+         "Error at line %d column %d: %s\n"
+         context.old_line
+         context.old_column
+         (Printexc.to_string any);
+       exit 1
+;;
+
+
+parse_and_generate stdin;;
+exit 0;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.7  2000/08/17 00:33:02  gerd
+ *     Bugfix: tok* and tok? work now if tok is an untyped token
+ * without label.
+ *
+ * Revision 1.6  2000/05/14 20:59:24  gerd
+ *     Added "phantasy line numbers" to help finding errorneous locations.
+ *
+ * Revision 1.5  2000/05/14 20:41:58  gerd
+ *     x: Token?   means: if Token is detected x=true else x=false.
+ *     x: Token*   means: x becomes the number of ocurrences of Token.
+ *
+ * Revision 1.4  2000/05/09 00:03:22  gerd
+ *     Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.3  2000/05/08 22:03:01  gerd
+ *     It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.2  2000/05/06 21:51:08  gerd
+ *     Numerous bugfixes.
+ *
+ * Revision 1.1  2000/05/06 17:36:17  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/lexer.mll b/helm/DEVEL/pxp/pxp/m2parsergen/lexer.mll
new file mode 100644 (file)
index 0000000..a016897
--- /dev/null
@@ -0,0 +1,93 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+{
+  open Parser
+}
+
+rule scan_file = parse
+    "/*" [^ '*']* ('*'+ [^ '/' '*'] [^ '*']* )* '*'* "*/"
+      { Space }
+  | "%token"
+      { Token }
+  | "<" [' ' '\t' '\r' '\n']* ">"
+      { Type 
+      }
+  | [ 'a'-'z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
+      { let s = Lexing.lexeme lexbuf in
+       Lname s
+      }
+  | [ 'A'-'Z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
+      { let s = Lexing.lexeme lexbuf in
+       Uname s
+      }
+  | "%%"
+      { Separator }
+  | "("
+      { Lparen }
+  | ","
+      { Comma }
+  | ")"
+      { Rparen }
+  | "[" 
+      { Lbracket }
+  | "]" 
+      { Rbracket }
+  | ":"
+      { Colon }
+  | "{{" [^ '}']* ( '}' [^ '}']+ )* "}}"
+      { let s = Lexing.lexeme lexbuf in
+       Code (String.sub s 2 (String.length s - 4), 0, 0)
+      }
+  | "?"
+      { Error }
+  | "|"
+      { Alt }
+  | "+"
+      { Loop_plus }
+  | "*"
+      { Loop_star }
+  | [' ' '\t' '\r' '\n']+
+      { Space }
+  | "$"
+      { Dollar }
+  | eof
+      { Eof }
+
+and scan_header = parse
+    "%%"
+      { Separator }
+  | "%"
+      { Code("%", 0, 0) }
+  | [^ '%']*
+      { Code(Lexing.lexeme lexbuf, 0, 0) }
+  | eof
+      { Eof }
+
+and scan_rest = parse
+    _*
+      { Code(Lexing.lexeme lexbuf, 0, 0) }
+  | eof 
+      { Eof }
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/05/09 00:03:22  gerd
+ *     Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.2  2000/05/06 21:51:24  gerd
+ *     New symbol Dollar.
+ *
+ * Revision 1.1  2000/05/06 17:36:17  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/parser.mly b/helm/DEVEL/pxp/pxp/m2parsergen/parser.mly
new file mode 100644 (file)
index 0000000..7497c3a
--- /dev/null
@@ -0,0 +1,194 @@
+/* $Id$
+ * ----------------------------------------------------------------------
+ *
+ */
+
+%{
+  open Ast
+
+%}
+
+%token Space
+%token Token
+%token Type
+%token <string> Lname
+%token <string> Uname
+%token Separator
+%token Lparen
+%token Rparen
+%token Comma
+%token Colon
+%token <string * int * int> Code
+%token Error
+%token Alt
+%token Loop_plus
+%token Loop_star
+%token Dollar
+%token Lbracket
+%token Rbracket%token Eof
+
+%start text
+%type <Ast.text> text
+
+%%
+
+text:
+  declarations rules
+    { { text_decls = $1; text_rules = $2; } }
+
+declarations:
+  declaration declarations
+    { $1 :: $2 }
+| Separator
+    { [] }
+
+declaration:
+  Token Uname
+    { D_token $2 }
+| Token Type Uname
+    { D_typed_token $3 }
+
+rules:
+  rule rules
+    { $1 :: $2 }
+| Separator
+    { [] }
+
+rule:
+  Lname Lparen formal_arguments Colon branches
+    { { rule_name = $1;
+       rule_arguments = $3;
+       rule_branches = $5;
+      }
+    }
+
+formal_arguments:
+  Rparen
+    { [] }
+| Lname comma_formal_arguments
+    { $1 :: $2 }
+
+comma_formal_arguments:
+  Comma Lname comma_formal_arguments
+    { $2 :: $3 }
+| Rparen
+    { [] }
+
+branches:
+  branch alt_branches
+    { $1 :: $2 }
+
+alt_branches:
+  Alt branch alt_branches
+    { $2 :: $3 }
+|
+    { [] }
+
+branch:
+  simple_branch
+    { $1 }
+| Dollar Code simple_branch
+    { { $3 with branch_early_code = $2 } }
+
+simple_branch:
+  symbol Dollar Code patterns Code opt_error_handler
+    { { branch_selector = $1;
+        branch_early_code = ("",0,0);
+       branch_binding_code = $3;
+       branch_pattern = $4;
+       branch_result_code = $5;
+       branch_error_code = $6;
+      }
+    }
+| symbol patterns Code opt_error_handler
+    { { branch_selector = $1;
+        branch_early_code = ("",0,0);
+       branch_binding_code = ("", 0, 0);
+       branch_pattern = $2;
+       branch_result_code = $3;
+       branch_error_code = $4;
+      }
+    }
+
+patterns:
+  pattern patterns
+    { $1 :: $2 }
+| 
+    { [] }
+
+pattern:
+  symbol Loop_star
+    { { pat_symbol = $1;
+       pat_modifier = Repetition;
+      }
+    }
+| symbol Error
+    { { pat_symbol = $1;
+       pat_modifier = Option;
+      }
+    }
+| symbol
+    { { pat_symbol = $1;
+       pat_modifier = Exact;
+      }
+    }
+
+symbol:
+  Lname Colon Uname
+    { U_symbol($3, Some $1) }
+| Lname Colon Lname Lparen actual_arguments 
+    { L_symbol($3, $5, Some $1) }
+| Lname Colon Lbracket Lname Rbracket Lparen actual_arguments 
+    { L_indirect($4, $7, Some $1) }
+| Uname
+    { U_symbol($1, None) }
+| Lname Lparen actual_arguments 
+    { L_symbol($1, $3, None) }
+| Lbracket Lname Rbracket Lparen actual_arguments 
+    { L_indirect($2, $5, None) }
+
+
+actual_arguments:
+  Rparen
+    { [] }
+| Lname comma_actual_arguments
+    { $1 :: $2 }
+
+comma_actual_arguments:
+  Rparen
+    { [] }
+| Comma Lname comma_actual_arguments
+    { $2 :: $3 }
+
+opt_error_handler:
+  Error Code
+    { Some $2 }
+| 
+    { None }
+
+%%
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/05/09 00:03:22  gerd
+ *     Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.3  2000/05/08 22:03:01  gerd
+ *     It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.2  2000/05/06 21:51:46  gerd
+ *     New Dollar tag.
+ *
+ * Revision 1.1  2000/05/06 17:36:17  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/m2parsergen/x.m2y b/helm/DEVEL/pxp/pxp/m2parsergen/x.m2y
new file mode 100644 (file)
index 0000000..3fa80b0
--- /dev/null
@@ -0,0 +1,45 @@
+
+type token =
+  A | B | C of int | EOF
+;;
+
+%%
+
+%token A
+%token B
+%token <> C
+%token EOF
+
+%%
+
+r():
+  one:s()
+  {{ }}
+  b:B
+  two:B?
+  three:s()
+  {{ prerr_endline ("Result: " ^ string_of_int three) }}
+? {{ prerr_endline ("ERROR: " ^ !yy_position) }}
+
+s():
+  A 
+  {{ }}
+  {{ prerr_endline "A"; 0 }}
+| B 
+  {{ }}
+  {{ prerr_endline "B"; 0 }}
+| n:C
+  {{ }}
+  {{ prerr_endline ("C: " ^ string_of_int n); n }}
+%%
+
+let input = ref [ A; B; B; B; C 5; EOF ] in
+let current() = List.hd !input in
+let next_token () =
+  prerr_endline "get_next";
+  input := List.tl !input;
+  List.hd !input
+in
+parse_r current next_token
+;;
+
diff --git a/helm/DEVEL/pxp/pxp/pxp_aux.ml b/helm/DEVEL/pxp/pxp/pxp_aux.ml
new file mode 100644 (file)
index 0000000..aa1212b
--- /dev/null
@@ -0,0 +1,651 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ * Some auxiliary functions 
+ *)
+
+(**********************************************************************)
+(* Lexing *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_lexers
+open Netconversion
+
+let character enc warner k =
+  assert (k>=0);
+  if (k >= 0xd800 & k < 0xe000) or (k >= 0xfffe & k <= 0xffff) or k > 0x10ffff
+     or (k < 8) or (k = 11) or (k = 12) or (k >= 14 & k <= 31)
+  then
+    raise (WF_error("Code point " ^ string_of_int k ^ 
+                   " outside the accepted range of code points"));
+
+  try
+    makechar (enc : rep_encoding :> encoding) k
+  with
+      Not_found ->
+       warner # warn ("Code point cannot be represented in internal encoding: "
+                      ^ string_of_int k);
+       ""
+;;
+
+
+let check_name warner name =
+  (* produces a warning for names beginning with "xml". *)
+  if String.length name >= 3 then begin
+    match String.sub name 0 3 with
+       ("xml" | "xmL" | "xMl" | "xML" | "Xml" | "XmL" | "XMl" | "XML") ->
+         warner # warn ("Name is reserved for future extensions: " ^ name)
+      | _ ->
+         ()
+  end
+;;
+
+
+let tokens_of_content_string lexerset s =
+  (* tokenizes general entities and character entities *)
+  let lexbuf = Lexing.from_string s in
+  let rec next_token () =
+    match lexerset.scan_content_string lexbuf with
+       Eof -> []
+      | tok -> tok :: next_token()
+  in
+  next_token()
+;;
+
+
+let rec expand_attvalue_with_rec_check lexerset dtd s warner entities norm_crlf =
+  (* recursively expands general entities and character entities;
+   * checks "standalone" document declaration;
+   * normalizes whitespace
+   *)
+  let toklist = tokens_of_content_string lexerset s in
+  let rec expand tl =
+    match tl with
+       [] -> ""
+      | ERef n :: tl' ->
+         if List.mem n entities then
+           raise(WF_error("Recursive reference to general entity `" ^ n ^ "'"));
+         let en, extdecl = dtd # gen_entity n in
+         if dtd # standalone_declaration && extdecl then
+           raise(Validation_error("Reference to entity `" ^ n ^ 
+                                  "' violates standalone declaration"));
+         let rtext, rtext_contains_ext_refs = en # replacement_text in
+         if rtext_contains_ext_refs then
+           raise(Validation_error("Found reference to external entity in attribute value"));
+         expand_attvalue_with_rec_check 
+           lexerset dtd rtext warner (n :: entities) false    ^    expand tl'
+      | CRef(-1) :: tl' ->
+         if norm_crlf then
+           " " ^ expand tl'
+         else
+           "  " ^ expand tl'
+      | CRef n :: tl' ->
+         character lexerset.lex_encoding warner n ^ expand tl'
+      | CharData "<" :: tl' ->
+         raise 
+           (WF_error
+              ("Attribute value contains character '<' literally"))
+      | CharData x :: tl' ->
+         x ^ expand tl'
+      | _ -> assert false
+  in
+  expand toklist
+;;
+
+
+let expand_attvalue lexerset dtd s warner norm_crlf =
+  (* norm_crlf: whether the sequence CRLF is recognized as one character or
+   * not (i.e. two characters)
+   *)
+  expand_attvalue_with_rec_check lexerset dtd s warner [] norm_crlf
+;;
+
+
+let count_lines s =
+  (* returns number of lines in s, number of columns of the last line *)
+  let l = String.length s in
+
+  let rec count n k no_cr no_lf =
+    let next_cr = 
+      if no_cr then
+       (-1)
+      else
+       try String.index_from s k '\013' with Not_found -> (-1) in
+    let next_lf = 
+      if no_lf then
+       (-1)
+      else
+       try String.index_from s k '\010' with Not_found -> (-1) in
+    if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
+      if next_cr+1 < l & s.[next_cr+1] = '\010' then
+       count (n+1) (next_cr+2) false (next_lf < 0)
+      else
+       count (n+1) (next_cr+1) false (next_lf < 0)
+    end
+    else if next_lf >= 0 then begin
+      count (n+1) (next_lf+1) (next_cr < 0) false
+    end
+    else
+      n, (l - k)
+
+  in
+  count 0 0 false false
+;;
+
+
+let tokens_of_xml_pi lexers s =
+  let lexbuf = Lexing.from_string (s ^ " ") in
+  let rec collect () =
+    let t = lexers.scan_xml_pi lexbuf in
+    match t with
+       Pro_eof -> []
+      | _       -> t :: collect()
+  in
+  collect()
+;;
+
+
+let decode_xml_pi pl =
+  (* 'pl' must consist of name="value" or name='value' pairs which are returned
+   * as list of pairs.
+   * The "value" is returned as it is; no substitution of &entities; happens.
+   *)
+  let rec decode pl =
+    match pl with
+       Pro_name name :: Pro_eq :: Pro_string value :: pl' ->
+         (name, value) :: decode pl'
+      | [] ->
+         []
+      | _ ->
+         raise (WF_error("Bad XML processing instruction"))
+  in
+  decode pl
+;;
+
+
+let decode_doc_xml_pi pl =
+  match pl with
+      [ "version", v ]                                  -> (v, None, None)
+    | [ "version", v; "encoding", e ]                   -> (v, Some e, None)
+    | [ "version", v; "standalone", s ]                 -> (v, None, Some s)
+    | [ "version", v; "encoding", e; "standalone", s ]  -> (v, Some e, Some s)
+    | _ ->
+       raise(WF_error("Bad XML declaration"))
+;;
+
+
+let check_text_xml_pi pl =
+  match pl with
+    | [ "version", v; "encoding", e ] -> ()
+    | [ "encoding", e ]  -> ()
+    | _ ->
+       raise(WF_error("Bad XML declaration"))
+;;
+
+
+let check_version_num s =
+  let l = String.length s in
+  for i = 0 to l - 1 do
+    match s.[i] with
+       ('a'..'z'|'A'..'Z'|'0'..'9'|
+        '-'|'_'|'.'|':') -> ()
+      | _ ->
+         raise(WF_error("Bad XML version string"))
+  done
+;;
+
+
+let check_public_id s =
+  let l = String.length s in
+  for i = 0 to l - 1 do
+    match s.[i] with
+       (' '|'\013'|'\010'|'a'..'z'|'A'..'Z'|'0'..'9'|
+        '-'|'\''|'('|')'|'+'|','|'.'|'/'|':'|'='|'?'|
+        ';'|'!'|'*'|'#'|'@'|'$'|'_'|'%') -> ()
+      | _ ->
+         raise(WF_error("Illegal character in PUBLIC identifier"))
+  done
+;;
+
+
+(**********************************************************************)
+(* list functions *)
+
+
+let rec check_dups l =
+  match l with
+      [] -> false
+    | c :: l' -> 
+       if List.mem c l' then true else check_dups l'
+;;
+
+
+let rec count pred l =
+  match l with
+      [] -> 0
+    | x :: l' -> 
+       if pred x then  1 + (count pred l') else count pred l'
+;;
+
+
+(**********************************************************************)
+(* attributes *)
+
+let check_attribute_value_lexically lexerset x t v =
+  (* raises x if the attribute value v does not match the lexical rules
+   * for attribute type t:
+   * - t = A_id: v must be a <name>
+   * - t = A_idref: v must match <name>
+   * - t = A_idrefs: v must match <names>
+   * - t = A_entity: v must match <name>
+   * - t = A_entities: v must match <names>
+   * - t = A_nmtoken: v must match <nmtoken>
+   * - t = A_nmtokens: v must match <nmtokens>
+   * - t = A_notation _: v must match <name>
+   * - t = A_enum _: v must match <nmtoken>
+   * - t = A_cdata: not checked
+   *)
+  let lexbuf = Lexing.from_string v in
+  let rec get_name_list() =
+    match lexerset.scan_name_string lexbuf with
+       Eof    -> []
+      | Ignore -> get_name_list()
+      | tok    -> tok :: get_name_list()
+  in
+  let l = get_name_list() in
+  match t with
+      (A_id | A_idref | A_entity | A_notation _) ->
+       begin match l with
+           [ Name n ] -> ()
+         | _          -> raise (Lazy.force x)
+       end
+    | (A_idrefs | A_entities) ->
+       if List.exists (fun tok -> 
+                         match tok with
+                             Name _ -> false
+                           | _ -> true) l then
+         raise (Lazy.force x)
+    | (A_nmtoken | A_enum _) ->
+       begin match l with
+           [ Name n ]      -> ()
+         | [ Nametoken n ] -> ()
+         | _               -> raise (Lazy.force x)
+       end
+    | A_nmtokens ->
+       if List.exists (fun tok -> 
+                         match tok with
+                             Name _ -> false
+                           | Nametoken _ -> false
+                           | _ -> true
+                      ) l then
+         raise (Lazy.force x)
+    | _ -> ()
+;;
+
+
+let split_attribute_value lexerset v =
+  (* splits 'v' into a list of names or nmtokens. The white space separating
+   * the names/nmtokens in 'v' is suppressed and not returned.
+   *)
+  let lexbuf = Lexing.from_string v in
+  let rec get_name_list() =
+    match lexerset.scan_name_string lexbuf with
+       Eof         -> []
+      | Ignore      -> get_name_list()
+      | Name s      -> s :: get_name_list()
+      | Nametoken s -> s :: get_name_list()
+      | _           -> raise(Validation_error("Illegal attribute value"))
+  in
+  get_name_list()
+;;
+
+
+let normalize_line_separators lexerset s =
+  let lexbuf = Lexing.from_string s in
+  let rec get_string() =
+    match lexerset.scan_for_crlf lexbuf with
+       Eof        -> ""
+      | CharData s -> s ^ get_string()
+      | _          -> assert false
+  in
+  get_string()
+;;
+
+
+let value_of_attribute lexerset dtd n atype v =
+  (* The attribute with name 'n', type 'atype' and string value 'v' is
+   * decomposed, and the att_value is returned:
+   * - It is checked whether 'v' conforms to the lexical rules for attributes
+   *   of type 'atype'
+   * - If 'atype <> A_cdata', leading and trailing spaces are removed from 'v'.
+   * - If 'atype = A_notation d', it is checked if 'v' matches one of the
+   *   notation names contained in d.
+   * - If 'atype = A_enum d', it is checked whether 'v' matches one of the
+   *   tokens from d
+   * - If 'atype' refers to a "single-value" type, the value is retured as
+   *   Value u, where u is the normalized value. If 'atype' refers to a 
+   *   "list" type, the value if returned as Valuelist l, where l contains
+   *   the tokens.
+   *
+   * Note that this function does not implement all normalization rules.
+   * It is expected that the string passed as 'v' is already preprocessed;
+   * i.e. character and entity references are resolved, and the substitution
+   * of white space characters by space characters has already been performed.
+   * If these requirements are met, the value returned by this function
+   * will be perfectly normalized.
+   *
+   * Further checks:
+   * - ENTITY and ENTITIES values: It is checked whether there is an
+   *   unparsed general entity
+   * [ Other checks planned: ID, IDREF, IDREFS but not yet implemented ]
+   *)
+
+  let lexical_error() =
+    lazy (raise(Validation_error("Attribute `" ^ n ^ "' is lexically malformed"))) in
+
+  let remove_leading_and_trailing_spaces u =
+    (* Precondition: 'u' matches <name> or <nmtoken> *)
+    match split_attribute_value lexerset u with
+       [ u' ] -> u'
+      | _      -> assert false
+  in
+
+  let check_ndata_entity u =
+    let en, extdecl = dtd # gen_entity u in  (* or Validation_error *)
+    if not (en # is_ndata) then
+      raise(Validation_error("Reference to entity `" ^ u ^ 
+                            "': NDATA entity expected"));
+    if dtd # standalone_declaration && extdecl then
+      raise(Validation_error("Reference to entity `" ^ u ^ 
+                            "' violates standalone declaration"));
+  in
+
+  match atype with
+      A_cdata ->
+       Value v
+
+    | (A_id | A_idref | A_nmtoken) ->
+       check_attribute_value_lexically lexerset (lexical_error()) atype v;
+       Value (remove_leading_and_trailing_spaces v)
+    | A_entity ->
+       check_attribute_value_lexically lexerset (lexical_error()) atype v;
+       let v' = remove_leading_and_trailing_spaces v in
+       check_ndata_entity v';
+       Value v'
+
+    | (A_idrefs | A_nmtokens) ->
+       check_attribute_value_lexically lexerset (lexical_error()) atype v;
+       Valuelist (split_attribute_value lexerset v)
+
+    | A_entities ->
+       check_attribute_value_lexically lexerset (lexical_error()) atype v;
+       let l = split_attribute_value lexerset v in
+       List.iter check_ndata_entity l;
+       Valuelist l
+
+    | A_notation nl ->
+       check_attribute_value_lexically lexerset (lexical_error()) atype v;
+       let v' = remove_leading_and_trailing_spaces v in
+       if not (List.mem v' nl) then
+         raise(Validation_error
+                 ("Attribute `" ^ n ^ 
+                  "' does not match one of the declared notation names"));
+       Value v'
+
+    | A_enum enuml ->
+       check_attribute_value_lexically lexerset (lexical_error()) atype v;
+       let v' = remove_leading_and_trailing_spaces v in
+       if not (List.mem v' enuml) then
+         raise(Validation_error
+                 ("Attribute `" ^ n ^ 
+                  "' does not match one of the declared enumerator tokens"));
+       Value v'
+;;
+
+
+let normalization_changes_value lexerset atype v =
+  (* Returns true if:
+   * - 'atype' is a "single-value" type, and the normalization of the string
+   *   value 'v' of this type discards leading and/or trailing spaces
+   * - 'atype' is a "list" type, and the normalization of the string value
+   *   'v' of this type discards leading and/or trailing spaces, or spaces
+   *   separating the tokens of the list (i.e. the normal form is that
+   *   the tokens are separated by exactly one space character).
+   *
+   * Note: It is assumed that TABs, CRs, and LFs in 'v' are already converted
+   * to spaces.
+   *)
+
+  match atype with
+      A_cdata -> 
+       false
+
+    | (A_id | A_idref | A_entity | A_nmtoken | A_notation _ | A_enum _) ->
+       (* Return 'true' if the first or last character is a space.
+        * The following check works for both ISO-8859-1 and UTF-8.
+        *)
+       v <> "" && (v.[0] = ' ' || v.[String.length v - 1] = ' ')
+
+    | (A_idrefs | A_entities | A_nmtokens) ->
+       (* Split the list, and concatenate the tokens as required by
+        * the normal form. Return 'true' if this operation results in 
+        * a different string than 'v'.
+        * This check works for both ISO-8859-1 and UTF-8.
+        *)
+       let l = split_attribute_value lexerset v in
+       let v' = String.concat " " l in
+       v <> v'
+;;
+
+
+(**********************************************************************)
+
+let write_markup_string ~(from_enc:rep_encoding) ~to_enc os s =
+  (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
+   * 'os'. All characters are written as they are.
+   *)
+  let s' =
+    if to_enc = (from_enc :> encoding)
+    then s 
+    else recode_string 
+                ~in_enc:(from_enc :> encoding)
+                ~out_enc:to_enc
+                ~subst:(fun n -> 
+                          failwith 
+                            ("Pxp_aux.write_markup_string: Cannot represent " ^
+                             "code point " ^ string_of_int n))
+                s
+  in
+  write os s' 0 (String.length s')
+;;
+
+
+let write_data_string ~(from_enc:rep_encoding) ~to_enc os content =
+  (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
+   * 'os'. The characters '&', '<', '>', '"', '%' and every character that
+   * cannot be represented in 'to_enc' are paraphrased as entity reference
+   * "&...;".
+   *)
+  let convert_ascii s =
+    (* Convert the ASCII-encoded string 's'. Note that 'from_enc' is
+     * always ASCII-compatible
+     *)
+    if to_enc = (from_enc :> encoding) 
+    then s
+    else
+      recode_string
+        ~in_enc:(from_enc :> encoding)
+        ~out_enc:to_enc
+        ~subst:(fun n -> assert false)
+       s
+  in
+
+  let write_ascii s =
+    (* Write the ASCII-encoded string 's' *)
+    let s' = convert_ascii s in
+    write os s' 0 (String.length s')
+  in
+      
+  let write_part j l =
+    (* Writes the substring of 'content' beginning at pos 'j' with length 'l'
+     *)
+    if to_enc = (from_enc :> encoding) then
+      write os content j l
+    else begin
+      let s' = recode_string 
+                ~in_enc:(from_enc :> encoding)
+                ~out_enc:to_enc
+                ~subst:(fun n -> 
+                          convert_ascii ("&#" ^ string_of_int n ^ ";"))
+                (String.sub content j l)
+      in
+      write os s' 0 (String.length s')
+    end
+  in
+
+  let i = ref 0 in
+  for k = 0 to String.length content - 1 do
+    match content.[k] with
+       ('&' | '<' | '>' | '"' | '%') as c ->
+         if !i < k then
+           write_part !i (k - !i);
+         begin match c with
+             '&' -> write_ascii "&amp;"
+           | '<' -> write_ascii "&lt;"
+           | '>' -> write_ascii "&gt;"
+           | '"' -> write_ascii "&quot;"
+           | '%' -> write_ascii "&#37;"  (* reserved in DTDs *)
+           | _   -> assert false
+         end;
+         i := k+1
+      | _ -> ()
+  done;
+  if !i < String.length content then
+    write_part !i (String.length content - !i)
+;;
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.6  2000/08/14 22:24:55  gerd
+ *     Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.5  2000/07/25 00:30:01  gerd
+ *     Added support for pxp:dtd PI options.
+ *
+ * Revision 1.4  2000/07/16 18:31:09  gerd
+ *     The exception Illegal_character has been dropped.
+ *
+ * Revision 1.3  2000/07/16 16:33:57  gerd
+ *     New function write_markup_string: Handles the encoding
+ * of the string.
+ *
+ * Revision 1.2  2000/07/08 22:15:45  gerd
+ *     [Merging 0.2.10:] write_data_string: The character '%' is special, too.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_aux.ml:
+ *
+ * Revision 1.12  2000/05/27 19:08:30  gerd
+ *     Added functionality to check standalone declaration:
+ *
+ *     expand_attvalue: Checks whether included entities violate the
+ * stand-alone declaration.
+ *
+ *     value_of_attribute: Checks whether ENTITY/ENTITIES values violate
+ * this declaration. (Furthermore, it is checked whether the NDATA
+ * entity exists - this has been forgotten in previous versions.)
+ *
+ *     value_of_attribute/check_attribute_value_lexically: improved.
+ *
+ *     New function normalization_changes_value: helps detecting
+ * one case which violates the standalone declaration.
+ *
+ * Revision 1.11  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.10  2000/05/01 20:41:56  gerd
+ *     New function write_data_string.
+ *
+ * Revision 1.9  2000/04/30 18:11:31  gerd
+ *     New function normalize_line_separators.
+ *     In function expand_attvalue: New argument norm_crlf. If the attvalue
+ * is read directly from a file, the sequence CR LF must be converted to a
+ * single space. If the attvalue is read from a replacement text, CR LF has
+ * already converted to a single LF, and CR LF, if still occurring, must be
+ * converted to two spaces. The caller can indicate the case by passing
+ * true/false as norm_crlf.
+ *
+ * Revision 1.8  1999/09/01 22:51:07  gerd
+ *     Added functions.
+ *     'character' raises Illegal_character if characters are found that
+ * do not match the production Char.
+ *
+ * Revision 1.7  1999/09/01 16:17:37  gerd
+ *     Added function 'check_name'.
+ *
+ * Revision 1.6  1999/08/15 20:33:19  gerd
+ *     Added: a function that checks public identifiers. Only certain
+ * characters may occur in these identifiers.
+ *     Control characters are rejected by the "character" function.
+ *     Bugfix: recursive entity references are detected in attribute
+ * expansion
+ *
+ * Revision 1.5  1999/08/15 02:18:02  gerd
+ *     That '<' is not allowed in attribute values, is a violation
+ * of well-formedness, not of the validity; so WF_error is raised.
+ *
+ * Revision 1.4  1999/08/15 00:20:37  gerd
+ *     When expanding attribute values, references to parameter
+ * entities are now resolved by the method "replacement_text" which
+ * has an additional return value, and no longer by "attlist_replacement_text".
+ * The new return value indicates whether references to external entities
+ * have been resolved (directly or indirectly); this is allowed at some
+ * locations but not in attribute values.
+ *
+ * Revision 1.3  1999/08/14 22:05:53  gerd
+ *     Several functions have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *
+ * Revision 1.2  1999/08/10 21:35:06  gerd
+ *     The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ *     TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1  1999/08/10 00:35:50  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_codewriter.ml b/helm/DEVEL/pxp/pxp/pxp_codewriter.ml
new file mode 100644 (file)
index 0000000..a6ab0db
--- /dev/null
@@ -0,0 +1,518 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_document
+open Pxp_yacc
+open Pxp_dtd
+open Pxp_types
+
+let write_expr_ext_id out extid =
+  match extid with
+      System s ->
+       output_string out ("(Pxp_types.System\"" ^ String.escaped s ^ "\")")
+    | Public(s,t) ->
+       output_string out ("(Pxp_types.Public(\"" ^ String.escaped s ^ 
+                          "\",\"" ^
+                          String.escaped t ^ "\"))")
+    | Anonymous ->
+       output_string out "Pxp_types.Anonymous"
+;;
+
+
+let rec write_expr_content_model out cm =
+  match cm with
+      Unspecified -> output_string out "Pxp_types.Unspecified"
+    | Empty       -> output_string out "Pxp_types.Empty"
+    | Any         -> output_string out "Pxp_types.Any"
+    | Mixed msl   -> output_string out "(Pxp_types.Mixed [";
+                    List.iter
+                      (fun ms ->
+                         write_expr_mixed_spec out ms;
+                         output_string out "; ";
+                      )
+                      msl;
+                    output_string out "])";
+    | Regexp re   -> output_string out "(Pxp_types.Regexp ";
+                    write_expr_regexp_spec out re;
+                    output_string out ")";
+
+and write_expr_mixed_spec out ms =
+  match ms with
+      MPCDATA  -> output_string out "Pxp_types.MPCDATA"
+    | MChild s -> output_string out ("(Pxp_types.MChild \"" ^
+                                    String.escaped s ^ "\")")
+
+and write_expr_regexp_spec out re =
+  match re with
+      Optional re'  -> output_string out "(Pxp_types.Optional ";
+                      write_expr_regexp_spec out re';
+                      output_string out ")";
+    | Repeated re'  -> output_string out "(Pxp_types.Repeated ";
+                      write_expr_regexp_spec out re';
+                      output_string out ")";
+    | Repeated1 re' -> output_string out "(Pxp_types.Repeated1 ";
+                      write_expr_regexp_spec out re';
+                      output_string out ")";
+    | Alt rel       -> output_string out "(Pxp_types.Alt [";
+                      List.iter
+                        (fun re' ->
+                           write_expr_regexp_spec out re';
+                           output_string out "; ";
+                        )
+                        rel;
+                      output_string out "])";
+    | Seq rel       -> output_string out "(Pxp_types.Seq [";
+                      List.iter
+                        (fun re' ->
+                           write_expr_regexp_spec out re';
+                           output_string out "; ";
+                        )
+                        rel;
+                      output_string out "])";
+    | Child s       -> output_string out ("(Pxp_types.Child \"" ^ 
+                                         String.escaped s ^ "\")")
+;;
+
+
+let write_expr_att_type out at =
+  match at with
+      A_cdata       -> output_string out "Pxp_types.A_cdata"
+    | A_id          -> output_string out "Pxp_types.A_id"
+    | A_idref       -> output_string out "Pxp_types.A_idref"
+    | A_idrefs      -> output_string out "Pxp_types.A_idrefs"
+    | A_entity      -> output_string out "Pxp_types.A_entity"
+    | A_entities    -> output_string out "Pxp_types.A_entities"
+    | A_nmtoken     -> output_string out "Pxp_types.A_nmtoken"
+    | A_nmtokens    -> output_string out "Pxp_types.A_nmtokens"
+    | A_notation sl -> output_string out "(Pxp_types.A_notation [";
+                      List.iter
+                        (fun s ->
+                           output_string out ("\"" ^ 
+                                              String.escaped s ^ "\"; "))
+                        sl;
+                      output_string out "])";
+    | A_enum sl     -> output_string out "(Pxp_types.A_enum [";
+                      List.iter
+                        (fun s ->
+                           output_string out ("\"" ^ 
+                                              String.escaped s ^ "\"; "))
+                        sl;
+                      output_string out "])";
+;;
+
+
+let write_expr_att_default out ad =
+  match ad with
+      D_required  -> output_string out "Pxp_types.D_required"
+    | D_implied   -> output_string out "Pxp_types.D_implied"
+    | D_default s -> output_string out ("(Pxp_types.D_default \"" ^
+                                       String.escaped s ^ "\")")
+    | D_fixed s   -> output_string out ("(Pxp_types.D_fixed \"" ^
+                                       String.escaped s ^ "\")")
+;;
+
+
+let write_expr_att_value out av =
+  match av with
+      Value s       -> output_string out ("(Pxp_types.Value \"" ^
+                                         String.escaped s ^ "\")")
+    | Valuelist sl  -> output_string out ("(Pxp_types.Valuelist [");
+                      List.iter
+                        (fun s ->
+                           output_string out ("\"" ^ String.escaped s ^ 
+                                              "\"; ")
+                        )
+                        sl;
+                      output_string out "])";
+    | Implied_value -> output_string out "Pxp_types.Implied_value"
+;;
+
+
+let ocaml_encoding enc =
+  match enc with
+      `Enc_utf8      -> "`Enc_utf8"
+    | `Enc_utf16     -> "`Enc_utf16"
+    | `Enc_utf16_le  -> "`Enc_utf16_le"
+    | `Enc_utf16_be  -> "`Enc_utf16_be"
+    | `Enc_iso88591  -> "`Enc_iso88591"
+;;
+
+
+let write_expr_new_pi out pi =
+  output_string out ("(new Pxp_dtd.proc_instruction \"" ^
+                    String.escaped(pi # target) ^ "\" \"" ^
+                    String.escaped(pi # value) ^ "\" " ^ 
+                    ocaml_encoding(pi # encoding) ^ ")")
+;;
+
+
+let write_expr_node_type out nt =
+  match nt with
+      T_data       -> output_string out "Pxp_document.T_data"
+    | T_element s  -> output_string out ("(Pxp_document.T_element \"" ^
+                                        String.escaped s ^ "\")")
+    | T_super_root -> output_string out "Pxp_document.T_super_root"
+    | T_pinstr s   -> output_string out ("(Pxp_document.T_pinstr \"" ^
+                                        String.escaped s ^ "\")")
+    | T_comment    -> output_string out "Pxp_document.T_comment"
+    | _            -> assert false
+;;
+
+
+let write_local_dtd out (dtd : dtd) =
+  (* Outputs "let mkdtd warner = ... in" to 'out' *)
+  output_string out "let mkdtd warner =\n";
+  output_string out ("let encoding = " ^ ocaml_encoding (dtd # encoding) ^ 
+                     " in\n");
+  output_string out "let dtdobj = new Pxp_dtd.dtd warner encoding in\n";
+  
+  (* Set the ID: *)
+  output_string out "dtdobj # set_id ";
+  begin match dtd # id with
+      None -> ()
+    | Some(External x) -> 
+       output_string out "(Pxp_types.External ";
+       write_expr_ext_id out x;
+       output_string out ");\n"
+    | Some(Derived x) ->  
+       output_string out "(Pxp_types.Derived ";
+       write_expr_ext_id out x;
+       output_string out ");\n"
+    | Some Internal ->   
+       output_string out "Pxp_types.Internal;\n";
+  end;
+
+  (* Set standalone declaration: *)
+  output_string out ("dtdobj # set_standalone_declaration " ^
+                     string_of_bool (dtd # standalone_declaration) ^ ";\n");
+
+  (* Add notations: *)
+  List.iter
+    (fun noname ->
+       let no = dtd # notation noname in
+       output_string out ("let no = new Pxp_dtd.dtd_notation \"" ^
+                         String.escaped noname ^ "\" ");
+       write_expr_ext_id out (no # ext_id);
+       output_string out " encoding in\n";
+       output_string out "dtdobj # add_notation no;\n";
+    )
+    (List.sort Pervasives.compare (dtd # notation_names));
+
+  (* Add unparsed entities: *)
+  List.iter
+    (fun enname ->
+       let en, _ = dtd # gen_entity enname in
+       if en # is_ndata then begin
+        let ext_id = en # ext_id in
+        let notation = en # notation in
+        let encoding = en # encoding in
+        output_string out ("let ndata = new Pxp_entity.ndata_entity \"" ^
+                           String.escaped enname ^ "\" ");
+        write_expr_ext_id out ext_id;
+        output_string out ("\"" ^ String.escaped notation ^ "\" " ^ 
+                           ocaml_encoding encoding ^ " in \n");
+        output_string out "dtdobj # add_gen_entity (ndata :> Pxp_entity.entity) false;\n";
+       end;
+    )
+    (List.sort Pervasives.compare (dtd # gen_entity_names));
+
+
+  (* Add elements: *)
+  List.iter
+    (fun elname ->
+       (* Create the element 'el': *)
+       let el = dtd # element elname in
+       output_string out ("let el = new Pxp_dtd.dtd_element dtdobj \"" ^
+                         String.escaped elname ^ "\" in\n");
+       output_string out "let cm = ";
+       write_expr_content_model out (el # content_model);
+       output_string out " in\n";
+       output_string out "el # set_cm_and_extdecl cm false;\n";
+       (* Add attributes: *)
+       List.iter
+        (fun attname ->
+           let atttype, attdefault = el # attribute attname in
+           output_string out ("el # add_attribute \"" ^ 
+                              String.escaped attname ^ "\" ");
+           write_expr_att_type out atttype;
+           output_string out " ";
+           write_expr_att_default out attdefault;
+           output_string out " false;\n";
+        )
+        (List.sort Pervasives.compare (el # attribute_names));
+
+       (* Allow arbitrary? *)
+       if el # arbitrary_allowed then
+         output_string out "el # allow_arbitrary;\n"
+       else
+         output_string out "el # disallow_arbitrary;\n";
+
+       (* Validate: *)
+       output_string out "el # validate;\n";
+       (* Add the element 'el' to 'dtdobj': *)
+       output_string out "dtdobj # add_element el;\n";
+    )
+    (List.sort Pervasives.compare (dtd # element_names));
+
+  (* Add processing instructions: *)
+  List.iter
+    (fun target ->
+       let pilist = dtd # pinstr target in
+       List.iter
+        (fun pi ->
+           output_string out "let pi = ";
+           write_expr_new_pi out pi;
+           output_string out " in\n";
+           output_string out "dtdobj # add_pinstr pi;\n";
+        )
+        pilist;
+    )
+    (List.sort Pervasives.compare (dtd # pinstr_names));
+
+  (* Set the name of the root element: *)
+  begin match dtd # root with
+      None -> ()
+    | Some rootname ->
+       output_string out ("dtdobj # set_root \"" ^
+                          String.escaped rootname ^ "\";\n")
+  end;
+
+  (* Special options: *)
+  if dtd # arbitrary_allowed then
+    output_string out "dtdobj # allow_arbitrary;\n"
+  else
+    output_string out "dtdobj # disallow_arbitrary;\n";
+
+  (* Return dtdobj: *)
+  output_string out "dtdobj in\n"
+;;
+
+
+let rec write_local_subtree out n =
+  (* Outputs the term generating the subtree *)
+  
+  output_string out "let nt = ";
+  write_expr_node_type out (n # node_type);
+  output_string out " in\n";
+
+  begin match n # node_type with
+      T_data ->
+       output_string out ("let t = Pxp_document.create_data_node spec dtd \"" ^
+                          String.escaped (n # data) ^ "\" in\n")
+    | T_element elname ->
+       let loc, line, col = n # position in
+       output_string out
+         ("let pos = \"" ^ String.escaped loc ^ "\", " ^ 
+          string_of_int line ^ ", " ^ 
+          string_of_int col ^ " in\n");
+       output_string out 
+          ("let t = Pxp_document.create_element_node ~position:pos spec dtd \"" ^
+           String.escaped elname ^ "\" [ ");
+       List.iter
+         (fun (name,value) ->
+            begin match value with
+                Value s -> 
+                  output_string out ("\"" ^ String.escaped name ^ "\", ");
+                  output_string out ("\"" ^ String.escaped s ^ "\"; ")
+              | Valuelist sl ->
+                  output_string out ("\"" ^ String.escaped name ^ "\", ");
+                  output_string out ("\"" ^ 
+                                     String.escaped (String.concat " " sl) ^ 
+                                     "\"; ")
+              | Implied_value ->
+                  ()
+            end
+         )
+         (n # attributes);
+       output_string out " ] in\n";
+    | T_super_root ->
+       let loc, line, col = n # position in
+       output_string out
+         ("let pos = \"" ^ String.escaped loc ^ "\", " ^ 
+          string_of_int line ^ ", " ^ 
+          string_of_int col ^ " in\n");
+       output_string out 
+          ("let t = Pxp_document.create_super_root_node ~position:pos spec dtd in\n")
+    | T_pinstr piname ->
+       let loc, line, col = n # position in
+       output_string out
+         ("let pos = \"" ^ String.escaped loc ^ "\", " ^ 
+          string_of_int line ^ ", " ^ 
+          string_of_int col ^ " in\n");
+       output_string out "let pi = ";
+       write_expr_new_pi out (List.hd (n # pinstr piname));
+       output_string out " in\n";
+       output_string out 
+          ("let t = Pxp_document.create_pinstr_node ~position:pos spec dtd pi in\n")
+    | T_comment ->
+       let loc, line, col = n # position in
+       output_string out
+         ("let pos = \"" ^ String.escaped loc ^ "\", " ^ 
+          string_of_int line ^ ", " ^ 
+          string_of_int col ^ " in\n");
+       output_string out "let comment = ";
+       ( match n # comment with
+             None   -> assert false
+           | Some c -> output_string out ("\"" ^ String.escaped c ^ "\"")
+       );
+       output_string out " in\n";
+       output_string out 
+          ("let t = Pxp_document.create_comment_node ~position:pos spec dtd comment in\n")
+    | _ ->
+       assert false
+  end;
+
+  (* Add processing instructions: *)
+  begin match n # node_type with
+      T_pinstr _ ->
+       ()
+    | _ ->
+       List.iter
+         (fun target ->
+            let pilist = n # pinstr target in
+            List.iter
+              (fun pi ->
+                 output_string out "let pi = ";
+                 write_expr_new_pi out pi;
+                 output_string out " in\n";
+                 output_string out "add_pinstr t pi;\n";
+              )
+              pilist;
+         )
+         (List.sort Pervasives.compare (n # pinstr_names));
+  end;
+       
+  (* Add the sub nodes: *)
+  n # iter_nodes
+    (fun n' ->
+       output_string out "add_node t (\n";
+       write_local_subtree out n';
+       output_string out ");\n";
+    );
+
+  (* Validate: *)
+  output_string out "local_validate t;\n";
+
+  (* Return: *)
+  output_string out "t\n"
+;;
+
+
+let write_local_document out (d : 'ext document) =
+  (* Outputs "let mkdoc warner spec = ... in" *)
+  
+  output_string out "let mkdoc warner spec =\n";
+  output_string out "let doc = new Pxp_document.document warner in\n";
+  output_string out ("doc # init_xml_version \"" ^
+                    String.escaped (d # xml_version) ^ "\";\n");
+  write_local_dtd out (d # dtd);
+  output_string out "let dtd = mkdtd warner in\n";
+  output_string out "let root = ";
+  write_local_subtree out (d # root);
+  output_string out " in\n";
+  output_string out "doc # init_root root;\n";
+
+  (* Add processing instructions: *)
+  List.iter
+    (fun target ->
+       let pilist = d # pinstr target in
+       List.iter
+        (fun pi ->
+           output_string out "let pi = ";
+           write_expr_new_pi out pi;
+           output_string out " in\n";
+           output_string out "doc # add_pinstr pi;\n";
+        )
+        pilist;
+    )
+    (List.sort Pervasives.compare (d # pinstr_names));
+  
+  (* Return the result: *)
+  output_string out "doc in\n"
+;;
+
+
+let write_helpers out =
+  output_string out "let add_node t n = (t : 'ext Pxp_document.node) # add_node (n : 'ext Pxp_document.node) in\n";
+  output_string out "let add_pinstr t pi = (t : 'ext Pxp_document.node) # add_pinstr (pi : Pxp_dtd.proc_instruction) in\n";
+  output_string out "let local_validate t = (t : 'ext Pxp_document.node) # local_validate ()in\n"
+;;
+
+
+let write_document out d =
+  output_string out "let create_document warner spec =\n";
+  write_helpers out;
+  write_local_document out d;
+  output_string out "mkdoc warner spec;;\n"
+;;
+
+
+let write_dtd out dtd =
+  output_string out "let create_dtd warner =\n";
+  write_local_dtd out dtd;
+  output_string out "mkdtd warner;;\n"
+;;
+
+
+let write_subtree out t =
+  output_string out "let create_subtree dtd spec =\n";
+  write_helpers out;
+  write_local_subtree out t;
+  output_string out "mktree dtd spec;;\n"
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.7  2000/08/30 15:48:07  gerd
+ *     Minor update.
+ *
+ * Revision 1.6  2000/08/18 20:16:59  gerd
+ *     Updates because of new node types T_comment, T_pinstr, T_super_root.
+ *
+ * Revision 1.5  2000/07/23 02:16:51  gerd
+ *     Changed signature of local_validate.
+ *
+ * Revision 1.4  2000/07/09 17:59:35  gerd
+ *     Updated: The position of element nodes is also written.
+ *
+ * Revision 1.3  2000/07/09 00:30:00  gerd
+ *     Notations are written before they are used.
+ *     Unparsed entities are included.
+ *     Further changes.
+ *
+ * Revision 1.2  2000/07/08 22:59:14  gerd
+ *     [Merging 0.2.10:] Improved: The resulting code can be compiled
+ * faster, and the compiler is less hungry on memory.
+ *     Updated because of PXP interface changes.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_codewriter.ml:
+ *
+ * Revision 1.1  2000/03/11 22:57:28  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_codewriter.mli b/helm/DEVEL/pxp/pxp/pxp_codewriter.mli
new file mode 100644 (file)
index 0000000..e04bd8a
--- /dev/null
@@ -0,0 +1,94 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_document
+open Pxp_yacc
+open Pxp_dtd
+
+val write_document : out_channel -> 'ext document -> unit
+    (* Writes O'Caml code to the out_channel that is a top-level function
+     * creating a fresh document which is equal to the passed document:
+     *
+     * "let create_document warner spec = ...;;"
+     *
+     * If you compile the code and call "create_document warner map"  the 
+     * function creates a document tree which is (almost) equal to the 
+     * passed document.
+     * 
+     * The following properties may not be equal:
+     * - Parsed entities
+     * - Whether a declaration occurs in an external entity or not
+     * 
+     * 'warner': a collect_warnings object
+     * 'spec': a Pxp_document.spec
+     *)
+
+  
+val write_dtd : out_channel -> dtd -> unit
+    (* Writes O'Caml code to the out_channel that is a top-level function
+     * creating a fresh DTD which is equal to the passed DTD:
+     *
+     * "let create_dtd warner = ...;;"
+     *
+     * If you compile the code and call "create_dtd warner"  the 
+     * function creates a DTD object which is (almost) equal to the 
+     * passed object.
+     * 
+     * The following properties may not be equal:
+     * - Parsed entities
+     * - Whether a declaration occurs in an external entity or not
+     * 
+     * 'warner': a collect_warnings object
+     *)
+
+val write_subtree : out_channel -> 'ext node -> unit
+    (* Writes O'Caml code to the out_channel that is a top-level function
+     * creating a fresh node tree which is equal to the passed tree:
+     *
+     * "let create_subtree dtd map = ...;;"
+     *
+     * If you compile the code and call "create_subtree dtd map"  the 
+     * function creates a DTD object which is equal to the passed object.
+     * 
+     * 'dtd': a DTD object
+     * 'map': a domspec
+     *)
+
+
+  
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/07/09 00:30:14  gerd
+ *     Updated.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_codewriter.mli:
+ *
+ * Revision 1.1  2000/03/11 22:57:28  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dfa.ml b/helm/DEVEL/pxp/pxp/pxp_dfa.ml
new file mode 100644 (file)
index 0000000..b7baeb4
--- /dev/null
@@ -0,0 +1,271 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module StringOrd = struct
+  type t = string
+  let compare = (compare : string -> string -> int)
+end;;
+
+module StringMap = Map.Make(StringOrd);;
+  (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
+
+module Graph = struct
+  type vertex =
+      { mutable edges_out : (string * vertex) list;
+       mutable edges_out_map : vertex StringMap.t;
+       mutable edges_in : (vertex * string) list;
+       mutable graph : graph;
+       mutable id : int;
+      }
+  and graph =
+      { mutable vertexes : vertex list;
+       mutable mid : int;   (* maximum id + 1 *)
+      }
+
+  exception Edge_not_unique
+
+  let create () =
+    { vertexes = [];
+      mid = 0;
+    }
+
+  let new_vertex g =
+    let v =
+      { edges_out = [];
+       edges_out_map = StringMap.empty;
+       edges_in = [];
+       graph = g;
+       id = g.mid;
+      } in
+    g.vertexes <- v :: g.vertexes;
+    g.mid <- g.mid + 1;
+    v
+
+  let new_edge v_from e v_to =
+    if v_from.graph != v_to.graph then
+      invalid_arg "Pxp_dfa.Graph.new_edge";
+    try 
+      let v = StringMap.find e v_from.edges_out_map in
+      if v != v_to then
+       raise Edge_not_unique;
+    with
+       Not_found ->
+         v_from.edges_out     <- (e, v_to) :: v_from.edges_out;
+         v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map;
+         v_to.edges_in        <- (v_from, e) :: v_to.edges_in;
+         ()
+
+  let graph_of_vertex v = v.graph
+
+  let union g1 g2 =
+    List.iter
+      (fun v ->
+        v.graph <- g1;
+        v.id <- v.id + g1.mid;
+      )
+      g2.vertexes;
+    g1.vertexes <- g2.vertexes @ g1.vertexes;
+    g1.mid <- g1.mid + g2.mid;
+    g2.vertexes <- [];
+    g2.mid <- 0
+
+  let outgoing_edges v =
+    v.edges_out
+
+  let ingoing_edges v =
+    v.edges_in
+
+  let follow_edge v e =
+    StringMap.find e v.edges_out_map  (* or raise Not_found *)
+end
+;;
+
+
+module VertexOrd = struct
+  type t = Graph.vertex
+  let compare v1 v2 =
+    if v1.Graph.graph != v2.Graph.graph then
+      invalid_arg "Pxp_dfa.VertexOrd.compare";
+    compare v1.Graph.id v2.Graph.id
+end
+;;
+
+module VertexSet = Set.Make(VertexOrd);;
+
+
+type dfa_definition =
+    { dfa_graph : Graph.graph;
+      dfa_start : Graph.vertex;
+      dfa_stops : VertexSet.t;
+      dfa_null  : bool;
+    }
+;;
+
+(**********************************************************************)
+
+(* Now that we have all the auxiliary data types, it is time for the
+ * algorithm that transforms regexps to DFAs.
+ *)
+
+open Pxp_types
+
+let dfa_of_regexp_content_model re =
+  let rec get_dfa re =
+    match re with
+       Child e ->
+         let g = Graph.create() in
+         let v1 = Graph.new_vertex g in
+         let v2 = Graph.new_vertex g in
+         Graph.new_edge v1 e v2;
+         { dfa_graph = g;
+           dfa_start = v1;
+           dfa_stops = VertexSet.singleton v2;
+           dfa_null = false;
+         }
+         
+      | Seq [] ->
+         invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+      | Seq [re'] ->
+         get_dfa re'
+      | Seq (re1 :: seq2) ->
+         let dfa1 = get_dfa re1 in
+         let dfa2 = get_dfa (Seq seq2) in
+         (* Merge the two graphs. The result is in dfa1.dfa_graph: *)
+         Graph.union dfa1.dfa_graph dfa2.dfa_graph;
+         (* Concatenation I: Add additional edges to the graph such
+          * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not
+          * empty, w1w2 will match the merged DFAs.
+          *)
+         List.iter
+           (fun (e,v') ->
+              VertexSet.iter
+                (fun v ->
+                   Graph.new_edge v e v')
+                dfa1.dfa_stops
+           )
+           (Graph.outgoing_edges dfa2.dfa_start);
+         (* Concatenation II: If the emtpy string matches dfa2, the stop
+          * nodes of dfa1 remain stop nodes.
+          *)
+         let stops =
+           if dfa2.dfa_null then
+             VertexSet.union dfa1.dfa_stops dfa2.dfa_stops
+           else
+             dfa2.dfa_stops
+         in
+         (* The resulting DFA: *)
+         { dfa_graph = dfa1.dfa_graph;
+           dfa_start = dfa1.dfa_start;
+           dfa_stops = stops;
+           dfa_null  = dfa1.dfa_null && dfa2.dfa_null;
+         }
+
+      | Alt [] ->
+         invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+      | Alt [re'] ->
+         get_dfa re'
+      | Alt alt ->
+         let dfa_alt = List.map get_dfa alt in
+         (* Merge the graphs. The result is in g: *)
+         let g = (List.hd dfa_alt).dfa_graph in
+         List.iter
+           (fun dfa ->
+              Graph.union g dfa.dfa_graph
+           )
+           (List.tl dfa_alt);
+         (* Get the new start node: *)
+         let start = Graph.new_vertex g in
+         (* Add the new edges starting at 'start': *)
+         List.iter
+           (fun dfa ->
+              List.iter
+                (fun (e, v) ->
+                   Graph.new_edge start e v)
+                (Graph.outgoing_edges dfa.dfa_start)
+           )
+           dfa_alt;
+         (* If one of the old start nodes was a stop node, the new start
+          * node will be a stop node, too.
+          *)
+         let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in
+         let stops =
+           List.fold_left
+             (fun s dfa -> VertexSet.union s dfa.dfa_stops)
+             VertexSet.empty
+             dfa_alt in
+         let stops' =
+           if null then
+             VertexSet.union stops (VertexSet.singleton start)
+           else
+             stops in
+         (* The resulting DFA: *)
+         { dfa_graph = g;
+           dfa_start = start;
+           dfa_stops = stops';
+           dfa_null  = null;
+         }
+
+      | Optional re' ->
+         let dfa' = get_dfa re' in
+         if dfa'.dfa_null then
+           (* simple case *)
+           dfa'
+         else begin
+           (* Optimization possible: case ingoing_edges dfa_start = [] *)
+           let start = Graph.new_vertex dfa'.dfa_graph in
+           List.iter
+             (fun (e, v) ->
+                Graph.new_edge start e v)
+             (Graph.outgoing_edges dfa'.dfa_start);
+           
+           (* The resulting DFA: *)
+           { dfa_graph = dfa'.dfa_graph;
+             dfa_start = start;
+             dfa_stops = VertexSet.union dfa'.dfa_stops 
+                                         (VertexSet.singleton start);
+             dfa_null  = true;
+           }
+         end
+
+      | Repeated1 re' ->
+         let dfa' = get_dfa re' in
+         List.iter
+           (fun (e, v') ->
+              VertexSet.iter
+                (fun v ->
+                   Graph.new_edge v e v')
+                dfa'.dfa_stops
+           )
+           (Graph.outgoing_edges dfa'.dfa_start);
+
+           (* The resulting DFA: *)
+           { dfa_graph = dfa'.dfa_graph;
+             dfa_start = dfa'.dfa_start;
+             dfa_stops = dfa'.dfa_stops;
+             dfa_null  = dfa'.dfa_null;
+           }
+
+      | Repeated re' ->
+         get_dfa (Optional (Repeated1 re'))
+
+  in
+  try
+    get_dfa re
+  with
+      Graph.Edge_not_unique -> raise Not_found
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/07/23 02:16:08  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dfa.mli b/helm/DEVEL/pxp/pxp/pxp_dfa.mli
new file mode 100644 (file)
index 0000000..515eace
--- /dev/null
@@ -0,0 +1,77 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Graph : sig
+  type graph
+  type vertex
+
+  (* A directed graph whose edges are marked with strings (= element types)
+   * and with the constraint that for a given vertex and a given element
+   * type the edge must be unique.
+   *)
+
+  exception Edge_not_unique
+
+  val create : unit -> graph
+      (* Creates an empty graph *)
+
+  val new_vertex : graph -> vertex
+      (* Adds a new vertex to the graph, and returns the vertex *)
+
+  val new_edge : vertex -> string -> vertex -> unit
+      (* new_edge v_from etype v_to:
+       * Adds a new edge from vertex v_from to vertex v_to, marked with
+       * etype.
+       * Raises Edge_not_unique if there is already an edge etype starting
+       * at v_from to a different vertex than v_to.
+       *)
+
+  val graph_of_vertex : vertex -> graph
+      (* Returns the graph the passed vertex is contained in. *)
+
+  val union : graph -> graph -> unit
+      (* union g1 g2:
+       * Moves the vertexes and edged found in g2 to g1.
+       * After that, g2 is empty again.
+       *)
+
+  val outgoing_edges : vertex -> (string * vertex) list
+      (* Returns the list of outgoing edges starting in the passed vertex *)
+
+  val follow_edge : vertex -> string -> vertex
+      (* Follows the edge starting in the passed vertex which is marked
+       * with the passed element type.
+       * Raises Not_found if there is no such edge.
+       *)
+
+  val ingoing_edges : vertex -> (vertex * string) list
+      (* Returns the list of ingoing edges ending in the passed vertex *)
+end
+
+module VertexSet : Set.S with type elt = Graph.vertex
+
+
+type dfa_definition =
+    { dfa_graph : Graph.graph;
+      dfa_start : Graph.vertex;   (* Where the automaton starts *)
+      dfa_stops : VertexSet.t;    (* Where the automaton may stop *)
+      dfa_null  : bool;           (* Whether dfa_start member of dfa_stops *)
+    }
+
+val dfa_of_regexp_content_model : Pxp_types.regexp_spec -> dfa_definition
+  (* Computes the DFA or raises Not_found if it does not exist *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/07/23 02:16:08  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_document.ml b/helm/DEVEL/pxp/pxp/pxp_document.ml
new file mode 100644 (file)
index 0000000..1f1d4cf
--- /dev/null
@@ -0,0 +1,1985 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_dtd
+open Pxp_aux
+open Pxp_dfa
+
+
+exception Skip
+
+type node_type =
+    T_element of string
+  | T_data
+  | T_super_root
+  | T_pinstr of string
+  | T_comment
+  | T_none
+  | T_attribute of string
+  | T_namespace of string
+;;
+
+
+class type ['node] extension =
+  object ('self)
+    method clone : 'self
+    method node : 'node
+    method set_node : 'node -> unit
+  end
+;;
+
+
+class type [ 'ext ] node =
+  object ('self)
+    constraint 'ext = 'ext node #extension
+    method extension : 'ext
+    method delete : unit
+    method parent : 'ext node
+    method root : 'ext node
+    method orphaned_clone : 'self
+    method orphaned_flat_clone : 'self
+    method add_node : ?force:bool -> 'ext node -> unit
+    method add_pinstr : proc_instruction -> unit
+    method pinstr : string -> proc_instruction list
+    method pinstr_names : string list
+    method node_position : int
+    method node_path : int list
+    method sub_nodes : 'ext node list
+    method iter_nodes : ('ext node -> unit) -> unit
+    method iter_nodes_sibl :
+      ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+    method nth_node : int -> 'ext node
+    method previous_node : 'ext node
+    method next_node : 'ext node
+    method set_nodes : 'ext node list -> unit
+    method data : string
+    method node_type : node_type
+    method position : (string * int * int)
+    method attribute : string -> att_value
+    method attribute_names : string list
+    method attribute_type : string -> att_type
+    method attributes : (string * Pxp_types.att_value) list
+    method required_string_attribute : string -> string
+    method required_list_attribute : string -> string list
+    method optional_string_attribute : string -> string option
+    method optional_list_attribute : string -> string list
+    method id_attribute_name : string
+    method id_attribute_value : string
+    method idref_attribute_names : string list
+    method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+    method attributes_as_nodes : 'ext node list
+    method set_comment : string option -> unit
+    method comment : string option
+    method dtd : dtd
+    method encoding : rep_encoding
+    method create_element :
+                   ?position:(string * int * int) ->
+                   dtd -> node_type -> (string * string) list -> 'ext node
+    method create_data : dtd -> string -> 'ext node
+    method local_validate : ?use_dfa:bool -> unit -> unit
+    method keep_always_whitespace_mode : unit
+    method write : output_stream -> encoding -> unit
+    method write_compact_as_latin1 : output_stream -> unit
+    method internal_adopt : 'ext node option -> int -> unit
+    method internal_set_pos : int -> unit
+    method internal_delete : 'ext node -> unit
+    method internal_init : (string * int * int) ->
+                           dtd -> string -> (string * string) list -> unit
+    method internal_init_other : (string * int * int) ->
+                                 dtd -> node_type -> unit
+  end
+;;
+
+type 'ext spec_table =
+    { mapping : (string, 'ext node) Hashtbl.t;
+      data_node : 'ext node;
+      default_element : 'ext node;
+      super_root_node : 'ext node option;
+      pinstr_mapping : (string, 'ext node) Hashtbl.t;
+      default_pinstr_node : 'ext node option;
+      comment_node : 'ext node option;
+    }
+;;
+
+type 'ext spec =
+  Spec_table of 'ext spec_table
+;;
+
+
+let make_spec_from_mapping
+      ?super_root_exemplar 
+      ?comment_exemplar
+      ?default_pinstr_exemplar 
+      ?pinstr_mapping
+      ~data_exemplar ~default_element_exemplar ~element_mapping () =
+  Spec_table
+    { mapping = element_mapping;
+      data_node = data_exemplar;
+      default_element = default_element_exemplar;
+      super_root_node = super_root_exemplar;
+      comment_node = comment_exemplar;
+      default_pinstr_node = default_pinstr_exemplar;
+      pinstr_mapping =
+       (match pinstr_mapping with
+            None -> Hashtbl.create 1
+          | Some m -> m
+       )
+    }
+;;
+
+
+let make_spec_from_alist
+      ?super_root_exemplar 
+      ?comment_exemplar
+      ?default_pinstr_exemplar 
+      ?(pinstr_alist = [])
+      ~data_exemplar ~default_element_exemplar ~element_alist () =
+  let m = List.length  pinstr_alist in
+  let pinstr_mapping = Hashtbl.create m in
+  List.iter
+    (fun (name,ex) -> Hashtbl.add pinstr_mapping name ex)
+    pinstr_alist;
+  let n = List.length  element_alist in
+  let element_mapping = Hashtbl.create m in
+  List.iter
+    (fun (name,ex) -> Hashtbl.add element_mapping name ex)
+    element_alist;
+  make_spec_from_mapping
+    ?super_root_exemplar:      super_root_exemplar
+    ?comment_exemplar:         comment_exemplar
+    ?default_pinstr_exemplar:  default_pinstr_exemplar
+    ~pinstr_mapping:           pinstr_mapping
+    ~data_exemplar:            data_exemplar
+    ~default_element_exemplar: default_element_exemplar
+    ~element_mapping:          element_mapping
+    ()
+;;
+
+(**********************************************************************)
+
+exception Found;;
+
+let validate_content ?(use_dfa=None) model (el : 'a node) =
+  (* checks that the nodes of 'el' matches the DTD. Returns 'true'
+   * on success and 'false' on failure.
+   *)
+
+  let rec is_empty cl =
+    (* Whether the node list counts as empty or not. *)
+    match cl with
+       [] -> true
+      | n :: cl' ->
+         ( match n # node_type with
+             | T_element _     -> false
+             | _               -> is_empty cl'    (* ignore other nodes *)
+         )
+  in
+
+  let rec run_regexp cl ml =
+    (* Validates regexp content models ml against instances cl. This
+     * function works for deterministic and non-determninistic models.
+     * The implementation uses backtracking and may sometimes be slow.
+     *
+     * cl:   the list of children that will have to be matched
+     * ml:   the list of regexps that will have to match (to be read as
+     *       sequence)
+     * returns () meaning that no match has been found, or raises Found.
+     *)
+    match ml with
+       [] ->
+         if cl = [] then raise Found;      (* Frequent case *)
+         if is_empty cl then raise Found;  (* General condition *)
+      | Seq seq :: ml' ->
+         assert (seq <> []);     (* necessary to ensure termination *)
+         run_regexp cl (seq @ ml')
+      | Alt alts :: ml' ->
+         let rec find alts =
+           match alts with
+               [] -> ()
+             | alt :: alts' ->
+                 run_regexp cl (alt :: ml');
+                 find alts'
+         in
+         assert (alts <> []);      (* Alt [] matches nothing *)
+         find alts
+      | Repeated re :: ml' ->
+         let rec norm re =     (* to avoid infinite loops *)
+           match re with
+               Repeated subre  -> norm subre    (* necessary *)
+             | Optional subre  -> norm subre    (* necessary *)
+             | Repeated1 subre -> norm subre    (* an optimization *)
+             | _               -> re
+         in
+         let re' = norm re in
+         run_regexp cl (re' :: Repeated re' :: ml');
+         run_regexp cl ml'
+      | Repeated1 re :: ml' ->
+         run_regexp cl (re :: Repeated re :: ml')
+      | Optional re :: ml' ->
+         run_regexp cl (re :: ml');
+         run_regexp cl ml';
+      | Child chld :: ml' ->
+         match cl with
+             [] ->
+               ()
+           | sub_el :: cl' ->
+               begin match sub_el # node_type with
+                   T_data ->                       (* Ignore data *)
+                     run_regexp cl' ml
+                     (* Note: It can happen that we find a data node here
+                      * if the 'keep_always_whitespace' mode is turned on.
+                      *)
+                 | T_element nt ->
+                     if nt = chld then run_regexp cl' ml'
+                 | _ ->                            (* Ignore this element *)
+                     run_regexp cl' ml
+               end
+  in
+
+  let run_dfa cl dfa =
+    (* Validates regexp content models ml against instances cl. This
+     * function works ONLY for deterministic models.
+     * The implementation executes the automaton.
+     *)
+    let current_vertex = ref dfa.dfa_start in
+    let rec next_step cl =
+      match cl with
+         el :: cl' ->
+           begin match el # node_type with
+               T_data ->                       (* Ignore data *)
+                 next_step cl'
+                   (* Note: It can happen that we find a data node here
+                    * if the 'keep_always_whitespace' mode is turned on.
+                    *)
+             | T_element nt ->
+                 begin try
+                   current_vertex := Graph.follow_edge !current_vertex nt;
+                   next_step cl'
+                 with
+                     Not_found -> false
+                 end
+             | _ ->                         (* Ignore this node *)
+                 next_step cl'
+           end
+       | [] ->
+           VertexSet.mem !current_vertex dfa.dfa_stops
+    in
+    next_step cl
+  in   
+
+  match model with
+      Unspecified -> true
+    | Any -> true
+    | Empty ->
+       let cl = el # sub_nodes in
+       is_empty cl 
+    | Mixed (MPCDATA :: mix) ->
+       let mix' = List.map (function
+                                MPCDATA -> assert false
+                              | MChild x -> x)
+                           mix in
+       begin try
+         el # iter_nodes
+           (fun sub_el ->
+              let nt = sub_el # node_type in
+              match nt with
+              | T_element name ->
+                  if not (List.mem name mix') then raise Not_found;
+              | _ -> ()
+           );
+         true
+       with
+           Not_found ->
+             false
+       end
+    | Regexp re ->
+       let cl = el # sub_nodes in
+       begin match use_dfa with
+           None ->
+             (* General backtracking implementation: *)
+             begin try
+               run_regexp cl [re];
+               false
+             with
+                 Found -> true
+             end
+         | Some dfa ->
+             run_dfa cl dfa
+       end
+
+    | _ -> assert false
+;;
+
+(**********************************************************************)
+
+
+class virtual ['ext] node_impl an_ext =
+  object (self)
+    constraint 'ext = 'ext node #extension
+
+    val mutable parent = (None : 'ext node option)
+    val mutable node_position = -1
+    val mutable dtd = (None : dtd option)
+    val mutable extension = an_ext
+
+    initializer
+      extension # set_node (self : 'ext #node  :> 'ext node)
+
+
+    method extension = (extension : 'ext)
+
+    method delete =
+      match parent with
+         None -> ()
+       | Some p -> p # internal_delete (self : 'ext #node :> 'ext node)
+
+    method parent =
+      match parent with
+         None -> raise Not_found
+       | Some p -> p
+
+    method root =
+      match parent with
+         None -> (self : 'ext #node :> 'ext node)
+       | Some p -> p # root
+
+    method node_position = 
+      if node_position >= 0 then node_position else
+       raise Not_found
+
+    method node_path =
+      let rec collect n path =
+       try
+         let p = n # node_position in
+         collect (n # parent) (p :: path)
+       with
+           Not_found -> 
+             (* n is the root *)
+             path
+      in
+      collect (self : 'ext #node :> 'ext node) []
+
+    method previous_node =
+      self # parent # nth_node (self # node_position - 1)
+
+    method next_node =
+      self # parent # nth_node (self # node_position + 1)
+
+    method orphaned_clone =
+      let x = extension # clone in
+      let n =
+       {< parent = None;
+          node_position = -1;
+          extension = x;
+       >} in
+      x # set_node (n : 'ext #node  :> 'ext node);
+      n
+
+    method orphaned_flat_clone =
+      let x = extension # clone in
+      let n =
+       {< parent = None;
+          node_position = -1;
+          extension = x;
+       >} in
+      x # set_node (n : 'ext #node  :> 'ext node);
+      n
+
+    method dtd =
+      match dtd with
+         None -> failwith "Pxp_document.node_impl#dtd: No DTD available"
+       | Some d -> d
+
+    method encoding =
+      match dtd with
+         None -> failwith "Pxp_document.node_impl#encoding: No DTD available"
+       | Some d -> d # encoding
+
+    method internal_adopt (new_parent : 'ext node option) pos =
+      begin match parent with
+         None -> ()
+       | Some p ->
+           if new_parent <> None then
+             failwith "Pxp_document.node_impl#internal_adopt: Tried to add a bound element"
+      end;
+      parent <- new_parent;
+      node_position <- pos
+
+    method internal_set_pos pos =
+      node_position <- pos
+
+    method virtual add_node : ?force:bool -> 'ext node -> unit
+    method virtual add_pinstr : proc_instruction -> unit
+    method virtual sub_nodes : 'ext node list
+    method virtual pinstr : string -> proc_instruction list
+    method virtual pinstr_names : string list
+    method virtual iter_nodes : ('ext node -> unit) -> unit
+    method virtual iter_nodes_sibl : ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+    method virtual nth_node : int -> 'ext node
+    method virtual set_nodes : 'ext node list -> unit
+    method virtual data : string
+    method virtual node_type : node_type
+    method virtual position : (string * int * int)
+    method virtual attribute : string -> att_value
+    method virtual attribute_names : string list
+    method virtual attribute_type : string -> att_type
+    method virtual attributes : (string * Pxp_types.att_value) list
+    method virtual required_string_attribute : string -> string
+    method virtual required_list_attribute : string -> string list
+    method virtual optional_string_attribute : string -> string option
+    method virtual optional_list_attribute : string -> string list
+    method virtual quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+    method virtual attributes_as_nodes : 'ext node list
+    method virtual set_comment : string option -> unit
+    method virtual comment : string option
+    method virtual create_element : 
+                   ?position:(string * int * int) ->
+                   dtd -> node_type -> (string * string) list -> 'ext node
+    method virtual create_data : dtd -> string -> 'ext node
+    method virtual keep_always_whitespace_mode : unit
+    method virtual write : output_stream -> encoding -> unit
+    method virtual write_compact_as_latin1 : output_stream -> unit
+    method virtual local_validate : ?use_dfa:bool -> unit -> unit
+    method virtual internal_delete : 'ext node -> unit
+    method virtual internal_init : (string * int * int) ->
+                                dtd -> string -> (string * string) list -> unit
+    method virtual internal_init_other : (string * int * int) ->
+                                         dtd -> node_type -> unit
+  end
+;;
+
+
+(**********************************************************************)
+
+let no_position = ("?", 0, 0) ;;
+
+
+class ['ext] data_impl an_ext : ['ext] node =
+  object (self)
+    inherit ['ext] node_impl an_ext
+    val mutable content = ("" : string)
+
+    method position = no_position
+
+    method add_node ?(force=false) _ =
+      failwith "method 'add_node' not applicable to data node"
+    method add_pinstr _ =
+      failwith "method 'add_pinstr' not applicable to data node"
+    method pinstr _ = []
+    method pinstr_names = []
+    method sub_nodes = []
+    method iter_nodes _ = ()
+    method iter_nodes_sibl _ = ()
+    method nth_node _ = raise Not_found
+    method set_nodes _ =
+      failwith "method 'set_nodes' not applicable to data node"
+    method data = content
+    method node_type = T_data
+    method attribute _ = raise Not_found
+    method attribute_names = []
+    method attribute_type _ = raise Not_found
+    method attributes = []
+    method required_string_attribute _ =
+      failwith "Markup.document, method required_string_attribute: not found"
+    method required_list_attribute _ =
+      failwith "Markup.document, method required_list_attribute: not found"
+    method optional_string_attribute _ = None
+    method optional_list_attribute _ = []
+    method id_attribute_name = raise Not_found
+    method id_attribute_value = raise Not_found
+    method idref_attribute_names = []
+    method quick_set_attributes _ =
+      failwith "method 'quick_set_attributes' not applicable to data node"
+    method attributes_as_nodes = []
+    method comment = None
+    method set_comment c =
+      match c with
+         None -> ()
+       | Some _ -> failwith "method 'set_comment' not applicable to data node"
+    method create_element ?position _ _ _ =
+      failwith "method 'create_element' not applicable to data node"
+    method create_data new_dtd new_str =
+      let x = extension # clone in
+      let n =
+      ( {< parent = None;
+          extension = x;
+          dtd = Some new_dtd;
+          content = new_str;
+       >}
+       : 'ext #node :> 'ext node) in
+      x # set_node n;
+      n
+    method local_validate ?use_dfa () = ()
+    method keep_always_whitespace_mode = ()
+
+
+    method write os enc =
+      let encoding = self # encoding in
+      write_data_string ~from_enc:encoding ~to_enc:enc os content
+
+
+    method write_compact_as_latin1 os =
+      self # write os `Enc_iso88591
+       
+    method internal_delete _ =
+      assert false
+    method internal_init _ _ _ _ =
+      assert false
+    method internal_init_other _ _ _ =
+      assert false
+  end
+;;
+
+
+(**********************************************************************)
+
+class ['ext] attribute_impl ~element ~name value dtd =
+  (object (self)
+     val mutable parent = (None : 'ext node option)
+     val mutable dtd = dtd
+     val mutable element_name = element
+     val mutable att_name = name
+     val mutable att_value = value
+                              
+     method parent = 
+       match parent with
+          None -> raise Not_found
+        | Some p -> p
+            
+     method root =
+       match parent with
+          None -> (self : 'ext #node :> 'ext node)
+        | Some p -> p # root
+            
+     method internal_adopt new_parent _ =
+       parent <- new_parent
+
+     method orphaned_clone =
+       {< parent = None >}
+       
+     method orphaned_flat_clone =
+       {< parent = None >}
+       
+     method dtd = dtd
+                   
+     method encoding = dtd # encoding
+                        
+     method node_type = T_attribute att_name
+                         
+     method attribute n =
+       if n = att_name then att_value else raise Not_found
+        
+     method attribute_names = [ att_name ]
+                               
+     method attribute_type n =
+       let eltype = dtd # element element_name in
+       ( try
+          let atype, adefault = eltype # attribute n in
+          atype
+        with
+            Undeclared ->
+              A_cdata
+       )
+                      
+     method attributes = [ att_name, att_value ]
+                          
+     method required_string_attribute n =
+       if n = att_name then
+        match att_value with
+            Value s -> s
+          | Valuelist l -> String.concat " " l
+          | Implied_value -> raise Not_found
+       else
+        failwith "Pxp_document.attribute_impl#required_string_attribute: not found"
+
+        
+     method required_list_attribute n =
+       if n = att_name then
+        match att_value with
+            Value s -> [ s ]
+          | Valuelist l -> l
+          | Implied_value -> raise Not_found
+       else
+        failwith "Pxp_document.attribute_impl#required_list_attribute: not found"
+        
+     method optional_string_attribute n =
+       if n = att_name then
+        match att_value with
+            Value s -> Some s
+          | Valuelist l -> Some(String.concat " " l)
+          | Implied_value -> None
+       else
+        None
+        
+     method optional_list_attribute n =
+       if n = att_name then
+        match att_value with
+            Value s -> [ s ]
+          | Valuelist l -> l
+          | Implied_value -> []
+       else
+        []
+        
+    (* Senseless methods: *)
+        
+     method sub_nodes = []
+     method pinstr _ = []
+     method pinstr_names = []
+     method iter_nodes _ = ()
+     method iter_nodes_sibl _ = ()
+     method nth_node _ = raise Not_found
+     method data = ""
+     method position = ("?",0,0)
+     method comment = None
+     method local_validate ?use_dfa () = ()
+                                          
+    (* Non-applicable methods: *)
+                                          
+     method extension =
+       failwith "Pxp_document.attribute_impl#extension: not applicable"
+     method delete =
+       failwith "Pxp_document.attribute_impl#delete: not applicable"
+     method node_position =
+       failwith "Pxp_document.attribute_impl#node_position: not applicable"
+     method node_path =
+       failwith "Pxp_document.attribute_impl#node_path: not applicable"
+     method previous_node = 
+       failwith "Pxp_document.attribute_impl#previous_node: not applicable"
+     method next_node = 
+       failwith "Pxp_document.attribute_impl#next_node: not applicable"
+     method internal_set_pos _ =
+       failwith "Pxp_document.attribute_impl#internal_set_pos: not applicable"
+     method internal_delete _ =
+       failwith "Pxp_document.attribute_impl#internal_delete: not applicable"
+     method internal_init _ _ _ _ =
+       failwith "Pxp_document.attribute_impl#internal_init: not applicable"
+     method internal_init_other _ _ _ =
+       failwith "Pxp_document.attribute_impl#internal_init_other: not applicable"
+     method add_node ?force _ =
+       failwith "Pxp_document.attribute_impl#add_node: not applicable"
+     method add_pinstr _ =
+       failwith "Pxp_document.attribute_impl#add_pinstr: not applicable"
+     method set_nodes _ =
+       failwith "Pxp_document.attribute_impl#set_nodes: not applicable"
+     method quick_set_attributes _ =
+       failwith "Pxp_document.attribute_impl#quick_set_attributes: not applicable"
+     method attributes_as_nodes =
+       failwith "Pxp_document.attribute_impl#dattributes_as_nodes: not applicable"
+     method set_comment c =
+       if c <> None then
+        failwith "Pxp_document.attribute_impl#set_comment: not applicable"
+     method create_element ?position _ _ _ =
+       failwith "Pxp_document.attribute_impl#create_element: not applicable"
+     method create_data _ _ =
+       failwith "Pxp_document.attribute_impl#create_data: not applicable"
+     method keep_always_whitespace_mode =
+       failwith "Pxp_document.attribute_impl#keep_always_whitespace_mode: not applicable"
+     method write _ _ =
+       failwith "Pxp_document.attribute_impl#write: not applicable"
+     method write_compact_as_latin1 _ =
+       failwith "Pxp_document.attribute_impl#write_compact_as_latin1: not applicable"
+     method id_attribute_name =
+       failwith "Pxp_document.attribute_impl#id_attribute_name: not applicable"
+     method id_attribute_value =
+       failwith "Pxp_document.attribute_impl#id_attribute_value: not applicable"
+     method idref_attribute_names =
+       failwith "Pxp_document.attribute_impl#idref_attribute_names: not applicable"
+   end
+     : ['ext] node)
+;;
+
+(**********************************************************************)
+
+class ['ext] element_impl an_ext : ['ext] node =
+    object (self:'self)
+      inherit ['ext] node_impl an_ext as super
+
+      val mutable content_model = Any
+      val mutable content_dfa = lazy None
+      val mutable ext_decl = false
+      val mutable ntype = T_none
+      val mutable id_att_name = None
+      val mutable idref_att_names = []
+      val mutable rev_nodes = ([] : 'c list)
+      val mutable nodes = (None : 'c list option)
+      val mutable array = (None : 'c array option)
+      val mutable size = 0
+      val mutable attributes = []
+      val mutable att_nodes = []
+      val mutable comment = None
+      val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
+      val mutable keep_always_whitespace = false
+
+      val mutable position = no_position
+
+      method comment = comment
+
+      method set_comment c =
+       if ntype = T_comment then
+         comment <- c
+       else
+         failwith "set_comment: not applicable to node types other than T_comment"
+
+      method attributes = attributes
+
+      method position = position
+
+      method private error_name =
+       match ntype with
+           T_element n -> "Element `" ^ n ^ "'"
+         | T_super_root -> "Super root"
+         | T_pinstr n -> "Wrapper element for processing instruction `" ^ n ^ 
+             "'"
+         | T_comment -> "Wrapper element for comment"
+         | T_none -> "NO element"
+         | T_attribute _ -> assert false
+         | T_namespace _ -> assert false
+         | T_data -> assert false
+
+      method add_node ?(force = false) n =
+       let only_whitespace s =
+         (* Checks that the string "s" contains only whitespace. On failure,
+          * Validation_error is raised.
+          *)
+         let l = String.length s in
+         if l < 100 then begin
+           for i=0 to l - 1 do  (* for loop is faster for small 'l' *)
+             match s.[i] with
+                 ('\009'|'\010'|'\013'|'\032') -> ()
+               | _ ->
+                   raise(Validation_error(self # error_name ^ 
+                                          " must not have character contents"));
+           done
+         end
+         else begin
+           let lexbuf = Lexing.from_string s in
+           let lexerset = Pxp_lexers.get_lexer_set (self # dtd # encoding) in
+           let t = lexerset.scan_name_string lexbuf in
+           if t <> Ignore or
+             (lexerset.scan_name_string lexbuf <> Eof)
+           then
+             raise(Validation_error(self # error_name ^
+                                    " must not have character contents"));
+           ()
+         end
+       in
+       (* general DTD check: *)
+       begin match dtd with
+           None -> ()
+         | Some d -> if n # dtd != d then
+             failwith "Pxp_document.element_impl # add_node: the sub node has a different DTD";
+       end;
+       (* specific checks: *)
+       try
+         begin match n # node_type with
+             T_data ->
+               begin match content_model with
+                   Any         -> ()
+                 | Unspecified -> ()
+                 | Empty       -> 
+                     if not force then begin
+                       if n # data <> "" then
+                         raise(Validation_error(self # error_name ^ 
+                                                " must be empty"));
+                       raise Skip
+                     end
+                 | Mixed _     -> ()
+                 | Regexp _    -> 
+                     if not force then begin
+                       only_whitespace (n # data);
+                       (* TODO: following check faster *)
+                       if n # dtd # standalone_declaration &&
+                         n # data <> ""
+                       then begin
+                         (* The standalone declaration is violated if the
+                          * element declaration is contained in an external
+                          * entity.
+                          *)
+                         if ext_decl then
+                           raise
+                             (Validation_error
+                                (self # error_name ^ 
+                                 " violates standalone declaration"  ^
+                                 " because extra white space separates" ^ 
+                                 " the sub elements"));
+                       end;
+                       if not keep_always_whitespace then raise Skip
+                     end
+               end
+           | _ ->
+               ()
+         end;
+         (* all OK, so add this node: *)
+         n # internal_adopt (Some (self : 'ext #node :> 'ext node)) size;
+         rev_nodes <- n :: rev_nodes;
+         nodes <- None;
+         array <- None;
+         size <- size + 1
+       with Skip ->
+         ()
+
+      method add_pinstr pi =
+       begin match dtd with
+           None -> ()
+         | Some d -> 
+             if pi # encoding <> d # encoding then
+               failwith "Pxp_document.element_impl # add_pinstr: Inconsistent encodings";
+       end;
+       let name = pi # target in
+       Hashtbl.add (Lazy.force pinstr) name pi
+
+      method pinstr name =
+       Hashtbl.find_all (Lazy.force pinstr) name
+
+      method pinstr_names =
+       let l = ref [] in
+       Hashtbl.iter
+         (fun n _ -> l := n :: !l)
+         (Lazy.force pinstr);
+       !l
+
+      method sub_nodes =
+       match nodes with
+           None ->
+             let cl = List.rev rev_nodes in
+             nodes <- Some cl;
+             cl
+         | Some cl ->
+             cl
+
+      method iter_nodes f =
+       let cl = self # sub_nodes in
+       List.iter f cl
+
+      method iter_nodes_sibl f =
+       let cl = self # sub_nodes in
+       let rec next last_node l =
+         match l with
+             [] -> ()
+           | [x] ->
+               f last_node x None
+           | x :: y :: l' ->
+               f last_node x (Some y);
+               next (Some x) l'
+       in
+       next None cl
+
+      method nth_node p =
+       if p < 0 or p >= size then raise Not_found;
+       if array = None then
+         array <- Some (Array.of_list (self # sub_nodes));
+       match array with
+           None -> assert false
+         | Some a ->
+             a.(p)
+
+      method set_nodes nl =
+       let old_size = size in
+       List.iter
+         (fun n -> n # internal_adopt None (-1))
+         rev_nodes;
+       begin try
+         size <- 0;
+         List.iter
+           (fun n -> n # internal_adopt 
+                           (Some (self : 'ext #node :> 'ext node))
+                           size;
+                     size <- size + 1)
+           nl
+       with
+           e ->
+             (* revert action as much as possible *)
+             List.iter
+               (fun n -> n # internal_adopt None (-1))
+               rev_nodes;
+             size <- old_size;
+             let pos = ref (size-1) in
+             List.iter
+               (fun n -> n # internal_adopt 
+                               (Some (self : 'ext #node :> 'ext node))
+                               !pos;
+                         decr pos
+               )
+               rev_nodes;
+             (* [TODO] Note: there may be bad members in nl *)
+             raise e
+       end;
+       rev_nodes <- List.rev nl;
+       array <- None;
+       nodes <- None
+
+
+      method orphaned_clone : 'self =
+       let sub_clones =
+         List.map
+           (fun m ->
+              m # orphaned_clone)
+           rev_nodes 
+       in
+
+       let x = extension # clone in
+       let n =
+         {< parent = None;
+            node_position = -1;
+            extension = x;
+            rev_nodes = sub_clones;
+            nodes = None;
+            array = None;
+         >} in 
+
+       let pos = ref (size - 1) in
+       List.iter
+         (fun m -> m # internal_adopt 
+                     (Some (n : 'ext #node :> 'ext node)) 
+                     !pos;
+                   decr pos
+         )
+         sub_clones;
+
+       x # set_node (n : 'ext #node  :> 'ext node);
+       n
+
+      method orphaned_flat_clone : 'self =
+       let x = extension # clone in
+       let n =
+         {< parent = None;
+            node_position = -1;
+            extension = x;
+            rev_nodes = [];
+            nodes = None;
+            size = 0;
+            array = None;
+         >} in 
+
+       x # set_node (n : 'ext #node  :> 'ext node);
+       n
+
+
+      method internal_delete n =
+       rev_nodes <- List.filter (fun n' -> n' != n) rev_nodes;
+       size <- size - 1;
+       let p = ref (size-1) in
+       List.iter
+         (fun n' -> n' # internal_set_pos !p; decr p)
+         rev_nodes;
+       nodes <- None;
+       n # internal_adopt None (-1);
+       
+
+      method data =
+       let cl = self # sub_nodes in
+       String.concat "" (List.map (fun n -> n # data) cl)
+
+      method node_type = ntype
+
+
+      method attribute n =
+       List.assoc n attributes
+
+      method attribute_names =
+       List.map fst attributes
+
+      method attribute_type n =
+       match ntype with
+           T_element name ->
+             let d =
+               match dtd with
+                   None -> assert false 
+                 | Some d -> d in
+             let eltype = d # element name in
+             ( try
+                 let atype, adefault = eltype # attribute n in
+                 atype
+               with
+                   Undeclared ->
+                     A_cdata
+             )
+         | _ ->
+             failwith "attribute_type: not available for non-element nodes"
+
+
+      method required_string_attribute n =
+       try
+         match List.assoc n attributes with
+             Value s -> s
+           | Valuelist l -> String.concat " " l
+           | Implied_value -> raise Not_found
+       with
+           Not_found ->
+             failwith "Pxp_document, method required_string_attribute: not found"
+
+      method optional_string_attribute n =
+       try
+         match List.assoc n attributes with
+             Value s -> Some s
+           | Valuelist l -> Some (String.concat " " l)
+           | Implied_value -> None
+       with
+           Not_found ->
+             None
+
+      method required_list_attribute n =
+       try
+         match List.assoc n attributes with
+             Value s -> [ s ]
+           | Valuelist l -> l
+           | Implied_value -> raise Not_found
+       with
+           Not_found ->
+             failwith "Markup.document, method required_list_attribute: not found"
+
+      method optional_list_attribute n =
+       try
+         match List.assoc n attributes with
+             Value s -> [ s ]
+           | Valuelist l -> l
+           | Implied_value -> []
+       with
+           Not_found ->
+             []
+
+      method id_attribute_name =
+       match id_att_name with
+           None -> raise Not_found
+         | Some name -> name
+
+      method id_attribute_value =
+       match id_att_name with
+           None -> raise Not_found
+         | Some name ->
+             begin match List.assoc name attributes (* may raise Not_found *)
+             with
+                 Value s -> s
+               | _ -> raise Not_found
+             end
+
+
+      method idref_attribute_names = idref_att_names
+
+
+      method quick_set_attributes atts =
+       match ntype with
+           T_element _ ->
+             attributes <- atts;
+             att_nodes <- []
+         | _ ->
+             failwith "quick_set_attributes: not applicable for non-element node"
+
+
+      method attributes_as_nodes =
+       match att_nodes with
+           [] when attributes = [] ->
+             []
+         | [] ->
+             let dtd = self # dtd in
+             let element_name =
+               match ntype with
+                   T_element n -> n
+                 | _ ->
+                     assert false in
+             let l =
+               List.map
+                 (fun (n,v) ->
+                    new attribute_impl 
+                      ~element:element_name
+                      ~name:n
+                      v
+                      dtd)
+                 attributes in
+             att_nodes <- l;
+             l
+         | _ ->
+             att_nodes
+
+
+      method create_element 
+                       ?(position = no_position) new_dtd new_type new_attlist =
+       let x = extension # clone in
+       let obj = ( {< parent = None;
+                      extension = x;
+                      pinstr = lazy (Hashtbl.create 10)
+                   >}
+                   : 'ext #node :> 'ext node
+                 ) in
+       x # set_node obj;
+       match new_type with
+           T_data ->
+             failwith "create_element: Cannot create T_data node"
+         | T_element name ->
+             obj # internal_init position new_dtd name new_attlist;
+             obj
+         | (T_comment | T_pinstr _ | T_super_root | T_none) ->
+             obj # internal_init_other position new_dtd new_type;
+             obj
+         | _ ->
+             failwith "create_element: Cannot create such node"
+
+
+      method internal_init_other new_pos new_dtd new_ntype =
+       (* resets the contents of the object *)
+       parent <- None;
+       rev_nodes <- [];
+       nodes <- None;
+       ntype <- new_ntype;
+       position <- new_pos;
+       content_model <- Any;
+       content_dfa <- lazy None;
+       attributes <- [];
+       att_nodes <- [];
+       dtd <- Some new_dtd;
+       ext_decl <- false;
+       id_att_name <- None;
+       idref_att_names <- [];
+       comment <- None;
+
+
+      method internal_init new_pos new_dtd new_name new_attlist =
+       (* ONLY FOR T_Element NODES!!! *)
+       (* resets the contents of the object *)
+       parent <- None;
+       rev_nodes <- [];
+       nodes <- None;
+       ntype <- T_element new_name;
+       position <- new_pos;
+       comment <- None;
+       att_nodes <- [];
+
+       let lexerset = Pxp_lexers.get_lexer_set (new_dtd # encoding) in
+       let sadecl = new_dtd # standalone_declaration in
+
+       (* First validate the element name and the attributes: *)
+       (* Well-Formedness Constraint: Unique Att Spec *)
+       let rec check_uniqueness al =
+         match al with
+             [] -> ()
+           | (n, av) :: al' ->
+               if List.mem_assoc n al' then
+                 raise (WF_error("Attribute `" ^ n ^ "' occurs twice in element `" ^ new_name ^ "'"));
+               check_uniqueness al'
+       in
+       check_uniqueness new_attlist;
+       (* Validity Constraint: Element Valid [element has been declared] *)
+       try
+         let eltype = new_dtd # element new_name in
+         content_model <- eltype # content_model;
+         content_dfa   <- lazy(eltype # content_dfa);
+         ext_decl <- eltype # externally_declared;
+         id_att_name <- eltype # id_attribute_name;
+         idref_att_names <- eltype # idref_attribute_names;
+         (* Validity Constraint: Attribute Value Type *)
+         (* Validity Constraint: Fixed Attribute Default *)
+         (* Validity Constraint: Standalone Document Declaration (partly) *)
+         let undeclared_attlist = ref [] in
+         let new_attlist' =
+           List.map
+             (fun (n,v) ->
+                try
+                  (* Get type, default, and the normalized attribute
+                   * value 'av':
+                   *)
+                  let atype, adefault = eltype # attribute n in
+                  let av = value_of_attribute lexerset new_dtd n atype v in
+                  (* If necessary, check whether normalization violates
+                   * the standalone declaration.
+                   *)
+                  if sadecl &&
+                      eltype # 
+                       attribute_violates_standalone_declaration n (Some v)
+                  then
+                    raise
+                      (Validation_error
+                         ("Attribute `" ^ n ^ "' of element type `" ^
+                          new_name ^ "' violates standalone declaration"));
+                  (* If the default is "fixed", check that. *)
+                  begin match adefault with
+                      (D_required | D_implied) -> ()
+                    | D_default _ -> ()
+                    | D_fixed u ->
+                        let uv = value_of_attribute 
+                                         lexerset new_dtd "[default]" atype u in
+                        if av <> uv then
+                          raise
+                            (Validation_error
+                               ("Attribute `" ^ n ^ 
+                                "' is fixed, but has here a different value"));
+                  end;
+                  n,av
+                with
+                    Undeclared ->
+                      (* raised by method "# attribute" *)
+                       undeclared_attlist :=
+                         (n, value_of_attribute lexerset new_dtd n A_cdata v) ::
+                         !undeclared_attlist;
+                       n, Implied_value        (* does not matter *)
+             )
+             new_attlist in
+         (* Validity Constraint: Required Attribute *)
+         (* Validity Constraint: Standalone Document Declaration (partly) *)
+         (* Add attributes with default values *)
+         let new_attlist'' =
+           List.map
+             (fun n ->
+                try
+                  n, List.assoc n new_attlist'
+                with
+                    Not_found ->
+                      (* Check standalone declaration: *)
+                      if sadecl &&
+                           eltype # 
+                           attribute_violates_standalone_declaration
+                           n None then
+                        raise
+                          (Validation_error
+                             ("Attribute `" ^ n ^ "' of element type `" ^
+                              new_name ^ "' violates standalone declaration"));
+                      (* add default value or Implied *)
+                      let atype, adefault = eltype # attribute n in
+                      match adefault with
+                          D_required ->
+                            raise(Validation_error("Required attribute `" ^ n ^ "' is missing"))
+                        | D_implied ->
+                            n, Implied_value
+                        | D_default v ->
+                            n, value_of_attribute lexerset new_dtd n atype v
+                        | D_fixed v ->
+                            n, value_of_attribute lexerset new_dtd n atype v
+             )
+             (eltype # attribute_names)
+         in
+         dtd <- Some new_dtd;
+         attributes <- new_attlist'' @ !undeclared_attlist;
+       with
+           Undeclared ->
+             (* The DTD allows arbitrary attributes/contents for this
+              * element
+              *)
+             dtd <- Some new_dtd;
+             attributes <- List.map (fun (n,v) -> n, Value v) new_attlist;
+             content_model <- Any;
+             content_dfa <- lazy None;
+
+      method local_validate ?(use_dfa=false) () =
+       (* validates that the content of this element matches the model *)
+       let dfa = if use_dfa then Lazy.force content_dfa else None in
+       if not (validate_content 
+                 ~use_dfa:dfa
+                 content_model 
+                 (self : 'ext #node :> 'ext node)) then
+         raise(Validation_error(self # error_name ^ 
+                                " does not match its content model"))
+
+
+      method create_data _ _ =
+       failwith "method 'create_data' not applicable to element node"
+
+      method keep_always_whitespace_mode =
+       keep_always_whitespace <- true
+
+      method write os enc =
+       let encoding = self # encoding in
+       let wms = 
+         write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+       begin match ntype with
+           T_element name ->
+             wms ("<" ^ name);
+             List.iter
+               (fun (aname, avalue) ->
+                  match avalue with
+                      Implied_value -> ()
+                    | Value v ->
+                        wms ("\n" ^ aname ^ "=\"");
+                        write_data_string ~from_enc:encoding ~to_enc:enc os v;
+                        wms "\"";
+                    | Valuelist l ->
+                        let v = String.concat " " l in
+                        wms ("\n" ^ aname ^ "=\"");
+                        write_data_string ~from_enc:encoding ~to_enc:enc os v;
+                        wms "\"";
+               )
+               attributes;
+             wms "\n>";
+         | _ ->
+             ()
+       end;
+
+       Hashtbl.iter
+         (fun n pi ->
+            pi # write os enc
+         )
+         (Lazy.force pinstr);
+       List.iter 
+         (fun n -> n # write os enc)
+         (self # sub_nodes);
+
+       begin match ntype with
+           T_element name ->
+             wms ("</" ^ name ^ "\n>");
+         | _ ->
+             ()
+       end
+
+       (* TODO: How to write comments? The comment string may contain
+        * illegal characters or "--".
+        *)
+
+
+      method write_compact_as_latin1 os =
+       self # write os `Enc_iso88591
+
+    end
+;;
+
+
+let spec_table_find_exemplar tab eltype =
+  try
+    Hashtbl.find tab.mapping eltype
+  with
+      Not_found -> tab.default_element
+;;
+
+
+let create_data_node spec dtd str =
+  match spec with
+      Spec_table tab ->
+       let exemplar = tab.data_node in
+       exemplar # create_data dtd str
+;;
+
+
+let create_element_node ?position spec dtd eltype atts =
+   match spec with
+      Spec_table tab ->
+       let exemplar = spec_table_find_exemplar tab eltype in
+       exemplar # create_element ?position:position dtd (T_element eltype) atts
+;;
+
+
+let create_super_root_node ?position spec dtd =
+    match spec with
+      Spec_table tab ->
+       ( match tab.super_root_node with
+             None -> 
+               failwith "Pxp_document.create_super_root_node: No exemplar"
+           | Some x -> 
+               x # create_element ?position:position dtd T_super_root []
+       )
+;;
+
+let create_no_node ?position spec dtd =
+    match spec with
+      Spec_table tab ->
+       let x = tab.default_element in
+       x # create_element ?position:position dtd T_none []
+;;
+
+
+let create_comment_node ?position spec dtd text =
+  match spec with
+      Spec_table tab ->
+       ( match tab.comment_node with
+             None ->
+               failwith "Pxp_document.create_comment_node: No exemplar"
+           | Some x ->
+               let e = x # create_element ?position:position dtd T_comment [] 
+               in
+               e # set_comment (Some text);
+               e
+       )
+;;
+       
+    
+let create_pinstr_node ?position spec dtd pi =
+  let target = pi # target in
+  let exemplar =
+    match spec with
+       Spec_table tab ->
+         ( try 
+             Hashtbl.find tab.pinstr_mapping target
+           with
+               Not_found ->
+                 ( match tab.default_pinstr_node with
+                       None -> 
+                         failwith 
+                           "Pxp_document.create_pinstr_node: No exemplar"
+                     | Some x -> x
+                 )
+         )
+  in
+  let el = 
+    exemplar # create_element ?position:position dtd (T_pinstr target) [] in
+  el # add_pinstr pi;
+  el
+;;
+
+
+let find ?(deeply=false) f base =
+  let rec search_flat children =
+    match children with
+       [] -> raise Not_found
+      | n :: children' ->
+         if f n then n else search_flat children'
+  in
+  let rec search_deep children =
+    match children with
+       [] -> raise Not_found
+      | n :: children' ->
+         if f n then
+           n 
+         else
+           try search_deep (n # sub_nodes)
+           with Not_found -> search_deep children'
+  in
+  (if deeply then search_deep else search_flat)
+  (base # sub_nodes)
+;;
+
+
+let find_all ?(deeply=false) f base =
+  let rec search_flat children =
+    match children with
+       [] -> []
+      | n :: children' ->
+         if f n then n :: search_flat children' else search_flat children'
+  in
+  let rec search_deep children =
+    match children with
+       [] -> []
+      | n :: children' ->
+         let rest =
+           search_deep (n # sub_nodes) @ search_deep children' in
+         if f n then
+           n :: rest
+         else
+           rest
+  in
+  (if deeply then search_deep else search_flat)
+  (base # sub_nodes)
+;;
+
+
+let find_element ?deeply eltype base =
+  find 
+    ?deeply:deeply 
+    (fun n -> 
+       match n # node_type with
+          T_element name -> name = eltype
+        | _              -> false)
+    base
+;;
+
+
+let find_all_elements ?deeply eltype base =
+  find_all
+    ?deeply:deeply 
+    (fun n -> 
+       match n # node_type with
+          T_element name -> name = eltype
+        | _              -> false)
+    base
+;;
+
+
+exception Skip;;
+
+let map_tree ~pre ?(post=(fun x -> x)) base =
+  let rec map_rec n =
+    (try
+      let n' = pre n in
+      if n' # node_type <> T_data then begin
+       let children = n # sub_nodes in
+       let children' = map_children children in
+       n' # set_nodes children';
+      end;
+      post n'
+    with
+       Skip -> raise Not_found
+    )
+  and map_children l =
+    match l with
+       [] -> []
+      | child :: l' ->
+         (try 
+            let child' = map_rec child in
+            child' :: map_children l'
+          with
+              Not_found ->
+                map_children l'
+         )
+  in
+  map_rec base
+;;
+
+
+let map_tree_sibl ~pre ?(post=(fun _ x _ -> x)) base =
+  let rec map_rec l n r =
+    (try
+      let n' = pre l n r in
+      if n' # node_type <> T_data then begin
+       let children = n # sub_nodes in
+       let children' = map_children None children in
+       let children'' = postprocess_children None children' in
+       n' # set_nodes children'';
+      end;
+      n'
+    with
+       Skip -> raise Not_found
+    )
+  and map_children predecessor l =
+    (match l with
+        [] -> []
+       | child :: l' ->
+          let successor =
+            match l' with
+                []    -> None
+             | x :: _ -> Some x in
+          (try 
+             let child' = map_rec predecessor child successor in
+             child' :: map_children (Some child) l'
+           with
+               Not_found ->
+                 map_children (Some child) l'
+          )
+    )
+  and postprocess_children predecessor l =
+    (match l with
+        [] -> []
+       | child :: l' ->
+          let successor =
+            match l' with
+                []     -> None
+              | x :: _ -> Some x in
+          (try 
+             let child' = post predecessor child successor in
+             child' :: postprocess_children (Some child) l'
+           with
+               Skip ->
+                 postprocess_children (Some child) l'
+          )
+    )
+  in
+  let base' = map_rec None base None in
+  try post None base' None with Skip -> raise Not_found
+;;
+
+
+let iter_tree ?(pre=(fun x -> ())) ?(post=(fun x -> ())) base =
+  let rec iter_rec n =
+    (try
+      pre n;
+      let children = n # sub_nodes in
+      iter_children children;
+      post n
+    with
+       Skip -> raise Not_found
+    )
+  and iter_children l =
+    match l with
+       [] -> []
+      | child :: l' ->
+         (try 
+            iter_rec child;
+            iter_children l'
+          with
+              Not_found ->
+                iter_children l'
+         )
+  in
+  iter_rec base
+;;
+
+
+let iter_tree_sibl ?(pre=(fun _ _ _ -> ())) ?(post=(fun _ _ _ -> ())) base =
+  let rec iter_rec l n r =
+    (try
+      pre l n r;
+      let children = n # sub_nodes in
+      iter_children None children;
+      post l n r
+    with
+       Skip -> raise Not_found
+    )
+  and iter_children predecessor l =
+    (match l with
+        [] -> []
+       | child :: l' ->
+          let successor =
+            match l' with
+                []    -> None
+             | x :: _ -> Some x in
+          (try 
+             iter_rec predecessor child successor;
+             iter_children (Some child) l'
+           with
+               Not_found ->
+                 iter_children (Some child) l'
+          )
+    )
+  in
+  iter_rec None base None
+;;
+
+
+let compare a b =
+  let rec cmp p1 p2 =
+    match p1, p2 with
+       [], []         -> 0
+      | [], _          -> -1
+      | _, []          -> 1
+      | x::p1', y::p2' -> if x = y then cmp p1' p2' else x - y
+  in
+
+  let a_path = a # node_path in
+  let b_path = b # node_path in
+
+  cmp a_path b_path
+;;
+
+
+type 'ext ord_index = ('ext node, int) Hashtbl.t;;
+
+let create_ord_index base =
+  let n = ref 0 in
+  iter_tree ~pre:(fun _ -> incr n) base;
+  let idx = Hashtbl.create !n in
+  let k = ref 0 in
+  iter_tree ~pre:(fun node -> Hashtbl.add idx node !k; incr k) base;
+  idx
+;;
+
+
+let ord_number idx node =
+  Hashtbl.find idx node
+;;
+
+let ord_compare idx a b =
+  let ord_a = Hashtbl.find idx a in
+  let ord_b = Hashtbl.find idx b in
+  ord_a - ord_b
+;;
+
+class ['ext] document the_warner =
+  object (self)
+    val mutable xml_version = "1.0"
+    val mutable dtd = (None : dtd option)
+    val mutable root = (None : 'ext node option)
+
+    val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
+    val warner = (the_warner : collect_warnings)
+
+    method init_xml_version s = 
+      if s <> "1.0" then
+       warner # warn ("XML version '" ^ s ^ "' not supported");
+      xml_version <- s
+
+    method init_root r = 
+      let dtd_r = r # dtd in
+      match r # node_type with
+
+       (**************** CASE: We have a super root element ***************)
+
+       | T_super_root ->
+           if not (dtd_r # arbitrary_allowed) then begin
+             match dtd_r # root with
+                 Some declared_root_element_name ->
+                   let real_root_element =
+                     try
+                       List.find
+                         (fun r' -> 
+                            match r' # node_type with
+                              | T_element _     -> true
+                              | _               -> false)
+                         (r # sub_nodes)
+                     with
+                         Not_found ->
+                           failwith "Pxp_document.document#init_root: Super root does not contain root element"
+                             (* TODO: Check also that there is at most one
+                              * element in the super root node
+                              *)
+
+                   in
+                   let real_root_element_name =
+                     match real_root_element # node_type with 
+                         T_element name -> name
+                       | _              -> assert false
+                   in
+                   if real_root_element_name <> declared_root_element_name then
+                     raise
+                       (Validation_error ("The root element is `" ^ 
+                                          real_root_element_name ^ 
+                                          "' but is declared as `" ^
+                                          declared_root_element_name))
+               | None -> ()
+           end;
+           (* All is okay, so store dtd and root node: *)
+           dtd <- Some dtd_r;
+           root <- Some r
+
+       (**************** CASE: No super root element **********************)
+
+       | T_element root_element_name ->
+           if not (dtd_r # arbitrary_allowed) then begin
+             match dtd_r # root with
+                 Some declared_root_element_name ->
+                   if root_element_name <> declared_root_element_name then
+                     raise
+                       (Validation_error ("The root element is `" ^ 
+                                          root_element_name ^ 
+                                          "' but is declared as `" ^
+                                          declared_root_element_name))
+               | None ->
+                   (* This may happen if you initialize your DTD yourself.
+                    * The value 'None' means that the method 'set_root' was
+                    * never called for the DTD; we interpret it here as:
+                    * The root element does not matter.
+                    *)
+                   ()
+           end;
+           (* All is okay, so store dtd and root node: *)
+           dtd <- Some dtd_r;
+           root <- Some r
+
+       | _ ->
+           failwith "Pxp_document.document#init_root: the root node must be an element or super-root"
+
+    method xml_version = xml_version
+
+    method xml_standalone = 
+      match dtd with
+         None -> false
+       | Some d -> d # standalone_declaration
+
+    method dtd =
+      match dtd with
+         None -> failwith "Pxp_document.document#dtd: Document has no DTD"
+       | Some d -> d
+
+    method encoding =
+      match dtd with
+         None -> failwith "Pxp_document.document#encoding: Document has no DTD"
+       | Some d -> d # encoding
+
+    method root =
+      match root with
+         None -> failwith "Pxp_document.document#root: Document has no root element"
+       | Some r -> r
+
+    method add_pinstr pi =
+      begin match dtd with
+         None -> ()
+       | Some d -> 
+           if pi # encoding <> d # encoding then
+             failwith "Pxp_document.document # add_pinstr: Inconsistent encodings";
+      end;
+      let name = pi # target in
+      Hashtbl.add (Lazy.force pinstr) name pi
+
+    method pinstr name =
+      Hashtbl.find_all (Lazy.force pinstr) name
+
+    method pinstr_names =
+      let l = ref [] in
+      Hashtbl.iter
+       (fun n _ -> l := n :: !l)
+       (Lazy.force pinstr);
+      !l
+
+    method write os enc =
+      let encoding = self # encoding in
+      let wms = 
+       write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+      let r = self # root in
+      wms ("<?xml version='1.0' encoding='" ^ 
+          Netconversion.string_of_encoding enc ^ 
+          "'?>\n");
+      ( match self # dtd # root with
+           None ->
+             self # dtd # write os enc false
+         | Some _ ->
+             self # dtd # write os enc true
+      );
+      Hashtbl.iter
+       (fun n pi ->
+          pi # write os enc
+       )
+       (Lazy.force pinstr);
+      r # write os enc;
+      wms "\n";
+           
+    method write_compact_as_latin1 os =
+      self # write os `Enc_iso88591
+
+  end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.14  2000/08/30 15:47:52  gerd
+ *     Implementation of pxp_document.mli rev 1.10.
+ *
+ * Revision 1.13  2000/08/26 23:29:10  gerd
+ *     Implementations for the changed in rev 1.9 of pxp_document.mli.
+ *
+ * Revision 1.12  2000/08/18 20:14:00  gerd
+ *     New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
+ * (T_none), (T_namespace).
+ *
+ * Revision 1.11  2000/08/14 22:24:55  gerd
+ *     Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.10  2000/07/23 02:16:34  gerd
+ *     Support for DFAs.
+ *
+ * Revision 1.9  2000/07/16 19:37:09  gerd
+ *     Simplification.
+ *
+ * Revision 1.8  2000/07/16 17:50:01  gerd
+ *     Fixes in 'write'
+ *
+ * Revision 1.7  2000/07/16 16:34:41  gerd
+ *     New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.6  2000/07/14 13:56:11  gerd
+ *     Added methods id_attribute_name, id_attribute_value,
+ * idref_attribute_names.
+ *
+ * Revision 1.5  2000/07/09 17:51:14  gerd
+ *     Element nodes can store positions.
+ *
+ * Revision 1.4  2000/07/08 23:04:06  gerd
+ *     [Merging 0.2.10:] Bugfix: allow_undeclared_attribute
+ *
+ * Revision 1.3  2000/07/04 22:10:06  gerd
+ *     Implemented rev 1.3 of pxp_document.mli in a straight-
+ * forward fashion.
+ *
+ * Revision 1.2  2000/06/14 22:19:06  gerd
+ *     Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_document.ml:
+ *
+ * Revision 1.19  2000/05/27 19:14:42  gerd
+ *     value_of_attribute: this function has been moved to
+ * markup_aux.ml.
+ *
+ *     Added the following checks whether there is a violation
+ * against the standalone declaration:
+ *     - Externally declared elements with regexp content model
+ *       must not contain extra white space
+ *     - The effect of normalization of externally declared attributes
+ *       must not depend on the type of the attributes
+ *     - Declared default values of externally declared attributes
+ *       must not have an effect on the value of the attributes.
+ *
+ *     Removed the method init_xml_standalone. It is now stored in
+ * the DTD whether there is a standalone declaration.
+ *
+ * Revision 1.18  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.17  2000/05/06 23:12:20  gerd
+ *     Allow undeclared attributes.
+ *
+ * Revision 1.16  2000/05/01 20:42:28  gerd
+ *     New method write_compact_as_latin1.
+ *
+ * Revision 1.15  2000/04/30 18:15:22  gerd
+ *     In function validate_content: Special handling of the pseudo
+ * nodes "-pi" and "-vr".
+ *     Method init_root, class document: Recognizes whether the
+ * root is virtual or real. The check on the root element name is different
+ * in each case.
+ *     New method keep_always_whitespace_mode: Turns a special mode
+ * on in which ignorable whitespace is included into the document.
+ *
+ * Revision 1.14  2000/03/11 22:58:15  gerd
+ *     Updated to support Markup_codewriter.
+ *
+ * Revision 1.13  2000/01/27 21:51:56  gerd
+ *     Added method 'attributes'.
+ *
+ * Revision 1.12  2000/01/27 21:19:34  gerd
+ *     Added methods.
+ *     Bugfix: 'orphaned_clone' performs now really a clone.
+ *
+ * Revision 1.11  2000/01/20 21:57:58  gerd
+ *     Bugfix: method set_nodes does no longer add the new subnodes
+ * in the reverse order.
+ *
+ * Revision 1.10  1999/12/17 21:35:37  gerd
+ *     Bugfix: If the name of the root element is not specified in
+ * the DTD, the document does not check whether the root element is a
+ * specific element.
+ *
+ * Revision 1.9  1999/11/09 22:22:01  gerd
+ *     The "document" classes now checks that the root element is the
+ * same as the declared root element. Thanks to Claudio Sacerdoti Coen
+ * for his bug report.
+ *
+ * Revision 1.8  1999/09/01 22:51:40  gerd
+ *     Added methods to store processing instructions.
+ *
+ * Revision 1.7  1999/09/01 16:19:18  gerd
+ *     Added some warnings.
+ *     If an element type has the content model EMPTY, it is now strictly
+ * checked that the element instance is really empty. Especially, white space
+ * is NOT allowed in such instances.
+ *
+ * Revision 1.6  1999/08/19 21:58:59  gerd
+ *     Added method "reset_finder". This is not very convincing, but
+ * currently the simplest way to update the ID hash table.
+ *
+ * Revision 1.5  1999/08/19 01:08:15  gerd
+ *     Added method "find" that searches node by ID in the whole
+ * tree.
+ *     Bugfix: After the extension has been cloned, the "set_node" method
+ * is invoked telling the clone to which node it is associated.
+ *
+ * Revision 1.4  1999/08/15 13:52:52  gerd
+ *     Bugfix: WF_error "Attribute x occurs twice in element [unnamed]"
+ * no longer possible; instead of "[unnamed]" the actual name is printed.
+ *     Improved some of the error messages.
+ *
+ * Revision 1.3  1999/08/15 02:19:01  gerd
+ *     If the DTD allows arbitrary elements, unknown elements are not
+ * rejected.
+ *
+ * Revision 1.2  1999/08/11 14:54:23  gerd
+ *     Optimizations: The hashtable for the 'pinstr' variable is only
+ * created on demand. -- The 'only_whitespace' function uses a simple "for"
+ * loop is the string is small and a lexer if the string is big.
+ *
+ * Revision 1.1  1999/08/10 00:35:50  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_document.mli b/helm/DEVEL/pxp/pxp/pxp_document.mli
new file mode 100644 (file)
index 0000000..67c6e4d
--- /dev/null
@@ -0,0 +1,838 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+(**********************************************************************)
+(*                                                                    *)
+(* Pxp_document:                                                      *)
+(*     Object model of the document/element instances                 *)
+(*                                                                    *)
+(**********************************************************************)
+
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class type node ............. The common class type of the nodes of
+ *                               the element tree. Nodes are either
+ *                               elements (inner nodes) or data nodes
+ *                               (leaves)
+ * class type extension ........ The minimal properties of the so-called
+ *                               extensions of the nodes: Nodes can be
+ *                               customized by applying a class parameter
+ *                               that adds methods/values to nodes.
+ * class data_impl : node ...... Implements data nodes.
+ * class element_impl : node ... Implements element nodes
+ * class document .............. A document is an element with some additional
+ *                               properties
+ *
+ * ======================================================================
+ *
+ * THE STRUCTURE OF NODE TREES:
+ *
+ * Every node except the root node has a parent node. The parent node is
+ * always an element, because data nodes never contain other nodes.
+ * In the other direction, element nodes may have children; both elements
+ * and data nodes are possible as children.
+ * Every node knows its parent (if any) and all its children (if any);
+ * the linkage is maintained in both directions. A node without a parent
+ * is called a root.
+ * It is not possible that a node is the child of two nodes (two different nodes
+ * or a multiple child of the same node).
+ * You can break the connection between a node and its parent; the method
+ * "delete" performs this operations and deletes the node from the parent's
+ * list of children. The node is now a root, for itself and for all
+ * subordinate nodes. In this context, the node is also called an orphan,
+ * because it has lost its parent (this is a bit misleading because the
+ * parent is not always the creator of a node).
+ * In order to simplify complex operations, you can also set the list of
+ * children of an element. Nodes that have been children before are unchanged;
+ * new nodes are added (and the linkage is set up), nodes no more occurring
+ * in the list are handled if they have been deleted.
+ * If you try to add a node that is not a root (either by an "add" or by a
+ * "set" operation) the operation fails.
+ *
+ * CREATION OF NODES
+ *
+ * The class interface supports creation of nodes by cloning a so-called
+ * exemplar. The idea is that it is sometimes useful to implement different
+ * element types by different classes, and to implement this by looking up
+ * exemplars.
+ * Imagine you have three element types A, B, and C, and three classes
+ * a, b, and c implementing the node interface (for example, by providing
+ * different extensions, see below). The XML parser can be configured to
+ * have a lookup table
+ *   { A --> a0,  B --> b0, C --> c0 }
+ * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
+ * objects belonging to these classes. If the parser finds an instance of
+ * A, it looks up the exemplar a0 of A and clones it (actually, the method
+ * "create_element" performs this for elements, and "create_data" for data
+ * nodes). Clones belong to the same class as the original nodes, so the
+ * instances of the elements have the same classes as the configured
+ * exemplars.
+ * Note: This technique assumes that the interface of all exemplars is the
+ * same!
+ *
+ * THE EXTENSION
+ *
+ * The class type node and all its implementations have a class parameter
+ * 'ext which must at least fulfil the properties of the class type "extension".
+ * The idea is that you can add properties, for example:
+ *
+ * class my_extension =
+ *   object
+ *     (* minimal properties required by class type "extension": *)
+ *     method clone = ...
+ *     method node = ...
+ *     method set_node n = ...
+ *     (* here my own methods: *)
+ *     method do_this_and_that ...
+ *   end
+ *
+ * class my_element_impl = [ my_extension ] element_impl
+ * class my_data_impl    = [ my_extension ] data_impl
+ *
+ * The whole XML parser is parameterized with 'ext, so your extension is
+ * visible everywhere (this is the reason why extensibility is solved by
+ * parametric polymorphism and not by inclusive polymorphism (subtyping)).
+ *
+ *
+ * SOME COMPLICATED TYPE EXPRESSIONS
+ *
+ * Sometimes the following type expressions turn out to be necessary:
+ *
+ * 'a node extension as 'a
+ *      This is the type of an extension that belongs to a node that
+ *      has an extension that is the same as we started with.
+ *
+ * 'a extension node as 'a
+ *      This is the type of a node that has an extension that belongs to a
+ *      node of the type we started with.
+ *
+ *
+ * DOCUMENTS
+ * ...
+ *
+ * ======================================================================
+ *
+ * SIMPLE USAGE: ...
+ *)
+
+
+open Pxp_dtd
+
+
+type node_type =
+  (* The basic and most important node types:
+   * - T_element element_type   is the type of element nodes
+   * - T_data                   is the type of text data nodes
+   * By design of the parser, neither CDATA sections nor entity references
+   * are represented in the node tree; so there are no types for them.
+   *)
+    T_element of string
+  | T_data
+
+  (* The following types are extensions to my original design. They have mainly
+   * been added to simplify the implementation of standards (such as
+   * XPath) that require that nodes of these types are included into the
+   * main document tree.
+   * There are options (see Pxp_yacc) forcing the parser to insert such
+   * nodes; in this case, the nodes are actually element nodes serving
+   * as wrappers for the additional data structures. The options are:
+   * enable_super_root_node, enable_pinstr_nodes, enable_comment_nodes.
+   * By default, such nodes are not created.
+   *)
+  | T_super_root
+  | T_pinstr of string                  (* The string is the target of the PI *)
+  | T_comment
+
+  (* The following types are fully virtual. This means that it is impossible
+   * to make the parser insert such nodes. However, these types might be
+   * practical when defining views on the tree.
+   * Note that the list of virtual node types will be extended if necessary.
+   *)
+  | T_none
+  | T_attribute of string          (* The string is the name of the attribute *)
+  | T_namespace of string               (* The string is the namespace prefix *)
+;;
+
+
+class type [ 'node ] extension =
+  object ('self)
+    method clone : 'self
+      (* "clone" should return an exact deep copy of the object. *)
+    method node : 'node
+      (* "node" returns the corresponding node of this extension. This method
+       * intended to return exactly what previously has been set by "set_node".
+       *)
+    method set_node : 'node -> unit
+      (* "set_node" is invoked once the extension is associated to a new
+       * node object.
+       *)
+  end
+;;
+
+
+class type [ 'ext ] node =
+  object ('self)
+    constraint 'ext = 'ext node #extension
+
+    method extension : 'ext
+      (* Return the extension of this node: *)
+
+    method delete : unit
+      (* Delete this node from the parent's list of sub nodes. This node gets
+       * orphaned.
+       * 'delete' does nothing if this node does not have a parent.
+       *)
+
+    method parent : 'ext node
+      (* Get the parent, or raise Not_found if this node is an orphan. *)
+
+    method root : 'ext node
+      (* Get the direct or indirect parent that does not have a parent itself,
+       * i.e. the root of the tree.
+       *)
+
+    method orphaned_clone : 'self
+      (* return an exact clone of this element and all sub nodes (deep copy)
+       * except string values which are shared by this node and the clone.
+       * The other exception is that the clone has no parent (i.e. it is now
+       * a root).
+       *)
+
+    method orphaned_flat_clone : 'self
+      (* return a clone of this element where all subnodes are omitted.
+       * The type of the node, and the attributes are the same as in the
+       * original node.
+       * The clone has no parent.
+       *)
+
+    method add_node : ?force:bool -> 'ext node -> unit
+      (* Append new sub nodes -- mainly used by the parser itself, but
+       * of course open for everybody. If an element is added, it must be
+       * an orphan (i.e. does not have a parent node); and after addition
+       * *this* node is the new parent.
+       * The method performs some basic validation checks if the current node
+       * has a regular expression as content model, or is EMPTY. You can
+       * turn these checks off by passing ~force:true to the method.
+       *)
+
+    method add_pinstr : proc_instruction -> unit
+      (* Add a processing instruction to the set of processing instructions of
+       * this node. Usually only elements contain processing instructions.
+       *)
+
+    method pinstr : string -> proc_instruction list
+      (* Get all processing instructions with the passed name *)
+
+    method pinstr_names : string list
+      (* Get a list of all names of processing instructions *)
+
+    method node_position : int
+      (* Returns the position of this node among all children of the parent
+       * node. Positions are counted from 0.
+       * Raises Not_found if the node is the root node.
+       *)
+
+    method node_path : int list
+      (* Returns the list of node positions of the ancestors of this node,
+       * including this node. The first list element is the node position
+       * of this child of the root, and the last list element is the 
+       * node position of this node.
+       * Returns [] if the node is the root node.
+       *)
+
+    method sub_nodes : 'ext node list
+      (* Get the list of sub nodes *)
+
+    method iter_nodes : ('ext node -> unit) -> unit
+      (* iterate over the sub nodes *)
+
+    method iter_nodes_sibl :
+      ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+      (* Here every iteration step can also access to the previous and to the
+       * following node if present.
+       *)
+
+    method nth_node : int -> 'ext node
+      (* Returns the n-th sub node of this node, n >= 0. Raises Not_found
+       * if the index is out of the valid range.
+       * Note that the first invocation of this method requires additional
+       * overhead.
+       *)
+
+    method previous_node : 'ext node
+    method next_node : 'ext node
+      (* Return the previous and next nodes, respectively. These methods are
+       * equivalent to 
+       * - parent # nth_node (self # node_position - 1) and
+       * - parent # nth_node (self # node_position + 1), respectively.
+       *)
+
+    method set_nodes : 'ext node list -> unit
+      (* Set the list of sub nodes. Elements that are no longer sub nodes gets
+       * orphaned, and all new elements that previously were not sub nodes
+       * must have been orphaned.
+       *)
+
+    method data : string
+      (* Get the data string of this node. For data nodes, this string is just
+       * the content. For elements, this string is the concatenation of all
+       * subordinate data nodes.
+       *)
+
+    method node_type : node_type
+      (* Get the name of the element type. *)
+
+    method position : (string * int * int)
+      (* Return the name of the entity, the line number, and the column
+       * position (byte offset) of the beginning of the element.
+       * Only available if the element has been created with position
+       * information.
+       * Returns "?",0,0 if not available. (Note: Line number 0 is not
+       * possible otherwise.)
+       *)
+
+    method attribute : string -> Pxp_types.att_value
+    method attribute_names : string list
+    method attribute_type : string -> Pxp_types.att_type
+    method attributes : (string * Pxp_types.att_value) list
+      (* Get a specific attribute; get the names of all attributes; get the
+       * type of a specific attribute; get names and values of all attributes.
+       * Only elements have attributes.
+       * Note: If the DTD allows arbitrary for this element, "attribute_type"
+       * raises Undeclared.
+       *)
+
+    method required_string_attribute : string -> string
+    method required_list_attribute : string -> string list
+      (* Return the attribute or fail if the attribute is not present:
+       * The first version passes the value always as string back;
+       * the second version always as list.
+       *)
+
+    method optional_string_attribute : string -> string option
+    method optional_list_attribute : string -> string list
+      (* Return some attribute value or return None if the attribute is not
+       *  present:
+       * The first version passes the value always as string back;
+       * the second version always as list.
+       *)
+
+    method id_attribute_name : string
+    method id_attribute_value : string
+      (* Return the name and value of the ID attribute. The methods may
+       * raise Not_found if there is no ID attribute in the DTD, or no
+       * ID attribute in the element, respectively.
+       *)
+
+    method idref_attribute_names : string list
+      (* Returns the list of attribute names of IDREF or IDREFS type. *)
+
+    method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+      (* Sets the attributes but does not check whether they match the DTD.
+       *)
+
+    method attributes_as_nodes : 'ext node list
+      (* Experimental feature: Return the attributes as node list. Every node
+       * has type T_attribute n, and contains only the single attribute n.
+       * This node list is computed on demand, so the first invocation of this
+       * method will create the list, and following invocations will only
+       * return the existing list.
+       *)
+
+    method set_comment : string option -> unit
+      (* Sets the comment string; only applicable for T_comment nodes *)
+
+    method comment : string option
+      (* Get the comment string.
+       * Returns always None for nodes with a type other than T_comment.
+       *)
+
+    method dtd : dtd
+      (* Get the DTD. Fails if no DTD is specified (which is impossible if
+       * 'create_element' or 'create_data' have been used to create this
+       * object)
+       *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* Get the encoding which is always the same as the encoding of the
+       * DTD. See also method 'dtd' (Note: This method fails, too, if
+       * no DTD is present.)
+       *)
+
+    method create_element : 
+             ?position:(string * int * int) ->
+             dtd -> node_type -> (string * string) list -> 'ext node
+      (* create an "empty copy" of this element:
+       * - new DTD
+       * - new node type (which must not be T_data)
+       * - new attribute list
+       * - empty list of nodes
+       *)
+
+    method create_data : dtd -> string -> 'ext node
+      (* create an "empty copy" of this data node: *)
+
+    method local_validate : 
+             ?use_dfa:bool ->
+             unit -> unit
+      (* Check that this element conforms to the DTD. 
+       * Option ~use_dfa: If true, the deterministic finite automaton of
+       *   regexp content models is used for validation, if available.
+       *   Defaults to false.
+       *)
+
+    method keep_always_whitespace_mode : unit
+      (* Normally, add_node does not accept data nodes when the DTD does not
+       * allow data nodes or only whitespace ("ignorable whitespace").
+       * Once you have invoked this method, ignorable whitespace is forced
+       * to be included into the document.
+       *)
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* Write the contents of this node and the subtrees to the passed
+       * output stream; the passed encoding is used. The format
+       * is compact (the opposite of "pretty printing").
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+
+    (* ---------------------------------------- *)
+    (* The methods 'find' and 'reset_finder' are no longer supported.
+     * The functionality is provided by the configurable index object
+     * (see Pxp_yacc).
+     *)
+
+
+    (* ---------------------------------------- *)
+    (* internal methods: *)
+    method internal_adopt : 'ext node option -> int -> unit
+    method internal_set_pos : int -> unit
+    method internal_delete : 'ext node -> unit
+    method internal_init : (string * int * int) ->
+                           dtd -> string -> (string * string) list -> unit
+    method internal_init_other : (string * int * int) ->
+                                 dtd -> node_type -> unit
+  end
+;;
+
+
+class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
+    (* Creation:
+     *   new data_impl an_extension
+     * creates a new data node with the given extension and the empty string
+     * as content.
+     *)
+;;
+
+
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+    (* Creation:
+     *   new element_impl an_extension
+     * creates a new empty element node with the given extension.
+     *)
+;;
+
+
+(* Attribute and namespace nodes are experimental: *)
+
+class [ 'ext ] attribute_impl : 
+  element:string -> name:string -> Pxp_types.att_value -> dtd -> [ 'ext ] node
+
+    (* Creation:
+     *   new attribute_impl element_name attribute_name attribute_value dtd
+     * Note that attribute nodes do intentionally not have extensions.
+     *)
+
+(* Once namespaces get implemented:
+class [ 'ext ] namespace_impl : 
+  prefix:string -> name:string -> dtd -> [ 'ext ] node
+*)
+
+(********************************** spec *********************************)
+
+type 'ext spec
+constraint 'ext = 'ext node #extension
+    (* Contains the exemplars used for the creation of new nodes
+     *)
+
+
+val make_spec_from_mapping :
+      ?super_root_exemplar : 'ext node ->
+      ?comment_exemplar : 'ext node ->
+      ?default_pinstr_exemplar : 'ext node ->
+      ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+      data_exemplar: 'ext node ->
+      default_element_exemplar: 'ext node ->
+      element_mapping: (string, 'ext node) Hashtbl.t -> 
+      unit -> 
+        'ext spec
+    (* Specifies:
+     * - For new data nodes, the ~data_exemplar must be used
+     * - For new element nodes: If the element type is mentioned in the
+     *   ~element_mapping hash table, the exemplar found in this table is
+     *   used. Otherwise, the ~default_element_exemplar is used.
+     * Optionally:
+     * - You may also specify exemplars for super root nodes, for comments
+     *   and for processing instructions
+     *)
+
+val make_spec_from_alist :
+      ?super_root_exemplar : 'ext node ->
+      ?comment_exemplar : 'ext node ->
+      ?default_pinstr_exemplar : 'ext node ->
+      ?pinstr_alist : (string * 'ext node) list ->
+      data_exemplar: 'ext node ->
+      default_element_exemplar: 'ext node ->
+      element_alist: (string * 'ext node) list -> 
+      unit -> 
+        'ext spec
+    (* This is a convenience function: You can pass the mappings from 
+     * elements and PIs to exemplar by associative lists.
+     *)
+
+val create_data_node : 
+      'ext spec -> dtd -> string -> 'ext node
+val create_element_node : 
+      ?position:(string * int * int) ->
+      'ext spec -> dtd -> string -> (string * string) list -> 'ext node
+val create_super_root_node :
+      ?position:(string * int * int) ->
+      'ext spec -> dtd -> 'ext node
+val create_comment_node :
+      ?position:(string * int * int) ->
+      'ext spec -> dtd -> string -> 'ext node
+val create_pinstr_node :
+      ?position:(string * int * int) ->
+      'ext spec -> dtd -> proc_instruction -> 'ext node
+  (* These functions use the exemplars contained in a spec and create fresh
+   * node objects from them.
+   *)
+
+val create_no_node : 
+       ?position:(string * int * int) -> 'ext spec -> dtd -> 'ext node
+  (* Creates a T_none node with limited functionality *)
+
+(*********************** Ordering of nodes ******************************)
+
+val compare : 'ext node -> 'ext node -> int
+  (* Returns -1 if the first node is before the second node, or +1 if the
+   * first node is after the second node, or 0 if both nodes are identical.
+   * If the nodes are unrelated (do not have a common ancestor), the result
+   * is undefined.
+   * This test is rather slow.
+   *)
+
+type 'ext ord_index
+constraint 'ext = 'ext node #extension
+  (* The type of ordinal indexes *)
+
+val create_ord_index : 'ext node -> 'ext ord_index
+  (* Creates an ordinal index for the subtree starting at the passed node.
+   * This index assigns to every node an ordinal number (beginning with 0) such
+   * that nodes are numbered upon the order of the first character in the XML
+   * representation (document order).
+   * Note that the index is not automatically updated when the tree is
+   * modified.
+   *)
+
+val ord_number : 'ext ord_index -> 'ext node -> int
+  (* Returns the ordinal number of the node, or raises Not_found *)
+
+val ord_compare : 'ext ord_index -> 'ext node -> 'ext node -> int
+  (* Compares two nodes like 'compare':
+   * Returns -1 if the first node is before the second node, or +1 if the
+   * first node is after the second node, or 0 if both nodes are identical.
+   * If one of the nodes does not occur in the ordinal index, Not_found
+   * is raised.
+   * This test is much faster than 'compare'.
+   *)
+
+
+(***************************** Iterators ********************************)
+
+val find : ?deeply:bool -> 
+           f:('ext node -> bool) -> 'ext node -> 'ext node
+  (* Searches the first node for which the predicate f is true, and returns
+   * it. Raises Not_found if there is no such node.
+   * By default, ~deeply=false. In this case, only the children of the
+   * passed node are searched.
+   * If passing ~deeply=true, the children are searched recursively
+   * (depth-first search).
+   *)
+
+val find_all : ?deeply:bool ->
+               f:('ext node -> bool) -> 'ext node -> 'ext node list
+  (* Searches all nodes for which the predicate f is true, and returns them.
+   * By default, ~deeply=false. In this case, only the children of the
+   * passed node are searched.
+   * If passing ~deeply=true, the children are searched recursively
+   * (depth-first search).
+   *)
+
+val find_element : ?deeply:bool ->
+                   string -> 'ext node -> 'ext node
+  (* Searches the first element with the passed element type.
+   * By default, ~deeply=false. In this case, only the children of the
+   * passed node are searched.
+   * If passing ~deeply=true, the children are searched recursively
+   * (depth-first search).
+   *)
+
+val find_all_elements : ?deeply:bool ->
+                        string -> 'ext node -> 'ext node list
+  (* Searches all elements with the passed element type.
+   * By default, ~deeply=false. In this case, only the children of the
+   * passed node are searched.
+   * If passing ~deeply=true, the children are searched recursively
+   * (depth-first search).
+   *)
+
+exception Skip
+val map_tree :  pre:('exta node -> 'extb node) ->
+               ?post:('extb node -> 'extb node) ->
+               'exta node -> 
+                   'extb node
+  (* Traverses the passed node and all children recursively. After entering
+   * a node, the function ~pre is called. The result of this function must
+   * be a new node; it must not have children nor a parent (you can simply
+   * pass (fun n -> n # orphaned_flat_clone) as ~pre).
+   * After that, the children are processed in the same way (from left to
+   * right); the results of the transformation will be added to the
+   * new node as new children.
+   * Now, the ~post function is invoked with this node as argument, and
+   * the result is the result of the function (~post should return a root
+   * node, too; if not specified, the identity is the ~post function).
+   * Both ~pre and ~post may raise Skip, which causes that the node is
+   * left out. If the top node is skipped, the exception Not_found is
+   * raised.
+   *)
+
+val map_tree_sibl : 
+        pre: ('exta node option -> 'exta node -> 'exta node option -> 
+                  'extb node) ->
+       ?post:('extb node option -> 'extb node -> 'extb node option -> 
+                  'extb node) ->
+       'exta node -> 
+           'extb node
+   (* Works like map_tree, but the function ~pre and ~post have additional
+    * arguments:
+    * - ~pre l n r: The node n is the node to map, and l is the previous
+    *   node, and r is the next node (both None if not present). l and r
+    *   are both nodes before the transformation.
+    * - ~post l n r: The node n is the node which is the result of ~pre
+    *   plus adding children. l and r are again the previous and the next
+    *   node, respectively, but after being transformed.
+    *)
+
+val iter_tree : ?pre:('ext node -> unit) ->
+                ?post:('ext node -> unit) ->
+                'ext node -> 
+                    unit
+   (* Iterates only instead of mapping the nodes. *)
+
+val iter_tree_sibl :
+       ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+       ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+       'ext node -> 
+           unit
+   (* Iterates only instead of mapping the nodes. *)
+
+
+(******************************* document ********************************)
+
+
+class [ 'ext ] document :
+  Pxp_types.collect_warnings -> 
+  object
+    (* Documents: These are containers for root elements and for DTDs.
+     * 
+     * Important invariant: A document is either empty (no root element,
+     * no DTD), or it has both a root element and a DTD.
+     *
+     * A fresh document created by 'new' is empty.
+     *)
+
+    method init_xml_version : string -> unit
+       (* Set the XML version string of the XML declaration. *)
+
+    method init_root : 'ext node -> unit
+       (* Set the root element. It is expected that the root element has
+        * a DTD.
+        * Note that 'init_root' checks whether the passed root element
+        * has the type expected by the DTD. The check takes into account
+        * that the root element might be a virtual root node.
+        *)
+
+    method xml_version : string
+      (* Returns the XML version from the XML declaration. Returns "1.0"
+       * if the declaration is missing.
+       *)
+
+    method xml_standalone : bool
+      (* Returns whether this document is declared as being standalone.
+       * This method returns the same value as 'standalone_declaration'
+       * of the DTD (if there is a DTD).
+       * Returns 'false' if there is no DTD.
+       *)
+
+    method dtd : dtd
+      (* Returns the DTD of the root element. 
+       * Fails if there is no root element.
+       *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* Returns the string encoding of the document = the encoding of
+       * the root element = the encoding of the element tree = the
+       * encoding of the DTD.
+       * Fails if there is no root element.
+       *)
+
+    method root : 'ext node
+      (* Returns the root element, or fails if there is not any. *)
+
+    method add_pinstr : proc_instruction -> unit
+      (* Adds a processing instruction to the document container.
+       * The parser does this for PIs occurring outside the DTD and outside
+       * the root element.
+       *)
+
+    method pinstr : string -> proc_instruction list
+      (* Return all PIs for a passed target string. *)
+
+    method pinstr_names : string list
+      (* Return all target strings of all PIs. *)
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* Write the document to the passed
+       * output stream; the passed encoding used. The format
+       * is compact (the opposite of "pretty printing").
+       * If a DTD is present, the DTD is included into the internal subset.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+  end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.10  2000/08/30 15:47:37  gerd
+ *     New method node_path.
+ *     New function compare.
+ *     New type ord_index with functions.
+ *
+ * Revision 1.9  2000/08/26 23:27:53  gerd
+ *     New function: make_spec_from_alist.
+ *     New iterators: find, find_all, find_element, find_all_elements,
+ * map_tree, map_tree_sibl, iter_tree, iter_tree_sibl.
+ *     New node methods: node_position, nth_node, previous_node,
+ * next_node.
+ *     Attribute and namespace types have now a string argument:
+ * the name/prefix. I hope this simplifies the handling of view nodes.
+ *     First implementation of view nodes: attribute_impl. The
+ * method attributes_as_nodes returns the attributes wrapped into
+ * T_attribute nodes which reside outside the document tree.
+ *
+ * Revision 1.8  2000/08/18 20:14:00  gerd
+ *     New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
+ * (T_none), (T_namespace).
+ *
+ * Revision 1.7  2000/07/23 02:16:34  gerd
+ *     Support for DFAs.
+ *
+ * Revision 1.6  2000/07/16 16:34:41  gerd
+ *     New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.5  2000/07/14 13:56:11  gerd
+ *     Added methods id_attribute_name, id_attribute_value,
+ * idref_attribute_names.
+ *
+ * Revision 1.4  2000/07/09 17:51:14  gerd
+ *     Element nodes can store positions.
+ *
+ * Revision 1.3  2000/07/04 22:05:10  gerd
+ *     New functions make_spec_from_mapping, create_data_node,
+ * create_element_node.
+ *
+ * Revision 1.2  2000/06/14 22:19:06  gerd
+ *     Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_document.mli:
+ *
+ * Revision 1.13  2000/05/27 19:15:08  gerd
+ *     Removed the method init_xml_standalone.
+ *
+ * Revision 1.12  2000/05/01 20:42:34  gerd
+ *         New method write_compact_as_latin1.
+ *
+ * Revision 1.11  2000/04/30 18:15:57  gerd
+ *     Beautifications.
+ *     New method keep_always_whitespace_mode.
+ *
+ * Revision 1.10  2000/03/11 22:58:15  gerd
+ *     Updated to support Markup_codewriter.
+ *
+ * Revision 1.9  2000/01/27 21:51:56  gerd
+ *     Added method 'attributes'.
+ *
+ * Revision 1.8  2000/01/27 21:19:07  gerd
+ *     Added further methods.
+ *
+ * Revision 1.7  1999/11/09 22:20:14  gerd
+ *     Removed method init_dtd from class "document". The DTD is
+ * implicitly passed to the document by the root element.
+ *
+ * Revision 1.6  1999/09/01 22:51:40  gerd
+ *     Added methods to store processing instructions.
+ *
+ * Revision 1.5  1999/09/01 16:19:57  gerd
+ *     The "document" class has now a "warner" as class argument.
+ *
+ * Revision 1.4  1999/08/19 21:59:13  gerd
+ *     Added method "reset_finder".
+ *
+ * Revision 1.3  1999/08/19 01:08:29  gerd
+ *     Added method "find".
+ *
+ * Revision 1.2  1999/08/15 02:19:41  gerd
+ *     Some new explanations: That unknown elements are not rejected
+ * if the DTD allows them.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dtd.ml b/helm/DEVEL/pxp/pxp/pxp_dtd.ml
new file mode 100644 (file)
index 0000000..ee62c4f
--- /dev/null
@@ -0,0 +1,1090 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_lexers
+open Pxp_entity
+open Pxp_aux
+open Pxp_dfa
+
+(**********************************************************************)
+
+class dtd  the_warner init_encoding =
+  object (self)
+    val mutable root = (None : string option)
+    val mutable id =   (None : dtd_id option)
+
+    val warner       = (the_warner : collect_warnings)
+    val encoding     = init_encoding
+    val lexerset     = Pxp_lexers.get_lexer_set init_encoding
+
+    val elements     = (Hashtbl.create 100 : (string,dtd_element) Hashtbl.t)
+    val gen_entities = (Hashtbl.create 100 : (string,entity * bool) Hashtbl.t)
+    val par_entities = (Hashtbl.create 100 : (string,entity) Hashtbl.t)
+    val notations    = (Hashtbl.create 100 : (string,dtd_notation) Hashtbl.t)
+    val pinstr       = (Hashtbl.create 100 : (string,proc_instruction) Hashtbl.t)
+    val mutable element_names = []
+    val mutable gen_entity_names = []
+    val mutable par_entity_names = []
+    val mutable notation_names = []
+    val mutable pinstr_names = []
+
+    val mutable allow_arbitrary = false
+    val mutable standalone_declaration = false
+
+    val mutable validated = false
+
+    initializer
+    let w = new drop_warnings in
+    self # add_gen_entity 
+      (new internal_entity self "lt"   w "&#38;#60;" false false false encoding)
+      false;
+    self # add_gen_entity 
+      (new internal_entity self "gt"   w "&#62;"     false false false encoding)
+      false;
+    self # add_gen_entity 
+      (new internal_entity self "amp"  w "&#38;#38;" false false false encoding)
+      false;
+    self # add_gen_entity 
+      (new internal_entity self "apos" w "&#39;"     false false false encoding)
+      false;
+    self # add_gen_entity 
+      (new internal_entity self "quot" w "&#34;"     false false false encoding)
+      false;
+
+
+    method encoding = encoding
+
+    method warner = warner
+
+    method set_root r =
+      if root = None then
+       root <- Some r
+      else
+       assert false
+
+
+    method set_id j =
+      if id = None then
+       id <- Some j
+      else
+       assert false
+
+
+    method standalone_declaration = standalone_declaration
+
+    method set_standalone_declaration b =
+      standalone_declaration <- b
+
+    method allow_arbitrary =
+      allow_arbitrary <- true
+
+    method disallow_arbitrary =
+      allow_arbitrary <- false
+
+    method arbitrary_allowed = allow_arbitrary
+
+    method root = root
+    method id = id
+
+
+    method add_element el =
+      (* raises Not_found if 'el' has already been added *)
+      (* Note: 'el' is encoded in the same way as 'self'! *)
+      let name = el # name in
+      check_name warner name;
+      if Hashtbl.mem elements name then
+       raise Not_found;
+      Hashtbl.add elements name el;
+      element_names <- name :: element_names;
+      validated <- false
+
+
+    method add_gen_entity en extdecl =
+      (* The following is commented out; perhaps there should be an option
+       * to reactivate it on demand
+       *)
+      (* raises Validation_error if the predefines entities 'lt', 'gt', 'amp',
+       * 'quot', and 'apos' are redeclared with an improper value.
+       *)
+      if en # encoding <> encoding then
+       failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings";
+      let name = en # name in
+      check_name warner name;
+      if Hashtbl.mem gen_entities name then begin
+       if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin
+         (* These are allowed to be declared several times *)
+         let (rt,_) = en # replacement_text in
+         let toks = tokens_of_content_string lexerset rt in
+         try
+           begin match toks with
+             [CRef 60]       -> if name <> "lt"   then raise Not_found
+           | [CharData ">"]  -> if name <> "gt"   then raise Not_found
+           | [CRef 62]       -> if name <> "gt"   then raise Not_found
+           | [CRef 38]       -> if name <> "amp"  then raise Not_found
+           | [CharData "'"]  -> if name <> "apos" then raise Not_found
+           | [CRef 39]       -> if name <> "apos" then raise Not_found
+           | [CharData "\""] -> if name <> "quot" then raise Not_found
+           | [CRef 34]       -> if name <> "quot" then raise Not_found
+           | _               -> raise Not_found
+           end
+         with
+             Not_found ->
+               raise (Validation_error("Predefined entity `" ^ name ^
+                                       "' redeclared"))
+       end
+       else
+         warner # warn ("Entity `" ^ name ^ "' declared twice")
+      end
+      else begin
+       Hashtbl.add gen_entities name (en, extdecl);
+       gen_entity_names <- name :: gen_entity_names
+      end
+
+
+    method add_par_entity en =
+      if en # encoding <> encoding then
+       failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings";
+      let name = en # name in
+      check_name warner name;
+      if not (Hashtbl.mem par_entities name) then begin
+       Hashtbl.add par_entities name en;
+       par_entity_names <- name :: par_entity_names
+      end
+      else
+       warner # warn ("Entity `" ^ name ^ "' declared twice")
+
+
+    method add_notation no =
+      (* raises Validation_error if 'no' already added *)
+      if no # encoding <> encoding then
+       failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings";
+      let name = no # name in
+      check_name warner name;
+      if Hashtbl.mem notations name then
+       raise (Validation_error("Notation `" ^ name ^ "' declared twice"));
+      Hashtbl.add notations name no;
+      notation_names <- name :: notation_names
+
+
+    method add_pinstr pi =
+      if pi # encoding <> encoding then
+       failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings";
+      let name = pi # target in
+      check_name warner name;
+
+      if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin
+       match name with
+           "pxp:dtd" -> 
+             let _, optname, atts = pi # parse_pxp_option in
+             begin match optname with
+                 "optional-element-and-notation-declarations" ->
+                   self # allow_arbitrary
+               | "optional-attribute-declarations" ->
+                   let lexers = Pxp_lexers.get_lexer_set encoding in
+                   let el_string = 
+                     try List.assoc "elements" atts
+                     with Not_found ->
+                       raise(Error("Missing `elements' attribute for pxp:dtd"))
+                   in
+                   let el = split_attribute_value lexers el_string in
+                   List.iter
+                     (fun e_name ->
+                        let e =
+                          try Hashtbl.find elements e_name
+                          with
+                              Not_found ->
+                                raise(Error("Reference to unknown element `" ^
+                                            e_name ^ "'"))
+                        in
+                        e # allow_arbitrary
+                     )
+                     el
+               | _ ->
+                   raise(Error("Unknown PXP option `" ^ 
+                               optname ^ "'"))
+             end
+         | _ ->
+             raise(Error("The processing instruction target `" ^ 
+                         name ^ "' is not defined by this PXP version"))
+      end
+      else begin
+       (*----------------------------------------------------------------------
+        * SUPPORT FOR DEPRECATED PI OPTIONS:
+        * - <?xml:allow_undeclared_elements_and_notations?>
+        *   is now <?pxp:dtd optional-element-and-notation-declarations?>
+        * - <?xml:allow_undeclared_attributes <elementname>?>
+        *   is now <?pxp:dtd optional-attribute-declarations 
+        *            elements='<elementname> ...'?>
+        * Please update your DTDs! Alternatively, you may uncommment the
+        * following piece of code.
+        *)
+(*         if name = "xml:allow_undeclared_elements_and_notations" then *)
+(*           self # allow_arbitrary; *)
+(*         if name = "xml:allow_undeclared_attributes" then begin *)
+(*           let v = pi # value in *)
+(*           let e =  *)
+(*             try *)
+(*               Hashtbl.find elements v *)
+(*             with *)
+(*                 Not_found -> *)
+(*                   raise(Validation_error("Reference to undeclared element `"*)
+(*                   ^ v ^ "'")) *)
+(*           in *)
+(*           e # allow_arbitrary; *)
+(*         end; *)
+       (*----------------------------------------------------------------------
+        *)
+       ()
+      end;
+      Hashtbl.add pinstr name pi;
+      pinstr_names <- name :: pinstr_names;
+
+
+    method element name =
+      (* returns the element 'name' or raises Validation_error if not found *)
+      try
+       Hashtbl.find elements name
+      with
+         Not_found ->
+           if allow_arbitrary then
+             raise Undeclared
+           else
+             raise(Validation_error("Reference to undeclared element `" ^ name ^ "'"))
+
+    method element_names =
+      (* returns the list of all names of element declarations *)
+      element_names
+
+
+    method gen_entity name =
+      (* returns the entity 'name' or raises WF_error if not found *)
+      try
+       Hashtbl.find gen_entities name
+      with
+         Not_found ->
+           raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'"))
+
+
+    method gen_entity_names = gen_entity_names
+
+
+    method par_entity name =
+      (* returns the entity 'name' or raises WF_error if not found *)
+      try
+       Hashtbl.find par_entities name
+      with
+         Not_found ->
+           raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'"))
+
+
+    method par_entity_names = par_entity_names
+
+
+    method notation name =
+      (* returns the notation 'name' or raises Validation_error if not found *)
+      try
+       Hashtbl.find notations name
+      with
+         Not_found ->
+           if allow_arbitrary then
+             raise Undeclared
+           else
+             raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'"))
+
+
+    method notation_names = notation_names
+
+
+    method pinstr name =
+      (* returns the list of all processing instructions contained in the DTD
+       * with target 'name'
+       *)
+      Hashtbl.find_all pinstr name
+
+
+    method pinstr_names = pinstr_names
+
+    method write os enc doctype = 
+      let wms = 
+       write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+      let write_sysid s =
+       if String.contains s '"' then
+         wms ("'" ^ s ^ "'")
+       else
+         wms ("\"" ^ s ^ "\"");
+      in
+
+      if doctype then begin
+       wms "<!DOCTYPE ";
+       ( match root with
+           None -> failwith "#write: DTD without root";
+         | Some r -> wms r
+       );
+       wms " [\n";
+      end;
+
+      (* Notations: *)
+      List.iter
+       (fun name ->
+          let notation = 
+            try Hashtbl.find notations name with Not_found -> assert false in
+          notation # write os enc)
+       (List.sort compare notation_names);
+
+      (* Unparsed entities: *)
+      List.iter
+       (fun name ->
+          let ent,_ = 
+            try Hashtbl.find gen_entities name with Not_found -> assert false 
+          in
+          if ent # is_ndata then begin
+            let xid = ent # ext_id in
+            let notation = ent # notation in
+            wms ("<!ENTITY " ^ name ^ " " );
+            ( match xid with
+                  System s ->
+                    wms "SYSTEM ";
+                    write_sysid s;
+                | Public (p,s) ->
+                    wms "PUBLIC ";
+                    write_sysid p;
+                    if (s <> "") then begin
+                      wms " ";
+                      write_sysid s;
+                    end;
+                | Anonymous ->
+                    failwith "#write: External ID Anonymous cannot be represented"
+            );
+            wms (" NDATA " ^ notation ^ ">\n");
+          end
+       )
+       (List.sort compare gen_entity_names);
+
+      (* Elements: *)
+      List.iter
+       (fun name ->
+          let element = 
+            try Hashtbl.find elements name with Not_found -> assert false in
+          element # write os enc)
+       (List.sort compare element_names);
+
+      (* Processing instructions: *)
+      List.iter
+       (fun name ->
+          let pi = 
+            try Hashtbl.find pinstr name with Not_found -> assert false in
+          pi # write os enc)
+       (List.sort compare pinstr_names);
+
+      if doctype then 
+       wms "]>\n";
+
+    method write_compact_as_latin1 os doctype = 
+      self # write os `Enc_iso88591 doctype
+
+
+
+    (************************************************************)
+    (*                    VALIDATION                            *)
+    (************************************************************)
+
+    method only_deterministic_models =
+      Hashtbl.iter
+       (fun n el ->
+          let cm = el # content_model in
+          match cm with
+              Regexp _ ->
+                if el # content_dfa = None then
+                  raise(Validation_error("The content model of element `" ^
+                                         n ^ "' is not deterministic"))
+            | _ ->
+                ()
+       )
+       elements;
+      
+
+    method validate =
+      if validated or allow_arbitrary then
+       ()
+      else begin
+       (* Validity constraint: Notations in NDATA entity declarations must
+        * be declared
+        *)
+       List.iter
+         (fun name ->
+            let ent,_ = 
+              try Hashtbl.find gen_entities name with Not_found -> assert false 
+            in
+            if ent # is_ndata then begin
+              let xid = ent # ext_id in
+              let notation = ent # notation in
+              try
+                ignore(self # notation notation)
+                  (* Raises Validation_error if the constraint is violated *)
+              with
+                  Undeclared -> ()
+            end
+         )
+         gen_entity_names;
+
+       (* Validate the elements: *)
+       Hashtbl.iter
+         (fun n el ->
+            el # validate)
+         elements;
+
+       (* Check the root element: *)
+       (* TODO: Check if this piece of code is executed at all! *)
+       begin match root with
+           None -> ()
+         | Some r ->
+             begin try
+               let _ = Hashtbl.find elements r in ()
+             with
+                 Not_found ->
+                   raise(Validation_error("The root element is not declared"))
+             end
+       end;
+       validated <- true;
+      end
+
+    method invalidate =
+      validated <- false
+
+    (************************************************************)
+
+  end
+
+
+(**********************************************************************)
+
+and dtd_element the_dtd the_name =
+  object (self)
+    val dtd = (the_dtd : dtd)
+    val name = the_name
+    val lexerset = Pxp_lexers.get_lexer_set (the_dtd # encoding)
+    val mutable content_model = Unspecified
+    val mutable content_model_validated = false
+    val mutable content_dfa = lazy None
+
+    val mutable externally_declared = false
+
+    val mutable attributes = 
+           ([] : (string * ((att_type * att_default) * bool)) list)
+    val mutable attributes_validated = false
+
+    val mutable id_att_name = None
+    val mutable idref_att_names = []
+
+    val mutable allow_arbitrary = false
+
+    method name = name
+
+    method set_cm_and_extdecl m extdecl =
+      if content_model = Unspecified then begin
+       content_model <- m;
+       content_model_validated <- false;
+       content_dfa <- lazy (self # compute_content_dfa);
+       externally_declared <- extdecl;
+       dtd # invalidate
+      end
+      else
+       raise(Validation_error("Element `" ^ name ^ "' has already a content model"))
+
+    method content_model = content_model
+
+    method content_dfa = Lazy.force content_dfa
+      
+    method private compute_content_dfa =
+      match content_model with
+         Regexp re ->
+           ( try Some (dfa_of_regexp_content_model re)
+             with Not_found -> None
+           )
+       | _ ->
+           None
+
+    method externally_declared = externally_declared
+
+    method encoding = dtd # encoding
+
+    method allow_arbitrary =
+      allow_arbitrary <- true
+
+    method disallow_arbitrary =
+      allow_arbitrary <- false
+
+    method arbitrary_allowed = allow_arbitrary
+
+    method add_attribute aname t d extdecl =
+      if aname <> "xml:lang" & aname <> "xml:space" then
+       check_name (dtd#warner) aname;
+      if List.mem_assoc aname attributes then
+       dtd # warner # warn ("More than one declaration for attribute `" ^
+                            aname ^ "' of element type `" ^ name ^ "'")
+      else begin
+       begin match aname with
+           "xml:space" ->
+             begin match t with
+                 A_enum l ->
+                   let l' = Sort.list ( <= ) l in
+                   if l' <> [ "default"; "preserve" ] then
+                     raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
+               | _ ->
+                   raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
+             end
+         | _ -> ()
+       end; 
+       begin match t with
+           A_id ->
+             id_att_name <- Some aname;
+         | (A_idref | A_idrefs) ->
+             idref_att_names <- aname :: idref_att_names
+         | _ ->
+             ()
+       end;
+       attributes <- (aname, ((t,d),extdecl)) :: attributes;
+       attributes_validated <- false;
+       dtd # invalidate;
+      end
+
+    method attribute attname =
+      try
+       fst (List.assoc attname attributes)
+      with
+         Not_found ->
+           if allow_arbitrary then
+             raise Undeclared
+           else
+             raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
+                                    ^ name ^ "' not declared"))
+
+    method attribute_violates_standalone_declaration attname v =
+      try
+       let (atype, adefault), extdecl = List.assoc attname attributes in
+       extdecl &&
+       ( match v with
+             None -> 
+               adefault <> D_required && adefault <> D_implied
+               (* i.e. adefault matches D_default or D_fixed *)
+           | Some s ->
+               atype <> A_cdata &&
+               normalization_changes_value lexerset atype s
+       )
+      with
+         Not_found ->
+           if allow_arbitrary then
+             raise Undeclared
+           else
+             raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
+                                    ^ name ^ "' not declared"))
+
+
+    method attribute_names =
+      List.map fst attributes
+
+    method names_of_required_attributes =
+      List.flatten
+       (List.map
+          (fun (n,((t,d),_)) ->
+             if d = D_required then
+               [n]
+             else
+               [])
+          attributes)
+
+    method id_attribute_name = id_att_name
+
+    method idref_attribute_names = idref_att_names
+
+
+    method write os enc = 
+      let encoding = self # encoding in
+      let wms = 
+       write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+      let rec write_contentspec cs =
+       match cs with
+           Unspecified ->
+             failwith "#write: Unspecified content model found"
+         | Empty ->
+             wms "EMPTY"
+         | Any ->
+             wms "ANY"
+         | Mixed ml ->
+             wms "(";
+             write_mixedspec_list ml;
+             wms ")*";
+         | Regexp re ->
+             write_children re false
+
+      and write_mixedspec_list ml =
+       match ml with
+           MPCDATA :: ml' ->
+             wms "#PCDATA";
+             if ml' <> [] then wms "|";
+             write_mixedspec_list ml';
+         | MChild s :: ml' ->
+             wms s;
+             if ml' <> [] then wms "|";
+             write_mixedspec_list ml';
+         | [] ->
+             ()
+
+      and write_children re cp =
+       match re with
+           Optional re' ->
+             let p = needs_parens re' in
+             if p then wms "(";
+             write_children re' cp;
+             if p then wms ")";
+             wms "?";
+         | Repeated re' ->
+             let p = needs_parens re' in
+             if p then wms "(";
+             write_children re' cp;
+             if p then wms ")";
+             wms "*";
+         | Repeated1 re' ->
+             let p = needs_parens re' in
+             if p then wms "(";
+             write_children re' cp;
+             if p then wms ")";
+             wms "+";
+         | Alt re' ->
+             wms "(";
+             ( match re' with
+                   re1' :: rer' ->
+                     write_children re1' true;
+                     List.iter
+                       (fun ren' ->
+                          wms "|";
+                          write_children ren' true;
+                       )
+                       rer';
+                 | [] ->
+                     failwith "#write: Illegal content model"
+             );
+             wms ")";
+         | Seq re' ->
+             wms "(";
+             ( match re' with
+                   re1' :: rer' ->
+                     write_children re1' true;
+                     List.iter
+                       (fun ren' ->
+                          wms ",";
+                          write_children ren' true;
+                       )
+                       rer';
+                 | [] ->
+                     failwith "#write: Illegal content model"
+             );
+             wms ")";
+         | Child ch ->
+             if not cp then wms "(";
+             wms ch;
+             if not cp then wms ")";
+
+      and needs_parens re =
+       match re with
+           (Optional _ | Repeated _ | Repeated1 _ ) -> true
+         | _ -> false
+      in
+
+      wms ("<!ELEMENT " ^ name ^ " ");
+      write_contentspec content_model;
+      wms ">\n";
+
+      wms ("<!ATTLIST " ^ name);
+      List.iter
+       (fun (n,((t,d),_)) ->
+          wms ("\n  " ^ n);
+          ( match t with
+                A_cdata       -> wms " CDATA";
+              | A_id          -> wms " ID";
+              | A_idref       -> wms " IDREF";
+              | A_idrefs      -> wms " IDREFS";
+              | A_entity      -> wms " ENTITY";
+              | A_entities    -> wms " ENTITIES";
+              | A_nmtoken     -> wms " NMTOKEN";
+              | A_nmtokens    -> wms " NMTOKENS";
+              | A_notation nl -> 
+                  wms " NOTATION (";
+                  ( match nl with
+                        nl1:: nl' ->
+                          wms nl1;
+                          List.iter
+                            (fun n ->
+                               wms ("|" ^ n);
+                            )
+                            nl'
+                      | [] ->
+                          failwith "#write: Illegal content model";
+                  );
+                  wms ")";
+              | A_enum el     ->
+                  wms " (";
+                  ( match el with
+                        el1:: el' ->
+                          wms el1;
+                          List.iter
+                            (fun e ->
+                               wms ("|" ^ e);
+                            )
+                            el'
+                      | [] ->
+                          failwith "#write: Illegal content model";
+                  );
+                  wms ")";
+          );
+          ( match d with
+                D_required -> wms " #REQUIRED"
+              | D_implied  -> wms " #IMPLIED"
+              | D_default s ->
+                  wms " \"";
+                  write_data_string ~from_enc:encoding ~to_enc:enc os s;
+                  wms "\"";
+              | D_fixed s ->
+                  wms " FIXED \"";
+                  write_data_string ~from_enc:encoding ~to_enc:enc os s;
+                  wms "\"";
+          );
+       )
+       attributes;
+
+      wms ">\n";
+
+    method write_compact_as_latin1 os = 
+      self # write os `Enc_iso88591
+
+    (************************************************************)
+    (*                    VALIDATION                            *)
+    (************************************************************)
+
+    method validate =
+      self # validate_attributes();
+      self # validate_content_model()
+
+    method private validate_attributes() =
+      if attributes_validated then
+       ()
+      else begin
+       (* Validity Constraint: One ID per Element Type *)
+       let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in
+       if n > 1 then
+         raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'"));
+       (* Validity Constraint: ID Attribute Default *)
+       if List.exists
+            (fun (n,((t,d),_)) ->
+               t = A_id & (d <> D_required & d <> D_implied))
+            attributes
+       then
+         raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'"));
+       (* Validity Constraint: One Notation per Element Type *)
+       let n = count (fun (n,((t,d),_)) ->
+                        match t with A_notation _ -> true | _ -> false)
+                     attributes in
+       if n > 1 then
+         raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'"));
+       (* Validity Constraint: Notation Attributes [second part] *)
+       List.iter
+         (fun (n,((t,d),_)) ->
+            match t with
+                A_notation l ->
+                  List.iter
+                    (fun nname ->
+                       let _ = dtd # notation nname in ())
+                    l
+              | _ -> ())
+         attributes;
+       (* Validity Constraint: Attribute Default Legal *)
+       List.iter
+         (fun (n,((t,d),_)) ->
+
+            let check v =
+              let lexical_error() =
+                lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in
+              check_attribute_value_lexically lexerset (lexical_error()) t v;
+              begin match t with
+                  (A_entity|A_entities) ->
+                    List.iter
+                      (fun nd ->
+                         let en, extdecl = dtd # gen_entity nd in
+                         if not (en # is_ndata) then
+                           raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+(*                       if dtd # standalone_declaration && extdecl then
+                           raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'")); 
+-- This is checked anyway when the attribute value is normalized
+*)
+                      )
+                      (split_attribute_value lexerset v)
+                | A_notation nl ->
+                    if not (List.mem v nl) then
+                      raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+                | A_enum nl ->
+                    if not (List.mem v nl) then
+                      raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+                | _          -> ()
+              end
+            in
+
+            match d with
+                D_required -> ()
+              | D_implied -> ()
+              | D_default v -> check v
+              | D_fixed v   -> check v
+         )
+         attributes;
+
+       (* Ok: This element declaration is valid *)
+       attributes_validated <- true;
+
+      end
+
+    method private validate_content_model () =
+      (* checks:
+       * - Validity Constraint: No Duplicate Types
+       * It is not an error if there is a child in the declaration for which
+       * no element declaration is provided.
+       *)
+      match content_model with
+         Unspecified ->
+           dtd # warner # warn ("Element type `" ^ name ^ "' mentioned but not declared");
+           ()
+       | Empty -> ()
+       | Any -> ()
+       | Mixed (pcdata :: l) ->
+           (* MPCDATA is always the first element by construction *)
+           assert (pcdata = MPCDATA);
+           if check_dups l then
+             raise (Validation_error("Double children in declaration for element `" ^ name ^ "'"))
+       | Regexp _ -> ()
+       | _ -> assert false
+
+
+
+    (************************************************************)
+
+  end
+
+and dtd_notation the_name the_xid init_encoding =
+object (self)
+    val name = the_name
+    val xid = (the_xid : ext_id)
+    val encoding = (init_encoding : Pxp_types.rep_encoding)
+    method name = name
+    method ext_id = xid
+    method encoding = encoding
+
+    method write os enc = 
+      let wms = 
+       write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+      let write_sysid s =
+       if String.contains s '"' then
+         wms ("'" ^ s ^ "'")
+       else
+         wms ("\"" ^ s ^ "\"");
+      in
+
+      wms ("<!NOTATION " ^ name ^ " ");
+      ( match xid with
+           System s ->
+             wms "SYSTEM ";
+             write_sysid s;
+         | Public (p,s) ->
+             wms "PUBLIC ";
+             write_sysid p;
+             if (s <> "") then begin
+               wms " ";
+               write_sysid s;
+             end;
+         | Anonymous ->
+             failwith "#write: External ID Anonymous cannot be represented"
+      );
+      wms ">\n";
+
+    method write_compact_as_latin1 os = 
+      self # write os `Enc_iso88591 
+
+  end
+
+and proc_instruction the_target the_value init_encoding =
+object (self)
+    val target = the_target
+    val value = (the_value : string)
+    val encoding = (init_encoding : Pxp_types.rep_encoding)
+
+    initializer
+      match target with
+         ("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") ->
+           (* This is an error, not a warning, because I do not have a
+            * "warner" object by hand.
+            *)
+           raise(WF_error("Reserved processing instruction"))
+       | _ -> ()
+
+    method target = target
+    method value = value
+    method encoding = encoding
+
+    method write os enc = 
+      let wms = 
+       write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+      wms "<?";
+      wms target;
+      wms " ";
+      wms value;
+      wms "?>";
+
+    method write_compact_as_latin1 os = 
+      self # write os `Enc_iso88591
+
+    method parse_pxp_option =
+      let lexers = get_lexer_set encoding in
+      try
+       let toks = tokens_of_xml_pi lexers value in   (* may raise WF_error *)
+       begin match toks with
+           (Pro_name option_name) :: toks' ->
+             let atts = decode_xml_pi toks' in       (* may raise WF_error *)
+             (target, option_name, atts)
+         | _ ->
+             raise(Error("Bad PXP processing instruction"))
+       end
+      with
+         WF_error _ ->
+           raise(Error("Bad PXP processing instruction"))
+
+  end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.10  2000/08/18 21:18:45  gerd
+ *     Updated wrong comments for methods par_entity and gen_entity.
+ * These can raise WF_error and not Validation_error, and this is the
+ * correct behaviour.
+ *
+ * Revision 1.9  2000/07/25 00:30:01  gerd
+ *     Added support for pxp:dtd PI options.
+ *
+ * Revision 1.8  2000/07/23 02:16:34  gerd
+ *     Support for DFAs.
+ *
+ * Revision 1.7  2000/07/16 17:50:01  gerd
+ *     Fixes in 'write'
+ *
+ * Revision 1.6  2000/07/16 16:34:41  gerd
+ *     New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.5  2000/07/14 13:56:48  gerd
+ *     Added methods id_attribute_name and idref_attribute_names.
+ *
+ * Revision 1.4  2000/07/09 00:13:37  gerd
+ *     Added methods gen_entity_names, par_entity_names.
+ *
+ * Revision 1.3  2000/07/04 22:10:55  gerd
+ *     Update: collect_warnings -> drop_warnings.
+ *     Update: Case ext_id = Anonymous.
+ *
+ * Revision 1.2  2000/06/14 22:19:06  gerd
+ *     Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ *
+ * Revision 1.18  2000/05/28 17:24:55  gerd
+ *     Bugfixes.
+ *
+ * Revision 1.17  2000/05/27 19:21:25  gerd
+ *     Implemented the changes of rev. 1.10 of markup_dtd.mli.
+ *
+ * Revision 1.16  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.15  2000/05/14 21:50:07  gerd
+ *     Updated: change in internal_entity.
+ *
+ * Revision 1.14  2000/05/06 23:08:46  gerd
+ *     It is possible to allow undeclared attributes.
+ *
+ * Revision 1.13  2000/05/01 20:42:46  gerd
+ *         New method write_compact_as_latin1.
+ *
+ * Revision 1.12  2000/05/01 15:16:57  gerd
+ *     The errors "undeclared parameter/general entities" are
+ * well-formedness errors, not validation errors.
+ *
+ * Revision 1.11  2000/03/11 22:58:15  gerd
+ *     Updated to support Markup_codewriter.
+ *
+ * Revision 1.10  2000/01/20 20:53:47  gerd
+ *     Changed such that it runs with Markup_entity's new interface.
+ *
+ * Revision 1.9  1999/11/09 22:15:41  gerd
+ *     Added method "arbitrary_allowed".
+ *
+ * Revision 1.8  1999/09/01 22:52:22  gerd
+ *     If 'allow_arbitrary' is in effect, no validation happens anymore.
+ *
+ * Revision 1.7  1999/09/01 16:21:24  gerd
+ *     Added several warnings.
+ *     The attribute type of "xml:space" is now strictly checked.
+ *
+ * Revision 1.6  1999/08/15 20:34:21  gerd
+ *     Improved error messages.
+ *     Bugfix: It is no longer allowed to create processing instructions
+ * with target "xml".
+ *
+ * Revision 1.5  1999/08/15 02:20:16  gerd
+ *     New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.4  1999/08/15 00:21:39  gerd
+ *     Comments have been updated.
+ *
+ * Revision 1.3  1999/08/14 22:12:52  gerd
+ *         Several functions have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *     Bugfix: if two general entities with the same name are definied,
+ * the first counts, not the second.
+ *
+ * Revision 1.2  1999/08/11 14:56:35  gerd
+ *     Declaration of the predfined entities {lt,gt,amp,quot,apos}
+ * is no longer forbidden; but the original definition cannot be overriddden.
+ *     TODO: If these entities are redeclared with problematic values,
+ * the user should be warned.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_dtd.mli b/helm/DEVEL/pxp/pxp/pxp_dtd.mli
new file mode 100644 (file)
index 0000000..1c347fb
--- /dev/null
@@ -0,0 +1,476 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+(*$ markup-dtd1.mli *)
+
+(**********************************************************************)
+(*                                                                    *)
+(* Pxp_dtd:                                                           *)
+(*     Object model of document type declarations                     *)
+(*                                                                    *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ *                           declarations, entity declarations, notation
+ *                           declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ *                           of a content model and an attribute list
+ *                           declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+  (* Creation:
+   *   new dtd
+   * creates a new, empty DTD object without any declaration, without a root
+   * element, without an ID.
+   *)
+  Pxp_types.collect_warnings -> 
+  Pxp_types.rep_encoding ->
+  object
+    method root : string option
+      (* get the name of the root element if present *)
+
+    method set_root : string -> unit
+      (* set the name of the root element. This method can be invoked 
+       * only once
+       *)
+
+    method id : Pxp_types.dtd_id option
+      (* get the identifier for this DTD *)
+
+    method set_id : Pxp_types.dtd_id -> unit
+      (* set the identifier. This method can be invoked only once *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* returns the encoding used for character representation *)
+
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - elements and notations that have not been added may be used in an
+       *   arbitrary way; the methods "element" and "notation" indicate this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary contents are allowed or not. *)
+
+    method standalone_declaration : bool
+      (* Whether there is a 'standalone' declaration or not. Strictly 
+       * speaking, this declaration is not part of the DTD, but it is
+       * included here because of practical reasons. 
+       * If not set, this property defaults to 'false'.
+       *)
+
+    method set_standalone_declaration : bool -> unit
+      (* Sets the 'standalone' declaration. *)
+
+
+    method add_element : dtd_element -> unit
+      (* add the given element declaration to this DTD. Raises Not_found
+       * if there is already an element declaration with the same name.
+       *)
+
+    method add_gen_entity : Pxp_entity.entity -> bool -> unit
+      (* add_gen_entity e extdecl:
+       * add the entity 'e' as general entity to this DTD (general entities
+       * are those represented by &name;). If there is already a declaration
+       * with the same name, the second definition is ignored; as exception from
+       * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+       * may only be redeclared with a definition that is equivalent to the
+       * standard definition; otherwise a Validation_error is raised.
+       *
+       * 'extdecl': 'true' indicates that the entity declaration occurs in
+       * an external entity. (Used for the standalone check.)
+       *)
+
+    method add_par_entity : Pxp_entity.entity -> unit
+      (* add the given entity as parameter entity to this DTD (parameter
+       * entities are those represented by %name;). If there is already a 
+       * declaration with the same name, the second definition is ignored.
+       *)
+
+    method add_notation : dtd_notation -> unit
+      (* add the given notation to this DTD. If there is already a declaration
+       * with the same name, a Validation_error is raised.
+       *)
+
+    method add_pinstr : proc_instruction -> unit
+      (* add the given processing instruction to this DTD. *)
+
+    method element : string -> dtd_element
+      (* looks up the element declaration with the given name. Raises 
+       * Validation_error if the element cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method element_names : string list
+      (* returns the list of the names of all element declarations. *)
+
+    method gen_entity : string -> (Pxp_entity.entity * bool)
+      (* let e, extdecl = obj # gen_entity n:
+       * looks up the general entity 'e' with the name 'n'. Raises
+       * WF_error if the entity cannot be found.
+       * 'extdecl': indicates whether the entity declaration occured in an 
+       * external entity.
+       *)
+
+    method gen_entity_names : string list
+      (* returns the list of all general entity names *)
+
+    method par_entity : string -> Pxp_entity.entity
+      (* looks up the parameter entity with the given name. Raises
+       * WF_error if the entity cannot be found.
+       *)
+
+    method par_entity_names : string list
+      (* returns the list of all parameter entity names *)
+
+    method notation : string -> dtd_notation
+      (* looks up the notation declaration with the given name. Raises
+       * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method notation_names : string list
+      (* Returns the list of the names of all added notations *)
+
+    method pinstr : string -> proc_instruction list
+      (* looks up all processing instructions with the given target.
+       * The "target" is the identifier following "<?".
+       * Note: It is not possible to find out the exact position of the
+       * processing instruction.
+       *)
+
+    method pinstr_names : string list
+      (* Returns the list of the names (targets) of all added pinstrs *)
+
+    method validate : unit
+      (* ensures that the DTD is valid. This method is optimized such that
+       * actual validation is only performed if DTD has changed.
+       * If the DTD is invalid, mostly a Validation_error is raised,
+       * but other exceptions are possible, too.
+       *)
+
+    method only_deterministic_models : unit
+      (* Succeeds if all regexp content models are deterministic. 
+       * Otherwise Validation_error.
+       *)
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
+      (* write_compact_as_latin1 os enc doctype:
+       * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a 
+       * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
+       * only the declarations are written (the material within the
+       * square brackets).
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+
+    (*----------------------------------------*)
+    method invalidate : unit
+      (* INTERNAL METHOD *)
+    method warner : Pxp_types.collect_warnings
+      (* INTERNAL METHOD *)
+  end
+
+(*$-*)
+
+(*$ markup-dtd2.mli *)
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -> string -> 
+  (* Creation:
+   *   new dtd_element init_dtd init_name:
+   * creates a new dtd_element object for init_dtd with init_name.
+   * The strings are represented in the same encoding as init_dtd.
+   *)
+  object
+
+    method name : string
+      (* returns the name of the declared element *)
+
+    method externally_declared : bool
+      (* returns whether the element declaration occurs in an external
+       * entity.
+       *)
+
+    method content_model : Pxp_types.content_model_type
+      (* get the content model of this element declaration, or Unspecified *)
+
+    method content_dfa : Pxp_dfa.dfa_definition option
+      (* return the DFA of the content model if there is a DFA, or None.
+       * A DFA exists only for regexp style content models which are
+       * deterministic.
+       *)
+
+    method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
+      (* set_cm_and_extdecl cm extdecl:
+       * set the content model to 'cm'. Once the content model is not 
+       * Unspecified, it cannot be set to a different value again.
+       * Furthermore, it is set whether the element occurs in an external
+       * entity ('extdecl').
+       *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* Return the encoding of the strings *)
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - attributes that have not been added may be used in an
+       *   arbitrary way; the method "attribute" indicates this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary attributes are allowed or not. *)
+
+    method attribute : string -> 
+                         Pxp_types.att_type * Pxp_types.att_default
+      (* get the type and default value of a declared attribute, or raise
+       * Validation_error if the attribute does not exist.
+       * If 'arbitrary_allowed', the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_violates_standalone_declaration : 
+               string -> string option -> bool
+      (* attribute_violates_standalone_declaration name v:
+       * Checks whether the attribute 'name' violates the "standalone"
+       * declaration if it has value 'v'.
+       * The method returns true if:
+       * - The attribute declaration occurs in an external entity, 
+       * and if one of the two conditions holds:
+       * - v = None, and there is a default for the attribute value
+       * - v = Some s, and the type of the attribute is not CDATA,
+       *   and s changes if normalized according to the rules of the
+       *   attribute type.
+       *
+       * The method raises Validation_error if the attribute does not exist.
+       * If 'arbitrary_allowed', the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_names : string list
+      (* get the list of all declared attributes *)
+
+    method names_of_required_attributes : string list
+      (* get the list of all attributes that are specified as required 
+       * attributes
+       *)
+
+    method id_attribute_name : string option
+      (* Returns the name of the attribute with type ID, or None. *)
+
+    method idref_attribute_names : string list
+      (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+    method add_attribute : string -> 
+                           Pxp_types.att_type -> 
+                          Pxp_types.att_default -> 
+                          bool ->
+                            unit
+      (* add_attribute name type default extdecl:
+       * add an attribute declaration for an attribute with the given name,
+       * type, and default value. If there is more than one declaration for
+       * an attribute name, the first declaration counts; the other declarations
+       * are ignored.
+       * 'extdecl': if true, the attribute declaration occurs in an external
+       * entity. This property is used to check the "standalone" attribute.
+       *)
+
+    method validate : unit
+      (* checks whether this element declaration (i.e. the content model and
+       * all attribute declarations) is valid for the associated DTD.
+       * Raises mostly Validation_error if the validation fails.
+       *)
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
+  (* Creation:
+   *    new dtd_notation a_name an_external_ID init_encoding
+   * creates a new dtd_notation object with the given name and the given
+   * external ID.
+   *)
+  object
+    method name : string
+    method ext_id : Pxp_types.ext_id
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded 
+       * string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
+  (* Creation:
+   *   new proc_instruction a_target a_value
+   * creates a new proc_instruction object with the given target string and
+   * the given value string. 
+   * Note: A processing instruction is written as <?target value?>. 
+   *)
+  object
+    method target : string
+    method value : string
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* write os enc:
+       * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+    method parse_pxp_option : (string * string * (string * string) list)
+      (* Parses a PI containing a PXP option. Such PIs are formed like:
+       *   <?target option-name option-att="value" option-att="value" ... ?>
+       * The method returns a triple
+       *   (target, option-name, [option-att, value; ...])
+       * or raises Error.
+       *)
+
+  end
+
+;;
+
+(*$-*)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/08/18 21:18:45  gerd
+ *     Updated wrong comments for methods par_entity and gen_entity.
+ * These can raise WF_error and not Validation_error, and this is the
+ * correct behaviour.
+ *
+ * Revision 1.7  2000/07/25 00:30:01  gerd
+ *     Added support for pxp:dtd PI options.
+ *
+ * Revision 1.6  2000/07/23 02:16:33  gerd
+ *     Support for DFAs.
+ *
+ * Revision 1.5  2000/07/16 16:34:41  gerd
+ *     New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.4  2000/07/14 13:56:49  gerd
+ *     Added methods id_attribute_name and idref_attribute_names.
+ *
+ * Revision 1.3  2000/07/09 00:13:37  gerd
+ *     Added methods gen_entity_names, par_entity_names.
+ *
+ * Revision 1.2  2000/06/14 22:19:06  gerd
+ *     Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_dtd.ml:
+ *
+ * Revision 1.11  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.10  2000/05/27 19:20:38  gerd
+ *     Changed the interfaces for the standalone check: New
+ * methods: standalone_declaration, set_standalone_declaration,
+ * externally_declared, attribute_violates_standalone_declaration.
+ *     The method set_content_model has been renamed to
+ * set_cm_and_extdecl; it now initializes also whether the element
+ * has been declared in an external entity.
+ *     Methods add_gen_entity and gen_entity pass an additional
+ * boolean argument containing whether the declaration of the
+ * general entity happened in an external entity.
+ *     Method add_attribute expects this argument, too, which
+ * states whether the declaration of the attribute happened in an
+ * external entity.
+ *
+ * Revision 1.9  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.8  2000/05/06 23:10:26  gerd
+ *     allow_arbitrary for elements, too.
+ *
+ * Revision 1.7  2000/05/01 20:42:52  gerd
+ *         New method write_compact_as_latin1.
+ *
+ * Revision 1.6  2000/03/11 22:58:15  gerd
+ *     Updated to support Markup_codewriter.
+ *
+ * Revision 1.5  2000/02/22 02:32:02  gerd
+ *     Updated.
+ *
+ * Revision 1.4  1999/11/09 22:15:41  gerd
+ *     Added method "arbitrary_allowed".
+ *
+ * Revision 1.3  1999/09/01 16:21:56  gerd
+ *     "dtd" classes have now an argument that passes a "warner".
+ *
+ * Revision 1.2  1999/08/15 02:20:23  gerd
+ *         New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_entity.ml b/helm/DEVEL/pxp/pxp/pxp_entity.ml
new file mode 100644 (file)
index 0000000..94b21ae
--- /dev/null
@@ -0,0 +1,1292 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+(* TODO:
+ * - Wie verhindert man, dass ein internal entity eine XML-Dekl. im 
+ *   replacement text akzeptiert?
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_aux
+open Pxp_reader
+
+(* Hierarchy of parsing layers:
+ *
+ * - Parser: Pxp_yacc
+ *   + gets input stream from the main entity object
+ *   + checks most of the grammar
+ *   + creates the DTD object as side-effect
+ *   + creates the element tree as side-effect
+ *   + creates further entity objects that are entered into the DTD
+ * - Entity layer: Pxp_entity
+ *   + gets input stream from the lexers, or another entity object
+ *   + handles entity references: if a reference is encountered the
+ *     input stream is redirected such that the tokens come from the
+ *     referenced entity object
+ *   + handles conditional sections
+ * - Lexer layer: Pxp_lexers
+ *   + gets input from lexbuffers created by resolvers
+ *   + different lexers for different lexical contexts
+ *   + a lexer returns pairs (token,lexid), where token is the scanned
+ *     token, and lexid is the name of the lexer that must be used for
+ *     the next token
+ * - Resolver layer: Pxp_entity
+ *   + a resolver creates the lexbuf from some character source
+ *   + a resolver recodes the input and handles the encoding scheme
+ *)
+
+(**********************************************************************)
+
+(* Variables of type 'state' are used to insert Begin_entity and End_entity
+ * tokens into the stream.
+ * - At_beginning: Nothing has been read so far
+ * - First_token tok: A Begin_entity has been inserted; and the next token
+ *   is 'tok' which is not Eof. (Begin_entity/End_entity must not be inserted
+ *   if the entity is empty.)
+ * - In_stream: After the first token has been read, but befor Eof.
+ * - At_end: Eof has been read, and End_entity has been returned.
+ *)
+
+type state =
+    At_beginning
+  | Inserted_begin_entity
+  | At_end
+;;
+
+
+(**********************************************************************)
+
+class virtual entity the_dtd the_name the_warner 
+              init_errors_with_line_numbers init_encoding =
+  object (self)
+    (* This class prescribes the type of all entity objects. Furthermore,
+     * the default 'next_token' mechanism is implemented.
+     *)
+
+    (* 'init_errors_with_line_numbers': whether error messages contain line
+     * numbers or not.
+     * Calculating line numbers is expensive.
+     *)
+
+    val mutable dtd = the_dtd
+    val mutable name = the_name
+    val mutable warner = the_warner
+
+    val encoding = (init_encoding : rep_encoding)
+    val lexerset = Pxp_lexers.get_lexer_set init_encoding
+
+    method encoding = encoding
+    (* method lexerset = lexerset *)
+
+    val mutable manager = None
+      (* The current entity_manager, see below *)
+
+    method private manager = 
+      ( match manager with
+           None -> assert false
+         | Some m -> m
+      : < current_entity : entity; 
+         pop_entity : unit;
+         push_entity : entity -> unit >
+      )
+
+    method set_manager m = manager <- Some m
+
+
+    val mutable lexbuf = Lexing.from_string ""
+      (* The lexical buffer currently used as character source. *)
+
+    val mutable prolog = None
+      (* Stores the initial <?xml ...?> token as PI_xml *)
+
+    val mutable prolog_pairs = []
+      (* If prolog <> None, these are the (name,value) pairs of the
+       * processing instruction.
+       *)
+
+
+    val mutable lex_id = Document
+      (* The name of the lexer that should be used for the next token *)
+
+    method set_lex_id id = lex_id <- lex_id
+
+
+
+    val mutable force_parameter_entity_parsing = false
+      (* 'true' forces that inner entities will always be embraced by
+       *        Begin_entity and End_entity.
+       * 'false': the inner entity itself decides this
+       *)
+
+    val mutable check_text_declaration = true
+      (* 'true': It is checked that the <?xml..?> declaration matches the
+       *         production TextDecl.
+       *)
+
+    val mutable normalize_newline = true
+      (* Whether this entity converts CRLF or CR to LF, or not *)
+
+
+    val mutable line = 1     (* current line *)
+    val mutable column = 0   (* current column *)
+    val mutable pos = 0      (* current absolute character position *)
+    val errors_with_line_numbers = init_errors_with_line_numbers
+
+    val mutable p_line = 1
+    val mutable p_column = 1
+
+    method line = p_line
+    method column = p_column
+
+
+    val mutable counts_as_external = false
+
+    method counts_as_external = counts_as_external
+        (* Whether the entity counts as external (for the standalone check). *)
+
+    method set_counts_as_external =
+      counts_as_external <- true
+
+
+    val mutable last_token = Bof
+      (* XXX
+       * These two variables are used to check that between certain pairs of
+       * tokens whitespaces exist. 'last_token' is simply the last token,
+       * but not Ignore, and not PERef (which both represent whitespace).
+       * 'space_seen' records whether Ignore or PERef was seen between this
+       * token and 'last_token'.
+       *)
+
+    val mutable deferred_token = None
+      (* If you set this to Some tl, the next invocations of 
+       * next_token_from_entity will return the tokens in tl.
+       * This makes it possible to insert tokens into the stream.
+       *)
+
+    val mutable debug = false
+
+    method is_ndata = false
+      (* Returns if this entity is an NDATA (unparsed) entity *)
+
+    method name = name
+
+    method virtual open_entity : bool -> lexers -> unit
+       (* open_entity force_parsing lexid:
+        * opens the entity, and the first token is scanned by the lexer
+        * 'lexid'. 'force_parsing' forces that Begin_entity and End_entity
+        * tokens embrace the inner tokens of the entity; otherwise this
+        * depends on the entity.
+        * By opening an entity, reading tokens from it, and finally closing
+        * the entity, the inclusion methods "Included",
+        * "Included if validating", and "Included as PE" can be carried out.
+        * Which method is chosen depends on the 'lexid', i.e. the lexical
+        * context: 'lexid = Content' performs "Included (if validating)" (we
+        * are always validating); 'lexid = Declaration' performs
+        * "Included as PE". The difference is which tokens are recognized,
+        * and how spaces are handled.
+        * 'force_parsing' causes that a Begin_entity token is inserted before
+        * and an End_entity token is inserted after the entity. The yacc
+        * rules allow the Begin_entity ... End_entity brace only at certain
+        * positions; this is used to restrict the possible positions where
+        * entities may be included, and to guarantee that the entity matches
+        * a certain production of the grammar ("parsed entities").
+        * 'open_entity' is currently invoked with 'force_parsing = true'
+        * for toplevel nodes, for inclusion of internal general entities,
+        * and for inclusion of parameter entities into document entities.
+        * 'force_parsing = false' is used for all other cases: External
+        * entities add the Begin_entity/End_entity tokens anyway; internal
+        * entities do not. Especially internal parameter entities referenced
+        * from non-document entities do not add these tokens.
+        *)
+
+    method virtual close_entity : lexers
+       (* close_entity:
+        * closes the entity and returns the name of the lexer that must
+        * be used to scan the next token.
+        *)
+
+    method virtual replacement_text : (string * bool)
+       (* replacement_text:
+        * returns the replacement text of the entity, and as second value,
+        * whether the replacement text was constructed by referencing
+        * external entities (directly or indirectly).
+        * This method implements the inclusion method "Included in Literal".
+        *)
+
+
+    method lexbuf = lexbuf
+
+
+    method xml_declaration =
+      (* return the (name,value) pairs of the initial <?xml name=value ...?>
+       * processing instruction.
+       *)
+      match prolog with
+         None ->
+           None
+       | Some p ->
+           Some prolog_pairs
+
+
+    method set_debugging_mode m =
+      debug <- m
+
+    method private virtual set_encoding : string -> unit
+
+
+    method full_name =
+      name
+
+
+    method next_token =
+      (* read next token from this entity *)
+
+      match deferred_token with
+         Some toklist ->
+           ( match toklist with
+                 [] -> 
+                   deferred_token <- None;
+                   self # next_token
+               | tok :: toklist' ->
+                   deferred_token <- Some toklist';
+                   if debug then
+                     prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok ^ " (deferred)");
+                   tok
+           )
+       | None -> begin
+            let this_line = line
+            and this_column = column in
+           let this_pos = pos in
+           p_line <- this_line;
+           p_column <- this_column;
+           (* Read the next token from the appropriate lexer lex_id, and get the
+            * name lex_id' of the next lexer to be used.
+            *)
+           let tok, lex_id' =
+             match lex_id with
+                 Document         -> lexerset.scan_document lexbuf
+               | Document_type    -> lexerset.scan_document_type lexbuf
+               | Content          -> lexerset.scan_content lexbuf
+               | Within_tag       -> lexerset.scan_within_tag lexbuf
+               | Declaration      -> lexerset.scan_declaration lexbuf
+               | Content_comment  -> lexerset.scan_content_comment lexbuf
+               | Decl_comment     -> lexerset.scan_decl_comment lexbuf
+               | Document_comment -> lexerset.scan_document_comment lexbuf
+               | Ignored_section  -> assert false
+                     (* Ignored_section: only used by method next_ignored_token *)
+           in
+           if debug then
+             prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok);
+           (* Find out the number of lines and characters of the last line: *)
+           let n_lines, n_columns =
+             if errors_with_line_numbers then
+               count_lines (Lexing.lexeme lexbuf)
+             else
+               0, (Lexing.lexeme_end lexbuf - Lexing.lexeme_start lexbuf)
+           in
+           line <- this_line + n_lines;
+           column <- if n_lines = 0 then this_column + n_columns else n_columns;
+           pos <- Lexing.lexeme_end lexbuf;
+           lex_id <- lex_id';
+           (* Throw Ignore and Comment away; Interpret entity references: *)
+           (* NOTE: Of course, references to general entities are not allowed
+            * everywhere; parameter references, too. This is already done by the
+            * lexers, i.e. &name; and %name; are recognized only where they
+            * are allowed.
+            *)
+
+           (* TODO: last_token is only used to detect Bof. Can be simplified *)
+
+           let at_bof = (last_token = Bof) in
+           last_token <- tok;
+
+           let tok' =
+             match tok with
+
+          (* Entity references: *)
+
+               | ERef n    -> 
+                    let en, extdecl = dtd # gen_entity n in
+                   if dtd # standalone_declaration && extdecl then
+                     raise
+                       (Validation_error
+                          ("Reference to entity `" ^ n ^ 
+                           "' violates standalone declaration"));
+                   en # set_debugging_mode debug;
+                   en # open_entity true lex_id;
+                   self # manager # push_entity en;
+                   en # next_token;
+               | PERef n   -> 
+                   let en = dtd # par_entity n in
+                   en # set_debugging_mode debug;
+                   en # open_entity force_parameter_entity_parsing lex_id;
+                   self # manager # push_entity en;
+                   en # next_token;
+
+          (* Convert LineEnd to CharData *)
+               | LineEnd s -> 
+                   if normalize_newline then 
+                     CharData "\n"
+                   else
+                     CharData s
+
+          (* Also normalize CDATA sections *)
+               | Cdata value as cd ->
+                   if normalize_newline then 
+                     Cdata(normalize_line_separators lexerset value)
+                   else
+                     cd
+
+          (* If there are CRLF sequences in a PI value, normalize them, too *)
+               | PI(name,value) as pi ->
+                   if normalize_newline then
+                     PI(name, normalize_line_separators lexerset value)
+                   else
+                     pi
+         
+          (* Attribute values: If they are already normalized, they are turned
+          * into Attval_nl_normalized. This is detected by other code.
+          *)
+               | Attval value as av ->
+                   if normalize_newline then
+                     av
+                   else
+                     Attval_nl_normalized value
+
+          (* Another CRLF normalization case: Unparsed_string *)
+               | Unparsed_string value as ustr ->
+                   if normalize_newline then
+                     Unparsed_string(normalize_line_separators lexerset value)
+                   else
+                     ustr
+                     
+          (* These tokens require that the entity_id parameter is set: *)
+               | Doctype _      -> Doctype       (self :> entity_id)
+               | Doctype_rangle _ ->Doctype_rangle(self :> entity_id)
+               | Dtd_begin _    -> Dtd_begin     (self :> entity_id)
+               | Dtd_end _      -> Dtd_end       (self :> entity_id)
+               | Decl_element _ -> Decl_element  (self :> entity_id)
+               | Decl_attlist _ -> Decl_attlist  (self :> entity_id)
+               | Decl_entity _  -> Decl_entity   (self :> entity_id)
+               | Decl_notation _ ->Decl_notation (self :> entity_id)
+               | Decl_rangle _  -> Decl_rangle   (self :> entity_id)
+               | Lparen _       -> Lparen        (self :> entity_id)
+               | Rparen _       -> Rparen        (self :> entity_id)
+               | RparenPlus _   -> RparenPlus    (self :> entity_id)
+               | RparenStar _   -> RparenStar    (self :> entity_id)
+               | RparenQmark _  -> RparenQmark   (self :> entity_id)
+               | Conditional_begin _ -> Conditional_begin (self :> entity_id)
+               | Conditional_body _  -> Conditional_body  (self :> entity_id)
+               | Conditional_end _   -> Conditional_end   (self :> entity_id)
+               | Tag_beg (n,_)  -> Tag_beg (n, (self :> entity_id))
+               | Tag_end (n,_)  -> Tag_end (n, (self :> entity_id))
+
+          (* End of file: *)
+
+               | Eof       -> 
+                   if debug then begin
+                     prerr_endline ("- Entity " ^ name ^ " # handle_eof");
+                     let tok = self # handle_eof in
+                     prerr_endline ("- Entity " ^ name ^ " # handle_eof: returns " ^ string_of_tok tok);
+                     tok
+                   end
+                   else
+                     self # handle_eof;
+                   
+          (* The default case. *)
+
+               | _         -> 
+                    tok
+
+           in
+           if at_bof & tok <> Eof
+           then begin
+             if debug then
+               prerr_endline ("- Entity " ^ name ^ " # handle_bof");
+             self # handle_bof tok'
+           end
+           else
+             tok'
+         end
+
+
+    (* 'handle_bof' and 'handle_eof' can be used as hooks. Behaviour:
+     *
+     * - Normally, the first token t is read in, and 'handle_bof t' is
+     *   called. The return value of this method is what is returned to
+     *   the user.
+     * - If the EOF has been reached, 'handle_eof' is called. 
+     * - BUT: If the first token is already EOF, 'handle_eof' is called
+     *   ONLY, and 'handle_bof' is NOT called.
+     *
+     * The default implementations:
+     * - handle_bof: does nothing
+     * - handle_eof: Pops the previous entity from the stack, switches back
+     *   to this entity, and returns the next token of this entity.
+     *)
+
+
+    method private handle_bof tok =
+      tok
+
+
+    method private handle_eof =
+      let mng = self # manager in
+      begin try
+       mng # pop_entity;
+       let next_lex_id = self # close_entity in
+       let en = mng # current_entity in
+       en # set_lex_id next_lex_id;
+       en # next_token
+      with
+         Stack.Empty ->
+           (* The outermost entity is at EOF *)
+           Eof
+      end
+
+
+    method next_ignored_token =
+        (* used after <![ IGNORE *)
+
+      (* TODO: Do we need a test on deferred tokens here? *)
+
+        let this_line = line
+        and this_column = column in
+       let this_pos = pos in
+       let tok, lex_id' = lexerset.scan_ignored_section lexbuf in
+       if debug then
+         prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok ^ " (Ignored)");
+       let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+       line <- this_line + n_lines;
+       column <- if n_lines = 0 then this_column + n_columns else n_columns;
+       pos <- Lexing.lexeme_end lexbuf;
+       match tok with
+         | Conditional_begin _ -> Conditional_begin (self :> entity_id)
+         | Conditional_end _   -> Conditional_end   (self :> entity_id)
+         | _                   -> tok
+
+
+    method process_xmldecl pl =
+      (* The parser calls this method just after the XML declaration
+       * <?xml ...?> has been detected.
+       * 'pl': This is the argument of the PI_xml token.
+       *)
+      if debug then
+       prerr_endline ("- Entity " ^ name ^ " # process_xmldecl");
+      prolog <- Some pl;
+      prolog_pairs <- decode_xml_pi pl;
+      if check_text_declaration then
+       check_text_xml_pi prolog_pairs;
+      begin
+       try
+         let e = List.assoc "encoding" prolog_pairs in
+         self # set_encoding e
+       with
+           Not_found ->
+             self # set_encoding ""
+      end;
+
+
+    method process_missing_xmldecl =
+      (* The parser calls this method if the XML declaration is missing *)
+      if debug then
+       prerr_endline ("- Entity " ^ name ^ " # process_missing_xmldecl");
+      self # set_encoding ""
+
+
+    (* Methods for NDATA entities only: *)
+    method ext_id = (assert false : ext_id)
+    method notation = (assert false : string)
+
+  end
+;;
+
+
+class ndata_entity the_name the_ext_id the_notation init_encoding =
+  object (self)
+    (* An NDATA entity is very restricted; more or less you can only find out
+     * its external ID and its notation.
+     *)
+
+    val mutable name = the_name
+    val mutable ext_id = the_ext_id
+    val mutable notation = the_notation
+    val encoding = (init_encoding : rep_encoding)
+
+    method name = (name : string)
+    method ext_id = (ext_id : ext_id)
+    method notation = (notation : string)
+
+    method is_ndata = true
+
+    method encoding = encoding
+
+
+    val mutable counts_as_external = false
+
+    method counts_as_external = counts_as_external
+        (* Whether the entity counts as external (for the standalone check). *)
+
+    method set_counts_as_external =
+      counts_as_external <- true
+
+
+    method set_manager (m : < current_entity : entity; 
+                             pop_entity : unit;
+                             push_entity : entity -> unit >) = 
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : unit )
+
+    method set_lex_id (id : lexers) =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : unit )
+
+    method line =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : int )
+
+    method column =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : int )
+
+    method full_name =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : string )
+
+    method private set_encoding (_:string) =
+      assert false
+
+    method xml_declaration = (None : (string*string) list option)
+
+    method set_debugging_mode (_:bool) = ()
+
+    method open_entity (_:bool) (_:lexers) =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : unit )
+
+    method close_entity =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : lexers )
+
+    method replacement_text =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : (string * bool) )
+
+    method lexbuf =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : Lexing.lexbuf )
+
+    method next_token =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : token )
+
+    method next_ignored_token =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : token )
+
+    method process_xmldecl (pl:prolog_token list) =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : unit )
+
+    method process_missing_xmldecl =
+      ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+         : unit )
+
+  end
+;;
+
+
+class external_entity the_resolver the_dtd the_name the_warner the_ext_id
+                      the_p_special_empty_entities
+                     init_errors_with_line_numbers
+                     init_encoding
+  =
+  object (self)
+    inherit entity
+              the_dtd the_name the_warner init_errors_with_line_numbers
+             init_encoding
+            as super
+
+    (* An external entity gets the lexbuf that is used as character source
+     * from a resolver.
+     * Furthermore, before the first token an Begin_entity is inserted, and
+     * before Eof an End_entity token is inserted into the stream. This done
+     * always regardless of the argument 'force_parsing' of the method
+     * 'open_entity'.
+     *
+     * 'the_p_internal_subset': see class internal_entity
+     * 'the_p_special_empty_entities': if true, a Begin_entity/End_entity
+     * brace is left out if the entity is otherwise empty.
+     *)
+
+    val resolver = (the_resolver : resolver)
+    val ext_id = (the_ext_id : ext_id)
+
+    val p_special_empty_entities = (the_p_special_empty_entities : bool)
+
+    val mutable resolver_is_open = false
+      (* Track if the resolver is open. This is also used to find recursive
+       * references of entities.
+       *)
+
+    val mutable state = At_beginning
+
+    initializer
+      counts_as_external <- true;
+
+
+    method private set_encoding e =
+      assert resolver_is_open;
+      resolver # change_encoding e
+
+
+    method full_name =
+      name ^
+      match ext_id with
+         System s    -> " = SYSTEM \"" ^ s ^ "\""
+       | Public(p,s) -> " = PUBLIC \"" ^ p ^ "\" \"" ^ s ^ "\""
+       | Anonymous   -> " = ANONYMOUS"
+
+
+    method open_entity force_parsing init_lex_id =
+      (* Note that external entities are always parsed, i.e. Begin_entity
+       * and End_entity tokens embrace the inner tokens to force that
+       * the entity is only called where the syntax allows it.
+       *)
+      if resolver_is_open then
+       raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+      let lex = 
+       try
+         resolver # open_in ext_id 
+       with
+           Pxp_reader.Not_competent ->
+             raise(Error ("No input method available for this external entity: " ^ 
+                       self # full_name))
+         | Pxp_reader.Not_resolvable Not_found ->
+             raise(Error ("Unable to open the external entity: " ^ 
+                          self # full_name))
+         | Pxp_reader.Not_resolvable e ->
+             raise(Error ("Unable to open the external entity: " ^ 
+                          self # full_name ^ "; reason: " ^ 
+                          string_of_exn e))
+      in
+      resolver_is_open <- true;
+      lexbuf  <- lex;
+      prolog  <- None;
+      lex_id  <- init_lex_id;
+      state <- At_beginning;
+      line <- 1;
+      column <- 0;
+      pos <- 0;
+      last_token <- Bof;
+      normalize_newline <- true;
+
+
+    method private handle_bof tok =
+      (* This hook is only called if the stream is not empty. *)
+      deferred_token <- Some [ tok ];
+      state <- Inserted_begin_entity;
+      Begin_entity
+
+
+    method private handle_eof =
+      (* This hook is called if the end of  the stream is reached *)
+      match state with
+         At_beginning ->
+           (* This is only possible if the stream is empty. *)
+           if p_special_empty_entities then begin
+             (* Continue immediately with the next token *)
+             state <- At_end;
+             super # handle_eof
+           end
+           else begin
+             (* Insert Begin_entity / End_entity *)
+             deferred_token <- Some [ End_entity ];
+             state <- At_end;
+             Begin_entity;
+             (* After these two token have been processed, the lexer
+              * is called again, and it will return another Eof.
+              *)
+           end
+       | Inserted_begin_entity ->
+           (* Insert End_entity, too. *)
+           state <- At_end;
+           End_entity;
+       | At_end ->
+           (* Continue with the next token: *)
+           super # handle_eof
+
+
+    method close_entity =
+      if not resolver_is_open then
+       failwith ("External entity " ^ name ^ " not open");
+      resolver # close_in;
+      resolver_is_open <- false;
+      lex_id
+
+
+    method replacement_text =
+      (* Return the replacement text of the entity. The method used for this
+       * is more or less the same as for internal entities; i.e. character
+       * and parameter entities are resolved immediately. In addition to that,
+       * external entities may begin with an "xml" processing instruction
+       * which is considered not to be part of the replacement text.
+       *)
+      if resolver_is_open then
+       raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+      let lex = resolver # open_in ext_id in
+      resolver_is_open <- true;
+      lexbuf  <- lex;
+      prolog  <- None;
+      (* arbitrary:    lex_id  <- init_lex_id; *)
+      state <- At_beginning;
+      line <- 1;
+      column <- 0;
+      pos <- 0;
+      last_token <- Bof;
+      (* First check if the first token of 'lex' is <?xml...?> *)
+      begin match lexerset.scan_only_xml_decl lex with
+         PI_xml pl ->
+           self # process_xmldecl pl
+       | Eof ->
+           (* This only means that the first token was not <?xml...?>;
+            * the "Eof" token represents the empty string.
+            *)
+           self # process_missing_xmldecl
+       | _ ->
+           (* Must not happen. *)
+           assert false
+      end;
+      (* Then create the replacement text. *)
+      let rec scan_and_expand () =
+       match lexerset.scan_dtd_string lexbuf with
+           ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
+         | CRef(-1) -> "\n" ^ scan_and_expand()
+         | CRef(-2) -> "\n" ^ scan_and_expand()
+         | CRef(-3) -> "\n" ^ scan_and_expand()
+         | CRef k -> character encoding warner k ^ scan_and_expand()
+         | CharData x -> x ^ scan_and_expand()
+         | PERef n ->
+             let en = dtd # par_entity n in
+             let (x,_) = en # replacement_text in
+             x ^ scan_and_expand()
+         | Eof ->
+             ""
+         | _ ->
+             assert false
+      in
+      let rtext = scan_and_expand() in
+      resolver # close_in;
+      resolver_is_open <- false;
+      rtext, true
+       (* TODO:
+        * - The replaced text is not parsed [VALIDATION WEAKNESS]
+        *)
+  end
+;;
+
+
+class document_entity  the_resolver the_dtd the_name the_warner the_ext_id
+                       init_errors_with_line_numbers
+                      init_encoding
+  =
+  object (self)
+    inherit external_entity  the_resolver the_dtd the_name the_warner
+                             the_ext_id false init_errors_with_line_numbers
+                            init_encoding
+
+    (* A document entity is an external entity that does not allow
+     * conditional sections, and that forces that internal parameter entities
+     * are properly nested.
+     *)
+
+    initializer
+    force_parameter_entity_parsing <- true;
+    check_text_declaration <- false;
+
+    method counts_as_external = false
+      (* Document entities count never as external! *)
+  end
+;;
+
+
+class internal_entity the_dtd the_name the_warner the_literal_value
+                      the_p_internal_subset init_errors_with_line_numbers
+                      init_is_parameter_entity
+                     init_encoding
+  =
+  (* An internal entity uses a "literal entity value" as character source.
+   * This value is first expanded and preprocessed, i.e. character and
+   * parameter references are expanded.
+   *
+   * 'the_p_internal_subset': indicates that the entity is declared in the
+   * internal subset. Such entity declarations are not allowed to contain
+   * references to parameter entities.
+   * 'init_is_parameter_entity': whether this is a parameter entity or not
+   *)
+
+  object (self)
+    inherit entity
+              the_dtd the_name the_warner init_errors_with_line_numbers
+             init_encoding
+           as super
+
+    val p_internal_subset = the_p_internal_subset
+
+    val mutable replacement_text = ""
+    val mutable contains_external_references = false
+    val mutable p_parsed_actually = false
+    val mutable is_open = false
+    val mutable state = At_beginning
+    val mutable is_parameter_entity = init_is_parameter_entity
+
+
+    initializer
+    let lexbuf = Lexing.from_string the_literal_value in
+    let rec scan_and_expand () =
+      match lexerset.scan_dtd_string lexbuf with
+         ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
+       | CRef(-1) -> "\r\n" ^ scan_and_expand()
+       | CRef(-2) -> "\r" ^ scan_and_expand()
+       | CRef(-3) -> "\n" ^ scan_and_expand()
+       | CRef k -> character encoding warner k ^ scan_and_expand()
+       | CharData x -> x ^ scan_and_expand()
+       | PERef n ->
+           if p_internal_subset then
+             raise(WF_error("Restriction of the internal subset: parameter entity not allowed here"));
+           let en = dtd # par_entity n in
+           let (x, extref) = en # replacement_text in
+           contains_external_references <-
+             contains_external_references or extref;
+           x ^ scan_and_expand()
+       | Eof ->
+           ""
+       | _ ->
+           assert false
+    in
+    is_open <- true;
+    replacement_text <- scan_and_expand();
+    is_open <- false;
+    normalize_newline <- false;
+    counts_as_external <- false;
+
+
+    method process_xmldecl (pl:prolog_token list) =
+      raise(Validation_error("The encoding cannot be changed in internal entities"))
+
+
+    method process_missing_xmldecl =
+      ()
+
+
+    method private set_encoding e =
+      (* Ignored if e = "" *)
+      assert(e = "");
+
+
+    method open_entity force_parsing init_lex_id =
+      if is_open then
+       raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+
+      p_parsed_actually <- force_parsing;
+      lexbuf  <- Lexing.from_string 
+                  (if is_parameter_entity then
+                     (" " ^ replacement_text ^ " ")
+                   else
+                     replacement_text);
+      prolog  <- None;
+      lex_id  <- init_lex_id;
+      state <- At_beginning;
+      is_open <- true;
+      line <- 1;
+      column <- 0;
+      pos <- 0;
+      last_token <- Eof;
+
+
+    method private handle_bof tok =
+      (* This hook is only called if the stream is not empty. *)
+      if p_parsed_actually then begin
+       deferred_token <- Some [ tok ];
+       state <- Inserted_begin_entity;
+       Begin_entity
+      end
+      else begin
+       state <- At_end;
+       tok
+      end
+
+
+    method private handle_eof =
+      (* This hook is called if the end of  the stream is reached *)
+      match state with
+         At_beginning ->
+           (* This is only possible if the stream is empty. *)
+           if p_parsed_actually then begin
+             (* Insert Begin_entity / End_entity *)
+             deferred_token <- Some [ End_entity ];
+             state <- At_end;
+             Begin_entity;
+             (* After these two token have been processed, the lexer
+              * is called again, and it will return another Eof.
+              *)
+           end
+           else begin
+             (* Continue immediately with the next token *)
+             state <- At_end;
+             super # handle_eof
+           end
+       | Inserted_begin_entity ->
+           (* Insert End_entity, too. *)
+           state <- At_end;
+           End_entity;
+       | At_end ->
+           (* Continue with the next token: *)
+           super # handle_eof
+
+
+    method close_entity =
+      if not is_open then
+       failwith ("Internal entity " ^ name ^ " not open");
+      is_open <- false;
+      lex_id
+
+
+    method replacement_text =
+      if is_open then
+       raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+      replacement_text, contains_external_references
+  end
+;;
+
+(**********************************************************************)
+
+(* An 'entity_manager' is a stack of entities, where the topmost entity
+ * is the currently active entity, the second entity is the entity that
+ * referred to the active entity, and so on.
+ *
+ * The entity_manager can communicate with the currently active entity.
+ *
+ * The entity_manager provides an interface for the parser; the functions
+ * returning the current token and the next token are exported.
+ *)
+
+class entity_manager (init_entity : entity) =
+  object (self)
+    val mutable entity_stack = Stack.create()
+    val mutable current_entity = init_entity
+    val mutable current_entity's_full_name = lazy (init_entity # full_name)
+                                  
+    val mutable yy_get_next_ref = ref (fun () -> assert false)
+
+    initializer
+      init_entity # set_manager (self :> 
+                                < current_entity : entity; 
+                                  pop_entity : unit;
+                                  push_entity : entity -> unit >
+                               );
+      yy_get_next_ref := (fun () -> init_entity # next_token)
+
+    method push_entity e =
+      e # set_manager (self :> 
+                      < current_entity : entity; 
+                        pop_entity : unit;
+                        push_entity : entity -> unit >
+                     );
+      Stack.push (current_entity, current_entity's_full_name) entity_stack;
+      current_entity <- e;
+      current_entity's_full_name <- lazy (e # full_name);
+      yy_get_next_ref := (fun () -> e # next_token);
+
+    method pop_entity =
+      (* May raise Stack.Empty *)
+      let e, e_name = Stack.pop entity_stack in
+      current_entity <- e;
+      current_entity's_full_name <- e_name;
+      yy_get_next_ref := (fun () -> e # next_token);
+
+
+
+    method position_string =
+      (* Gets a string describing the position of the last token;
+       * includes an entity backtrace
+       *)
+      let b = Buffer.create 200 in
+      Buffer.add_string b
+       ("In entity " ^ current_entity # full_name
+        ^ ", at line " ^ string_of_int (current_entity # line)
+        ^ ", position " ^ string_of_int (current_entity # column)
+        ^ ":\n");
+      Stack.iter
+       (fun (e, e_name) ->
+          Buffer.add_string b 
+            ("Called from entity " ^ Lazy.force e_name
+             ^ ", line " ^ string_of_int (e # line)
+             ^  ", position " ^ string_of_int (e # column)
+             ^ ":\n");
+       )
+       entity_stack;
+      Buffer.contents b
+
+
+    method position =
+      (* Returns the triple (full_name, line, column) of the last token *)
+      Lazy.force current_entity's_full_name, 
+      current_entity # line,
+      current_entity # column
+
+
+    method current_entity_counts_as_external =
+      (* Whether the current entity counts as external to the main
+       * document for the purpose of stand-alone checks.
+       *)
+      (* TODO: improve performance *)
+      let is_external = ref false in
+      let check (e, _) =
+       if e # counts_as_external then begin
+         is_external := true;
+       end;
+      in
+      check (current_entity,());
+      Stack.iter check entity_stack;
+      !is_external
+
+
+    method current_entity  = current_entity
+
+    method yy_get_next_ref = yy_get_next_ref
+
+  end
+;;
+
+      
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.6  2000/07/14 13:55:00  gerd
+ *     Cosmetic changes.
+ *
+ * Revision 1.5  2000/07/09 17:51:50  gerd
+ *     Entities return now the beginning of a token as its
+ * position.
+ *     New method 'position' for entity_manager.
+ *
+ * Revision 1.4  2000/07/09 01:05:04  gerd
+ *     Exported methods 'ext_id' and 'notation' anyway.
+ *
+ * Revision 1.3  2000/07/08 16:28:05  gerd
+ *     Updated: Exception 'Not_resolvable' is taken into account.
+ *
+ * Revision 1.2  2000/07/04 22:12:47  gerd
+ *     Update: Case ext_id = Anonymous.
+ *     Update: Handling of the exception Not_competent when reading
+ * from a resolver.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_entity.ml:
+ *
+ * Revision 1.27  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.26  2000/05/28 17:24:55  gerd
+ *     Bugfixes.
+ *
+ * Revision 1.25  2000/05/27 19:23:32  gerd
+ *     The entities store whether they count as external with
+ * respect to the standalone check: New methods counts_as_external
+ * and set_counts_as_external.
+ *     The entity manager can find out whether the current
+ * entity counts as external: method current_entity_counts_as_external.
+ *
+ * Revision 1.24  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.23  2000/05/14 21:51:24  gerd
+ *     Change: Whitespace is handled by the grammar, and no longer
+ * by the entity.
+ *
+ * Revision 1.22  2000/05/14 17:50:54  gerd
+ *     Updates because of changes in the token type.
+ *
+ * Revision 1.21  2000/05/09 00:02:44  gerd
+ *     Conditional sections are now recognized by the parser.
+ * There seem some open questions; see the TODO comments!
+ *
+ * Revision 1.20  2000/05/08 21:58:22  gerd
+ *     Introduced entity_manager as communication object between
+ * the parser and the currently active entity.
+ *     New hooks handle_bof and handle_eof.
+ *     Removed "delegated entities". The entity manager contains
+ * the stack of open entities.
+ *     Changed the way Begin_entity and End_entity are inserted.
+ * This is now done by handle_bof and handle_eof.
+ *     The XML declaration is no longer detected by the entity.
+ * This is now done by the parser.
+ *
+ * Revision 1.19  2000/05/01 15:18:44  gerd
+ *     Improved CRLF handling in the replacement text of entities.
+ *     Changed one error message.
+ *
+ * Revision 1.18  2000/04/30 18:18:39  gerd
+ *     Bugfixes: The conversion of CR and CRLF to LF is now hopefully
+ * done right. The new variable "normalize_newline" indicates whether
+ * normalization must happen for that type of entity. The normalization
+ * if actually carried out separately for every token that needs it.
+ *
+ * Revision 1.17  2000/03/13 23:42:38  gerd
+ *     Removed the resolver classes, and put them into their
+ * own module (Markup_reader).
+ *
+ * Revision 1.16  2000/02/22 01:06:58  gerd
+ *     Bugfix: Resolvers are properly re-initialized. This bug caused
+ * that entities could not be referenced twice in the same document.
+ *
+ * Revision 1.15  2000/01/20 20:54:11  gerd
+ *     New config.errors_with_line_numbers.
+ *
+ * Revision 1.14  2000/01/08 18:59:03  gerd
+ *     Corrected the string resolver.
+ *
+ * Revision 1.13  1999/09/01 22:58:23  gerd
+ *     Method warn_not_latin1 raises Illegal_character if the character
+ * does not match the Char production.
+ *     External entities that are not document entities check if the
+ * <?xml...?> declaration at the beginning matches the TextDecl production.
+ *     Method xml_declaration has type ... list option, not ... list.
+ *     Tag_beg and Tag_end now carry an entity_id with them.
+ *     The code to check empty entities has changed. That the Begin_entity/
+ * End_entity pair is not to be added must be explicitly turned on. See the
+ * description of empty entity handling in design.txt.
+ *     In internal subsets entity declarations are not allowed to refer
+ * to parameter entities. The internal_entity class can do this now.
+ *     The p_parsed parameter of internal_entity has gone. It was simply
+ * superflous.
+ *
+ * Revision 1.12  1999/09/01 16:24:13  gerd
+ *     The method replacement_text returns the text as described for
+ * "included in literal". The former behaviour has been dropped to include
+ * a leading and a trailing space character for parameter entities.
+ *     Bugfix: When general entities are included, they are always parsed.
+ *
+ * Revision 1.11  1999/08/31 19:13:31  gerd
+ *     Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.10  1999/08/19 01:06:41  gerd
+ *     Improved error messages: external entities print their
+ * ext id, too
+ *
+ * Revision 1.9  1999/08/15 20:35:48  gerd
+ *     Improved error messages.
+ *     Before the tokens Plus, Star, Qmark space is not allowed any longer.
+ *     Detection of recursive entity references is a bit cleaner.
+ *
+ * Revision 1.8  1999/08/15 15:33:44  gerd
+ *     Revised whitespace checking: At certain positions there must be
+ * white space. These checks cannot be part of the lexer, as %entity; counts
+ * as white space. They cannot be part of the yacc parser because one look-ahead
+ * token would not suffice if we did that. So these checks must be done by the
+ * entity layer. Luckily, the rules are simple: There are simply a number of
+ * token pairs between which white space must occur independently of where
+ * these token have been found. Two variables, "space_seen", and "last_token"
+ * have been added in order to check these rules.
+ *
+ * Revision 1.7  1999/08/15 00:41:06  gerd
+ *     The [ token of conditional sections is now allowed to occur
+ * in a different entity.
+ *
+ * Revision 1.6  1999/08/15 00:29:02  gerd
+ *     The method "attlist_replacement_text" has gone. There is now a
+ * more general "replacement_text" method that computes the replacement
+ * text for both internal and external entities. Additionally, this method
+ * returns whether references to external entities have been resolved;
+ * this is checked in the cases where formerly "attlist_replacement_text"
+ * was used as it is not allowed everywhere.
+ *     Entities have a new slot "need_spaces" that indicates that the
+ * next token must be white space or a parameter reference. The problem
+ * was that "<!ATTLIST%e;" is legal because when including parameter
+ * entities white space is added implicitly. Formerly, the white space
+ * was expected by the underlying lexer; now the lexer does not check
+ * anymore that "<!ATTLIST" is followed by white space because the lexer
+ * cannot handle parameter references. Because of this, the check on
+ * white space must be done by the entity.
+ *
+ * Revision 1.5  1999/08/14 22:57:19  gerd
+ *     It is allowed that external entities are empty because the
+ * empty string is well-parsed for both declarations and contents. Empty
+ * entities can be referenced anywhere because the references are replaced
+ * by nothing. Because of this, the Begin_entity...End_entity brace is only
+ * inserted if the entity is non-empty. (Otherwise references to empty
+ * entities would not be allowed anywhere.)
+ *     As a consequence, the grammar has been changed such that a
+ * single Eof is equivalent to Begin_entity,End_entity without content.
+ *
+ * Revision 1.4  1999/08/14 22:11:19  gerd
+ *         Several objects have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *     Previously, the resolvers had features in order to warn about
+ * such characters; this has been removed.
+ *     UTF-8 streams can be read even if they contain characters
+ * that cannot be represented by 16 bits.
+ *     The buffering used in the resolvers is now solved in a
+ * cleaner way; the number of characters that are expected to be read
+ * from a source can be limited. This removes a bug with UTF-16 streams
+ * that previously lead to wrong exceptions; and the buffering is more
+ * efficient, too.
+ *
+ * Revision 1.3  1999/08/11 14:58:53  gerd
+ *     Some more names for encodings are allowed, such as "utf8" instead
+ * of the standard name "UTF-8".
+ *     'resolve_as_file' interprets relative file names as relative to
+ * the "parent" resolver.
+ *
+ * Revision 1.2  1999/08/10 21:35:07  gerd
+ *     The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ *     TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexer_types.ml b/helm/DEVEL/pxp/pxp/pxp_lexer_types.ml
new file mode 100644 (file)
index 0000000..988e9d0
--- /dev/null
@@ -0,0 +1,248 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type lexers =
+    Document
+  | Document_type
+  | Content
+  | Within_tag
+  | Declaration
+  | Content_comment
+  | Decl_comment
+  | Document_comment
+  | Ignored_section
+
+
+type prolog_token =
+    Pro_name of string
+  | Pro_eq                  (* "=" *)
+  | Pro_string of string    (* "..." or '...' *)
+  | Pro_eof
+
+
+type entity_id = < >
+  (* The class without properties; but you can still compare if two objects
+   * are the same.
+   *)
+
+type token = 
+  | Begin_entity             (* Beginning of entity *)
+  | End_entity               (* End of entity *)
+  | Comment_begin            (* <!-- *)
+  | Comment_material of string (* within a comment *)
+  | Comment_end              (* --> *)
+  | Ignore                   (* ignored whitespace *)
+  | Eq                       (* = *)
+  | Rangle                   (* > as tag delimiter *)
+  | Rangle_empty             (* /> as tag delimiter *)
+  | Percent                  (* % followed by space in declaration *)
+  | Plus                     (* + in declaration *)
+  | Star                     (* * in declaration *)
+  | Bar                      (* | in declaration *)
+  | Comma                    (* , in declaration *)
+  | Qmark                    (* ? in declaration *)
+  | Pcdata                   (* #PCDATA in declaration *)
+  | Required                 (* #REQUIRED in declaration *)
+  | Implied                  (* #IMPLIED in declaration *)
+  | Fixed                    (* #FIXED in declaration *)
+  | Bof                      (* A marker for 'beginning of file' *)
+  | Eof                      (* End of file *)
+  | Conditional_begin of entity_id  (* <![ in declaration *)
+  | Conditional_body  of entity_id  (* [ in declaration *)
+  | Conditional_end   of entity_id  (* ]]> in declaration *)
+  | Doctype        of entity_id  (* <!DOCTYPE *)
+  | Doctype_rangle of entity_id  (* > as DOCTYPE delimiter *)
+  | Dtd_begin      of entity_id  (* '[' after DOCTYPE *)
+  | Dtd_end        of entity_id  (* ']' *)
+  | Decl_element   of entity_id  (* <!ELEMENT *)
+  | Decl_attlist   of entity_id  (* <!ATTLIST *)
+  | Decl_entity    of entity_id  (* <!ENTITY *)
+  | Decl_notation  of entity_id  (* <!NOTATION *)
+  | Decl_rangle    of entity_id  (* > *)
+  | Lparen         of entity_id  (* ( in declaration *)
+  | Rparen         of entity_id  (* ) in declaration *)
+  | RparenPlus     of entity_id  (* )+ in declaration *)
+  | RparenStar     of entity_id  (* )* in declaration *)
+  | RparenQmark    of entity_id  (* )? in declaration *)
+      
+  | Tag_beg of (string*entity_id)     (* <name *)
+  | Tag_end of (string*entity_id)     (* </name *)
+
+  | PI        of (string*string)      (* <?name ... ?> *)
+  | PI_xml    of (prolog_token list)  (* <?xml ...?> *)
+  | Cdata     of string               (* <![CDATA[...]]> *)
+  | CRef      of int                  (* &#digits; *)
+  | ERef      of string               (* &name; *)
+  | PERef     of string               (* %name; *)
+  | CharData  of string             (* any characters not otherwise matching *)
+  | LineEnd   of string
+  | Name      of string               (* name *)
+  | Nametoken of string               (* nmtoken but not name *)
+  | Attval    of string           (* attribute value; may contain entity refs *)
+  | Attval_nl_normalized of string
+  | Unparsed_string      of string    (* "data" or 'data' *)
+      
+
+(**********************************************************************)
+(* debugging *)
+
+let string_of_tok tok =
+  match tok with
+    Begin_entity -> "Begin_entity"
+  | End_entity -> "End_entity"
+  | Doctype _ -> "Doctype"
+  | Doctype_rangle _ -> "Doctype_rangle"
+  | Comment_begin -> "Comment_begin"
+  | Comment_end -> "Comment_end"
+  | Comment_material _ -> "Comment_material"
+  | Rangle -> "Rangle"
+  | Rangle_empty -> "Rangle_empty"
+  | Ignore -> "Ignore"
+  | Eq -> "Eq"
+  | Dtd_begin _ -> "Dtd_begin"
+  | Dtd_end _ -> "Dtd_end"
+  | Conditional_begin _ -> "Conditional_begin"
+  | Conditional_body _ -> "Conditional_body"
+  | Conditional_end _ -> "Conditional_end"
+  | Percent -> "Percent"
+  | Lparen _ -> "Lparen"
+  | Rparen _ -> "Rparen"
+  | Plus -> "Plus"
+  | Star -> "Star"
+  | Bar -> "Bar"
+  | Comma -> "Comma"
+  | Qmark -> "Qmark"
+  | Pcdata -> "Pcdata"
+  | Required -> "Required"
+  | Implied -> "Implied"
+  | Fixed -> "Fixed"
+  | Decl_element _ -> "Decl_element"
+  | Decl_attlist _ -> "Decl_attlist"
+  | Decl_entity _ -> "Decl_entity"
+  | Decl_notation _ -> "Decl_notation"
+  | Decl_rangle _ -> "Decl_rangle"
+  | RparenPlus _ -> "RparenPlus"
+  | RparenStar _ -> "RparenStar"
+  | RparenQmark _ -> "RparenQmark"
+  | Bof -> "Bof"
+  | Eof -> "Eof"
+  | PI _ -> "PI"
+  | PI_xml _ -> "PI_xml"
+  | Tag_beg _ -> "Tag_beg"
+  | Tag_end _ -> "Tag_end"
+  | Cdata _ -> "Cdata"
+  | CRef _ -> "CRef"
+  | ERef _ -> "ERef"
+  | PERef _ -> "PERef"
+  | CharData _ -> "CharData"
+  | Name _ -> "Name" 
+  | Nametoken _ -> "Nametoken" 
+  | Attval _ -> "Attval" 
+  | Attval_nl_normalized _ -> "Attval_nl_normalized"
+  | Unparsed_string _ -> "Unparsed_string" 
+  | LineEnd _ -> "LineEnd"
+
+
+type lexer_set =
+    { lex_encoding         : Pxp_types.rep_encoding;
+      scan_document        : Lexing.lexbuf -> (token * lexers);
+      scan_content         : Lexing.lexbuf -> (token * lexers);
+      scan_within_tag      : Lexing.lexbuf -> (token * lexers);
+      scan_document_type   : Lexing.lexbuf -> (token * lexers);
+      scan_declaration     : Lexing.lexbuf -> (token * lexers);
+      scan_content_comment : Lexing.lexbuf -> (token * lexers);
+      scan_decl_comment    : Lexing.lexbuf -> (token * lexers);
+      scan_document_comment: Lexing.lexbuf -> (token * lexers);
+      scan_ignored_section : Lexing.lexbuf -> (token * lexers);
+      scan_xml_pi          : Lexing.lexbuf -> prolog_token;
+      scan_dtd_string      : Lexing.lexbuf -> token;
+      scan_content_string  : Lexing.lexbuf -> token;
+      scan_name_string     : Lexing.lexbuf -> token;
+      scan_only_xml_decl   : Lexing.lexbuf -> token;
+      scan_for_crlf        : Lexing.lexbuf -> token;
+    }
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/18 20:14:31  gerd
+ *     Comment -> Comment_begin, Comment_material, Comment_end.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_lexer_types.ml:
+ *
+ * Revision 1.6  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.5  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.4  2000/05/14 17:45:36  gerd
+ *     Bugfix.
+ *
+ * Revision 1.3  2000/05/14 17:35:12  gerd
+ *     Conditional_begin, _end, and _body have an entity_id.
+ *
+ * Revision 1.2  2000/05/08 21:59:06  gerd
+ *     New token Bof (beginning of file).
+ *
+ * Revision 1.1  2000/05/06 23:21:49  gerd
+ *     Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * DERIVED FROM REVISION 1.4 of markup_lexer_types_shadow.ml
+ *
+ * Revision 1.4  2000/04/30 18:19:04  gerd
+ *     Added new tokens.
+ *
+ * Revision 1.3  1999/08/31 19:13:31  gerd
+ *     Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.2  1999/08/10 21:35:08  gerd
+ *     The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ *     TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli b/helm/DEVEL/pxp/pxp/pxp_lexer_types.mli
new file mode 100644 (file)
index 0000000..9e7c2d8
--- /dev/null
@@ -0,0 +1,188 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type lexers =
+    Document
+  | Document_type
+  | Content
+  | Within_tag
+  | Declaration
+  | Content_comment
+  | Decl_comment
+  | Document_comment
+  | Ignored_section
+
+
+type prolog_token =
+    Pro_name of string
+  | Pro_eq                  (* "=" *)
+  | Pro_string of string    (* "..." or '...' *)
+  | Pro_eof
+
+type entity_id = < >
+  (* The class without properties; but you can still compare if two objects
+   * are the same.
+   *)
+
+type token = 
+  | Begin_entity             (* Beginning of entity *)
+  | End_entity               (* End of entity *)
+  | Comment_begin            (* <!-- *)
+  | Comment_material of string (* within a comment *)
+  | Comment_end              (* --> *)
+  | Ignore                   (* ignored whitespace *)
+  | Eq                       (* = *)
+  | Rangle                   (* > as tag delimiter *)
+  | Rangle_empty             (* /> as tag delimiter *)
+  | Percent                  (* % followed by space in declaration *)
+  | Plus                     (* + in declaration *)
+  | Star                     (* * in declaration *)
+  | Bar                      (* | in declaration *)
+  | Comma                    (* , in declaration *)
+  | Qmark                    (* ? in declaration *)
+  | Pcdata                   (* #PCDATA in declaration *)
+  | Required                 (* #REQUIRED in declaration *)
+  | Implied                  (* #IMPLIED in declaration *)
+  | Fixed                    (* #FIXED in declaration *)
+  | Bof                      (* A marker for 'beginning of file' *)
+  | Eof                      (* End of file *)
+  | Conditional_begin of entity_id  (* <![ in declaration *)
+  | Conditional_body  of entity_id  (* [ in declaration *)
+  | Conditional_end   of entity_id  (* ]]> in declaration *)
+  | Doctype        of entity_id  (* <!DOCTYPE *)
+  | Doctype_rangle of entity_id  (* > as DOCTYPE delimiter *)
+  | Dtd_begin      of entity_id  (* '[' after DOCTYPE *)
+  | Dtd_end        of entity_id  (* ']' *)
+  | Decl_element   of entity_id  (* <!ELEMENT *)
+  | Decl_attlist   of entity_id  (* <!ATTLIST *)
+  | Decl_entity    of entity_id  (* <!ENTITY *)
+  | Decl_notation  of entity_id  (* <!NOTATION *)
+  | Decl_rangle    of entity_id  (* > *)
+  | Lparen         of entity_id  (* ( in declaration *)
+  | Rparen         of entity_id  (* ) in declaration *)
+  | RparenPlus     of entity_id  (* )+ in declaration *)
+  | RparenStar     of entity_id  (* )* in declaration *)
+  | RparenQmark    of entity_id  (* )? in declaration *)
+      
+  | Tag_beg of (string*entity_id)     (* <name *)
+  | Tag_end of (string*entity_id)     (* </name *)
+
+  | PI        of (string*string)      (* <?name ... ?> *)
+  | PI_xml    of (prolog_token list)  (* <?xml ...?> *)
+  | Cdata     of string               (* <![CDATA[...]]> *)
+  | CRef      of int                  (* &#digits; *)
+  | ERef      of string               (* &name; *)
+  | PERef     of string               (* %name; *)
+  | CharData  of string             (* any characters not otherwise matching *)
+  | LineEnd   of string
+  | Name      of string               (* name *)
+  | Nametoken of string               (* nmtoken but not name *)
+  | Attval    of string           (* attribute value; may contain entity refs *)
+  | Attval_nl_normalized of string
+  | Unparsed_string      of string    (* "data" or 'data' *)
+      
+
+val string_of_tok : token -> string
+
+
+type lexer_set =
+    { lex_encoding         : Pxp_types.rep_encoding;
+      scan_document        : Lexing.lexbuf -> (token * lexers);
+      scan_content         : Lexing.lexbuf -> (token * lexers);
+      scan_within_tag      : Lexing.lexbuf -> (token * lexers);
+      scan_document_type   : Lexing.lexbuf -> (token * lexers);
+      scan_declaration     : Lexing.lexbuf -> (token * lexers);
+      scan_content_comment : Lexing.lexbuf -> (token * lexers);
+      scan_decl_comment    : Lexing.lexbuf -> (token * lexers);
+      scan_document_comment: Lexing.lexbuf -> (token * lexers);
+      scan_ignored_section : Lexing.lexbuf -> (token * lexers);
+      scan_xml_pi          : Lexing.lexbuf -> prolog_token;
+      scan_dtd_string      : Lexing.lexbuf -> token;
+      scan_content_string  : Lexing.lexbuf -> token;
+      scan_name_string     : Lexing.lexbuf -> token;
+      scan_only_xml_decl   : Lexing.lexbuf -> token;
+      scan_for_crlf        : Lexing.lexbuf -> token;
+    }
+
+(* lexer_set: Every internal encoding has its own set of lexer functions *)
+
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/18 20:14:31  gerd
+ *     Comment -> Comment_begin, Comment_material, Comment_end.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_lexer_types.mli:
+ *
+ * Revision 1.5  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.4  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.3  2000/05/14 17:35:12  gerd
+ *     Conditional_begin, _end, and _body have an entity_id.
+ *
+ * Revision 1.2  2000/05/08 21:59:17  gerd
+ *         New token Bof (beginning of file).
+ *
+ * Revision 1.1  2000/05/06 23:21:49  gerd
+ *     Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * DERIVED FROM REVISION 1.3 of markup_lexer_types_shadow.mli
+ *
+ * Revision 1.3  1999/08/31 19:13:31  gerd
+ *     Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.2  1999/08/10 21:35:09  gerd
+ *     The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ *     TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1  1999/08/10 00:35:51  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexers.ml b/helm/DEVEL/pxp/pxp/pxp_lexers.ml
new file mode 100644 (file)
index 0000000..ce6e7b3
--- /dev/null
@@ -0,0 +1,90 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+
+let lexer_set_iso88591 = 
+  { lex_encoding         = `Enc_iso88591;
+    scan_document        = Pxp_lex_document_iso88591.scan_document;
+    scan_content         = Pxp_lex_content_iso88591.scan_content;
+    scan_within_tag      = Pxp_lex_within_tag_iso88591.scan_within_tag;
+    scan_document_type   = Pxp_lex_document_type_iso88591.
+                            scan_document_type;
+    scan_declaration     = Pxp_lex_declaration_iso88591.scan_declaration;
+    scan_content_comment  = Pxp_lex_misc_iso88591.scan_content_comment;
+    scan_decl_comment     = Pxp_lex_misc_iso88591.scan_decl_comment;
+    scan_document_comment = Pxp_lex_misc_iso88591.scan_document_comment;
+    scan_ignored_section = Pxp_lex_name_string_iso88591.
+                             scan_ignored_section;
+    scan_xml_pi          = Pxp_lex_misc_iso88591.scan_xml_pi;
+    scan_dtd_string      = Pxp_lex_dtd_string_iso88591.scan_dtd_string;
+    scan_content_string  = Pxp_lex_content_string_iso88591.
+                            scan_content_string;
+    scan_name_string     = Pxp_lex_name_string_iso88591.scan_name_string;
+    scan_only_xml_decl   = Pxp_lex_misc_iso88591.scan_only_xml_decl;
+    scan_for_crlf        = Pxp_lex_misc_iso88591.scan_for_crlf;
+  }
+;;
+
+
+let lexer_set_utf8 = ref None
+;;
+
+
+let init_utf8 ls =
+  lexer_set_utf8 := Some ls
+;;
+
+
+let get_lexer_set enc =
+  match enc with
+      `Enc_iso88591 -> lexer_set_iso88591
+    | `Enc_utf8 ->
+       ( match !lexer_set_utf8 with
+             None ->
+               failwith ("Pxp_lexers: UTF-8 lexers not initialized")
+           | Some ls ->
+               ls
+       )
+    | _ ->
+       failwith ("Pxp_lexers: This type of internal encoding is not supported")
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.3  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2  2000/05/23 00:09:44  gerd
+ *     The UTF-8 lexer set is no longer initialized here. It is done
+ * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
+ *
+ * Revision 1.1  2000/05/20 20:30:50  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_lexers.mli b/helm/DEVEL/pxp/pxp/pxp_lexers.mli
new file mode 100644 (file)
index 0000000..d8eabf6
--- /dev/null
@@ -0,0 +1,51 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+
+val get_lexer_set : rep_encoding -> lexer_set
+  (* Return the set of lexer functions that is able to handle the passed
+   * encoding.
+   *)
+
+val init_utf8 : lexer_set -> unit
+  (* Internally used. *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.3  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2  2000/05/23 00:09:44  gerd
+ *     The UTF-8 lexer set is no longer initialized here. It is done
+ * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
+ *
+ * Revision 1.1  2000/05/20 20:30:50  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_reader.ml b/helm/DEVEL/pxp/pxp/pxp_reader.ml
new file mode 100644 (file)
index 0000000..83add26
--- /dev/null
@@ -0,0 +1,730 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types;;
+exception Not_competent;;
+exception Not_resolvable of exn;;
+
+class type resolver =
+  object
+    method init_rep_encoding : rep_encoding -> unit
+    method init_warner : collect_warnings -> unit
+    method rep_encoding : rep_encoding
+    method open_in : ext_id -> Lexing.lexbuf
+    method close_in : unit
+    method close_all : unit
+    method change_encoding : string -> unit
+    method clone : resolver
+  end
+;;
+
+
+class virtual resolve_general 
+ =
+  object (self)
+    val mutable internal_encoding = `Enc_utf8
+
+    val mutable encoding = `Enc_utf8
+    val mutable encoding_requested = false
+
+    val mutable warner = new drop_warnings
+
+    val mutable enc_initialized = false
+    val mutable wrn_initialized = false
+
+    val mutable clones = []
+
+    method init_rep_encoding e =
+      internal_encoding <- e;
+      enc_initialized <- true;
+
+    method init_warner w =
+      warner <- w;
+      wrn_initialized <- true;
+
+    method rep_encoding = (internal_encoding :> rep_encoding)
+
+(*
+    method clone =
+      ( {< encoding = `Enc_utf8;
+          encoding_requested = false;
+       >}
+       : # resolver :> resolver )
+*)
+
+    method private warn (k:int) =
+      (* Called if a character not representable has been found.
+       * k is the character code.
+       *)
+       if k < 0xd800 or (k >= 0xe000 & k <= 0xfffd) or
+          (k >= 0x10000 & k <= 0x10ffff) then begin
+            warner # warn ("Code point cannot be represented: " ^ string_of_int k);
+          end
+       else
+         raise (WF_error("Code point " ^ string_of_int k ^ 
+                   " outside the accepted range of code points"))
+
+
+    method private autodetect s =
+      (* s must be at least 4 bytes long. The slot 'encoding' is
+       * set to:
+       * "UTF-16-BE": UTF-16/UCS-2 encoding big endian
+       * "UTF-16-LE": UTF-16/UCS-2 encoding little endian
+       * "UTF-8":     UTF-8 encoding
+       *)
+      if String.length s < 4 then
+       encoding <- `Enc_utf8
+      else if String.sub s 0 2 = "\254\255" then
+       encoding <- `Enc_utf16
+         (* Note: Netconversion.recode will detect the big endianess, too *)
+      else if String.sub s 0 2 = "\255\254" then
+       encoding <- `Enc_utf16
+         (* Note: Netconversion.recode will detect the little endianess, too *)
+      else
+       encoding <- `Enc_utf8
+
+
+    method private virtual next_string : string -> int -> int -> int
+    method private virtual init_in : ext_id -> unit
+    method virtual close_in : unit
+
+    method close_all =
+      List.iter (fun r -> r # close_in) clones
+
+    method open_in xid =
+      assert(enc_initialized && wrn_initialized);
+
+      encoding <- `Enc_utf8;
+      encoding_requested <- false;
+      self # init_in xid;         (* may raise Not_competent *)
+      (* init_in: may already set 'encoding' *)
+
+      let buffer_max = 512 in
+      let buffer = String.make buffer_max ' ' in
+      let buffer_len = ref 0 in
+      let buffer_end = ref false in
+      let fillup () =
+       if not !buffer_end & !buffer_len < buffer_max then begin
+         let l =
+           self # next_string buffer !buffer_len (buffer_max - !buffer_len) in
+         if l = 0 then
+           buffer_end := true
+         else begin
+           buffer_len := !buffer_len + l
+         end
+       end
+      in
+      let consume n =
+       let l = !buffer_len - n in
+       String.blit buffer n buffer 0 l;
+       buffer_len := l
+      in
+
+      fillup();
+      if not encoding_requested then self # autodetect buffer;
+
+      Lexing.from_function
+       (fun s n ->
+          (* TODO: if encoding = internal_encoding, it is possible to
+           * avoid copying buffer to s because s can be directly used
+           * as buffer.
+           *)
+
+          fillup();
+          if !buffer_len = 0 then
+            0
+          else begin
+            let m_in  = !buffer_len in
+            let m_max = if encoding_requested then n else 1 in
+            let n_in, n_out, encoding' =
+              if encoding = (internal_encoding : rep_encoding :> encoding) &&
+                 encoding_requested
+              then begin
+                (* Special case encoding = internal_encoding *)
+                String.blit buffer 0 s 0 m_in;
+                m_in, m_in, encoding
+              end
+              else
+                Netconversion.recode
+                  ~in_enc:encoding
+                  ~in_buf:buffer
+                  ~in_pos:0
+                  ~in_len:m_in
+                  ~out_enc:(internal_encoding : rep_encoding :> encoding)
+                  ~out_buf:s
+                  ~out_pos:0
+                  ~out_len:n
+                  ~max_chars:m_max
+                  ~subst:(fun k -> self # warn k; "")
+            in
+            if n_in = 0 then
+              (* An incomplete character at the end of the stream: *)
+              raise Netconversion.Malformed_code;
+              (* failwith "Badly encoded character"; *)
+            encoding <- encoding';
+            consume n_in;
+            assert(n_out <> 0);
+            n_out
+          end)
+
+    method change_encoding enc =
+      if not encoding_requested then begin
+       if enc <> "" then begin
+         match Netconversion.encoding_of_string enc with
+             `Enc_utf16 ->
+               (match encoding with
+                    (`Enc_utf16_le | `Enc_utf16_be) -> ()
+                  | `Enc_utf16 -> assert false
+                  | _ ->
+                      raise(WF_error "Encoding of data stream and encoding declaration mismatch")
+               )
+           | e ->
+               encoding <- e
+       end;
+       (* else: the autodetected encoding counts *)
+       encoding_requested <- true;
+      end;
+  end
+;;
+
+
+class resolve_read_any_channel ?(auto_close=true) ~channel_of_id =
+  object (self)
+    inherit resolve_general as super
+
+    val f_open = channel_of_id
+    val mutable current_channel = None
+    val auto_close = auto_close
+
+    method private init_in (id:ext_id) =
+      if current_channel <> None then
+       failwith "Pxp_reader.resolve_read_any_channel # init_in";
+      let ch, enc_opt = f_open id in       (* may raise Not_competent *)
+      begin match enc_opt with
+         None     -> ()
+       | Some enc -> encoding <- enc; encoding_requested <- true
+      end;
+      current_channel <- Some ch;
+
+    method private next_string s ofs len =
+      match current_channel with
+         None -> failwith "Pxp_reader.resolve_read_any_channel # next_string"
+       | Some ch ->
+           input ch s ofs len
+
+    method close_in =
+      match current_channel with
+         None -> ()
+       | Some ch ->
+           if auto_close then close_in ch;
+           current_channel <- None
+
+    method clone =
+      let c = new resolve_read_any_channel 
+               ?auto_close:(Some auto_close) f_open in
+      c # init_rep_encoding internal_encoding;
+      c # init_warner warner;
+      clones <- c :: clones;
+      (c :> resolver)
+
+  end
+;;
+
+
+class resolve_read_this_channel1 is_stale ?id ?fixenc ?auto_close ch =
+
+  let getchannel = ref (fun xid -> assert false) in
+
+  object (self)
+    inherit resolve_read_any_channel 
+              ?auto_close:auto_close 
+             (fun xid -> !getchannel xid)
+             as super
+
+    val mutable is_stale = is_stale
+      (* The channel can only be read once. To avoid that the channel
+       * is opened several times, the flag 'is_stale' is set after the
+       * first time.
+       *)
+
+    val fixid = id
+    val fixenc = fixenc
+    val fixch = ch
+
+    initializer
+      getchannel := self # getchannel
+
+    method private getchannel xid =
+      begin match fixid with
+         None -> ()
+       | Some bound_xid -> 
+           if xid <> bound_xid then raise Not_competent
+      end;
+      ch, fixenc
+
+    method private init_in (id:ext_id) =
+      if is_stale then
+       raise Not_competent
+      else begin
+       super # init_in id;
+       is_stale <- true
+      end
+
+    method close_in =
+      current_channel <- None
+
+    method clone =
+      let c = new resolve_read_this_channel1 
+               is_stale 
+               ?id:fixid ?fixenc:fixenc ?auto_close:(Some auto_close) fixch
+      in
+      c # init_rep_encoding internal_encoding;
+      c # init_warner warner;
+      clones <- c :: clones;
+      (c :> resolver)
+
+  end
+;;
+
+
+class resolve_read_this_channel =
+  resolve_read_this_channel1 false
+;;
+
+
+class resolve_read_any_string ~string_of_id =
+  object (self)
+    inherit resolve_general as super
+
+    val f_open = string_of_id
+    val mutable current_string = None
+    val mutable current_pos    = 0
+
+    method private init_in (id:ext_id) =
+      if current_string <> None then
+       failwith "Pxp_reader.resolve_read_any_string # init_in";
+      let s, enc_opt = f_open id in       (* may raise Not_competent *)
+      begin match enc_opt with
+         None     -> ()
+       | Some enc -> encoding <- enc; encoding_requested <- true
+      end;
+      current_string <- Some s;
+      current_pos    <- 0;
+
+    method private next_string s ofs len =
+      match current_string with
+         None -> failwith "Pxp_reader.resolve_read_any_string # next_string"
+       | Some str ->
+           let l = min len (String.length str - current_pos) in
+           String.blit str current_pos s ofs l;
+           current_pos <- current_pos + l;
+           l
+
+    method close_in =
+      match current_string with
+         None -> ()
+       | Some _ ->
+           current_string <- None
+
+    method clone =
+      let c = new resolve_read_any_string f_open in
+      c # init_rep_encoding internal_encoding;
+      c # init_warner warner;
+      clones <- c :: clones;
+      (c :> resolver)
+  end
+;;
+
+
+class resolve_read_this_string1 is_stale ?id ?fixenc str =
+
+  let getstring = ref (fun xid -> assert false) in
+
+  object (self)
+    inherit resolve_read_any_string (fun xid -> !getstring xid) as super
+
+    val is_stale = is_stale
+      (* For some reasons, it is not allowed to open a clone of the resolver 
+       * a second time when the original resolver is already open.
+       *)
+
+    val fixid = id
+    val fixenc = fixenc
+    val fixstr = str
+
+    initializer
+      getstring := self # getstring
+
+    method private getstring xid =
+      begin match fixid with
+         None -> ()
+       | Some bound_xid -> 
+           if xid <> bound_xid then raise Not_competent
+      end;
+      fixstr, fixenc
+
+
+    method private init_in (id:ext_id) =
+      if is_stale then
+       raise Not_competent
+      else
+       super # init_in id
+
+    method clone =
+      let c = new resolve_read_this_string1 
+               (is_stale or current_string <> None) 
+               ?id:fixid ?fixenc:fixenc fixstr
+      in
+      c # init_rep_encoding internal_encoding;
+      c # init_warner warner;
+      clones <- c :: clones;
+      (c :> resolver)
+  end
+;;
+
+
+class resolve_read_this_string =
+  resolve_read_this_string1 false
+;;
+
+
+class resolve_read_url_channel 
+  ?(base_url = Neturl.null_url)
+  ?auto_close
+  ~url_of_id
+  ~channel_of_url 
+
+  : resolver
+  =
+
+  let getchannel = ref (fun xid -> assert false) in
+
+  object (self)
+    inherit resolve_read_any_channel 
+              ?auto_close:auto_close 
+             (fun xid -> !getchannel xid) 
+             as super
+
+    val base_url = base_url
+    val mutable own_url = Neturl.null_url
+
+    val url_of_id = url_of_id
+    val channel_of_url = channel_of_url
+
+
+    initializer
+      getchannel := self # getchannel
+
+    method private getchannel xid =
+      let rel_url = url_of_id xid in    (* may raise Not_competent *)
+
+      try
+       (* Now compute the absolute URL: *)
+       let abs_url = Neturl.apply_relative_url base_url rel_url in
+                      (* may raise Malformed_URL *)
+
+       (* Simple check whether 'abs_url' is really absolute: *)
+       if not(Neturl.url_provides ~scheme:true abs_url) 
+       then raise Not_competent;
+
+       own_url <- abs_url;
+        (* FIXME: Copy 'abs_url' ? *)
+
+       (* Get and return the channel: *)
+       channel_of_url abs_url            (* may raise Not_competent *)
+      with
+         Neturl.Malformed_URL -> raise (Not_resolvable Neturl.Malformed_URL)
+       | Not_competent        -> raise (Not_resolvable Not_found)
+
+    method clone =
+      let c = 
+       new resolve_read_url_channel 
+         ?base_url:(Some own_url) 
+         ?auto_close:(Some auto_close)
+         ~url_of_id:url_of_id 
+         ~channel_of_url:channel_of_url
+      in
+      c # init_rep_encoding internal_encoding;
+      c # init_warner warner;
+      clones <- c :: clones;
+      (c :> resolve_read_url_channel)
+  end
+;;
+
+
+type spec = [ `Not_recognized | `Allowed | `Required ]
+
+class resolve_as_file
+  ?(file_prefix = (`Allowed :> spec))
+  ?(host_prefix = (`Allowed :> spec))
+  ?(system_encoding = `Enc_utf8) 
+  ?url_of_id:passed_url_of_id
+  ?channel_of_url:passed_channel_of_url
+  ()
+  =
+
+  let url_syntax =
+    let enable_if =
+      function
+         `Not_recognized  -> Neturl.Url_part_not_recognized
+       | `Allowed         -> Neturl.Url_part_allowed
+       | `Required        -> Neturl.Url_part_required
+    in
+    { Neturl.null_url_syntax with
+       Neturl.url_enable_scheme = enable_if file_prefix;
+       Neturl.url_enable_host   = enable_if host_prefix;
+       Neturl.url_enable_path   = Neturl.Url_part_required;
+       Neturl.url_accepts_8bits = true;
+    } 
+  in
+
+  let base_url_syntax = 
+    { Neturl.null_url_syntax with
+       Neturl.url_enable_scheme = Neturl.Url_part_required;
+       Neturl.url_enable_host   = Neturl.Url_part_allowed;
+       Neturl.url_enable_path   = Neturl.Url_part_required;
+       Neturl.url_accepts_8bits = true;
+    } 
+  in
+
+  let default_base_url =
+    Neturl.make_url
+      ~scheme: "file"
+      ~host:   ""
+      ~path:   (Neturl.split_path (Sys.getcwd() ^ "/"))
+      base_url_syntax
+  in
+
+  let file_url_of_id xid =
+    let file_url_of_sysname sysname =
+      (* By convention, we can assume that sysname is a URL conforming
+       * to RFC 1738 with the exception that it may contain non-ASCII
+       * UTF-8 characters. 
+       *)
+      try
+       Neturl.url_of_string url_syntax sysname 
+          (* may raise Malformed_URL *)
+      with
+         Neturl.Malformed_URL -> raise Not_competent
+    in
+    let url =
+      match xid with
+         Anonymous          -> raise Not_competent
+       | Public (_,sysname) -> if sysname <> "" then file_url_of_sysname sysname
+                                                 else raise Not_competent
+       | System sysname     -> file_url_of_sysname sysname
+    in
+    let scheme =
+      try Neturl.url_scheme url with Not_found -> "file" in
+    let host =
+      try Neturl.url_host url with Not_found -> "" in
+    
+    if scheme <> "file" then raise Not_competent;
+    if host <> "" && host <> "localhost" then raise Not_competent;
+    
+    url
+  in
+
+  let channel_of_file_url url =
+    try
+      let path_utf8 =
+       try Neturl.join_path (Neturl.url_path ~encoded:false url)
+       with Not_found -> raise Not_competent
+      in
+      
+      let path = 
+       Netconversion.recode_string
+         ~in_enc:  `Enc_utf8
+         ~out_enc: system_encoding
+         path_utf8 in
+        (* May raise Bad_character_stream *)
+      
+      open_in_bin path, None
+       (* May raise Sys_error *)
+
+    with
+      | Netconversion.Malformed_code -> assert false
+           (* should not happen *)
+
+  in
+
+  let url_of_id id =
+    match passed_url_of_id with
+       None -> 
+         file_url_of_id id
+      | Some f -> 
+         begin 
+           try f id
+           with 
+               Not_competent -> file_url_of_id id
+         end
+  in
+
+  let channel_of_url url =
+    match passed_channel_of_url with
+       None -> 
+         channel_of_file_url url
+      | Some f -> 
+         begin 
+           try f url
+           with 
+               Not_competent -> channel_of_file_url url
+         end
+  in
+  
+  resolve_read_url_channel 
+    ~base_url:       default_base_url
+    ~auto_close:     true
+    ~url_of_id:      url_of_id
+    ~channel_of_url: channel_of_url
+;;
+
+
+class combine ?prefer rl =
+  object (self)
+    val prefered_resolver = prefer
+    val resolvers = (rl : resolver list)
+    val mutable internal_encoding = `Enc_utf8
+    val mutable warner = new drop_warnings
+    val mutable active_resolver = None
+    val mutable clones = []
+
+    method init_rep_encoding enc =
+      List.iter
+       (fun r -> r # init_rep_encoding enc)
+       rl;
+      internal_encoding <- enc
+
+    method init_warner w =
+      List.iter
+       (fun r -> r # init_warner w)
+       rl;
+      warner <- w;
+
+    method rep_encoding = internal_encoding
+      (* CAUTION: This may not be the truth! *)
+
+    method open_in xid =
+      let rec find_competent_resolver rl =
+       match rl with
+           r :: rl' ->
+             begin try 
+               r, (r # open_in xid)
+             with
+                 Not_competent -> find_competent_resolver rl'
+             end;
+         | [] ->
+             raise Not_competent
+      in
+
+      if active_resolver <> None then failwith "Pxp_reader.combine # open_in";
+      let r, lb = 
+       match prefered_resolver with
+           None ->   find_competent_resolver resolvers 
+         | Some r -> find_competent_resolver (r :: resolvers)
+      in
+      active_resolver <- Some r;
+      lb
+
+    method close_in =
+      match active_resolver with
+         None   -> ()
+       | Some r -> r # close_in;
+                   active_resolver <- None
+
+    method close_all =
+      List.iter (fun r -> r # close_in) clones
+
+    method change_encoding (enc:string) =
+      match active_resolver with
+         None   -> failwith "Pxp_reader.combine # change_encoding"
+       | Some r -> r # change_encoding enc
+
+    method clone =
+      let c =
+       match active_resolver with
+           None   -> 
+             new combine ?prefer:None (List.map (fun q -> q # clone) resolvers)
+         | Some r -> 
+             let r' = r # clone in
+             new combine 
+               ?prefer:(Some r')
+               (List.map 
+                  (fun q -> if q == r then r' else q # clone) 
+                  resolvers)
+      in
+      c # init_rep_encoding internal_encoding;
+      c # init_warner warner;
+      clones <- c :: clones;
+      c
+  end
+
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.9  2000/08/14 22:24:55  gerd
+ *     Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.8  2000/07/16 18:31:09  gerd
+ *     The exception Illegal_character has been dropped.
+ *
+ * Revision 1.7  2000/07/09 15:32:01  gerd
+ *     Fix in resolve_this_channel, resolve_this_string
+ *
+ * Revision 1.6  2000/07/09 01:05:33  gerd
+ *     New methode 'close_all' that closes the clones, too.
+ *
+ * Revision 1.5  2000/07/08 16:24:56  gerd
+ *     Introduced the exception 'Not_resolvable' to indicate that
+ * 'combine' should not try the next resolver of the list.
+ *
+ * Revision 1.4  2000/07/06 23:04:46  gerd
+ *     Quick fix for 'combine': The active resolver is "prefered",
+ * but the other resolvers are also used.
+ *
+ * Revision 1.3  2000/07/06 21:43:45  gerd
+ *     Fix: Public(_,name) is now treated as System(name) if
+ * name is non-empty.
+ *
+ * Revision 1.2  2000/07/04 22:13:30  gerd
+ *     Implemented the new API rev. 1.2 of pxp_reader.mli.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_reader.ml:
+ *
+ * Revision 1.3  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1  2000/03/13 23:41:44  gerd
+ *     Initial revision; this code was formerly part of Markup_entity.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_reader.mli b/helm/DEVEL/pxp/pxp/pxp_reader.mli
new file mode 100644 (file)
index 0000000..27a3680
--- /dev/null
@@ -0,0 +1,388 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types;;
+
+exception Not_competent;;
+  (* Raised by the 'open_in' method if the object does not know how to 
+   * handle the passed external ID.
+   *)
+
+exception Not_resolvable of exn;;
+  (* Indicates that one resolver was competent, but there was an error
+   * while resolving the external ID. The passed exception explains the
+   * reason.
+   * Not_resolvable(Not_found) serves as indicator for an unknown reason.
+   *)
+
+
+(* The class type 'resolver' is the official type of all "resolvers". 
+ * Resolvers take file names (or better, external identifiers) and 
+ * return lexbufs, scanning the file for tokens. Resolvers may be
+ * cloned, and clones can interpret relative file names relative to
+ * their creator.
+ *
+ * Example of the latter:
+ *
+ * Resolver r reads from file:/dir/f1.xml
+ *
+ * <tag>some XML text
+ * &e;                       -----> Entity e is bound to "subdir/f2.xml"
+ * </tag>                           Step (1): let r' = "clone of r"
+ *                                  Step (2): open file "subdir/f2.xml"
+ *
+ * r' must still know the directory of the file r is reading, otherwise
+ * it would not be able to resolve "subdir/f2.xml" = "file:/dir/subdir/f2.xml".
+ *
+ * Actually, this example can be coded as:
+ *
+ * let r = new resolve_as_file in
+ * let lbuf = r # open_in "file:/dir/f1.xml" in
+ * ... read from lbuf ...
+ * let r' = r # clone in
+ * let lbuf' = r' # open_in "subdir/f2.xml" in
+ * ... read from lbuf' ...
+ * r' # close_in;
+ * ... read from lbuf ...
+ * r # close_in;
+ *)
+
+class type resolver =
+  object
+    (* A resolver can open an input source, and returns this source as
+     * Lexing.lexbuf.
+     *
+     * After creating a resolver, one must invoke the two methods
+     * init_rep_encoding and init_warner to set the internal encoding of 
+     * strings and the warner object, respectively. This is normally
+     * done by the parsing functions in Pxp_yacc.
+     * It is not necessary to invoke these two methods for a fresh
+     * clone.
+     *
+     * It is possible that the character encoding of the source and the
+     * internal encoding of the parser are different. To cope with this,
+     * one of the tasks of the resolver is to recode the characters of
+     * the input source into the internal character encoding.
+     *
+     * Note that there are several ways of determining the encoding of the
+     * input: (1) It is possible that the transport protocol (e.g. HTTP)
+     * transmits the encoding, and (2) it is possible to inspect the beginning
+     * of the file, and to analyze:
+     * (2.1) The first two bytes indicate whether UTF-16 is used
+     * (2.2) Otherwise, one can assume that an ASCII-compatible character
+     *       set is used. It is now possible to read the XML declaration
+     *       <?xml ... encoding="xyz" ...?>. The encoding found here is
+     *       to be used.
+     * (2.3) If the XML declaration is missing, the encoding is UTF-8.
+     * The resolver needs only to distinguish between cases (1), (2.1),
+     * and the rest.
+     * The details of analyzing whether (2.2) or (2.3) applies are programmed 
+     * elsewhere, and the resolver will be told the result (see below).
+     *
+     * A resolver is like a file: it must be opened before one can work
+     * with it, and it should be closed after all operations on it have been
+     * done. The method 'open_in' is called with the external ID as argument
+     * and it must return the lexbuf reading from the external resource.
+     * The method 'close_in' does not require an argument.
+     *
+     * It is allowed to re-open a resolver after it has been closed. It is
+     * forbidden to open a resolver again while it is open.
+     * It is allowed to close a resolver several times: If 'close_in' is
+     * invoked while the resolver is already closed, nothing happens.
+     *
+     * The method 'open_in' may raise Not_competent to indicate that this
+     * resolver is not able to open this type of IDs.
+     *
+     * The method 'change_encoding' is called from the parser after the
+     * analysis of case (2) has been done; the argument is either the
+     * string name of the encoding, or the empty string to indicate
+     * that no XML declaration was found. It is guaranteed that 
+     * 'change_encoding' is invoked after only a few tokens of the 
+     * file. The resolver should react as follows:
+     * - If case (1) applies:   Ignore the encoding passed to 'change_encoding'.
+     * - If case (2.1) applies: The encoding passed to 'change_encoding' must
+     *                          be compatible with UTF-16. This should be
+     *                          checked, and violations should be reported.
+     * - Else:                  If the passed encoding is "", assume UTF-8.
+     *                          Otherwise, assume the passed encoding.
+     *
+     * The following rule helps synchronizing the lexbuf with the encoding:
+     * If the resolver has been opened, but 'change_encoding' has not yet
+     * been invoked, the lexbuf contains at most one character (which may
+     * be represented by multiple bytes); i.e. the lexbuf is created by
+     * Lexing.from_function, and the function puts only one character into
+     * the buffer at once.
+     * After 'change_encoding' has been invoked, there is no longer a limit
+     * on the lexbuf size.
+     *
+     * The reason for this rule is that you know exactly the character where
+     * the encoding changes to the encoding passed by 'change_encoding'.
+     *
+     * The method 'clone' may be invoked for open or closed resolvers.
+     * Basically, 'clone' returns a new resolver which is always closed.
+     * If the original resolver is closed, the clone is simply a clone.
+     * If the original resolver is open at the moment of cloning:
+     * If the clone is later opened for a relative system ID (i.e. relative
+     * URL), the clone must interpret this ID relative to the ID of the
+     * original resolver.
+     *)
+    method init_rep_encoding : rep_encoding -> unit
+    method init_warner : collect_warnings -> unit
+
+    method rep_encoding : rep_encoding
+
+    method open_in : ext_id -> Lexing.lexbuf
+      (* May raise Not_competent if the object does not know how to handle
+       * this ext_id.
+       *)
+    method close_in : unit
+    method change_encoding : string -> unit
+
+
+    (* Every resolver can be cloned. The clone does not inherit the connection
+     * with the external object, i.e. it is initially closed.
+     *)
+    method clone : resolver
+
+    method close_all : unit
+      (* Closes this resolver and every clone *)
+
+  end
+;;
+
+(* Note: resolve_general is no longer exported. In most cases, the classes
+ * resolve_read_any_channel or resolve_read_any_string are applicable, too,
+ * and much easier to configure.
+ *)
+
+
+(* The next classes are resolvers for concrete input sources. *)
+
+class resolve_read_this_channel : 
+  ?id:ext_id -> ?fixenc:encoding -> ?auto_close:bool -> 
+  in_channel -> resolver;;
+
+  (* Reads from the passed channel (it may be even a pipe). If the ~id
+   * argument is passed to the object, the created resolver accepts only
+   * this ID. Otherwise all IDs are accepted.
+   * Once the resolver has been cloned, it does not accept any ID. This
+   * means that this resolver cannot handle inner references to external
+   * entities. Note that you can combine this resolver with another resolver
+   * that can handle inner references (such as resolve_as_file); see
+   * class 'combine' below.
+   * If you pass the ~fixenc argument, the encoding of the channel is
+   * set to the passed value, regardless of any auto-recognition or
+   * any XML declaration.
+   * If ?auto_close = true (which is the default), the channel is
+   * closed after use. If ?auto_close = false, the channel is left open.
+   *)
+
+
+class resolve_read_any_channel : 
+  ?auto_close:bool -> 
+  channel_of_id:(ext_id -> (in_channel * encoding option)) -> 
+  resolver;;
+
+  (* resolve_read_any_channel f_open:
+   * This resolver calls the function f_open to open a new channel for
+   * the passed ext_id. This function must either return the channel and
+   * the encoding, or it must fail with Not_competent.
+   * The function must return None as encoding if the default mechanism to
+   * recognize the encoding should be used. It must return Some e if it is
+   * already known that the encoding of the channel is e.
+   * If ?auto_close = true (which is the default), the channel is
+   * closed after use. If ?auto_close = false, the channel is left open.
+   *)
+
+
+class resolve_read_url_channel :
+  ?base_url:Neturl.url ->
+  ?auto_close:bool -> 
+  url_of_id:(ext_id -> Neturl.url) -> 
+  channel_of_url:(Neturl.url -> (in_channel * encoding option)) -> 
+    resolver;;
+
+  (* resolve_read_url_channel url_of_id channel_of_url:
+   *
+   * When this resolver gets an ID to read from, it calls the function
+   * ~url_of_id to get the corresponding URL. This URL may be a relative
+   * URL; however, a URL scheme must be used which contains a path.
+   * The resolver converts the URL to an absolute URL if necessary.
+   * The second function, ~channel_of_url, is fed with the absolute URL
+   * as input. This function opens the resource to read from, and returns
+   * the channel and the encoding of the resource.
+   *
+   * Both functions, ~url_of_id and ~channel_of_url, can raise
+   * Not_competent to indicate that the object is not able to read from
+   * the specified resource. However, there is a difference: A Not_competent
+   * from ~url_of_id is left as it is, but a Not_competent from ~channel_of_url
+   * is converted to Not_resolvable. So only ~url_of_id decides which URLs
+   * are accepted by the resolver and which not.
+   *
+   * The function ~channel_of_url must return None as encoding if the default 
+   * mechanism to recognize the encoding should be used. It must return
+   * Some e if it is already known that the encoding of the channel is e.
+   *
+   * If ?auto_close = true (which is the default), the channel is
+   * closed after use. If ?auto_close = false, the channel is left open.
+   * 
+   * Objects of this class contain a base URL relative to which relative
+   * URLs are interpreted. When creating a new object, you can specify
+   * the base URL by passing it as ~base_url argument. When an existing
+   * object is cloned, the base URL of the clone is the URL of the original
+   * object.
+   *
+   * Note that the term "base URL" has a strict definition in RFC 1808.
+   *)
+
+
+class resolve_read_this_string : 
+  ?id:ext_id -> ?fixenc:encoding -> string -> resolver;;
+
+  (* Reads from the passed string. If the ~id
+   * argument is passed to the object, the created resolver accepts only
+   * this ID. Otherwise all IDs are accepted.
+   * Once the resolver has been cloned, it does not accept any ID. This
+   * means that this resolver cannot handle inner references to external
+   * entities. Note that you can combine this resolver with another resolver
+   * that can handle inner references (such as resolve_as_file); see
+   * class 'combine' below.
+   * If you pass the ~fixenc argument, the encoding of the string is
+   * set to the passed value, regardless of any auto-recognition or
+   * any XML declaration.
+   *)
+
+
+class resolve_read_any_string : 
+  string_of_id:(ext_id -> (string * encoding option)) -> resolver;;
+
+  (* resolver_read_any_string f_open:
+   * This resolver calls the function f_open to get the string for
+   * the passed ext_id. This function must either return the string and
+   * the encoding, or it must fail with Not_competent.
+   * The function must return None as encoding if the default mechanism to
+   * recognize the encoding should be used. It must return Some e if it is
+   * already known that the encoding of the string is e.
+   *)
+
+
+class resolve_as_file :
+  ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+  ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+  ?system_encoding:encoding ->
+  ?url_of_id:(ext_id -> Neturl.url) -> 
+  ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+  unit -> 
+  resolver;;
+
+  (* Reads from the local file system. Every file name is interpreted as
+   * file name of the local file system, and the referred file is read.
+   *
+   * The full form of a file URL is: file://host/path, where
+   * 'host' specifies the host system where the file identified 'path'
+   * resides. host = "" or host = "localhost" are accepted; other values
+   * will raise Not_competent. The standard for file URLs is 
+   * defined in RFC 1738.
+   *
+   * Option ~file_prefix: Specifies how the "file:" prefix of file names
+   * is handled:
+   * `Not_recognized:  The prefix is not recognized.
+   * `Allowed:         The prefix is allowed but not required (the default).
+   * `Required:        The prefix is required.
+   *
+   * Option ~host_prefix: Specifies how the "//host" phrase of file names
+   * is handled:
+   * `Not_recognized:  The phrase is not recognized.
+   * `Allowed:         The phrase is allowed but not required (the default).
+   * `Required:        The phrase is required.
+   *
+   * Option ~system_encoding: Specifies the encoding of file names of
+   * the local file system. Default: UTF-8.
+   *
+   * Options ~url_of_id, ~channel_of_url: Not for the end user!
+   *)
+
+
+class combine : ?prefer:resolver -> resolver list -> resolver;;
+  
+  (* Combines several resolver objects. If a concrete entity with an
+   * ext_id is to be opened, the combined resolver tries the contained
+   * resolvers in turn until a resolver accepts opening the entity
+   * (i.e. it does not raise Not_competent on open_in).
+   *
+   * Clones: If the 'clone' method is invoked before 'open_in', all contained
+   * resolvers are cloned and again combined. If the 'clone' method is 
+   * invoked after 'open_in' (i.e. while the resolver is open), only the
+   * active resolver is cloned.
+   *) 
+
+(* EXAMPLES OF RESOLVERS:
+ *
+ * let r1 = new resolve_as_file
+ *   - r1 can open all local files
+ *
+ * let r2 = new resolve_read_this_channel 
+ *            ~id:"file:/dir/f.xml" 
+ *            (open_in "/dir/f.xml")
+ *   - r2 can only read /dir/f.xml of the local file system. If this file
+ *     contains references to other files, r2 will fail
+ *
+ * let r3 = new combine [ r2; r1 ]
+ *   - r3 reads /dir/f.xml of the local file system by calling r2, and all
+ *     other files by calling r1
+ *)
+
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.5  2000/07/09 01:05:33  gerd
+ *     New methode 'close_all' that closes the clones, too.
+ *
+ * Revision 1.4  2000/07/08 16:24:56  gerd
+ *     Introduced the exception 'Not_resolvable' to indicate that
+ * 'combine' should not try the next resolver of the list.
+ *
+ * Revision 1.3  2000/07/06 23:04:46  gerd
+ *     Quick fix for 'combine': The active resolver is "prefered",
+ * but the other resolvers are also used.
+ *
+ * Revision 1.2  2000/07/04 22:06:49  gerd
+ *     MAJOR CHANGE: Complete redesign of the reader classes.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_reader.mli:
+ *
+ * Revision 1.3  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1  2000/03/13 23:41:54  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_types.ml b/helm/DEVEL/pxp/pxp/pxp_types.ml
new file mode 100644 (file)
index 0000000..e8a8eac
--- /dev/null
@@ -0,0 +1,212 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type ext_id =
+    System of string
+  | Public of (string * string)
+  | Anonymous
+
+
+type dtd_id =
+    External of ext_id
+  | Derived of ext_id
+  | Internal
+;;
+
+type content_model_type =
+    Unspecified
+  | Empty
+  | Any
+  | Mixed of mixed_spec list
+  | Regexp of regexp_spec
+
+and mixed_spec =
+    MPCDATA
+  | MChild of string
+
+and regexp_spec =
+    Optional of regexp_spec
+  | Repeated of regexp_spec
+  | Repeated1 of regexp_spec
+  | Alt of regexp_spec list
+  | Seq of regexp_spec list
+  | Child of string
+;;
+
+
+type att_type =
+    A_cdata
+  | A_id
+  | A_idref
+  | A_idrefs
+  | A_entity
+  | A_entities
+  | A_nmtoken
+  | A_nmtokens
+  | A_notation of string list
+  | A_enum of string list
+;;
+
+
+type att_default =
+    D_required
+  | D_implied
+  | D_default of string  (* The default value is already expanded *)
+  | D_fixed of string    (* The default value is already expanded *)
+;;
+
+
+type att_value =
+    Value of string
+  | Valuelist of string list
+  | Implied_value
+;;
+
+
+class type collect_warnings =
+  object 
+    method warn : string -> unit
+  end
+;;
+
+
+class drop_warnings =
+  object 
+    method warn (w:string) = ()
+  end
+;;
+
+
+type encoding = Netconversion.encoding;;
+
+type rep_encoding =
+  (* The subset of 'encoding' that may be used for internal representation
+   * of strings.
+   *)
+  [  `Enc_utf8       (* UTF-8 *)
+  |  `Enc_iso88591   (* ISO-8859-1 *)
+  ]
+;;
+
+
+exception Validation_error of string
+
+exception WF_error of string
+
+exception Error of string
+
+exception Character_not_supported
+
+exception At of (string * exn)
+
+exception Undeclared
+
+
+let rec string_of_exn x0 =
+  match x0 with
+      At (s, x) ->
+        s ^ string_of_exn x
+    | Validation_error s ->
+        "ERROR (Validity constraint): "  ^ s
+    | WF_error s ->
+        "ERROR (Well-formedness constraint): " ^ s
+    | Error s ->
+       "ERROR: " ^ s
+    | Character_not_supported ->
+        "RESTRICTION: Character not supported"
+    | Netconversion.Malformed_code ->
+        "ERROR: Bad character stream"
+    | Undeclared ->
+        "INFORMATION: Undeclared"
+    | Parsing.Parse_error ->
+       "SYNTAX ERROR"
+    | _ ->
+        "Other exception: " ^ Printexc.to_string x0
+;;
+
+
+type output_stream =
+    Out_buffer of Buffer.t
+  | Out_channel of out_channel
+  | Out_function of (string -> int -> int -> unit)
+;;
+
+
+let write os str pos len =
+  match os with
+      Out_buffer b -> Buffer.add_substring b str pos len
+    | Out_channel ch -> output ch str pos len
+    | Out_function f -> f str pos len
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.7  2000/08/14 22:24:55  gerd
+ *     Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.6  2000/07/27 00:41:15  gerd
+ *     new 8 bit codes
+ *
+ * Revision 1.5  2000/07/16 18:31:09  gerd
+ *     The exception Illegal_character has been dropped.
+ *
+ * Revision 1.4  2000/07/14 21:25:27  gerd
+ *     Simplified the type 'collect_warnings'.
+ *
+ * Revision 1.3  2000/07/08 16:23:50  gerd
+ *     Added the exception 'Error'.
+ *
+ * Revision 1.2  2000/07/04 22:14:05  gerd
+ *     Implemented the changes of rev. 1.2 of pxp_types.mli.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_types.ml:
+ *
+ * Revision 1.7  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5  2000/05/01 20:43:19  gerd
+ *     New type output_stream; new function 'write'.
+ *
+ * Revision 1.4  1999/09/01 16:25:35  gerd
+ *     Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3  1999/08/15 02:22:33  gerd
+ *     Added exception Undeclared.
+ *
+ * Revision 1.2  1999/08/14 22:14:58  gerd
+ *     New class "collect_warnings".
+ *
+ * Revision 1.1  1999/08/10 00:35:52  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_types.mli b/helm/DEVEL/pxp/pxp/pxp_types.mli
new file mode 100644 (file)
index 0000000..e8b4711
--- /dev/null
@@ -0,0 +1,224 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+type ext_id =
+    System of string
+  | Public of (string * string)
+  | Anonymous
+
+  (* external identifiers are either "system identifiers" (filenames or URLs),
+   * or "public identifiers" Public(id,sysid) where "id" is the representation
+   * of the public ID, and "sysid" a fallback system ID, or the empty string.
+   *
+   * New in PXP: Sometimes the external ID is not known. This case can be
+   * referred to as Anonymous ID.
+   *
+   * Encoding: The identifiers are _always_ encoded as UTF8 strings,
+   * regardless of whether another encoding is configured for the parser.
+   * TODO: umsetzen
+   *)
+
+
+type dtd_id =
+    External of ext_id       (* DTD is completely external *)
+  | Derived of ext_id        (* DTD is derived from an external DTD *)
+  | Internal                 (* DTD is completely internal *)
+;;
+
+type content_model_type =
+    Unspecified              (* A specification of the model has not yet been
+                             * found
+                             *)
+  | Empty                    (* Nothing is allowed as content *)
+  | Any                      (* Everything is allowed as content *)
+  | Mixed of mixed_spec list (* The contents consist of elements and PCDATA 
+                             * in arbitrary order. What is allowed in
+                             * particular is given as mixed_spec.
+                             *)
+  | Regexp of regexp_spec    (* The contents are elements following this regular
+                             * expression
+                             *)
+
+and mixed_spec =
+    MPCDATA                  (* PCDATA children are allowed *)
+  | MChild of string         (* This kind of Element is allowed *)
+
+and regexp_spec =
+    Optional of regexp_spec  (* subexpression? *)
+  | Repeated of regexp_spec  (* subexpression* *)
+  | Repeated1 of regexp_spec (* subexpression+ *)
+  | Alt of regexp_spec list  (* subexpr1 | subexpr2 | ... | subexprN *)
+  | Seq of regexp_spec list  (* subexpr1 , subexpr2 , ... , subexprN *)
+  | Child of string          (* This kind of Element is allowed here *)
+;;
+
+
+type att_type =
+    A_cdata                    (* CDATA *)
+  | A_id                       (* ID *)
+  | A_idref                    (* IDREF *)
+  | A_idrefs                   (* IDREFS *)
+  | A_entity                   (* ENTITY *)
+  | A_entities                 (* ENTiTIES *)
+  | A_nmtoken                  (* NMTOKEN *)
+  | A_nmtokens                 (* NMTOKENS *)
+  | A_notation of string list  (* NOTATION (name1 | name2 | ... | nameN) *)
+  | A_enum of string list      (* (name1 | name2 | ... | nameN) *)
+;;
+
+
+type att_default =
+    D_required           (* #REQUIRED *)
+  | D_implied            (* #IMPLIED *)
+  | D_default of string  (* <value> -- The value is already expanded *)
+  | D_fixed of string    (* FIXED <value> -- The value is already expanded *)
+;;
+
+
+type att_value =
+    Value of string           (* a single value *)
+  | Valuelist of string list  (* a list of values *)
+  | Implied_value             (* a value left out *)
+;;
+
+
+class type collect_warnings =
+  object 
+    method warn : string -> unit
+  end
+;;
+
+
+class drop_warnings : collect_warnings;;
+
+
+type encoding = Netconversion.encoding;;
+  (* We accept all encodings for character sets which are defined in
+   * Netconversion (package netstring).
+   *)
+
+type rep_encoding =
+  (* The subset of 'encoding' that may be used for internal representation
+   * of strings.
+   * Note: The following encodings are ASCII-compatible! This is an important
+   * property used throghout the whole PXP code.
+   *)
+  [ `Enc_utf8       (* UTF-8 *)
+  | `Enc_iso88591   (* ISO-8859-1 *)
+  ]
+;;
+
+
+exception Validation_error of string
+  (* Violation of a validity constraint *)
+
+exception WF_error of string
+  (* Violation of a well-formedness constraint *)
+
+exception Error of string
+  (* Other error *)
+
+exception Character_not_supported
+
+exception At of (string * exn)
+  (* The string is a description where the exn happened. The exn value can
+   * again be At(_,_) (for example, when an entity within an entity causes
+   * the error).
+   *)
+
+exception Undeclared
+  (* Indicates that declaration is available and because of this every kind
+   * of usage is allowed.
+   *)
+
+val string_of_exn : exn -> string
+  (* Converts a Markup exception into a readable string *)
+
+
+type output_stream =
+    Out_buffer of Buffer.t
+  | Out_channel of out_channel
+  | Out_function of (string -> int -> int -> unit)
+
+val write : output_stream -> string -> int -> int -> unit
+  (* write os s pos len: Writes the string to the buffer/channel/stream *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/08/14 22:24:55  gerd
+ *     Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.7  2000/07/27 00:41:15  gerd
+ *     new 8 bit codes
+ *
+ * Revision 1.6  2000/07/16 18:31:09  gerd
+ *     The exception Illegal_character has been dropped.
+ *
+ * Revision 1.5  2000/07/16 16:34:21  gerd
+ *     Updated comments.
+ *
+ * Revision 1.4  2000/07/14 21:25:27  gerd
+ *     Simplified the type 'collect_warnings'.
+ *
+ * Revision 1.3  2000/07/08 16:23:50  gerd
+ *     Added the exception 'Error'.
+ *
+ * Revision 1.2  2000/07/04 22:08:26  gerd
+ *     type ext_id: New variant Anonymous. - The System and Public
+ * variants are now encoded as UTF-8.
+ *     collect_warnings is now a class type only. New class
+ * drop_warnings.
+ *     New functions  encoding_of_string and string_of_encoding.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from Markup_types.mli:
+ *
+ * Revision 1.7  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5  2000/05/01 20:43:25  gerd
+ *         New type output_stream; new function 'write'.
+ *
+ * Revision 1.4  1999/09/01 16:25:35  gerd
+ *     Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3  1999/08/15 02:22:40  gerd
+ *         Added exception Undeclared.
+ *
+ * Revision 1.2  1999/08/14 22:15:17  gerd
+ *         New class "collect_warnings".
+ *
+ * Revision 1.1  1999/08/10 00:35:52  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_utf8.ml b/helm/DEVEL/pxp/pxp/pxp_utf8.ml
new file mode 100644 (file)
index 0000000..f0a9462
--- /dev/null
@@ -0,0 +1,48 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types;;
+open Pxp_lexer_types;;
+
+Pxp_lexers.init_utf8 
+  { lex_encoding         = `Enc_utf8;
+    scan_document        = Pxp_lex_document_utf8.scan_document;
+    scan_content         = Pxp_lex_content_utf8.scan_content;
+    scan_within_tag      = Pxp_lex_within_tag_utf8.scan_within_tag;
+    scan_document_type   = Pxp_lex_document_type_utf8.
+                            scan_document_type;
+    scan_declaration     = Pxp_lex_declaration_utf8.scan_declaration;
+    scan_content_comment  = Pxp_lex_misc_utf8.scan_content_comment;
+    scan_decl_comment     = Pxp_lex_misc_utf8.scan_decl_comment;
+    scan_document_comment = Pxp_lex_misc_utf8.scan_document_comment;
+    scan_ignored_section = Pxp_lex_name_string_utf8.scan_ignored_section;
+    scan_xml_pi          = Pxp_lex_misc_utf8.scan_xml_pi;
+    scan_dtd_string      = Pxp_lex_dtd_string_utf8.scan_dtd_string;
+    scan_content_string  = Pxp_lex_content_string_utf8.
+                            scan_content_string;
+    scan_name_string     = Pxp_lex_name_string_utf8.scan_name_string;
+    scan_only_xml_decl   = Pxp_lex_misc_utf8.scan_only_xml_decl;
+    scan_for_crlf        = Pxp_lex_misc_utf8.scan_for_crlf;
+  }
+;;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.3  2000/06/04 20:31:44  gerd
+ *     Updated.
+ *
+ * Revision 1.2  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1  2000/05/23 00:08:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_utf8.mli b/helm/DEVEL/pxp/pxp/pxp_utf8.mli
new file mode 100644 (file)
index 0000000..42cb033
--- /dev/null
@@ -0,0 +1,22 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This is a module without interface. Its initialization part sets up
+ * the UTF-8 lexers.
+ * Link with this module if you want to use the UTF-8 lexers!
+ *)
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.1  2000/05/23 00:08:48  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_yacc.m2y b/helm/DEVEL/pxp/pxp/pxp_yacc.m2y
new file mode 100644 (file)
index 0000000..91de7cd
--- /dev/null
@@ -0,0 +1,2528 @@
+(* $Id$ -*- tuareg -*-
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Parsing
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_dtd
+open Pxp_entity
+open Pxp_document
+open Pxp_aux
+
+(* Some types from the interface definition: *)
+
+exception ID_not_unique
+
+class type [ 'ext ] index =
+object 
+  constraint 'ext = 'ext node #extension
+  method add : string -> 'ext node -> unit
+  method find : string -> 'ext node
+end
+
+
+type config =
+    { warner : collect_warnings;
+      errors_with_line_numbers : bool;
+      enable_pinstr_nodes : bool;
+      enable_super_root_node : bool;
+      enable_comment_nodes : bool;
+      encoding : rep_encoding;
+      recognize_standalone_declaration : bool;
+      store_element_positions : bool;
+      idref_pass : bool;
+      validate_by_dfa : bool;
+      accept_only_deterministic_models : bool;
+      debugging_mode : bool;
+    }
+
+type source =
+    Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+  | ExtID of (ext_id * Pxp_reader.resolver)
+
+
+type start_symbol =
+    Ext_document
+  | Ext_declarations
+  | Ext_element
+
+
+type context =
+    { mutable current : unit -> token;  (* get the current token *)
+      mutable get_next : unit -> token; (* go on to the next token; return it *)
+      mutable current_token : token;    (* This is the current token *)
+      mutable manager : entity_manager; (* The entity manager *)
+    }
+
+
+let make_context entity_manager =
+  let c =
+    { current = (fun _ -> assert false);
+      get_next = (fun _ -> assert false);
+      current_token = Eof;
+      manager = entity_manager;
+    }
+  in
+  (* Note that the function which is stored in get_next_ref can be changed
+   * as a side-effect when an entity is opened or closed. The function in
+   * c.get_next must be programmed such that always the current "get_next"
+   * function is executed.
+   *)
+  let get_next_ref = entity_manager # yy_get_next_ref in
+  c.current  <- (fun () -> c.current_token);
+  c.get_next <- (fun () -> let tok = !get_next_ref() in
+                          c.current_token <- tok;
+                          tok);
+  ignore(c.get_next());
+  c
+;;
+
+
+let from_channel ?system_encoding ?id:init_id ?fixenc ch =
+
+  (* Reading from a channel works by modifying the algorithm of
+   * resolve_as_file.
+   *)
+
+  let url_syntax =      (* A syntax suitable for "file" URLs *)
+    { Neturl.null_url_syntax with
+       Neturl.url_enable_scheme = Neturl.Url_part_allowed;
+       Neturl.url_enable_host   = Neturl.Url_part_allowed;
+       Neturl.url_enable_path   = Neturl.Url_part_required;
+       Neturl.url_accepts_8bits = true;
+    } 
+  in
+
+  let an_url =
+    Neturl.make_url
+      ~scheme: "file"
+      ~host:   ""
+      ~path:   [ "" ]
+      url_syntax
+  in
+
+  let init_channel_done = ref false in
+    (* Whether the first access to this source has already happened. *)
+
+  (* The task of url_of_id is:
+   * - When it is called the first time, and no init_id is present,
+   *   the URL file:/// is passed back (an_url). This forces that
+   *   absolute path names /path/dir/... will be interpreted as 
+   *   file path names. (But relative path names will not work.)
+   * - If an init_id has been passed, we can assume that the opened URL
+   *   is exactly this init_id. By raising Not_competent it is indicated
+   *   that the standard method is to be used for the interpretation of
+   *   the URL.
+   * - Otherwise, the channel is already being read, and thus cannot again
+   *   opened. (This case is handled in channel_of_url.)
+   *)
+
+  let url_of_id xid =
+    if !init_channel_done then begin
+      (* Use the normal way of determining the URL of the ID: *)
+      raise Pxp_reader.Not_competent
+    end
+    else begin
+      match init_id with
+         None -> 
+           an_url
+             (* If the channel is not associated with any URL: Simply pass 
+              * the URL file:/// back. 
+              *)
+       | Some the_init_id ->
+           assert (the_init_id = xid);
+           raise Pxp_reader.Not_competent
+             (* If the channel is associated with a URL, the corresponding
+              * ID must be passed when the first invocation happens.
+              *)
+    end
+  in
+
+  (* The task of channel_of_url:
+   * - If it is called the first time ("else"), the channel is returned
+   * - Otherwise, the channel is already being read, and thus cannot again
+   *   opened. By raising Not_competent it is signaled that the 
+   *   resolve_as_file object must not continue to open the URL.
+   *)
+
+  let channel_of_url url =
+    if !init_channel_done then
+      raise Pxp_reader.Not_competent
+    else begin
+      init_channel_done := true;
+      ch, fixenc
+    end
+  in
+
+  let r =
+    new Pxp_reader.resolve_as_file 
+      ?system_encoding:system_encoding
+      ~url_of_id:url_of_id
+      ~channel_of_url:channel_of_url
+      ()
+  in
+
+  let init_xid =
+    match init_id with
+       None   -> Anonymous
+      | Some id -> 
+         (* Note: 'id' may be illegal (malformed); in this case, the first
+          * invocation of url_of_id will raise Not_competent, and the 'open_in'
+          * method will fail.
+          *)
+         id
+  in
+
+  ExtID(init_xid, r)
+;;
+
+
+let from_file ?system_encoding utf8_filename =
+  
+  let r =
+    new Pxp_reader.resolve_as_file 
+      ?system_encoding:system_encoding
+      ()
+  in
+
+  let utf8_abs_filename =
+    if utf8_filename <> "" && utf8_filename.[0] = '/' then
+      utf8_filename
+    else
+      Sys.getcwd() ^ "/" ^ utf8_filename
+  in
+
+  let syntax = { Neturl.ip_url_syntax with Neturl.url_accepts_8bits = true } in
+  let url = Neturl.make_url 
+             ~scheme:"file" 
+             ~host:"localhost" 
+             ~path:(Neturl.split_path utf8_abs_filename) 
+             syntax
+  in
+
+  let xid = System (Neturl.string_of_url url) in
+    
+
+  ExtID(xid, r)
+;;
+
+
+let from_string ?fixenc s =
+  let r =
+    new Pxp_reader.resolve_read_this_string ?fixenc:fixenc s in
+  ExtID(Anonymous, r)
+;;
+
+
+(**********************************************************************)
+
+class ['ext] parser_object
+  init_doc init_dtd init_extend_dtd init_config init_resolver init_spec 
+  init_process_xmldecl transform_dtd id_index
+  =
+  object (self)
+
+      (* Note that the 'ext parameter has been the motivation to make the
+       * parser a class.
+       *)
+
+    val mutable dtd = init_dtd
+       (* The DTD being parsed; or the DTD currently assumed *)
+
+    val extend_dtd = init_extend_dtd
+       (* Whether the DTD should be extended by ELEMENT, ATTLIST, and
+        * NOTATION declarations or not. (True for validating mode,
+        * false for well-formedness mode.)
+        *)
+
+    val transform_dtd = transform_dtd
+        (* A function transforming the DTD *)
+
+    val id_index = (id_index : 'ext index option)
+        (* The ID index or None *)
+
+    val process_xmldecl = init_process_xmldecl
+        (* Whether the XML declaration is parsed and the found XML version
+        * and standalone declaration are passed to 'doc'.
+        *)
+
+    val lexerset = Pxp_lexers.get_lexer_set (init_config.encoding)
+
+    val doc = init_doc
+        (* The current document *)
+
+    method doc = (doc : 'ext document)
+
+    val resolver = init_resolver
+        (* The resolver for external IDs *)
+
+    val config = init_config
+        (* The current configuration *)
+
+    val elstack = (Stack.create() : ('ext node * entity_id) Stack.t)
+       (* The element stack containing all open elements, i.e. elements that
+       * have begun by a start tag but that have not been finished (end tag).
+       * If the parser sees a start tag, it creates the element and pushes it
+       * on top of this stack. If the parser recognizes an end tag, it pulls
+       * one element from the stack and checks if it has the same name as
+       * given with the end tag.
+       *
+       * At initialization time, a special element is pushed on the stack,
+       * the so-called super root. It is always the bottommost
+       * element of the stack, and serves as a guard.
+       * [See "initializer" below.]
+       *)
+                   
+    method current =
+        (* Get the top element of the element stack *)
+        try
+          fst(Stack.top elstack)
+       with
+           Stack.Empty -> assert false
+               (* Not possible, because the super root is always the element 
+                * at the bottom of the stack.
+                *)
+
+    val mutable n_tags_open = 0
+       (* Number of begin tags that have been parsed and whose corresponding
+        * end tags have not yet been parsed
+        *)
+
+    val mutable p_internal_subset = false
+        (* true while parsing the internal subset - there are some additional
+        * constraints for internal subsets, and because of this it must
+        * be known whether the current declaration is contained in the
+        * internal or external subset of the DTD.
+        *)
+
+    val mutable root = None
+        (* Contains the root element (topmost element) while it is being parsed
+        * and after it has been parsed.
+        * This variable is None before the root element is seen.
+        *)
+
+    method root = root
+
+    val spec = init_spec
+        (* A hashtable that contains exemplar objects for the various element
+        * types. If an element is parsed, the exemplar is looked up and
+        * "cloned" (by the "create" method)
+        *)
+
+    val mutable current_data = []
+       (* Collects character data. *)
+
+    method collect_data s =
+        (* Collects the character material 's' *)
+        current_data <- s :: current_data
+
+    method save_data =
+      (* Puts the material collected in 'current_data' into a new
+       * node, and appends this node as new sub node to 'current'
+       *)
+      match current_data with
+         [] ->
+           ()
+       | [ str ] ->
+           if str <> "" then
+             self # current # add_node (create_data_node spec dtd str);
+           current_data <- []
+       | _ ->
+           let count = List.fold_left 
+                         (fun acc s -> acc + String.length s) 
+                         0
+                         current_data in
+           let str = String.create count in
+           let pos = ref count in
+           List.iter
+             (fun s ->
+                let l = String.length s in
+                pos := !pos - l;
+                String.blit
+                ~src:s
+                ~src_pos:0
+                ~dst:str
+                ~dst_pos:(!pos)
+                ~len:l
+             )
+             current_data;
+           assert(!pos = 0);
+           if str <> "" then
+             self # current # add_node (create_data_node spec dtd str);
+           current_data <- []
+             
+
+    method only_whitespace data =
+       (* Checks that the string "data" contains only whitespace. On failure,
+        * Validation_error is raised.
+        *)
+      let lexbuf = Lexing.from_string data in
+      let t1 = lexerset.scan_name_string lexbuf in
+      if t1 <> Ignore then
+       raise(WF_error("Data not allowed here"));
+      let t2 = lexerset.scan_name_string lexbuf in
+      if t2 <> Eof then
+       raise(WF_error("Data not allowed here"));
+      ()
+
+    initializer
+      (* CHECKS: *)
+      if config.encoding <> dtd # encoding then
+       failwith("Encoding mismatch");
+
+      (* --- Initialize 'elstack': Push the super-root on the stack. *)
+      let super_root = 
+       if config.enable_super_root_node then
+         create_super_root_node spec dtd 
+       else
+         (* because spec may not contain an exemplar for the super root: *)
+         create_no_node spec dtd
+      in
+      (* Move the super root or the emulation to the stack: *)
+      Stack.push (super_root, (self :> entity_id)) elstack;
+
+
+
+      (********* Here the method "parse" begins. The grammar below is
+       *         transformed to a local function of this method
+       *)
+
+      method parse context start_symbol =
+
+       let parse_ignored_section yy_current yy_get_next =
+         (* A special parser which should be used after <![IGNORE[.
+          * It parses until the corresponding ]]> is found.
+          *)
+
+          while yy_current() = Ignore do
+           ignore(yy_get_next());
+         done;
+
+         ( match yy_current() with
+             Conditional_body _ -> ()
+           | _                  -> raise Parsing.Parse_error;
+         );
+
+         let en = context.manager # current_entity in
+         let llev = ref 1 in
+         while !llev >= 1 do
+           let igntok = en # next_ignored_token in
+           (* next_ignored_token: uses a special lexer that only
+            * recognizes Conditional_begin and Conditional_end;
+            * other character combinations are ignored.
+            *)
+           (* NOTE: next_ignored_token works much like yy_get_next,
+            * but it does not set the current token!
+            *)
+           match igntok with
+               Conditional_begin _ ->
+                 llev := !llev + 1
+             | Conditional_end _ ->
+                 llev := !llev - 1;
+                 (* Because the loop may be exited now: *)
+                 context.current_token <- igntok;
+             | (End_entity | Eof) ->
+                 raise Parsing.Parse_error
+             | _ ->
+                 ()
+         done;
+         
+       in
+
+
+       let check_and_parse_xmldecl xmldecl =
+         if process_xmldecl then begin
+           let v, _, s = decode_doc_xml_pi (decode_xml_pi xmldecl) in
+           check_version_num v;
+           doc # init_xml_version v;
+           let v = match s with
+               None -> false
+             | Some "yes" -> true
+             | Some "no" -> false
+             | _ -> raise (WF_error("Illegal 'standalone' declaration"))
+           in
+           if config.recognize_standalone_declaration then 
+             dtd # set_standalone_declaration v
+         end
+       in
+
+       let recode_utf8 s =
+         (* Recode 's' to UTF-8 *)
+         if config.encoding = `Enc_utf8 then
+           s   (* No recoding necessary *)
+         else
+           Netconversion.recode_string 
+             ~in_enc:(config.encoding :> encoding) ~out_enc:`Enc_utf8 s
+       in
+
+       
+%%
+
+/* The following grammar looks similar to ocamlyacc grammars, but 
+ * ocamlyacc is actually not used to transform the grammar into a parser. 
+ * Instead, the parser generator m2parsergen is applied.
+ *
+ * The format of the grammar is different (see m2parsergen/README), 
+ * but I hope that you can understand most features immediately. 
+ *
+ * The type of the parser is different: m2parsergen creates a top-down
+ * parser while ocamlyacc generates a LALR-1 parser.
+ *
+ * The way the generated code is called is different: ocamlyacc produces
+ * lots of top-level definitions whereas m2parsergen generates only 
+ * a local let-in-phrase. This is explained in the already mentioned
+ * README file.
+ */ 
+
+/* See Pxp_types.ml for comments to the various tokens */
+
+%token Begin_entity
+%token End_entity
+%token Comment_begin
+%token Comment_end
+%token Ignore
+%token Eq
+%token Rangle
+%token Rangle_empty
+%token <> Conditional_begin
+%token <> Conditional_body
+%token <> Conditional_end
+%token Percent
+%token Plus
+%token Star
+%token Bar
+%token Comma
+%token Qmark
+%token Pcdata
+%token Required
+%token Implied
+%token Fixed
+%token Eof
+
+%token <> Comment_material
+%token <> Doctype
+%token <> Doctype_rangle
+%token <> Dtd_begin
+%token <> Dtd_end
+%token <> Decl_element
+%token <> Decl_attlist
+%token <> Decl_entity
+%token <> Decl_notation
+%token <> Decl_rangle
+%token <> Lparen
+%token <> Rparen
+%token <> RparenPlus
+%token <> RparenStar
+%token <> RparenQmark
+
+%token <> Tag_beg
+%token <> Tag_end
+
+%token <> PI
+%token <> PI_xml
+%token <> Cdata
+%token <> CRef
+%token <> ERef
+%token <> PERef
+%token <> CharData
+%token <> LineEnd
+%token <> Name
+%token <> Nametoken
+%token <> Attval
+%token <> Attval_nl_normalized
+%token <> Unparsed_string
+
+/* START SYMBOLS:
+ *
+ * "ext_document":       parses a complete XML document (i.e. containing a
+ *                       <!DOCTYPE..> and an element)
+ * "ext_declarations":   parses an "external DTD subset", i.e. a sequence
+ *                       of declarations
+ * "ext_element":        parses a single element (no <!DOCTYPE...> allowed);
+ *                       the element needs not to be the root element of the
+ *                       DTD
+ *
+ * The functions corresponding to these symbols return always () because
+ * they only have side-effects.
+ */
+
+/* SOME GENERAL COMMENTS:
+ *
+ * The parser does not get its tokens from the lexers directly. Instead of
+ * this, there is an entity object between the parser and the lexers. This
+ * object already handles:
+ *
+ * - References to general and parameter entities. The token stream is
+ *   modified such that tokens automatically come from the referenced entities.
+ *   External parameter entities and all general entities are embraced by
+ *   the two special tokens Begin_entity and End_entity. The parser must
+ *   check that these braces are correctly nested.
+ */
+
+%%
+
+
+ext_document():
+  Begin_entity 
+  doc_xmldecl_then_misc_then_prolog_then_rest() End_entity
+    {{
+      if n_tags_open <> 0 then
+       raise(WF_error("Missing end tag"))
+    }}
+
+
+/* In the following rule, we must find out whether there is an XML declaration
+ * or not, and directly after that either "process_xmldecl" or 
+ * "process_missing_xmldecl" of the current entity must be called.
+ * AND IT MUST BE DIRECTLY! Because of this, the invocation is carried out
+ * in the "$" clause immediately following the first token.
+ *
+ * TODO: This is not enough. The first token may be a tag, and the tag
+ * may already contain non-ASCII characters. (But in this case, the resolvers
+ * assume UTF8, and they are right...)
+ */
+
+doc_xmldecl_then_misc_then_prolog_then_rest():
+  pl:PI_xml 
+  $ {{ context.manager # current_entity # process_xmldecl pl; 
+       check_and_parse_xmldecl pl;
+    }}
+  misc()* doc_prolog_then_rest()
+    {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  misc() misc()* doc_prolog_then_rest()
+    {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  doctypedecl() misc()* contents_start()
+    {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  contents_start()
+    {{ () }}
+
+
+doc_prolog_then_rest():
+  doctypedecl() misc()* contents_start()
+    {{ () }}
+| contents_start() 
+    {{ () }}
+
+ext_element():
+  Begin_entity el_xmldecl_then_misc_then_rest() End_entity
+    {{
+      if n_tags_open <> 0 then
+       raise(WF_error("Missing end tag"))
+    }}
+
+
+/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
+
+el_xmldecl_then_misc_then_rest():
+  pl:PI_xml
+  $ {{ context.manager # current_entity # process_xmldecl pl; }}
+  misc()* contents_start() 
+    {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  misc() misc()* contents_start() 
+    {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  contents_start()
+    {{ () }}
+
+
+ext_declarations():
+  /* Parses a sequence of declarations given by an entity. As side-effect,
+   * the parsed declarations are put into the dtd object.
+   */
+  Begin_entity decl_xmldecl_then_rest() 
+   {{ () }}
+| Eof
+   {{ () }}
+
+
+decl_xmldecl_then_rest():
+  /* Note: This rule is also called from declaration()! */
+  pl:PI_xml
+  $ {{ context.manager # current_entity # process_xmldecl pl; 
+    }}
+  declaration()* End_entity
+   {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  declaration() declaration()* End_entity
+   {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  End_entity
+   {{ () }}
+
+
+misc():
+  pi()
+    {{ () }}
+| data: CharData
+    /* In this context, the lexers sometimes do not recognize white space; 
+     * instead CharData tokens containing white space are delivered.
+     */
+    {{ self # only_whitespace data }}
+| Ignore
+    {{ () }}
+| comment()
+    {{ () }}
+
+
+/********************* DOCUMENT TYPE DECLARATION *************************/
+
+doctypedecl():
+  /* parses from <!DOCTYPE to >. As side-effect, first the declarations of
+   * the internal DTD (if any) are put into !!on_dtd, then the declarations
+   * of the external DTD (if any) are put into this DTD object.
+   */
+  doctype_entid:  Doctype 
+             ws:  Ignore Ignore*
+                  doctypedecl_material (doctype_entid)
+    {{ () }}
+  ? {{ match !yy_position with
+          "ws" -> raise(WF_error("Whitespace is missing after `DOCTYPE'"))
+        | _    -> raise(WF_error("Bad DOCTYPE declaration"))
+     }}
+
+
+/* TRICK: 
+ *   ws: Ignore? Ignore* 
+ * is meant seriously. The effect is that ws becomes a boolean variable
+ * which is true if there is an Ignore token and false otherwise.
+ * This construct is faster than just 
+ *   ws: Ignore*
+ * in which case ws becomes an integer variable containing the number of
+ * Ignore tokens. Counting the number of tokens is slower than only checking
+ * the existence.
+ *
+ * We need the information whether there is an Ignore token (representing
+ * white space), because white space is only obligatory if also an identifier
+ * for the external subset is parsed; this conditional syntax constraint is 
+ * simply programmed in the body of the grammar rule.
+ */
+
+doctypedecl_material(doctype_entid):
+  root_name:             Name
+  ws:                    Ignore? Ignore*
+  external_subset:       external_id()? 
+                         Ignore*
+  internal_subset:       internal_dtd()? 
+                         Ignore*
+  doctype_rangle_entid:  Doctype_rangle
+    {{ 
+      if doctype_entid != doctype_rangle_entid then
+       raise (Validation_error("Entities not properly nested with DOCTYPE declaration"));
+      dtd # set_root root_name;
+      begin match external_subset, internal_subset with
+         None, None      -> ()         (* no DTD means no ID *)
+       | None, Some _    -> dtd # set_id Internal
+       | Some id, None   -> dtd # set_id (External id)
+       | Some id, Some _ -> dtd # set_id (Derived id)
+      end;
+      (* Get now the external doctype declaration. Note that the internal
+       * subset has precedence and must be read first.
+       *)
+      begin match external_subset with
+         None -> ()
+       | Some id ->
+           if not ws then
+             raise(WF_error("Whitespace is missing after `DOCTYPE " ^ 
+                            root_name ^ "'"));
+           let r' = resolver # clone in
+           let pobj =
+             new parser_object
+               (new document config.warner)
+               dtd
+               extend_dtd
+               config
+               r'
+               spec
+               process_xmldecl
+               (fun x -> x)
+               None
+           in
+           let en = new external_entity r' dtd "[dtd]"
+                        config.warner id false config.errors_with_line_numbers
+                        config.encoding
+           in
+           en # set_debugging_mode (config.debugging_mode);
+           let mgr = new entity_manager en in
+           en # open_entity true Declaration;
+           try
+             let context = make_context mgr in
+             pobj # parse context Ext_declarations;
+             ignore(en # close_entity);
+           with
+               error ->
+                 ignore(en # close_entity);
+                 r' # close_all;
+                 let pos = mgr # position_string in
+                 raise (At(pos, error))
+      end;
+      dtd # validate
+    }}
+  ? {{
+       match !yy_position with
+          "doctype_rangle_entid" -> raise(WF_error("`>' expected"))
+        | _                      -> raise(WF_error("Bad DOCTYPE declaration"))
+    }}
+
+/* Note that there are no keywords for SYSTEM or PUBLIC, as these would
+ * be difficult to recognize in the lexical contexts. Because of this, 
+ * SYSTEM/PUBLIC is parsed as name, and the rule for everything after
+ * SYSTEM/PUBLIC is computed dynamically.
+ */
+
+external_id():
+  tok:Name 
+  $ {{ 
+       let followup = 
+        match tok with
+            "SYSTEM" -> parse_system_id
+                           (* Apply the rule system_id (below) to parse the
+                           * rest of the ID 
+                           *)
+          | "PUBLIC" -> parse_public_id
+                           (* Apply the rule public_id (below) to parse the
+                           * rest of the ID 
+                           *)
+          | _        -> raise(WF_error("SYSTEM or PUBLIC expected"))
+       in
+     }}
+  ws:Ignore Ignore*
+  r:[followup]()
+    {{ r }}
+  ? {{ match !yy_position with
+          "ws" -> raise(WF_error("Whitespace is missing after " ^ tok))
+        | _    -> raise(WF_error("Bad SYSTEM or PUBLIC identifier"))
+    }}
+
+
+system_id():
+  str:Unparsed_string 
+    {{ System (recode_utf8 str) }}
+
+
+public_id():
+  str1: Unparsed_string 
+    ws: Ignore Ignore*
+  str2: Unparsed_string
+    {{ check_public_id str1;
+       Public(recode_utf8 str1, recode_utf8 str2)
+    }}
+  ? {{ match !yy_position with
+          "ws" -> raise(WF_error("Whitespace is missing between the literals of the PUBLIC identifier"))
+        | _    -> raise(WF_error("Bad PUBLIC identifier"))
+    }}
+
+
+/* The internal subset: "[" declaration* "]". While parsing the declarations
+ * the object variable p_internal_subset must be true; however, if there
+ * are entity references, this variable must be reset to false during
+ * the entity. (See the rule for "declaration" below.)
+ */
+
+internal_dtd():
+  dtd_begin_entid:    internal_dtd_begin() 
+                      declaration()* 
+  dtd_end_entid:      internal_dtd_end()
+    {{ 
+      if dtd_begin_entid != dtd_end_entid then
+       raise(Validation_error("Entities not properly nested with internal DTD subset"))
+    }}
+  ? {{ match !yy_position with
+          "dtd_end_entid" -> raise(WF_error("`]' expected"))
+        | _               -> raise(WF_error("Bad internal DTD subset"))
+    }}
+
+
+internal_dtd_begin():
+  Dtd_begin
+    {{ assert (not p_internal_subset);
+       p_internal_subset <- true }}
+
+
+internal_dtd_end():
+  Dtd_end
+    {{ assert p_internal_subset;
+       p_internal_subset <- false }}
+
+
+declaration():
+  /* Parses a single declaration (or processing instruction). As side-effect
+   * the parsed declaration is stored into the dtd object.
+   */
+  elementdecl()
+    {{ () }}
+| attlistdecl()
+    {{ () }}
+| entid:Decl_entity ws:Ignore Ignore* e:entitydecl(entid)
+    {{ () }}
+  ? {{ match !yy_position with
+          "ws" -> raise(WF_error("Whitespace is missing after ENTITY")) 
+        | "e"  -> raise(WF_error("Name or `%' expected"))
+        | _    -> raise(WF_error("Bad entity declaration"))
+    }}
+| notationdecl()
+    {{ () }}
+| pi: PI
+    {{ let target, value = pi in
+       let pi = new proc_instruction target value config.encoding in
+       dtd # add_pinstr pi
+    }}
+| Ignore
+    {{ () }}
+| Comment_begin Comment_material* ce:Comment_end
+    {{ () }}
+  ? {{ match !yy_position with
+          "ce" -> raise(WF_error("`-->' expected"))
+        | _    -> raise(WF_error("Bad comment"))
+    }}
+| Begin_entity
+  $ {{ (* Set 'p_internal_subset' to 'false' until the matching 'end_entity'
+       * rule is parsed. This allows unrestricted usage of parameter entities
+       * within declarations of internal entities.
+       *)
+       let old_p_internal_subset = p_internal_subset in
+       p_internal_subset <- false;
+    }}
+  decl_xmldecl_then_rest()
+    {{ (* Restore the old value of 'p_internal_subset'. *)
+       p_internal_subset <- old_p_internal_subset;
+       ()
+    }}
+| begin_entid:Conditional_begin
+  $ {{ (* Check whether conditional sections are allowed at this position. *)
+       if p_internal_subset then 
+        raise(WF_error("Restriction of the internal subset: Conditional sections not allowed"));
+     }}
+  Ignore*
+  cond:conditional_section()  end_entid:Conditional_end
+    {{ (* Check whether Conditional_begin and Conditional_end are in the same
+       * entity. (This restriction is explained in the file SPECS.)
+       *)
+       if begin_entid != end_entid then
+        raise(Validation_error("The first and the last token of conditional sections must be in the same entity (additional restriction of this parser)"));
+    }}
+  ? {{ match !yy_position with
+          "end_entid" -> raise(WF_error("`>]>' expected"))
+        | "cond"      -> raise(WF_error("INCLUDE or IGNORE expected"))
+        | _           -> raise(WF_error("Bad conditional section"))
+    }}
+
+/* The tokens INCLUDE/IGNORE are scanned as names, and the selection of the
+ * right parsing rule is dynamic.
+ * Note that parse_ignored_section is not defined by a grammar rule but
+ * by a conventional let-binding above.
+ */
+
+conditional_section():
+  include_or_ignore:Name
+  $ {{ let parsing_function =
+        match include_or_ignore with
+            "INCLUDE"  -> parse_included_section
+                           (* invoke rule "included_section" below *)
+          | "IGNORE"   -> parse_ignored_section
+                           (* invoke function "parse_ignored_section" *)
+          | _          -> raise(WF_error("INCLUDE or IGNORE expected"))
+       in
+    }}
+  [ parsing_function ] ()  
+    {{ () }}
+  ? {{ raise(WF_error("Bad conditional section")) }}
+
+included_section():
+  Conditional_body declaration()*  
+    {{ () }}
+| Ignore Ignore* Conditional_body declaration()*  
+    {{ () }}
+
+
+/*************************** ELEMENT DECLARATIONS ********************/
+
+elementdecl():
+  /* parses <!ELEMENT ... >. Puts the parsed element type as side-effect into
+   * dtd.
+   */
+  decl_element_entid:      Decl_element 
+  $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+     }}
+  ws1:                     Ignore Ignore*
+  name:                    Name 
+  ws2:                     Ignore Ignore*
+  content_model:           contentspec() 
+                           Ignore*
+  decl_rangle_entid:       Decl_rangle
+    {{
+      if decl_element_entid != decl_rangle_entid then
+       raise (Validation_error "Entities not properly nested with ELEMENT declaration");
+      if extend_dtd then begin
+       let el = new dtd_element dtd name in
+       (* It is allowed that an <!ATTLIST...>  precedes the corresponding
+        * <!ELEMENT...>. Because of this it is possible that there is already
+        * an element called 'name' in the DTD, and we only must set the content
+        * model of this element.
+        *)
+       try
+         dtd # add_element el;
+         el # set_cm_and_extdecl content_model extdecl;
+       with
+           Not_found ->  (* means: there is already an element 'name' *)
+             let el' = dtd # element name in
+             el' # set_cm_and_extdecl content_model extdecl;
+              (* raises Validation_error if el' already has a content model *)
+      end
+    }}
+  ? {{ match !yy_position with
+          ("ws1"|"ws2")   -> raise(WF_error("Whitespace is missing"))
+        | "name"          -> raise(WF_error("The name of the element is expected here"))
+        | "content_model" -> raise(WF_error("Content model expression expected"))
+        | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+        | _                   -> raise(WF_error("Bad element type declaration"))
+    }}
+
+contentspec():
+  /* parses a content model and returns it (type content_model_type) */
+  name: Name   /* EMPTY or ANY */
+    {{ match name with
+         "EMPTY" -> Empty
+       | "ANY"   -> Any
+       | _       -> raise(WF_error("EMPTY, ANY, or a subexpression expected"))
+    }}
+| entid:Lparen  Ignore*  term:mixed_or_regexp(entid)
+    {{ term }}
+  ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+/* Many of the following rules have an lparen_entid argument. This is the
+ * internal ID of the entity containing the corresponding left parenthesis;
+ * by comparing it with the ID of the entity of the right parenthesis the
+ * contraint is implemented that both parentheses must be in the same entity.
+ */
+
+mixed_or_regexp(lparen_entid):
+  re: choice_or_seq(lparen_entid)
+    {{ Regexp re }}
+| m: mixed(lparen_entid)
+    {{ m }}
+
+
+multiplier():
+  /* returns one of the multiplier symbols (?,*,+) */
+  Plus
+    {{ Plus }}
+| Star
+    {{ Star }}
+| Qmark
+    {{ Qmark }}
+
+
+mixed (lparen_entid) :
+                   Pcdata 
+                   Ignore*
+  material:        mixed_alternatives_top()
+    {{ 
+      let rest, rparen_entid = material in
+      if lparen_entid != rparen_entid then
+       raise (Validation_error "Entities not properly nested with parentheses");
+      Mixed (MPCDATA :: rest)
+    }}
+  ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+mixed_alternatives_top():
+  entid: Rparen
+    {{ [], entid }}
+| entid: RparenStar
+    {{ [], entid }}
+| Bar Ignore* name:Name Ignore* names:mixed_alternative()* entid:RparenStar
+    {{ 
+       (MChild name :: names), entid
+    }}
+  ? {{ match !yy_position with
+          "name"  -> raise(WF_error("Name expected"))
+        | "entid" -> raise(WF_error("`)*' expected"))
+        | _       -> raise(WF_error("Bad content model expression"))
+    }}
+
+
+mixed_alternative() :
+  Bar Ignore* name:Name Ignore*
+    {{ MChild name }}
+  ? {{ match !yy_position with
+          "name" -> raise(WF_error("Name expected"))
+        | _      -> raise(WF_error("Bad content model expression"))
+    }}
+
+
+
+choice_or_seq (lparen_entid):
+  /* parses either a regular expression, or a mixed expression. Returns
+   * Mixed spec or Regexp spec (content_model_type).
+   * Which kind of expression (regexp or mixed) is being read is recognized
+   * after the first subexpression has been parsed; the other subexpressions
+   * must be of the same kind.
+   */
+  re:     cp() 
+          Ignore*
+  factor: choice_or_seq_factor()
+    {{
+      let (finalmark,subexpr), rparen_entid = factor in
+      if lparen_entid != rparen_entid then
+       raise (Validation_error "Entities not properly nested with parentheses");
+      (* Check that the other subexpressions are "regexp", too, and
+       * merge them with the first.
+       *)
+      let re' =
+       match subexpr with
+           Alt []  ->  re
+         | Alt alt -> Alt (re :: alt)
+         | Seq seq -> Seq (re :: seq)
+         | _       -> assert false
+      in
+      (* Interpret the finalmark. *)
+      match finalmark with
+         Ignore -> re'
+       | Plus   -> Repeated1 re'
+       | Star   -> Repeated re'
+       | Qmark  -> Optional re'
+       | _      -> assert false
+    }}
+  ? {{ raise(WF_error("Bad content model expression")) }}
+
+choice_or_seq_factor():
+  /* Parses "|<subexpr>|...)" or ",<subexpr>,...)", both forms optionally
+   * followed by ?, *, or +.
+   * Returns ((finalmark, expr), rparen_entid), where
+   * - finalmark is the character after the right parenthesis or Ignore
+   * - expr is either
+   *   Alt []              meaning that only ")" has been found
+   *   Alt non_empty_list  meaning that the subexpressions are separated by '|'
+   *   Seq non_empty_list  meaning that the subexpressions are separated by ','
+   */
+  entid:Rparen
+    {{ (Ignore, Alt []), entid }}
+| entid:RparenPlus
+    {{ (Plus, Alt []), entid }}
+| entid:RparenStar
+    {{ (Star, Alt []), entid }}
+| entid:RparenQmark
+    {{ (Qmark, Alt []), entid }}
+| Bar Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
+    {{ 
+      let (finalmark, subexpr), rparen_entid = factor in
+      begin match subexpr with
+         Alt []  -> (finalmark, (Alt [re])), rparen_entid
+       | Alt alt -> (finalmark, (Alt (re :: alt))), rparen_entid
+       | _       -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
+      end
+    }}
+  ? {{ raise(WF_error("Bad content model expression")) }}
+| Comma Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
+    {{
+      let (finalmark, subexpr), rparen_entid = factor in
+      begin match subexpr with
+         Alt []  -> (finalmark, (Seq [re])), rparen_entid
+       | Seq seq -> (finalmark, (Seq (re :: seq))), rparen_entid
+       | _       -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
+      end
+    }}
+  ? {{ raise(WF_error("Bad content model expression")) }}
+
+cp():
+  /* parse either a name, or a parenthesized subexpression "(...)"  */
+  name:Name  m:multiplier()?
+    {{ match m with
+         None       -> Child name
+       | Some Plus  -> Repeated1 (Child name)
+       | Some Star  -> Repeated  (Child name)
+       | Some Qmark -> Optional  (Child name)
+       | _          -> assert false
+    }}
+  ? {{ raise(WF_error("Bad content model expression")) }}
+| entid:Lparen Ignore* m:choice_or_seq(entid)
+    {{ m }}
+  ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+/********************* ATTRIBUTE LIST DECLARATION ***********************/
+
+attlistdecl():
+  /* parses <!ATTLIST ... >. Enters the attribute list in dtd as side-
+   * effect.
+   */
+  decl_attlist_entid: Decl_attlist 
+  $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+    }}
+  ws1:                Ignore Ignore*
+  el_name:            Name 
+  ws:                 Ignore? Ignore*
+  factor:             attdef_factor()
+    {{ 
+      let at_list, decl_rangle_entid = factor in
+
+      if decl_attlist_entid != decl_rangle_entid then
+       raise (Validation_error "Entities not properly nested with ATTLIST declaration");
+
+      if not ws && at_list <> [] then begin
+       match at_list with
+           (name,_,_) :: _ ->
+             (* This is normally impossible, because the lexer demands 
+              * some other token between two names.
+              *)
+             raise(WF_error("Whitespace is missing before `" ^ name ^ "'"));
+         | _ -> assert false
+      end;
+
+      if extend_dtd then begin
+       let new_el = new dtd_element dtd el_name in
+       (* Note that it is allowed that <!ATTLIST...> precedes the corresponding
+        * <!ELEMENT...> declaration. In this case we add the element declaration
+        * already to the DTD but leave the content model unspecified.
+        *)
+       let el =
+         try
+           dtd # add_element new_el;
+           new_el
+         with
+             Not_found ->  (* already added *)
+               let old_el = dtd # element el_name in
+               if old_el # attribute_names <>  [] then
+                 config.warner # warn ("More than one ATTLIST declaration for element type `" ^
+                                       el_name ^ "'");
+               old_el
+       in
+       List.iter
+         (fun (a_name, a_type, a_default) ->
+            el # add_attribute a_name a_type a_default extdecl)
+         at_list
+      end
+    }}
+  ? {{ match !yy_position with
+          "ws1"     -> raise(WF_error("Whitespace is missing after ATTLIST"))
+        | "el_name" -> raise(WF_error("The name of the element is expected here"))
+        | "factor"  -> raise(WF_error("Another attribute name or `>' expected"))
+        | _         -> raise(WF_error("Bad attribute declaration"))
+    }}
+
+
+attdef_factor():
+  /* parses a list of triples <name> <type> <default value> and returns the
+   * list as (string * att_type * att_default) list.
+   */
+  attdef:attdef()   ws:Ignore?   Ignore*   factor:attdef_factor()
+    {{ 
+      let attdef_rest, decl_rangle_entid = factor in
+      if not ws && attdef_rest <> [] then begin
+       match attdef_rest with
+           (name,_,_) :: _ ->
+             raise(WF_error("Missing whitespace before `" ^ name ^ "'"));
+         | _ -> assert false
+      end;
+      (attdef :: attdef_rest), decl_rangle_entid }}
+  ? {{ match !yy_position with
+        | "factor"  -> raise(WF_error("Another attribute name or `>' expected"))
+        | _         -> raise(WF_error("Bad attribute declaration"))
+    }}
+| entid:Decl_rangle
+    {{ [], entid }}
+
+
+attdef():
+  /* Parses a single triple */
+  name:     Name 
+  ws1:      Ignore Ignore*
+  tp:       atttype() 
+  ws2:      Ignore Ignore*
+  default:  defaultdecl()
+    {{ (name,tp,default) }}
+  ? {{ match !yy_position with
+          ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
+        | "tp"          -> raise(WF_error("Type of attribute or `(' expected"))
+        | "default"     -> raise(WF_error("#REQUIRED, #IMPLIED, #FIXED or a string literal expected"))
+        | _             -> raise(WF_error("Bad attribute declaration"))
+    }}
+
+atttype():
+  /* Parses an attribute type and returns it as att_type. */
+  name:      Name 
+  $ {{ let followup = 
+        if name = "NOTATION" then 
+          parse_notation
+        else
+          parse_never
+       in
+     }}
+  nota:      [followup]()?
+    {{ 
+       match name with
+         "CDATA"    -> A_cdata
+       | "ID"       -> A_id
+       | "IDREF"    -> A_idref
+       | "IDREFS"   -> A_idrefs
+       | "ENTITY"   -> A_entity
+       | "ENTITIES" -> A_entities
+       | "NMTOKEN"  -> A_nmtoken
+       | "NMTOKENS" -> A_nmtokens
+       | "NOTATION" ->
+           (match nota with
+                None   -> raise(WF_error("Error in NOTATION type (perhaps missing whitespace after NOTATION?)"))
+              | Some n -> n
+            )
+       | _          -> raise(WF_error("One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected"))
+    }}
+  ? {{ raise(WF_error("Bad attribute declaration (perhaps missing whitespace after NOTATION)")) }}
+
+|         Lparen 
+          Ignore* 
+  name:   name_or_nametoken() 
+          Ignore* 
+  names:  nmtoken_factor()* 
+  rp:     Rparen
+    /* Enumeration */
+    {{ A_enum(name :: names) }}
+  ? {{ match !yy_position with
+          "name"  -> raise(WF_error("Name expected"))
+        | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
+        | "rp"    -> raise(WF_error("`|' and more names expected, or `)'"))
+        | _       -> raise(WF_error("Bad enumeration type"))
+    }}
+
+
+never():
+  /* The always failing rule */
+  $ {{ raise Not_found; }}
+  Doctype   /* questionable */
+    {{ A_cdata    (* Does not matter *)
+    }}
+
+
+notation():
+         Ignore Ignore*
+  lp:    Lparen 
+         Ignore*
+  name:  Name 
+         Ignore* 
+  names: notation_factor()* 
+  rp:    Rparen
+    {{ A_notation(name :: names) }}
+  ? {{ match !yy_position with
+          "lp"    -> raise(WF_error("`(' expected"))
+        | "name"  -> raise(WF_error("Name expected"))
+        | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
+        | "rp"    -> raise(WF_error("`|' and more names expected, or `)'"))
+        | _       -> raise(WF_error("Bad NOTATION type"))
+    }}
+
+
+notation_factor():
+  /* Parse "|<name>" and return the name */
+  Bar Ignore* name:Name Ignore*
+    {{ name }}
+  ? {{ match !yy_position with
+          "name" -> raise(WF_error("Name expected"))
+        | _      -> raise(WF_error("Bad NOTATION type"))
+    }}
+
+nmtoken_factor():
+  /* Parse "|<nmtoken>" and return the nmtoken */
+  Bar Ignore* n:name_or_nametoken() Ignore*
+    {{ n }}
+  ? {{ match !yy_position with
+          "n" -> raise(WF_error("Nametoken expected"))
+        | _   -> raise(WF_error("Bad enumeration type"))
+    }}
+
+
+name_or_nametoken():
+  n:Name      {{ n }}
+| n:Nametoken {{ n }}
+
+
+/* The default values must be expanded and normalized. This has been implemented
+ * by the function expand_attvalue.
+ */
+
+
+defaultdecl():
+  /* Parse the default value for an attribute and return it as att_default */
+  Required
+    {{ D_required }}
+| Implied
+    {{ D_implied }}
+| Fixed ws:Ignore Ignore* str:Unparsed_string
+    {{ D_fixed (expand_attvalue lexerset dtd str config.warner false) }}
+  ? {{ match !yy_position with
+          "ws"  -> raise(WF_error("Whitespace is missing after #FIXED"))
+        | "str" -> raise(WF_error("String literal expected"))
+        | _     -> raise(WF_error("Bad #FIXED default value"))
+    }}
+| str:Unparsed_string
+    {{ D_default (expand_attvalue lexerset dtd str config.warner false) }}
+
+
+/**************************** ENTITY DECLARATION ***********************/
+
+entitydecl(decl_entity_entid):
+  /* parses everything _after_ <!ENTITY until the matching >. The parsed 
+   * entity declaration is entered into the dtd object as side-effect.
+   */
+  name:               Name 
+  $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+    }}
+  ws:                 Ignore Ignore* 
+  material:           entitydef() 
+                      Ignore*
+  decl_rangle_entid:  Decl_rangle     
+    /* A general entity */
+    {{
+       if decl_entity_entid != decl_rangle_entid then
+        raise (Validation_error "Entities not properly nested with ENTITY declaration");
+      let en =
+       (* Distinguish between
+         * - internal entities
+         * - external entities
+         * - NDATA (unparsed) entities
+         *)
+       match material with
+           (Some s, None,     None)   ->
+             new internal_entity dtd name config.warner s p_internal_subset
+                 config.errors_with_line_numbers false config.encoding
+         | (None,   Some xid, None)   ->
+             new external_entity (resolver # clone) dtd name config.warner
+                                  xid false config.errors_with_line_numbers
+                                 config.encoding
+
+         | (None,   Some xid, Some n) ->
+             (new ndata_entity name xid n config.encoding :> entity)
+         | _ -> assert false
+      in
+      dtd # add_gen_entity en extdecl
+    }}
+  ? {{ match !yy_position with
+          "ws"                -> raise(WF_error("Whitespace is missing"))
+        | "material"          -> raise(WF_error("String literal or identifier expected"))
+        | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+        | _                   -> raise(WF_error("Bad entity declaration"))
+    }}
+
+|                     Percent 
+  $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+    }}
+  ws1:                Ignore Ignore* 
+  name:               Name 
+  ws2:                Ignore Ignore* 
+  material:           pedef() 
+                      Ignore* 
+  decl_rangle_entid:  Decl_rangle
+    /* A parameter entity */
+    {{ 
+      if decl_entity_entid != decl_rangle_entid then
+        raise (Validation_error "Entities not properly nested with ENTITY declaration");
+      let en =
+       (* Distinguish between internal and external entities *)
+       match material with
+           (Some s, None)   ->
+             new internal_entity dtd name config.warner s p_internal_subset
+                 config.errors_with_line_numbers true config.encoding
+         | (None,   Some xid)   ->
+             new external_entity (resolver # clone) dtd name config.warner
+                                  xid true config.errors_with_line_numbers
+                                 config.encoding
+         | _ -> assert false
+      in
+
+      (* The following two lines force that even internal entities count
+       * as external (for the standalone check) if the declaration of 
+       * the internal entity occurs in an external entity.
+       *)
+      if extdecl then
+       en # set_counts_as_external;
+
+      dtd # add_par_entity en;
+    }}
+  ? {{ match !yy_position with
+          ("ws1"|"ws2")       -> raise(WF_error("Whitespace is missing"))
+        | "material"          -> raise(WF_error("String literal or identifier expected"))
+        | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+        | _                   -> raise(WF_error("Bad entity declaration"))
+    }}
+
+
+entitydef():
+  /* parses the definition value of a general entity. Returns either:
+   * - (Some s, None,   None)    meaning the definition of an internal entity
+   *                               with (literal) value s has been found
+   * - (None,   Some x, None)    meaning that an external parsed entity with
+   *                               external ID x has been found
+   * - (None,   Some x, Some n)  meaning that an unparsed entity with
+   *                               external ID x and notations n has been found
+   */
+  str:Unparsed_string
+    {{ Some str, None, None }}
+| id:external_id()   ws:Ignore?  Ignore*  decl:ndatadecl()?
+    {{  if not ws  && decl <> None then
+         raise(WF_error("Whitespace missing before `NDATA'"));
+       None, Some id, decl 
+    }}
+
+
+pedef():
+  /* parses the definition value of a parameter entity. Returns either:
+   * - (Some s, None)     meaning that the definition of an internal entity
+   *                        with (literal) value s has been found
+   * - (None,   Some x)   meaning that an external ID x has been found
+   */
+  str:Unparsed_string
+    {{ Some str, None }}
+| id:external_id()
+    {{ None, Some id }}
+
+
+ndatadecl():
+  /* Parses either NDATA "string" or the empty string; returns Some "string"
+   * in the former, None in the latter case.
+   */
+  ndata:Name ws:Ignore Ignore* name:Name
+    {{ if ndata = "NDATA" then
+       name
+      else
+       raise(WF_error("NDATA expected"))
+    }}
+  ? {{ match !yy_position with
+          "ws"   -> raise(WF_error("Whitespace is missing after NDATA"))
+        | "name" -> raise(WF_error("Name expected"))
+        | _      -> raise(WF_error("Bad NDATA declaration"))
+    }}
+
+/**************************** NOTATION DECLARATION *******************/
+
+notationdecl():
+  /* parses <!NOTATION ... > and enters the notation declaration into the
+   * dtd object as side-effect
+   */
+  decl_notation_entid: Decl_notation 
+  ws1:                 Ignore Ignore*
+  name:                Name 
+  ws2:                 Ignore Ignore*
+  sys_or_public:       Name /* SYSTEM or PUBLIC */ 
+  ws3:                 Ignore Ignore*
+  str1:                Unparsed_string 
+  ws:                  Ignore? Ignore*
+  str2:                Unparsed_string? 
+                       Ignore*
+  decl_rangle_entid:   Decl_rangle
+    {{ 
+      if decl_notation_entid != decl_rangle_entid then
+       raise (Validation_error "Entities not properly nested with NOTATION declaration");
+      let xid =
+       (* Note that it is allowed that PUBLIC is only followed by one
+        * string literal
+        *)
+       match sys_or_public with
+           "SYSTEM" ->
+             if str2 <> None then raise(WF_error("SYSTEM must be followed only by one argument"));
+             System (recode_utf8 str1)
+         | "PUBLIC" ->
+             begin match str2 with
+                 None ->
+                   check_public_id str1;
+                   Public(recode_utf8 str1,"")
+               | Some p ->
+                   if not ws then
+                     raise(WF_error("Missing whitespace between the string literals of the `PUBLIC' id"));
+                   check_public_id str1;
+                   Public(recode_utf8 str1, recode_utf8 p)
+             end
+         | _ -> raise(WF_error("PUBLIC or SYSTEM expected"))
+      in
+      if extend_dtd then begin
+       let no = new dtd_notation name xid config.encoding in
+       dtd # add_notation no
+      end
+    }}
+  ? {{ match !yy_position with
+          ("ws1"|"ws2"|"ws3") -> raise(WF_error("Whitespace is missing"))
+        | "name"              -> raise(WF_error("Name expected"))
+        | "sys_or_public"     -> raise(WF_error("SYSTEM or PUBLIC expected"))
+        | ("str1"|"str2")     -> raise(WF_error("String literal expected"))
+        | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+        | _                   -> raise(WF_error("Bad NOTATION declaration"))
+    }}
+
+/****************************** ELEMENTS **************************/
+
+/* In the following rules, the number of error rules is reduced to
+ * improve the performance of the parser.
+ */
+
+
+contents_start():
+  /* parses <element>...</element> misc*, i.e. exactly one element followed
+   * optionally by white space or processing instructions.
+   * The element is entered into the global variables as follows:
+   * - If elstack is non-empty, the parsed element is added as new child to
+   *   the top element of the stack.
+   * - If elstack is empty, the root_examplar object is modified rather than
+   *   that a new element is created. If additionally the variable root is
+   *   None, it is assigned Some root_examplar.
+   * Note that the modification of the root_exemplar is done by the method
+   * internal_init.
+   * The reason why the root element is modified rather than newly created
+   * is a typing requirement. It must be possible that the class of the root
+   * is derived from the original class element_impl, i.e. the user must be
+   * able to add additional methods. If we created a new root object, we
+   * would have to denote to which class the new object belongs; the root
+   * would always be an 'element_impl' object (and not a derived object).
+   * If we instead cloned an  exemplar object and modified it by the
+   * "create" method, the root object would belong to the same class as the
+   * exemplar (good), but the type of the parsing function would always
+   * state that an 'element_impl' was created (because we can pass the new
+   * object only back via a global variable). The only solution is to
+   * modify the object that has been passed to the parsing function directly.
+   */
+  $ {{ dtd <- transform_dtd dtd; }}
+  start_tag() content()*
+    {{ () }}
+
+
+content():
+  /* parses: start tags, end tags, content, or processing
+   * instructions. That the tags are properly nested is dynamically checked.
+   * As result, recognized elements are added to their parent elements,
+   * content is added to the element containing it, and processing instructions
+   * are entered into the element embracing them. (All as side-effects.)
+   */
+  start_tag()
+    {{ () }}
+| end_tag()
+    {{ () }}
+| char_data()
+    {{ () }}
+| cref()
+    {{ () }}
+| pi()
+    {{ () }}
+| entity_ref()
+    {{ () }}
+| comment()
+    {{ () }}
+
+
+entity_ref():
+   Begin_entity eref_xmldecl_then_rest()
+    {{ if n_tags_open = 0 then
+       raise(WF_error("Entity reference not allowed here"))
+    }}
+
+
+/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
+
+eref_xmldecl_then_rest():
+  pl:PI_xml
+  $ {{ context.manager # current_entity # process_xmldecl pl; 
+    }}
+  content()* End_entity
+    {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  content() content()* End_entity
+    {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+  End_entity
+    {{ () }}
+
+
+start_tag():
+  /* parses <element attribute-values> or <element attribute-values/>.
+   *
+   * EFFECT: If elstack is non-empty, the element is added to the
+   * top element of the stack as new child, and the element
+   * is pushed on the stack. If elstack is empty, the root_exemplar is
+   * modified and gets the parsed name and attribute list. The root_exemplar
+   * is pushed on the stack. If additionally the variable root is empty, too,
+   * this variable is initialized.
+   * If the <element ... /> form has been parsed, no element is pushed
+   * on the stack.
+   */
+  tag:        Tag_beg
+    $ {{ let position =
+          if config.store_element_positions then
+            Some(context.manager # position)
+          else
+            None
+        in
+       }}
+  ws:         Ignore? Ignore*
+  attlist:    attribute()* 
+  emptiness:  start_tag_rangle()
+  /* Note: it is guaranteed that there is whitespace between Tag_beg and
+   * the name of the first attribute, because there must be some separator.
+   * So we need not to check ws!
+   */
+    {{ 
+      let rec check_attlist al =
+       match al with
+           (nv1, num1) :: al' ->
+             if not num1 && al' <> [] then begin
+               match al with
+                   ((n1,_),_) :: ((n2,_),_) :: _ ->
+                     raise(WF_error("Whitespace is missing between attributes `" ^
+                                    n1 ^ "' and `" ^ n2 ^ "'"))
+                 | _ -> assert false
+             end;
+             check_attlist al'
+         | [] -> ()
+      in
+      check_attlist attlist;
+               
+      let name, tag_beg_entid = tag in
+      let attlist' = List.map (fun (nv,_) -> nv) attlist in
+      let d =
+       create_element_node ?position:position spec dtd name attlist' in
+
+      begin match id_index with
+         None -> ()
+       | Some idx ->
+           (* Put the ID attribute into the index, if present *)
+           begin try 
+             let v = d # id_attribute_value in  (* may raise Not_found *)
+             idx # add v d                      (* may raise ID_not_unique *)
+           with
+               Not_found ->
+                 (* No ID attribute *)
+                 ()
+             | ID_not_unique ->
+                 (* There is already an ID with the same value *)
+                 raise(Validation_error("ID not unique"))
+           end
+      end;
+
+      if n_tags_open = 0 then begin
+       if root = None then begin
+         (* We have found the begin tag of the root element. *)
+         if config.enable_super_root_node then begin
+           (* The user wants the super root instead of the real root.
+            * The real root element becomes the child of the VR.
+            *)
+           (* Assertion: self # current is the super root *)
+           assert (self # current # node_type = T_super_root);
+           root <- Some (self # current);
+           self # current # add_node d;
+           doc # init_root (self # current);
+         end
+         else begin
+           (* Normal behaviour: The user wants to get the real root. *)
+           root <- Some d;
+           doc # init_root d;
+         end;
+       end
+       else
+         (* We have found a second topmost element. This is illegal. *)
+         raise(WF_error("Document must consist of only one toplevel element"))
+      end
+      else begin
+       (* We have found some inner begin tag. *)
+       self # save_data;        (* Save outstanding data material first *)
+       self # current # add_node d
+      end;
+
+      if emptiness then
+       (* An empty tag like <a/>. *)
+       d # local_validate ~use_dfa:config.validate_by_dfa ()
+      else begin
+       (* A non-empty tag. *)
+       Stack.push (d, tag_beg_entid) elstack;
+       n_tags_open <- n_tags_open + 1;
+      end;
+    }}
+  ? {{ match !yy_position with
+          "attlist"   -> raise(WF_error("Bad attribute list"))
+        | "emptiness" -> raise(WF_error("`>' or `/>' expected"))
+        | _           -> raise(WF_error("Bad start tag"))
+    }}
+
+
+attribute():
+  /* Parses name="value"  */
+  n:Name Ignore* Eq Ignore* v:attval() ws:Ignore? Ignore*
+    {{ (n,v), ws }}
+
+
+attval():
+  v:Attval
+    {{ expand_attvalue lexerset dtd v config.warner true }}
+| v:Attval_nl_normalized
+    {{ expand_attvalue lexerset dtd v config.warner false }}
+
+
+start_tag_rangle():
+  Rangle       {{ false }}
+| Rangle_empty {{ true }}
+
+
+end_tag():
+  /* parses </element>.
+   * Pops the top element from the elstack and checks if it is the same
+   * element.
+   */
+  tag:Tag_end  Ignore*  Rangle
+    {{ let name, tag_end_entid = tag in
+       if n_tags_open = 0 then
+        raise(WF_error("End-tag without start-tag"));
+
+       self # save_data;        (* Save outstanding data material first *)
+
+       let x, tag_beg_entid = Stack.pop elstack in
+       let x_name =
+        match x # node_type with
+          | T_element n -> n
+          | _ -> assert false
+       in
+       if name <> x_name then
+        raise(WF_error("End-tag does not match start-tag"));
+       if tag_beg_entid != tag_end_entid then
+        raise(WF_error("End-tag not in the same entity as the start-tag"));
+       x # local_validate ~use_dfa:config.validate_by_dfa ();
+       
+       n_tags_open <- n_tags_open - 1;
+       
+       assert (n_tags_open >= 0);
+
+    }}
+
+char_data():
+  /* Parses any literal characters not otherwise matching, and adds the
+   * characters to the top element of elstack.
+   * If elstack is empty, it is assumed that there is no surrounding
+   * element, and any non-white space character is forbidden.
+   */
+  data:CharData
+    {{ 
+      if n_tags_open = 0 then
+       (* only white space is allowed *)
+       self # only_whitespace data
+      else
+       self # collect_data data
+          (* We collect the chardata material until the next end tag is
+          * reached. Then the collected material will concatenated and
+          * stored as a single T_data node (see end_tag rule above)
+          * using save_data.
+          *)
+    }}
+| data:Cdata
+    {{ 
+      if n_tags_open = 0 then
+       raise (WF_error("CDATA section not allowed here"));
+      self # collect_data data
+          (* Also collect CDATA material *)
+    }}
+
+cref():
+  /* Parses &#...; and adds the character to the top element of elstack. */
+  code:CRef
+    {{ 
+       if n_tags_open = 0 then
+        (* No surrounding element: character references are not allowed *)
+        raise(WF_error("Character reference not allowed here"));
+       self # collect_data (character config.encoding config.warner code)
+          (* Also collect character references *)
+    }}
+
+pi():
+  /* Parses <?...?> (but not <?xml white-space ... ?>).
+   * If there is a top element in elstack, the processing instruction is added
+   * to this element.
+   */
+  pi: PI
+    {{ 
+      let position =
+       if config.store_element_positions then
+         Some(context.manager # position)
+       else
+         None
+      in
+      let target,value = pi in
+
+      if n_tags_open = 0 & not config.enable_super_root_node
+      then
+       doc # add_pinstr (new proc_instruction target value config.encoding)
+      else begin
+       (* Special case: if processing instructions are processed inline,
+        * they are wrapped into T_pinstr nodes.
+        *)
+       if config.enable_pinstr_nodes then begin
+         self # save_data;        (* Save outstanding data material first *)
+         let pinstr = new proc_instruction target value config.encoding in
+         let wrapper = create_pinstr_node 
+                         ?position:position spec dtd pinstr in
+         wrapper # local_validate();                (* succeeds always   *)
+         self # current # add_node wrapper;
+       end
+       else
+         (* Normal behaviour: Add the PI to the parent element. *)
+         self # current # add_pinstr 
+                            (new proc_instruction target value config.encoding)
+      end
+    }}
+
+
+comment():
+  /* Parses <!-- ... -->
+   */
+  Comment_begin
+  $ {{ 
+      let position =
+       if config.enable_comment_nodes && config.store_element_positions then
+         Some(context.manager # position)
+       else
+         None
+      in
+    }}
+  mat: Comment_material*
+  ce: Comment_end
+    {{
+      if config.enable_comment_nodes then begin
+       self # save_data;        (* Save outstanding data material first *)
+       let comment_text = String.concat "" mat in
+       let wrapper = create_comment_node 
+                       ?position:position spec dtd comment_text in
+       wrapper # local_validate();                (* succeeds always   *)
+       self # current # add_node wrapper;
+      end
+    }}
+  ? {{ match !yy_position with
+        | "ce"  -> raise(WF_error("`-->' expected"))
+        | _     -> raise(WF_error("Bad comment"))
+    }}
+
+
+%%
+   (* The method "parse" continues here... *)
+
+   try
+     match start_symbol with
+        Ext_document ->
+          parse_ext_document context.current context.get_next 
+       | Ext_declarations ->
+          parse_ext_declarations context.current context.get_next 
+       | Ext_element ->
+          parse_ext_element context.current context.get_next
+   with
+       Not_found ->
+        raise Parsing.Parse_error
+
+  (*********** The method "parse" ends here *************)
+
+
+(**********************************************************************)
+
+(* Here ends the class definition: *)
+end
+;;
+
+(**********************************************************************)
+
+open Pxp_reader;;
+
+
+class default_ext =
+  object(self)
+    val mutable node = (None : ('a extension node as 'a) option)
+    method clone = {< >}
+    method node =
+      match node with
+         None ->
+           assert false
+       | Some n -> n
+    method set_node n =
+      node <- Some n
+  end
+;;
+
+
+let default_extension = new default_ext;;
+
+let default_spec =
+  make_spec_from_mapping
+    ~super_root_exemplar:      (new element_impl default_extension)
+    ~comment_exemplar:         (new element_impl default_extension)
+    ~default_pinstr_exemplar:  (new element_impl default_extension)
+    ~data_exemplar:            (new data_impl default_extension)
+    ~default_element_exemplar: (new element_impl default_extension)
+    ~element_mapping:          (Hashtbl.create 1)
+    ()
+;;
+
+
+let idref_pass id_index root =
+  let error t att value =
+    let name =
+      match t # node_type with
+         T_element name -> name
+       | _ -> assert false
+    in
+    let text =
+      "Attribute `" ^ att ^ "' of element `" ^ name ^ 
+      "' refers to unknown ID `" ^ value ^ "'" in
+    let pos_ent, pos_line, pos_col = t # position in
+    if pos_line = 0 then
+      raise(Validation_error text)
+    else
+      raise(At("In entity " ^ pos_ent ^ " at line " ^
+              string_of_int pos_line ^ ", position " ^ string_of_int pos_col ^
+              ":\n",
+              Validation_error text))
+  in
+    
+  let rec check_tree t =
+    let idref_atts = t # idref_attribute_names in
+    List.iter
+      (fun att ->
+        match t # attribute att with
+            Value s ->
+              begin try ignore(id_index # find s) with
+                  Not_found ->
+                    error t att s
+              end
+          | Valuelist l ->
+              List.iter
+                (fun s ->
+                   try ignore(id_index # find s) with
+                       Not_found ->
+                         error t att s
+                )
+                l
+          | Implied_value -> ()
+      )
+      idref_atts;
+    List.iter check_tree (t # sub_nodes)
+  in
+  check_tree root
+;;
+
+
+exception Return_DTD of dtd;;
+  (* Used by extract_dtd_from_document_entity to jump out of the parser *)
+
+
+let call_parser ~configuration:cfg 
+                ~source:src 
+               ~dtd 
+               ~extensible_dtd 
+               ~document:doc 
+               ~specification:spec 
+               ~process_xmldecl 
+               ~transform_dtd
+                ~(id_index : 'ext #index option)
+               ~use_document_entity
+                ~entry 
+               ~init_lexer =
+  let e = cfg.errors_with_line_numbers in
+  let w = cfg.warner in
+  let r, en =
+    match src with
+       Entity(m,r')  -> r', m dtd
+      | ExtID(xid,r') -> r', 
+                        if use_document_entity then
+                           new document_entity 
+                            r' dtd "[toplevel]" w xid e
+                             cfg.encoding
+                        else
+                           new external_entity 
+                            r' dtd "[toplevel]" w xid false e
+                             cfg.encoding
+  in
+  r # init_rep_encoding cfg.encoding;
+  r # init_warner w;
+  en # set_debugging_mode (cfg.debugging_mode);
+  let pobj =
+    new parser_object
+      doc
+      dtd
+      extensible_dtd
+      cfg
+      r
+      spec
+      process_xmldecl
+      transform_dtd
+      (id_index :> 'ext index option)
+  in
+  let mgr = new entity_manager en in
+  en # open_entity true init_lexer;
+  begin try
+    let context = make_context mgr in
+    pobj # parse context entry;
+    ignore(en # close_entity);
+  with
+      Return_DTD d ->
+       ignore(en # close_entity);
+       raise(Return_DTD d)
+    | error ->
+       ignore(en # close_entity);
+       r # close_all;
+       let pos = mgr # position_string in
+       raise (At(pos, error))
+  end;
+  if cfg.idref_pass then begin
+    match id_index with
+       None -> ()
+      | Some idx ->
+         ( match pobj # root with
+               None -> ()
+             | Some root ->
+                 idref_pass idx root;
+         )
+  end;
+  pobj
+
+
+let parse_dtd_entity cfg src =
+  (* Parse a DTD given as separate entity. *)
+  let dtd = new dtd cfg.warner cfg.encoding in
+  let doc = new document cfg.warner in
+  let pobj =
+    call_parser 
+      ~configuration:cfg 
+      ~source:src 
+      ~dtd:dtd 
+      ~extensible_dtd:true         (* Extend the DTD by parsed declarations *)
+      ~document:doc 
+      ~specification:default_spec 
+      ~process_xmldecl:false       (* The XML declaration is ignored 
+                                   * (except 'encoding') 
+                                   *)
+      ~transform_dtd:(fun x -> x)  (* Do not transform the DTD *)
+      ~id_index: None
+      ~use_document_entity:false
+      ~entry:Ext_declarations      (* Entry point of the grammar *)
+      ~init_lexer:Declaration      (* The initially used lexer *)
+  in
+  dtd # validate;
+  if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
+  dtd
+;;
+
+
+let parse_content_entity ?id_index cfg src dtd spec =
+  (* Parse an element given as separate entity *)
+  dtd # validate;            (* ensure that the DTD is valid *)
+  if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
+  let doc = new document cfg.warner in
+  let pobj =
+    call_parser
+      ~configuration:cfg 
+      ~source:src 
+      ~dtd:dtd 
+      ~extensible_dtd:true         (* Extend the DTD by parsed declarations *)
+      ~document:doc 
+      ~specification:spec 
+      ~process_xmldecl:false       (* The XML declaration is ignored 
+                                   * (except 'encoding') 
+                                   *)
+      ~transform_dtd:(fun x -> x)  (* Do not transform the DTD *)
+      ~id_index:(id_index :> 'ext index option)
+      ~use_document_entity:false
+      ~entry:Ext_element           (* Entry point of the grammar *)
+      ~init_lexer:Content          (* The initially used lexer *)
+  in
+  match pobj # root with
+      Some r -> r
+    | None -> raise(WF_error("No root element"))
+;;
+
+
+let parse_wfcontent_entity cfg src spec =
+  let dtd = new dtd cfg.warner cfg.encoding in
+  dtd # allow_arbitrary;
+  let doc = new document cfg.warner in
+  let pobj =
+    call_parser
+      ~configuration:cfg 
+      ~source:src 
+      ~dtd:dtd 
+      ~extensible_dtd:false        (* Do not extend the DTD *)
+      ~document:doc 
+      ~specification:spec 
+      ~process_xmldecl:false       (* The XML declaration is ignored 
+                                   * (except 'encoding') 
+                                   *)
+      ~transform_dtd:(fun x -> x)  (* Do not transform the DTD *)
+      ~id_index:None
+      ~use_document_entity:false
+      ~entry:Ext_element           (* Entry point of the grammar *)
+      ~init_lexer:Content          (* The initially used lexer *)
+  in
+  match pobj # root with
+      Some r -> r
+    | None -> raise(WF_error("No root element"))
+;;
+
+
+let iparse_document_entity ?(transform_dtd = (fun x -> x)) 
+                           ?id_index
+                           cfg0 src spec p_wf =
+  (* Parse an element given as separate entity *)
+  (* p_wf: 'true' if in well-formedness mode, 'false' if in validating mode *)
+  let cfg = { cfg0 with
+               recognize_standalone_declaration = 
+                   cfg0.recognize_standalone_declaration && (not p_wf) 
+            } in
+  let dtd = new dtd cfg.warner cfg.encoding in
+  if p_wf then
+    dtd # allow_arbitrary;
+  let doc = new document cfg.warner in
+  let pobj =
+    call_parser 
+      ~configuration:cfg 
+      ~source:src 
+      ~dtd:dtd 
+      ~extensible_dtd:(not p_wf)   (* Extend the DTD by parsed declarations
+                                   * only if in validating mode
+                                   *)
+      ~document:doc 
+      ~specification:spec 
+      ~process_xmldecl:true        (* The XML declaration is processed *)
+                                   (* TODO: change to 'not p_wf' ? *)
+      ~transform_dtd:(fun dtd -> 
+                       let dtd' = transform_dtd dtd in
+                       if cfg.accept_only_deterministic_models then 
+                         dtd' # only_deterministic_models;
+                       dtd')
+
+      ~id_index:(id_index :> 'ext index option)
+      ~use_document_entity:true
+      ~entry:Ext_document          (* Entry point of the grammar *)
+      ~init_lexer:Document         (* The initially used lexer *)
+  in
+  pobj # doc
+;;
+
+
+let parse_document_entity ?(transform_dtd = (fun x -> x)) 
+                          ?id_index
+                          cfg src spec =
+  iparse_document_entity 
+    ~transform_dtd:transform_dtd 
+    ?id_index:(id_index : 'ext #index option :> 'ext index option)
+    cfg src spec false;;
+
+let parse_wfdocument_entity cfg src spec =
+  iparse_document_entity cfg src spec true;;
+
+let extract_dtd_from_document_entity cfg src =
+  let transform_dtd dtd = raise (Return_DTD dtd) in
+  try
+    let doc = parse_document_entity 
+               ~transform_dtd:transform_dtd
+               cfg
+               src
+               default_spec in
+    (* Should not happen: *)
+    doc # dtd
+  with
+      Return_DTD dtd ->
+       (* The normal case: *)
+       dtd
+;;
+
+
+let default_config =
+  let w = new drop_warnings in
+  { warner = w;
+    errors_with_line_numbers = true;
+    enable_pinstr_nodes = false;
+    enable_super_root_node = false;
+    enable_comment_nodes = false;
+    encoding = `Enc_iso88591;
+    recognize_standalone_declaration = true;
+    store_element_positions = true;
+    idref_pass = false;
+    validate_by_dfa = true;
+    accept_only_deterministic_models = true;
+    debugging_mode = false;
+  }
+
+
+class  [ 'ext ] hash_index =
+object 
+  constraint 'ext = 'ext node #extension
+  val ht = (Hashtbl.create 100 : (string, 'ext node) Hashtbl.t)
+  method add s n =
+    try
+      ignore(Hashtbl.find ht s);
+      raise ID_not_unique
+    with
+       Not_found ->
+         Hashtbl.add ht s n
+
+  method find s = Hashtbl.find ht s
+  method index = ht
+end
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:29  lpadovan
+ * Initial revision
+ *
+ * Revision 1.14  2000/08/26 23:23:14  gerd
+ *     Bug: from_file must not interpret the file name as URL path.
+ *     Bug: When PI and comment nodes are generated, the collected data
+ * material must be saved first.
+ *
+ * Revision 1.13  2000/08/19 21:30:03  gerd
+ *     Improved the error messages of the parser
+ *
+ * Revision 1.12  2000/08/18 20:16:25  gerd
+ *     Implemented that Super root nodes, pinstr nodes and comment
+ * nodes are included into the document tree.
+ *
+ * Revision 1.11  2000/08/14 22:24:55  gerd
+ *     Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.10  2000/07/23 02:16:33  gerd
+ *     Support for DFAs.
+ *
+ * Revision 1.9  2000/07/14 13:57:29  gerd
+ *     Added the id_index feature.
+ *
+ * Revision 1.8  2000/07/09 17:52:45  gerd
+ *     New implementation for current_data.
+ *     The position of elements is stored on demand.
+ *
+ * Revision 1.7  2000/07/09 01:00:35  gerd
+ *     Improvement: It is now guaranteed that only one data node
+ * is added for consecutive character material.
+ *
+ * Revision 1.6  2000/07/08 16:27:29  gerd
+ *     Cleaned up the functions calling the parser.
+ *     New parser argument: transform_dtd.
+ *     Implementations for 'extract_dtd_from_document_entity' and
+ * 'parse_wfcontent_entity'.
+ *
+ * Revision 1.5  2000/07/06 23:05:18  gerd
+ *     Initializations of resolvers were missing.
+ *
+ * Revision 1.4  2000/07/06 22:11:01  gerd
+ *     Fix: The creation of the non-virtual root element is protected
+ * in the same way as the virtual root element.
+ *
+ * Revision 1.3  2000/07/04 22:15:18  gerd
+ *     Change: Using the new resolver capabilities.
+ *     Still incomplete: the new extraction and parsing functions.
+ *
+ * Revision 1.2  2000/06/14 22:19:06  gerd
+ *     Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_yacc.m2y:
+ *
+ * Revision 1.9  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.8  2000/05/27 19:26:19  gerd
+ *     Change: The XML declaration is interpreted right after
+ * it has been parsed (no longer after the document): new function
+ * check_and_parse_xmldecl.
+ *     When elements, attributes, and entities are declared
+ * it is stored whether the declaration happens in an external
+ * entity (for the standalone check).
+ *     The option recognize_standalone_declaration is interpreted.
+ *
+ * Revision 1.7  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.6  2000/05/14 21:51:24  gerd
+ *     Change: Whitespace is handled by the grammar, and no longer
+ * by the entity.
+ *
+ * Revision 1.5  2000/05/14 17:50:54  gerd
+ *     Updates because of changes in the token type.
+ *
+ * Revision 1.4  2000/05/11 22:09:17  gerd
+ *     Fixed the remaining problems with conditional sections.
+ * This seems to be also a weakness of the XML spec!
+ *
+ * Revision 1.3  2000/05/09 00:02:44  gerd
+ *     Conditional sections are now recognized by the parser.
+ * There seem some open questions; see the TODO comments!
+ *
+ * Revision 1.2  2000/05/08 22:01:44  gerd
+ *     Introduced entity managers (see markup_entity.ml).
+ *     The XML declaration is now recognized by the parser. If such
+ * a declaration is found, the method process_xmldecl of the currently
+ * active entity is called. If the first token is not an XML declaration,
+ * the method process_missing_xmldecl is called instead.
+ *     Some minor changes.
+ *
+ * Revision 1.1  2000/05/06 23:21:49  gerd
+ *     Initial revision.
+ *
+ *     
+ * ======================================================================
+ *
+ * COPIED FROM REVISION 1.19 OF markup_yacc.mly
+ *
+ * Revision 1.19  2000/05/01 15:20:08  gerd
+ *     "End tag matches start tag" is checked before "End tag in the
+ * same entity as start tag".
+ *
+ * Revision 1.18  2000/04/30 18:23:08  gerd
+ *     Bigger change: Introduced the concept of virtual roots. First,
+ * this reduces the number of checks. Second, it makes it possible to
+ * return the virtual root to the caller instead of the real root (new
+ * config options 'virtual_root' and 'processing_instructions_inline').
+ *     Minor changes because of better CR/CRLF handling.
+ *
+ * Revision 1.17  2000/03/13 23:47:46  gerd
+ *     Updated because of interface changes. (See markup_yacc_shadow.mli
+ * rev. 1.8)
+ *
+ * Revision 1.16  2000/01/20 20:54:43  gerd
+ *     New config.errors_with_line_numbers.
+ *
+ * Revision 1.15  1999/12/17 22:27:58  gerd
+ *     Bugfix: The value of 'p_internal_subset' (an instance
+ * variable of the parser object) is to true when the internal subset
+ * begins, and is set to false when this subset ends. The error was
+ * that references to external entities within this subset did not
+ * set 'p_internal_subset' to false; this is now corrected by introducing
+ * the 'p_internal_subset_stack'.
+ *     This is a typical example of how the code gets more and
+ * more complicated and that it is very difficult to really understand
+ * what is going on.
+ *
+ * Revision 1.14  1999/11/09 22:23:37  gerd
+ *     Removed the invocation of "init_dtd" of the root document.
+ * This method is no longer available. The DTD is also passed to the
+ * document object by the root element, so nothing essential changes.
+ *
+ * Revision 1.13  1999/10/25 23:37:09  gerd
+ *     Bugfix: The warning "More than one ATTLIST declaration for element
+ * type ..." is only generated if an ATTLIST is found while there are already
+ * attributes for the element.
+ *
+ * Revision 1.12  1999/09/01 23:08:38  gerd
+ *     New frontend function: parse_wf_document. This simply uses
+ * a DTD that allows anything, and by the new parameter "extend_dtd" it is
+ * avoided that element, attlist, and notation declarations are added to this
+ * DTD. The idea is that this function simulates a well-formedness parser.
+ *     Tag_beg, Tag_end carry the entity_id. The "elstack" stores the
+ * entity_id of the stacked tag. This was necessary because otherwise there
+ * are some examples to produces incorrectly nested elements.
+ *     p_internal_subset is a variable that stores whether the internal
+ * subset is being parsed. This is important beacause entity declarations in
+ * internal subsets are not allowed to contain parameter references.
+ *     It is checked if the "elstack" is empty after all has been parsed.
+ *     Processing instructions outside DTDs and outside elements are now
+ * added to the document.
+ *     The rules of mixed and regexp style content models have been
+ * separated. The code is now much simpler.
+ *     Entity references outside elements are detected and rejected.
+ *
+ * Revision 1.11  1999/09/01 16:26:08  gerd
+ *     Improved the quality of error messages.
+ *
+ * Revision 1.10  1999/08/31 19:13:31  gerd
+ *     Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.9  1999/08/15 20:42:01  gerd
+ *     Corrected a misleading message.
+ *
+ * Revision 1.8  1999/08/15 20:37:34  gerd
+ *     Improved error messages.
+ *     Bugfix: While parsing document entities, the subclass document_entity is
+ * now used instead of external_entity. The rules in document entities are a bit
+ * stronger.
+ *
+ * Revision 1.7  1999/08/15 14:03:59  gerd
+ *     Empty documents are not allowed.
+ *     "CDATA section not allowed here" is a WF_error, not a Validation_
+ * error.
+ *
+ * Revision 1.6  1999/08/15 02:24:19  gerd
+ *     Removed some grammar rules that were used for testing.
+ *     Documents without DTD can now have arbitrary elements (formerly
+ * they were not allowed to have any element).
+ *
+ * Revision 1.5  1999/08/14 22:57:20  gerd
+ *     It is allowed that external entities are empty because the
+ * empty string is well-parsed for both declarations and contents. Empty
+ * entities can be referenced anywhere because the references are replaced
+ * by nothing. Because of this, the Begin_entity...End_entity brace is only
+ * inserted if the entity is non-empty. (Otherwise references to empty
+ * entities would not be allowed anywhere.)
+ *     As a consequence, the grammar has been changed such that a
+ * single Eof is equivalent to Begin_entity,End_entity without content.
+ *
+ * Revision 1.4  1999/08/14 22:20:01  gerd
+ *         The "config" slot has now a component "warner" which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *         Furthermore, there is a new component "debugging_mode".
+ *         Some Parse_error exceptions have been changed into Validation_error.
+ *         The interfaces of functions/classes imported from other modules
+ * have changed; the invocations have been adapted.
+ *         Contents may contain CDATA sections that have been forgotten.
+ *
+ * Revision 1.3  1999/08/11 15:00:41  gerd
+ *     The Begin_entity ... End_entity brace is also possible in
+ * 'contents'.
+ *     The configuration passed to the parsing object contains always
+ * the resolver that is actually used.
+ *
+ * Revision 1.2  1999/08/10 21:35:12  gerd
+ *     The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ *     TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1  1999/08/10 00:35:52  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/pxp_yacc.mli b/helm/DEVEL/pxp/pxp/pxp_yacc.mli
new file mode 100644 (file)
index 0000000..cb987a8
--- /dev/null
@@ -0,0 +1,488 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+(*$ markup-yacc.mli *)
+
+open Pxp_types
+open Pxp_dtd
+open Pxp_document
+
+exception ID_not_unique
+
+class type [ 'ext ] index =
+object 
+  (* The type of indexes over the ID attributes of the elements. This type
+   * is the minimum requirement needed by the parser to create such an index.
+   *)
+  constraint 'ext = 'ext node #extension
+  method add : string -> 'ext node -> unit
+    (* Add the passed node to the index. If there is already an ID with
+     * the passed string value, the exception ID_not_unique should be
+     * raised. (But the index is free also to accept several identical IDs.)
+     *)
+  method find : string -> 'ext node
+    (* Finds the node with the passed ID value, or raises Not_found *)
+end
+;;
+
+
+class [ 'ext ] hash_index : 
+object 
+  (* This is a simple implementation of 'index' using a hash table. *)
+  constraint 'ext = 'ext node #extension
+  method add : string -> 'ext node -> unit
+    (* See above. *)
+  method find : string -> 'ext node
+    (* See above. *)
+  method index : (string, 'ext node) Hashtbl.t
+    (* Returns the hash table. *)
+end
+;;
+
+
+type config =
+    { warner : collect_warnings;
+         (* An object that collects warnings. *)
+
+      errors_with_line_numbers : bool;
+         (* Whether error messages contain line numbers or not. The parser
+         * is 10 to 20 per cent faster if line numbers are turned off;
+         * you get only byte positions in this case.
+         *)
+
+      enable_pinstr_nodes : bool;
+         (* true: turns a special mode for processing instructions on. Normally,
+         * you cannot determine the exact location of a PI; you only know
+         * in which element the PI occurs. This mode makes it possible
+         * to find the exact location out: Every PI is artificially wrapped
+         * by a special node with type T_pinstr. For example, if the XML text
+         * is <a><?x?><?y?></a>, the parser normally produces only an element
+         * object for "a", and puts the PIs "x" and "y" into it (without
+         * order). In this mode, the object "a" will contain two objects
+         * with type T_pinstr, and the first object will contain "x", and the
+         * second "y": the object tree looks like
+         * - Node with type = T_element "a"
+         *   - Node with type = T_pinstr "x"
+         *     + contains processing instruction "x"
+         *   - Node with type = T_pinstr "y"
+         *     + contains processing instruction "y"
+         *
+         * Notes:
+         * (1) In past versions of PXP this mode was called
+         *     processing_instructions_inline, and it produced nodes of
+         *     type T_element "-pi" instead of T_pinstr.
+         * (2) The T_pinstr nodes are created from the pinstr exemplars
+         *     in your spec
+         *)
+
+      enable_super_root_node : bool;
+         (* true: the topmost element of the XML tree is not the root element,
+         * but the so-called super root. The root element is a son of the
+         * super root. The super root is a node with type T_super_root.
+         * The following behaviour changes, too:
+         * - PIs occurring outside the root element and outside the DTD are
+         *   added to the super root instead of the document object
+         * - If enable_pinstr_nodes is also turned on, the PI wrappers
+         *   are added to the super root
+         *
+         * For example, the document
+         *   <?x?><a>y</a><?y?>
+         * is normally represented by:
+         * - document object
+         *   + contains PIs x and y
+         *   - reference to root node with type = T_element "a"
+         *     - node with type = T_data: contains "y"
+         * With enabled super root node:
+         * - document object
+         *   - reference to super root node with type = T_super_root
+         *     + contains PIs x and y
+         *     - root node with type = T_element "a"
+         *       - node with type = T_data: contains "y"
+         * If also enable_pinstr_nodes:
+         * - document object
+         *   - reference to super root node with type = T_super_root
+         *     - node with type = T_pinstr "x"
+         *       + contains PI "x"
+         *     - root node with type = T_element "a"
+         *       - node with type = T_data: contains "y"
+         *     - node with type = T_pinstr "y"
+         *       + contains PI "y"
+         * Notes:
+         * (1) In previous versions of PXP this mode was called
+         *     virtual_root, and it produced an additional node of type
+         *     T_element "-vr" instead of T_super_root.
+         * (2) The T_super_root node is created from the super root exemplar
+         *     in your spec.
+         *)
+
+      enable_comment_nodes : bool;
+         (* When enabled, comments are represented as nodes with type =
+         * T_comment.
+         * To access the contents of comments, use the method "comment"
+         * for the comment nodes. 
+         * These nodes behave like elements; however, they are normally
+         * empty and do not have attributes. Note that it is possible to
+         * add children to comment nodes and to set attributes, but it is
+         * strongly recommended not to do so. There are no checks on
+         * such abnormal use, because they would cost too
+         * much time, even when no comment nodes are generated at all.
+         *
+         * Comment nodes should be disabled unless you must parse a 
+         * third-party XML text which uses comments as another data
+         * container.
+         *
+         * The nodes of type T_comment are created from the comment exemplars
+         * in your spec.
+         *)
+
+      encoding : rep_encoding;
+        (* Specifies the encoding used for the *internal* representation
+        * of any character data.
+        * Note that the default is still Enc_iso88591.
+        *)
+
+      recognize_standalone_declaration : bool;
+        (* Whether the "standalone" declaration is recognized or not.
+        * This option does not have an effect on well-formedness parsing:
+        * in this case such declarations are never recognized.
+        *
+        * Recognizing the "standalone" declaration means that the 
+        * value of the declaration is scanned and passed to the DTD,
+        * and that the "standalone-check" is performed. 
+        *
+        * Standalone-check: If a document is flagged standalone='yes' 
+        * some additional constraints apply. The idea is that a parser
+        * without access to any external document subsets can still parse
+        * the document, and will still return the same values as the parser
+        * with such access. For example, if the DTD is external and if
+        * there are attributes with default values, it is checked that there
+        * is no element instance where these attributes are omitted - the
+        * parser would return the default value but this requires access to
+        * the external DTD subset.
+        *)
+
+      store_element_positions : bool;
+        (* Whether the file name, the line and the column of the
+        * beginning of elements are stored in the element nodes.
+        * This option may be useful to generate error messages.
+        * 
+        * Positions are only stored for:
+        * - Elements
+        * - Wrapped processing instructions (see enable_pinstr_nodes)
+        * For all other node types, no position is stored.
+        *
+        * You can access positions by the method "position" of nodes.
+        *)
+
+      idref_pass : bool;
+        (* Whether the parser does a second pass and checks that all
+        * IDREF and IDREFS attributes contain valid references.
+        * This option works only if an ID index is available. To create
+        * an ID index, pass an index object as id_index argument to the
+        * parsing functions (such as parse_document_entity; see below).
+        *
+        * "Second pass" does not mean that the XML text is again parsed;
+        * only the existing document tree is traversed, and the check
+        * on bad IDREF/IDREFS attributes is performed for every node.
+        *)
+
+      validate_by_dfa : bool;
+        (* If true, and if DFAs are available for validation, the DFAs will
+        * actually be used for validation.
+        * If false, or if no DFAs are available, the standard backtracking
+        * algorithm will be used.
+        * DFA = deterministic finite automaton.
+        *
+        * DFAs are only available if accept_only_deterministic_models is
+        * "true" (because in this case, it is relatively cheap to construct
+        * the DFAs). DFAs are a data structure which ensures that validation
+        * can always be performed in linear time.
+        *
+        * I strongly recommend using DFAs; however, there are examples
+        * for which validation by backtracking is faster.
+        *)
+
+      accept_only_deterministic_models : bool;
+        (* Whether only deterministic content models are accepted in DTDs. *)
+
+      (* The following options are not implemented, or only for internal
+       * use.
+       *)
+
+      debugging_mode : bool;
+    }
+
+
+type source =
+    Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+  | ExtID of (ext_id * Pxp_reader.resolver)
+
+val from_channel : 
+      ?system_encoding:encoding -> ?id:ext_id -> ?fixenc:encoding -> 
+      in_channel -> source
+
+val from_string :
+      ?fixenc:encoding -> string -> source
+
+val from_file :
+      ?system_encoding:encoding -> string -> source
+
+(* Notes on sources (version 2):
+ *
+ * Sources specify where the XML text to parse comes from. Sources not only
+ * represent character streams, but also external IDs (i.e. SYSTEM or PUBLIC
+ * names), and they are interpreted as a specific encoding of characters.
+ * A source should be associated with an external ID, because otherwise
+ * it is not known how to handle relative names.
+ *
+ * There are two primary sources, Entity and ExtID, and several functions
+ * for derived sources. First explanations for the functions:
+ *
+ * from_channel: The XML text is read from an in_channel. By default, the
+ *   channel is not associated with an external ID, and it is impossible
+ *   to resolve relative SYSTEM IDs found in the document.
+ *   If the ?id argument is passed, it is assumed that the channel has this
+ *   external ID. If relative SYSTEM IDs occur in the document, they can
+ *   be interpreted; however, it is only possible to read from "file:"
+ *   IDs.
+ *   By default, the channel automatically detects the encoding. You can
+ *   set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_string: The XML text is read from a string.
+ *   It is impossible to read from any external entity whose reference is found
+ *   in the string.
+ *   By default, the encoding of the string is detected automatically. You can
+ *   set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_file: The XML text is read from the file whose file name is
+ *   passed to the function (as UTF-8 string).
+ *   Relative system IDs can be interpreted by this function.
+ *   The ?system_encoding argument specifies the character encoding used
+ *   for file names (sic!). By default, UTF-8 is assumed.
+ *
+ * Examples:
+ *
+ * from_file "/tmp/file.xml": 
+ *   reads from this file, which is assumed to have the ID 
+ *   SYSTEM "file://localhost/tmp/file.xml".
+ *
+ * let ch = open_in "/tmp/file.xml" in
+ * from_channel ~id:(System "file://localhost/tmp/file.xml") ch
+ *   This does the same, but uses a channel.
+ *
+ * from_channel ~id:(System "http://host/file.xml")
+ *              ch
+ *   reads from the channel ch, and it is assumed that the ID is
+ *   SYSTEM "http://host/file.xml". If there is any relative SYSTEM ID,
+ *   it will be interpreted relative to this location; however, there is
+ *   no way to read via HTTP.
+ *   If there is any "file:" SYSTEM ID, it is possible to read the file.
+ *
+ * The primary sources:
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ *   entity to read from is passed to the resolver, and the resolver finds
+ *   the entity and opens it.
+ *   The intention of this option is to allow customized
+ *   resolvers to interpret external identifiers without any restriction.
+ *   The Pxp_reader module contains several classes allowing the user to
+ *   compose such a customized resolver from predefined components.
+ *
+ *   ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ *   entity class. Once the DTD object d is known that will be used during
+ *   parsing, the entity  e = m d  is determined and used together with the
+ *   resolver r.
+ *   This is only for hackers.
+ *)
+
+
+
+val default_config : config
+  (* - Warnings are thrown away
+   * - Error messages will contain line numbers
+   * - Neither T_super_root nor T_pinstr nor T_comment nodes are generated
+   * - The internal encoding is ISO-8859-1
+   * - The standalone declaration is checked
+   * - Element positions are stored
+   * - The IDREF pass is left out
+   * - If available, DFAs are used for validation
+   * - Only deterministic content models are accepted
+   *) 
+
+val default_extension : ('a node extension) as 'a
+  (* A "null" extension; an extension that does not extend the functionality *)
+
+val default_spec : ('a node extension as 'a) spec
+  (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity : config -> source -> dtd
+  (* Parse an entity containing a DTD (external subset), and return this DTD. *)
+
+val extract_dtd_from_document_entity : config -> source -> dtd
+  (* Parses a closed document, i.e. a document beginning with <!DOCTYPE...>,
+   * and returns the DTD contained in the document.
+   * The parts of the document outside the DTD are actually not parsed,
+   * i.e. parsing stops when all declarations of the DTD have been read.
+   *)
+
+val parse_document_entity : 
+  ?transform_dtd:(dtd -> dtd) ->
+  ?id_index:('ext index) ->
+  config -> source -> 'ext spec -> 'ext document
+  (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
+   * and validate the contents of the document against the DTD contained
+   * and/or referenced in the document.
+   *
+   * If the optional argument ~transform_dtd is passed, the following 
+   * modification applies: After the DTD (both the internal and external
+   * subsets) has been parsed, the function ~transform_dtd is called,
+   * and the resulting DTD is actually used to validate the document.
+   *
+   * If the optional argument ~transform_dtd is missing, the parser
+   * behaves in the same way as if the identity were passed as ~transform_dtd.
+   *
+   * If the optional argument ~id_index is present, the parser adds
+   * any ID attribute to the passed index. An index is required to detect
+   * violations of the uniqueness of IDs.
+   *)
+
+val parse_wfdocument_entity : 
+  config -> source -> 'ext spec -> 'ext document
+  (* Parse a closed document (see parse_document_entity), but do not
+   * validate it. Only checks on well-formedness are performed.
+   *)
+
+val parse_content_entity  : 
+  ?id_index:('ext index) ->
+  config -> source -> dtd -> 'ext spec -> 'ext node
+  (* Parse a file representing a well-formed fragment of a document. The
+   * fragment must be a single element (i.e. something like <a>...</a>;
+   * not a sequence like <a>...</a><b>...</b>). The element is validated
+   * against the passed DTD, but it is not checked whether the element is
+   * the root element specified in the DTD.
+   *
+   * If the optional argument ~id_index is present, the parser adds
+   * any ID attribute to the passed index. An index is required to detect
+   * violations of the uniqueness of IDs.
+   *)
+
+val parse_wfcontent_entity : 
+  config -> source -> 'ext spec -> 'ext node
+  (* Parse a file representing a well-formed fragment of a document
+   * (see parse_content_entity). The fragment is not validated, only
+   * checked for well-formedness.
+   *)
+  
+
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:30  lpadovan
+ * Initial revision
+ *
+ * Revision 1.7  2000/08/18 20:15:43  gerd
+ *     Config options:
+ * - enable_super_root_nodes: new name for virtual_root
+ * - enable_pinstr_nodes: new name for processing_instructions_inline
+ * - enable_comment_nodes: new option
+ *     Updated comments for various options.
+ *
+ * Revision 1.6  2000/07/23 02:16:33  gerd
+ *     Support for DFAs.
+ *
+ * Revision 1.5  2000/07/14 13:57:29  gerd
+ *     Added the id_index feature.
+ *
+ * Revision 1.4  2000/07/09 17:52:54  gerd
+ *     New option store_element_positions.
+ *
+ * Revision 1.3  2000/07/08 16:26:21  gerd
+ *     Added the signatures of the functions
+ * 'extract_dtd_from_document_entity' and 'parse_wfcontent_entity'.
+ * Updated the signature of 'parse_document_entity': New optional
+ * argument 'transform_dtd'.
+ *     Updated the comments.
+ *
+ * Revision 1.2  2000/07/04 22:09:03  gerd
+ *     MAJOR CHANGE: Redesign of the interface (not yet complete).
+ *
+ * Revision 1.1  2000/05/29 23:48:38  gerd
+ *     Changed module names:
+ *             Markup_aux          into Pxp_aux
+ *             Markup_codewriter   into Pxp_codewriter
+ *             Markup_document     into Pxp_document
+ *             Markup_dtd          into Pxp_dtd
+ *             Markup_entity       into Pxp_entity
+ *             Markup_lexer_types  into Pxp_lexer_types
+ *             Markup_reader       into Pxp_reader
+ *             Markup_types        into Pxp_types
+ *             Markup_yacc         into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_yacc.mli:
+ *
+ * Revision 1.4  2000/05/29 21:14:57  gerd
+ *     Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.3  2000/05/27 19:24:01  gerd
+ *     New option: recognize_standalone_declaration.
+ *
+ * Revision 1.2  2000/05/20 20:31:40  gerd
+ *     Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1  2000/05/06 23:21:49  gerd
+ *     Initial revision.
+ *
+ * Revision 1.9  2000/04/30 18:23:38  gerd
+ *     New config options 'processing_instructions_inline' and
+ * 'virtual_root'.
+ *
+ * Revision 1.8  2000/03/13 23:46:46  gerd
+ *     Change: The 'resolver' component of the 'config' type has
+ * disappeared. Instead, there is a new resolver component in the Entity
+ * and ExtID values of 'source'. I hope that this makes clearer that the
+ * resolver has only an effect if used together with Entity and ExtID
+ * sources.
+ *     Change: The Entity value can now return the entity dependent
+ * on the DTD that is going to be used.
+ *
+ * Revision 1.7  2000/02/22 02:32:02  gerd
+ *     Updated.
+ *
+ * Revision 1.6  2000/02/22 01:52:45  gerd
+ *     Added documentation.
+ *
+ * Revision 1.5  2000/01/20 20:54:43  gerd
+ *     New config.errors_with_line_numbers.
+ *
+ * Revision 1.4  1999/09/01 23:09:10  gerd
+ *     New function parse_wf_entity that simulates a well-formedness
+ * parser.
+ *
+ * Revision 1.3  1999/09/01 16:26:36  gerd
+ *     Added an empty line. This is *really* a big change.
+ *
+ * Revision 1.2  1999/08/14 22:20:27  gerd
+ *         The "config" slot has now a component "warner"which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *         Furthermore, there is a new component "debugging_mode".
+ *
+ * Revision 1.1  1999/08/10 00:35:52  gerd
+ *     Initial revision.
+ *
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/MISSING_TESTS b/helm/DEVEL/pxp/pxp/rtests/MISSING_TESTS
new file mode 100644 (file)
index 0000000..386830b
--- /dev/null
@@ -0,0 +1,6 @@
+- Conditional sections:
+
+  Conditional_begin and Conditional_end must be in the same entity.
+
+- NDATA: check whether ENTITY attributes refer only to declared
+  NDATA entities
diff --git a/helm/DEVEL/pxp/pxp/rtests/Makefile b/helm/DEVEL/pxp/pxp/rtests/Makefile
new file mode 100644 (file)
index 0000000..653aaf2
--- /dev/null
@@ -0,0 +1,32 @@
+.PHONY: all
+all:
+       $(MAKE) -C reader
+       $(MAKE) -C write
+       $(MAKE) -C codewriter
+       $(MAKE) -C canonxml
+       $(MAKE) -C negative
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+       $(MAKE) -C reader clean
+       $(MAKE) -C write clean
+       $(MAKE) -C codewriter clean
+       $(MAKE) -C canonxml clean
+       $(MAKE) -C negative clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       rm -f dumpfiles
+       $(MAKE) -C reader distclean
+       $(MAKE) -C write distclean
+       $(MAKE) -C codewriter distclean
+       $(MAKE) -C canonxml distclean
+       $(MAKE) -C negative distclean
+
+dumpfiles: dumpfiles.ml
+       ocamlc -o dumpfiles dumpfiles.ml
diff --git a/helm/DEVEL/pxp/pxp/rtests/README b/helm/DEVEL/pxp/pxp/rtests/README
new file mode 100644 (file)
index 0000000..5c56b7a
--- /dev/null
@@ -0,0 +1,16 @@
+----------------------------------------------------------------------
+(Anti) Regression tests
+----------------------------------------------------------------------
+
+- To build the tests, "markup" must already be compiled in ..
+  Do "make" to start the compilation.
+
+- To run the tests:
+  ./run
+
+- Program dumpfiles: Do "make dumpfiles" to create it.
+  It takes XML file names on the command line, and writes a Latex
+  document on stdout. The document shows the contents of all files.
+  EXAMPLE:
+  $ ./dumpfiles canonxml/data_jclark_valid/ext-sa/*.* >x.tex
+  $ latex x
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/Makefile b/helm/DEVEL/pxp/pxp/rtests/canonxml/Makefile
new file mode 100644 (file)
index 0000000..afbefb3
--- /dev/null
@@ -0,0 +1,27 @@
+# make validate:        make bytecode executable
+# make validate.opt:    make native executable
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_canonxml: test_canonxml.ml
+       ocamlfind ocamlc -g -custom -o test_canonxml -package .,str -linkpkg test_canonxml.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out.xml
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       rm -f test_canonxml
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/README b/helm/DEVEL/pxp/pxp/rtests/canonxml/README
new file mode 100644 (file)
index 0000000..c4c3303
--- /dev/null
@@ -0,0 +1,20 @@
+----------------------------------------------------------------------
+Regression test "canonxml":
+----------------------------------------------------------------------
+
+- An XML file is parsed, and the contents are printed in a canonical
+  format.
+
+- The output is compared with a reference file. The test is only
+  passed if the output and the reference are equal.
+
+- Test data "data_jclark_valid":
+  Contains the samples by James Clark that are valid. The subdirectories:
+  - sa:           standalone documents
+  - not-sa:       non-standalone document (with external DTD)
+  - ext-sa:       non-standalone document (with other external entity)
+
+  Tests that are not passed have been moved into the *-problems directories.
+  The reason is typically that characters have been used not in the 
+  Latin 1 character set.
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.ent
new file mode 100644 (file)
index 0000000..0b7088e
--- /dev/null
@@ -0,0 +1 @@
+Data
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/001.xml
new file mode 100644 (file)
index 0000000..e4cc432
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "001.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.ent
new file mode 100644 (file)
index 0000000..45f6d8e
--- /dev/null
@@ -0,0 +1 @@
+Data
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/002.xml
new file mode 100644 (file)
index 0000000..2ee5988
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "002.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.ent
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/003.xml
new file mode 100644 (file)
index 0000000..407a4a1
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "003.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.ent
new file mode 100644 (file)
index 0000000..0b7088e
--- /dev/null
@@ -0,0 +1 @@
+Data
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/004.xml
new file mode 100644 (file)
index 0000000..c3cdbd0
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "004.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.ent
new file mode 100644 (file)
index 0000000..c6e97f8
--- /dev/null
@@ -0,0 +1 @@
+<e/><e/><e/>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/005.xml
new file mode 100644 (file)
index 0000000..66b8a91
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (e*)>
+<!ELEMENT e EMPTY>
+<!ENTITY e SYSTEM "005.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.ent
new file mode 100644 (file)
index 0000000..2846b57
--- /dev/null
@@ -0,0 +1,4 @@
+Data
+<e/>
+More data
+<e/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/006.xml
new file mode 100644 (file)
index 0000000..b8f42b4
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA|e)*>
+<!ELEMENT e EMPTY>
+<!ENTITY e SYSTEM "006.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.ent
new file mode 100644 (file)
index 0000000..ab1d696
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.ent differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/007.xml
new file mode 100644 (file)
index 0000000..50416ed
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "007.ent">
+]>
+<doc>X&e;Z</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.ent
new file mode 100644 (file)
index 0000000..c6ca61f
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.ent differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/008.xml
new file mode 100644 (file)
index 0000000..565f947
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "008.ent">
+]>
+<doc>X&e;Z</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.ent
new file mode 100644 (file)
index 0000000..8b13789
--- /dev/null
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/009.xml
new file mode 100644 (file)
index 0000000..8119aa0
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "009.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.ent
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/010.xml
new file mode 100644 (file)
index 0000000..5c19ba2
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "010.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.ent
new file mode 100644 (file)
index 0000000..cf7711b
--- /dev/null
@@ -0,0 +1 @@
+xyzzy
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/011.xml
new file mode 100644 (file)
index 0000000..c43795c
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e PUBLIC "a not very interesting file" "011.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.ent
new file mode 100644 (file)
index 0000000..8eb1fb9
--- /dev/null
@@ -0,0 +1 @@
+&e4;
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/012.xml
new file mode 100644 (file)
index 0000000..42d538f
--- /dev/null
@@ -0,0 +1,9 @@
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 SYSTEM "012.ent">
+<!ENTITY e4 "&e5;">
+<!ENTITY e5 "(e5)">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e1;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.ent
new file mode 100644 (file)
index 0000000..7f25c50
--- /dev/null
@@ -0,0 +1 @@
+<e/>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/013.xml
new file mode 100644 (file)
index 0000000..e7f221f
--- /dev/null
@@ -0,0 +1,10 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (e)>
+<!ELEMENT e (#PCDATA)>
+<!ATTLIST e
+  a1 CDATA "a1 default"
+  a2 NMTOKENS "a2 default"
+>
+<!ENTITY x SYSTEM "013.ent">
+]>
+<doc>&x;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.ent
new file mode 100644 (file)
index 0000000..470fd6f
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.ent differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/014.xml
new file mode 100644 (file)
index 0000000..6b068d7
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "014.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/001.xml
new file mode 100644 (file)
index 0000000..0a7acf8
--- /dev/null
@@ -0,0 +1 @@
+<doc>Data&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/002.xml
new file mode 100644 (file)
index 0000000..d4a445e
--- /dev/null
@@ -0,0 +1 @@
+<doc>Data</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/003.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/004.xml
new file mode 100644 (file)
index 0000000..0a7acf8
--- /dev/null
@@ -0,0 +1 @@
+<doc>Data&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/005.xml
new file mode 100644 (file)
index 0000000..6e293aa
--- /dev/null
@@ -0,0 +1 @@
+<doc><e></e><e></e><e></e></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/006.xml
new file mode 100644 (file)
index 0000000..04b6fc8
--- /dev/null
@@ -0,0 +1 @@
+<doc>Data&#10;<e></e>&#10;More data&#10;<e></e>&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/007.xml
new file mode 100644 (file)
index 0000000..ab2a74c
--- /dev/null
@@ -0,0 +1 @@
+<doc>XYZ</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/008.xml
new file mode 100644 (file)
index 0000000..ab2a74c
--- /dev/null
@@ -0,0 +1 @@
+<doc>XYZ</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/009.xml
new file mode 100644 (file)
index 0000000..a79dff6
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/010.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/011.xml
new file mode 100644 (file)
index 0000000..bf275ad
--- /dev/null
@@ -0,0 +1 @@
+<doc>xyzzy&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/012.xml
new file mode 100644 (file)
index 0000000..81a251c
--- /dev/null
@@ -0,0 +1 @@
+<doc>(e5)</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/013.xml
new file mode 100644 (file)
index 0000000..524d94e
--- /dev/null
@@ -0,0 +1 @@
+<doc><e a1="a1 default" a2="a2 default"></e></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/ext-sa/out/014.xml
new file mode 100644 (file)
index 0000000..71c6dc3
--- /dev/null
@@ -0,0 +1 @@
+<doc>data</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.ent
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/001.xml
new file mode 100644 (file)
index 0000000..8419329
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc SYSTEM "001.ent" [
+<!ELEMENT doc EMPTY>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.ent
new file mode 100644 (file)
index 0000000..8b13789
--- /dev/null
@@ -0,0 +1 @@
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/002.xml
new file mode 100644 (file)
index 0000000..f497ac8
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc SYSTEM "002.ent" [
+<!ELEMENT doc EMPTY>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-1.ent
new file mode 100644 (file)
index 0000000..f7af6e8
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e SYSTEM "003-2.ent">
+<!ATTLIST doc a1 CDATA %e; "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003-2.ent
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/003.xml
new file mode 100644 (file)
index 0000000..465dafe
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "003-1.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-1.ent
new file mode 100644 (file)
index 0000000..a988ade
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e1 SYSTEM "004-2.ent">
+<!ENTITY % e2 "%e1;">
+%e1;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004-2.ent
new file mode 100644 (file)
index 0000000..f2ed894
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST doc a1 CDATA "value">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/004.xml
new file mode 100644 (file)
index 0000000..95c9a92
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "004-1.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-1.ent
new file mode 100644 (file)
index 0000000..6e224b5
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e SYSTEM "005-2.ent">
+%e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005-2.ent
new file mode 100644 (file)
index 0000000..8611eaf
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST doc a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/005.xml
new file mode 100644 (file)
index 0000000..9b87d9e
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "005-1.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.ent
new file mode 100644 (file)
index 0000000..c9f9cfe
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc EMPTY>
+<!ATTLIST doc a1 CDATA "w1" a2 CDATA "w2">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/006.xml
new file mode 100644 (file)
index 0000000..3b9f3d3
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc SYSTEM "006.ent" [
+<!ATTLIST doc a1 CDATA "v1">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.ent
new file mode 100644 (file)
index 0000000..a26a45e
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/007.xml
new file mode 100644 (file)
index 0000000..1c5bc80
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "007.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.ent
new file mode 100644 (file)
index 0000000..a26a45e
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/008.xml
new file mode 100644 (file)
index 0000000..0138562
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc PUBLIC "whatever" "008.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.ent
new file mode 100644 (file)
index 0000000..a26a45e
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/009.xml
new file mode 100644 (file)
index 0000000..a000855
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc PUBLIC "whatever" "009.ent" [
+<!ATTLIST doc a2 CDATA "v2">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.ent
new file mode 100644 (file)
index 0000000..e4c75bb
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v2">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/010.xml
new file mode 100644 (file)
index 0000000..0cbf093
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc SYSTEM "010.ent" [
+<!ATTLIST doc a1 CDATA "v1">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.ent
new file mode 100644 (file)
index 0000000..a26a45e
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/011.xml
new file mode 100644 (file)
index 0000000..a105cc5
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY % e SYSTEM "011.ent">
+%e;
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.ent
new file mode 100644 (file)
index 0000000..2714b33
--- /dev/null
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/012.xml
new file mode 100644 (file)
index 0000000..832359c
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY % e SYSTEM "012.ent">
+%e;
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.ent
new file mode 100644 (file)
index 0000000..0fce054
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc (#PCDATA)>
+<![ INCLUDE [
+<!ATTLIST doc a1 CDATA "v1">
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/013.xml
new file mode 100644 (file)
index 0000000..0430cc9
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "013.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.ent
new file mode 100644 (file)
index 0000000..827e12e
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc (#PCDATA)>
+<![ %e; [
+<!ATTLIST doc a1 CDATA "v1">
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/014.xml
new file mode 100644 (file)
index 0000000..8ce35a3
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc SYSTEM "014.ent" [
+<!ENTITY % e "INCLUDE">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.ent
new file mode 100644 (file)
index 0000000..9089b1c
--- /dev/null
@@ -0,0 +1,5 @@
+<!ELEMENT doc (#PCDATA)>
+<![ %e; [
+<!ATTLIST doc a1 CDATA "v1">
+]]>
+<!ATTLIST doc a2 CDATA "v2">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/015.xml
new file mode 100644 (file)
index 0000000..2e0103e
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc SYSTEM "015.ent" [
+<!ENTITY % e "IGNORE">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.ent
new file mode 100644 (file)
index 0000000..7a11d00
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc (#PCDATA)>
+<![%e;[
+<!ATTLIST doc a1 CDATA "v1">
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/016.xml
new file mode 100644 (file)
index 0000000..4340c9e
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc SYSTEM "016.ent" [
+<!ENTITY % e "INCLUDE">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.ent
new file mode 100644 (file)
index 0000000..6718e5c
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "<!ATTLIST doc a1 CDATA 'v1'>">
+%e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/017.xml
new file mode 100644 (file)
index 0000000..e2d218a
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "017.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.ent
new file mode 100644 (file)
index 0000000..ecaa135
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "'v1'">
+<!ATTLIST doc a1 CDATA %e;>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/018.xml
new file mode 100644 (file)
index 0000000..e27f48b
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "018.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.ent
new file mode 100644 (file)
index 0000000..7d56007
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "'v1'">
+<!ATTLIST doc a1 CDATA%e;>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/019.xml
new file mode 100644 (file)
index 0000000..f9e9301
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "019.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.ent
new file mode 100644 (file)
index 0000000..c681b50
--- /dev/null
@@ -0,0 +1,3 @@
+<!ENTITY % e "doc">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST%e;a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/020.xml
new file mode 100644 (file)
index 0000000..e009e38
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "020.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.ent
new file mode 100644 (file)
index 0000000..22d7134
--- /dev/null
@@ -0,0 +1,3 @@
+<!ENTITY % e "doc a1 CDATA">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST %e; "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/021.xml
new file mode 100644 (file)
index 0000000..77789b6
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "021.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.ent
new file mode 100644 (file)
index 0000000..13bab79
--- /dev/null
@@ -0,0 +1,3 @@
+<!ENTITY % e "INCLUDE[">
+<!ELEMENT doc (#PCDATA)>
+<![ %e; <!ATTLIST doc a1 CDATA "v1"> ]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/022.xml
new file mode 100644 (file)
index 0000000..62bad5d
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "022.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.ent
new file mode 100644 (file)
index 0000000..4498d84
--- /dev/null
@@ -0,0 +1,5 @@
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e1 "do">
+<!ENTITY % e2 "c">
+<!ENTITY % e3 "%e1;%e2;">
+<!ATTLIST %e3; a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/023.xml
new file mode 100644 (file)
index 0000000..2a8d4d9
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "023.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.ent
new file mode 100644 (file)
index 0000000..02c1878
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e1 "'v1'">
+<!ENTITY % e2 'a1 CDATA %e1;'>
+<!ATTLIST doc %e2;>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/024.xml
new file mode 100644 (file)
index 0000000..2121cae
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "024.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.ent
new file mode 100644 (file)
index 0000000..d0ee124
--- /dev/null
@@ -0,0 +1,5 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "x">
+<!ENTITY % e "y">
+<!ENTITY % v "'%e;'">
+<!ATTLIST doc a1 CDATA %v;>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/025.xml
new file mode 100644 (file)
index 0000000..8d5bf3e
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "025.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.ent
new file mode 100644 (file)
index 0000000..1e35698
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST doc a1 CDATA "w1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/026.xml
new file mode 100644 (file)
index 0000000..2b320cc
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc ANY>
+<!ENTITY % e SYSTEM "026.ent">
+%e;
+<!ATTLIST doc a1 CDATA "x1" a2 CDATA "x2">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.ent
new file mode 100644 (file)
index 0000000..36a5466
--- /dev/null
@@ -0,0 +1,2 @@
+<!ENTITY % e "">
+<!ELEMENT doc (#PCDATA %e;)>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/027.xml
new file mode 100644 (file)
index 0000000..cab657f
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "027.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.ent
new file mode 100644 (file)
index 0000000..0b84263
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<![INCLUDE[<!ATTLIST doc a1 CDATA "v1">]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/028.xml
new file mode 100644 (file)
index 0000000..70a6cf2
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "028.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.ent
new file mode 100644 (file)
index 0000000..6ba25e7
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc (#PCDATA)>
+<![IGNORE[<!ATTLIST doc a1 CDATA "v1">]]>
+<!ATTLIST doc a1 CDATA "v2">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/029.xml
new file mode 100644 (file)
index 0000000..4e69c41
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "029.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.ent
new file mode 100644 (file)
index 0000000..f623441
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc (#PCDATA)>
+<![IGNORE[]]>
+<![INCLUDE[]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/030.xml
new file mode 100644 (file)
index 0000000..376e84c
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "030.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-1.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-1.ent
new file mode 100644 (file)
index 0000000..8fd57b7
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e SYSTEM "031-2.ent">
+<!ENTITY e "<![CDATA[%e;]]>">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-2.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031-2.ent
new file mode 100644 (file)
index 0000000..8611eaf
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST doc a1 CDATA "v1">
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/031.xml
new file mode 100644 (file)
index 0000000..5a94354
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "031-1.ent">
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/001.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/002.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/003.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/004.xml
new file mode 100644 (file)
index 0000000..bdc39e2
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="value"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/005.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/006.xml
new file mode 100644 (file)
index 0000000..d07627d
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1" a2="w2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/007.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/008.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/009.xml
new file mode 100644 (file)
index 0000000..7293fb6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/010.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/011.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/012.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/013.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/014.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/015.xml
new file mode 100644 (file)
index 0000000..131a32f
--- /dev/null
@@ -0,0 +1 @@
+<doc a2="v2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/016.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/017.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/018.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/019.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/020.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/021.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/022.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/023.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/024.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/025.xml
new file mode 100644 (file)
index 0000000..eb3f967
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="x"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/026.xml
new file mode 100644 (file)
index 0000000..71c0202
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="w1" a2="x2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/027.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/028.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/029.xml
new file mode 100644 (file)
index 0000000..7ac8b2b
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/030.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/not-sa/out/031.xml
new file mode 100644 (file)
index 0000000..03a6c3f
--- /dev/null
@@ -0,0 +1 @@
+<doc>&lt;!ATTLIST doc a1 CDATA &quot;v1&quot;&gt;&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/001.xml
new file mode 100644 (file)
index 0000000..d1e11b7
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/002.xml
new file mode 100644 (file)
index 0000000..671db91
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc ></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/003.xml
new file mode 100644 (file)
index 0000000..8130204
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc >
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/004.xml
new file mode 100644 (file)
index 0000000..c805282
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1="v1"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/005.xml
new file mode 100644 (file)
index 0000000..9b203e7
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1 = "v1"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/006.xml
new file mode 100644 (file)
index 0000000..13c9477
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1='v1'></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/007.xml
new file mode 100644 (file)
index 0000000..1bb7d38
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#32;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/008.xml
new file mode 100644 (file)
index 0000000..e4b9ab7
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&amp;&lt;&gt;&quot;&apos;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/009.xml
new file mode 100644 (file)
index 0000000..1fbdc30
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#x20;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/010.xml
new file mode 100644 (file)
index 0000000..a964d28
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1="v1" ></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/011.xml
new file mode 100644 (file)
index 0000000..8f99e5f
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED a2 CDATA #IMPLIED>
+]>
+<doc a1="v1" a2="v2"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/012.xml
new file mode 100644 (file)
index 0000000..7c07c88
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc : CDATA #IMPLIED>
+]>
+<doc :="v1"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/013.xml
new file mode 100644 (file)
index 0000000..1cf6401
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc _.-0123456789 CDATA #IMPLIED>
+]>
+<doc _.-0123456789="v1"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/014.xml
new file mode 100644 (file)
index 0000000..244ec80
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc abcdefghijklmnopqrstuvwxyz CDATA #IMPLIED>
+]>
+<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/015.xml
new file mode 100644 (file)
index 0000000..011275b
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc ABCDEFGHIJKLMNOPQRSTUVWXYZ CDATA #IMPLIED>
+]>
+<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/016.xml
new file mode 100644 (file)
index 0000000..d863f85
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><?pi?></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/017.xml
new file mode 100644 (file)
index 0000000..3ace15b
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><?pi some data ? > <??></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/018.xml
new file mode 100644 (file)
index 0000000..5b3bf06
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[<foo>]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/019.xml
new file mode 100644 (file)
index 0000000..1e935d4
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[<&]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/020.xml
new file mode 100644 (file)
index 0000000..e8525e6
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[<&]>]]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/021.xml
new file mode 100644 (file)
index 0000000..6923ee2
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><!-- a comment --></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/022.xml
new file mode 100644 (file)
index 0000000..a510a7f
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><!-- a comment ->--></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/023.xml
new file mode 100644 (file)
index 0000000..835e909
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/024.xml
new file mode 100644 (file)
index 0000000..9ba2f0e
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (foo)>
+<!ELEMENT foo (#PCDATA)>
+<!ENTITY e "&#60;foo></foo>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/025.xml
new file mode 100644 (file)
index 0000000..6c78b2e
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (foo*)>
+<!ELEMENT foo (#PCDATA)>
+]>
+<doc><foo/><foo></foo></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/026.xml
new file mode 100644 (file)
index 0000000..4d71396
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (foo*)>
+<!ELEMENT foo EMPTY>
+]>
+<doc><foo/><foo></foo></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/027.xml
new file mode 100644 (file)
index 0000000..dcd3a9a
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (foo*)>
+<!ELEMENT foo ANY>
+]>
+<doc><foo/><foo></foo></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/028.xml
new file mode 100644 (file)
index 0000000..24e4ec0
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/029.xml
new file mode 100644 (file)
index 0000000..70e9ffb
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version='1.0'?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/030.xml
new file mode 100644 (file)
index 0000000..a171155
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version = "1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/031.xml
new file mode 100644 (file)
index 0000000..d97d137
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version='1.0' encoding="UTF-8"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/032.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/032.xml
new file mode 100644 (file)
index 0000000..57ceada
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version='1.0' standalone='yes'?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/033.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/033.xml
new file mode 100644 (file)
index 0000000..e3ce40e
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version='1.0' encoding="UTF-8" standalone='yes'?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/034.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/034.xml
new file mode 100644 (file)
index 0000000..832e9d9
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/035.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/035.xml
new file mode 100644 (file)
index 0000000..a3500fd
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc />
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/036.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/036.xml
new file mode 100644 (file)
index 0000000..ceacd4d
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
+<?pi data?>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/037.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/037.xml
new file mode 100644 (file)
index 0000000..e3add8e
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
+<!-- comment -->
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/038.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/038.xml
new file mode 100644 (file)
index 0000000..087662d
--- /dev/null
@@ -0,0 +1,6 @@
+<!-- comment -->
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/039.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/039.xml
new file mode 100644 (file)
index 0000000..84c5b93
--- /dev/null
@@ -0,0 +1,5 @@
+<?pi data?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/040.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/040.xml
new file mode 100644 (file)
index 0000000..4482f2c
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1="&quot;&lt;&amp;&gt;&apos;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/041.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/041.xml
new file mode 100644 (file)
index 0000000..80bb2da
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1="&#65;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/042.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/042.xml
new file mode 100644 (file)
index 0000000..5cb4bbe
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#00000000000000000000000000000000065;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/043.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/043.xml
new file mode 100644 (file)
index 0000000..4774e5a
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ATTLIST doc a1 CDATA #IMPLIED>
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc a1="foo
+bar"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/044.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/044.xml
new file mode 100644 (file)
index 0000000..8321a14
--- /dev/null
@@ -0,0 +1,10 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (e*)>
+<!ELEMENT e EMPTY>
+<!ATTLIST e a1 CDATA "v1" a2 CDATA "v2" a3 CDATA #IMPLIED>
+]>
+<doc>
+<e a3="v3"/>
+<e a1="w1"/>
+<e a2="w2" a3="v3"/>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/045.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/045.xml
new file mode 100644 (file)
index 0000000..2d70b32
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
+<!ATTLIST doc a1 CDATA "z1">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/046.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/046.xml
new file mode 100644 (file)
index 0000000..79e039b
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
+<!ATTLIST doc a2 CDATA "v2">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/047.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/047.xml
new file mode 100644 (file)
index 0000000..6dd4ddf
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>X
+Y</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/048.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/048.xml
new file mode 100644 (file)
index 0000000..e0c6caf
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>]</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/049.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/049.xml
new file mode 100644 (file)
index 0000000..8cec20d
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/049.xml differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/050.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/050.xml
new file mode 100644 (file)
index 0000000..00e7a78
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/050.xml differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/051.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/051.xml
new file mode 100644 (file)
index 0000000..1202e50
Binary files /dev/null and b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/051.xml differ
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/052.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/052.xml
new file mode 100644 (file)
index 0000000..69c9902
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>ð€€ô¿½</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/053.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/053.xml
new file mode 100644 (file)
index 0000000..d8718a4
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e "<e/>">
+<!ELEMENT doc (e)>
+<!ELEMENT e EMPTY>
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/054.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/054.xml
new file mode 100644 (file)
index 0000000..638693b
--- /dev/null
@@ -0,0 +1,10 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+
+
+<doc
+></doc
+>
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/055.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/055.xml
new file mode 100644 (file)
index 0000000..a1a2a7a
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<?pi  data?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/056.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/056.xml
new file mode 100644 (file)
index 0000000..1aa0813
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#x0000000000000000000000000000000000000041;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/057.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/057.xml
new file mode 100644 (file)
index 0000000..796ec2d
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a*)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/058.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/058.xml
new file mode 100644 (file)
index 0000000..f0830bf
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ATTLIST doc a1 NMTOKENS #IMPLIED>
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc a1=" 1    2       "></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/059.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/059.xml
new file mode 100644 (file)
index 0000000..07604ef
--- /dev/null
@@ -0,0 +1,10 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (e*)>
+<!ELEMENT e EMPTY>
+<!ATTLIST e a1 CDATA #IMPLIED a2 CDATA #IMPLIED a3 CDATA #IMPLIED>
+]>
+<doc>
+<e a1="v1" a2="v2" a3="v3"/>
+<e a1="w1" a2="v2"/>
+<e a1="v1" a2="w2" a3="v3"/>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/060.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/060.xml
new file mode 100644 (file)
index 0000000..fc20976
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>X&#10;Y</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/061.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/061.xml
new file mode 100644 (file)
index 0000000..65f6d4d
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#163;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/062.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/062.xml
new file mode 100644 (file)
index 0000000..5f4aab7
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#xe40;&#xe08;&#xe21;ส์</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/063.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/063.xml
new file mode 100644 (file)
index 0000000..a6dcdc6
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE à¹€à¸ˆà¸¡à¸ªà¹Œ [
+<!ELEMENT à¹€à¸ˆà¸¡à¸ªà¹Œ (#PCDATA)>
+]>
+<เจมส์></เจมส์>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/064.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/064.xml
new file mode 100644 (file)
index 0000000..c59acf8
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#x10000;&#x10FFFD;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/065.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/065.xml
new file mode 100644 (file)
index 0000000..1156e6d
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#60;">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/066.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/066.xml
new file mode 100644 (file)
index 0000000..f332ffc
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+<!-- 34 is double quote -->
+<!ENTITY e1 "&#34;">
+]>
+<doc a1="&e1;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/067.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/067.xml
new file mode 100644 (file)
index 0000000..470ee63
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#13;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/068.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/068.xml
new file mode 100644 (file)
index 0000000..e1f87a9
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "&#13;">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/069.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/069.xml
new file mode 100644 (file)
index 0000000..8f4c458
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!NOTATION n PUBLIC "whatever">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/070.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/070.xml
new file mode 100644 (file)
index 0000000..61a6d28
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY % e "<!ELEMENT doc (#PCDATA)>">
+%e;
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/071.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/071.xml
new file mode 100644 (file)
index 0000000..b07de8c
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ID #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/072.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/072.xml
new file mode 100644 (file)
index 0000000..68d5170
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a IDREF #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/073.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/073.xml
new file mode 100644 (file)
index 0000000..8466a10
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a IDREFS #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/074.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/074.xml
new file mode 100644 (file)
index 0000000..d88455f
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ENTITY #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/075.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/075.xml
new file mode 100644 (file)
index 0000000..fd73101
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ENTITIES #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/076.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/076.xml
new file mode 100644 (file)
index 0000000..7f3d037
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a NOTATION (n1|n2) #IMPLIED>
+<!NOTATION n1 SYSTEM "http://www.w3.org/">
+<!NOTATION n2 SYSTEM "http://www.w3.org/">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/077.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/077.xml
new file mode 100644 (file)
index 0000000..fb7b9fa
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a (1|2) #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/078.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/078.xml
new file mode 100644 (file)
index 0000000..6f8ce37
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #REQUIRED>
+]>
+<doc a="v"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/079.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/079.xml
new file mode 100644 (file)
index 0000000..b647d0d
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #FIXED "v">
+]>
+<doc a="v"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/080.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/080.xml
new file mode 100644 (file)
index 0000000..e644746
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #FIXED "v">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/081.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/081.xml
new file mode 100644 (file)
index 0000000..e17bb75
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a, b, c)>
+<!ELEMENT a (a?)>
+<!ELEMENT b (b*)>
+<!ELEMENT c (a | b)+>
+]>
+<doc><a/><b/><c><a/></c></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/082.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/082.xml
new file mode 100644 (file)
index 0000000..055e0c2
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY % e SYSTEM "e.dtd">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/083.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/083.xml
new file mode 100644 (file)
index 0000000..1451165
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY % e PUBLIC 'whatever' "e.dtd">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/084.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/084.xml
new file mode 100644 (file)
index 0000000..c36bba2
--- /dev/null
@@ -0,0 +1 @@
+<!DOCTYPE doc [<!ELEMENT doc (#PCDATA)>]><doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/085.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/085.xml
new file mode 100644 (file)
index 0000000..d173b8b
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "<foo>">
+<!ENTITY e "">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/086.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/086.xml
new file mode 100644 (file)
index 0000000..1f82c22
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "">
+<!ENTITY e "<foo>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/087.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/087.xml
new file mode 100644 (file)
index 0000000..d3c7e39
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e "<foo/&#62;">
+<!ELEMENT doc (foo)>
+<!ELEMENT foo EMPTY>
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/088.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/088.xml
new file mode 100644 (file)
index 0000000..7bd0e81
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "&lt;foo>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/089.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/089.xml
new file mode 100644 (file)
index 0000000..e144c75
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#x10000;&#x10FFFD;&#x10FFFF;">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/090.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/090.xml
new file mode 100644 (file)
index 0000000..f349eef
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ATTLIST e a NOTATION (n) #IMPLIED>
+<!ELEMENT doc (e)*>
+<!ELEMENT e (#PCDATA)>
+<!NOTATION n PUBLIC "whatever">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/091.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/091.xml
new file mode 100644 (file)
index 0000000..a779ce8
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!NOTATION n SYSTEM "http://www.w3.org/">
+<!ENTITY e SYSTEM "http://www.w3.org/" NDATA n>
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ENTITY "e">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/092.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/092.xml
new file mode 100644 (file)
index 0000000..4e8eb6c
--- /dev/null
@@ -0,0 +1,10 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)*>
+<!ELEMENT a EMPTY>
+]>
+<doc>
+<a/>
+    <a/>       <a/>
+
+
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/093.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/093.xml
new file mode 100644 (file)
index 0000000..300578e
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>
+
+
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/094.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/094.xml
new file mode 100644 (file)
index 0000000..09045d9
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY % e "foo">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "%e;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/095.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/095.xml
new file mode 100644 (file)
index 0000000..f6e1287
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ATTLIST doc a1 CDATA #IMPLIED>
+<!ATTLIST doc a1 NMTOKENS #IMPLIED>
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc a1="1  2"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/096.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/096.xml
new file mode 100644 (file)
index 0000000..5fd635d
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ATTLIST doc a1 NMTOKENS " 1          2       ">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.ent
new file mode 100644 (file)
index 0000000..8bb4305
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST doc a2 CDATA #IMPLIED>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/097.xml
new file mode 100644 (file)
index 0000000..74b636f
--- /dev/null
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e SYSTEM "097.ent">
+<!ATTLIST doc a1 CDATA "v1">
+%e;
+<!ATTLIST doc a2 CDATA "v2">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/098.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/098.xml
new file mode 100644 (file)
index 0000000..881de4e
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><?pi x
+y?></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/099.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/099.xml
new file mode 100644 (file)
index 0000000..64db61f
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/100.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/100.xml
new file mode 100644 (file)
index 0000000..033855e
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY e PUBLIC ";!*#@$_%" "100.xml">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/101.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/101.xml
new file mode 100644 (file)
index 0000000..efd9efe
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "&#34;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/102.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/102.xml
new file mode 100644 (file)
index 0000000..e034d04
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="&#34;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/103.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/103.xml
new file mode 100644 (file)
index 0000000..6b21a13
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#60;doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/104.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/104.xml
new file mode 100644 (file)
index 0000000..6469bc6
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x      y"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/105.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/105.xml
new file mode 100644 (file)
index 0000000..3acc97c
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x&#9;y"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/106.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/106.xml
new file mode 100644 (file)
index 0000000..4f3e8c8
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x&#10;y"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/107.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/107.xml
new file mode 100644 (file)
index 0000000..2a58153
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x&#13;y"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/108.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/108.xml
new file mode 100644 (file)
index 0000000..938f287
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "
+">
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x&e;y"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/109.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/109.xml
new file mode 100644 (file)
index 0000000..dcf2686
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a=""></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/110.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/110.xml
new file mode 100644 (file)
index 0000000..c6a3a87
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "&#13;&#10;">
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x&e;y"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/111.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/111.xml
new file mode 100644 (file)
index 0000000..b4cdca6
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a NMTOKENS #IMPLIED>
+]>
+<doc a="&#32;x&#32;&#32;y&#32;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/112.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/112.xml
new file mode 100644 (file)
index 0000000..7924e67
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a | b)>
+<!ELEMENT a (#PCDATA)>
+]>
+<doc><a></a></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/113.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/113.xml
new file mode 100644 (file)
index 0000000..51d2e6d
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST e a CDATA #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/114.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/114.xml
new file mode 100644 (file)
index 0000000..f706b1d
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "<![CDATA[&foo;]]>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/115.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/115.xml
new file mode 100644 (file)
index 0000000..cacac33
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "v">
+]>
+<doc>&e1;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/116.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/116.xml
new file mode 100644 (file)
index 0000000..7a174cf
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[
+]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/117.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/117.xml
new file mode 100644 (file)
index 0000000..23c8406
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY rsqb "]">
+]>
+<doc>&rsqb;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/118.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/118.xml
new file mode 100644 (file)
index 0000000..068db63
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY rsqb "]]">
+]>
+<doc>&rsqb;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/119.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/119.xml
new file mode 100644 (file)
index 0000000..dafff48
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc ANY>
+]>
+<doc><!-- -á --></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/001.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/002.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/003.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/004.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/005.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/006.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/007.xml
new file mode 100644 (file)
index 0000000..97cf3e3
--- /dev/null
@@ -0,0 +1 @@
+<doc> </doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/008.xml
new file mode 100644 (file)
index 0000000..3ea232c
--- /dev/null
@@ -0,0 +1 @@
+<doc>&amp;&lt;&gt;&quot;'</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/009.xml
new file mode 100644 (file)
index 0000000..97cf3e3
--- /dev/null
@@ -0,0 +1 @@
+<doc> </doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/010.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/011.xml
new file mode 100644 (file)
index 0000000..7293fb6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/012.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/012.xml
new file mode 100644 (file)
index 0000000..5a0c983
--- /dev/null
@@ -0,0 +1 @@
+<doc :="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/013.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/013.xml
new file mode 100644 (file)
index 0000000..c9c7ec5
--- /dev/null
@@ -0,0 +1 @@
+<doc _.-0123456789="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/014.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/014.xml
new file mode 100644 (file)
index 0000000..ac6b28f
--- /dev/null
@@ -0,0 +1 @@
+<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/015.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/015.xml
new file mode 100644 (file)
index 0000000..8e216eb
--- /dev/null
@@ -0,0 +1 @@
+<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/016.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/016.xml
new file mode 100644 (file)
index 0000000..4fc7692
--- /dev/null
@@ -0,0 +1 @@
+<doc><?pi ?></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/017.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/017.xml
new file mode 100644 (file)
index 0000000..3b9a2f8
--- /dev/null
@@ -0,0 +1 @@
+<doc><?pi some data ? > <??></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/018.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/018.xml
new file mode 100644 (file)
index 0000000..a547101
--- /dev/null
@@ -0,0 +1 @@
+<doc>&lt;foo&gt;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/019.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/019.xml
new file mode 100644 (file)
index 0000000..05d4e2f
--- /dev/null
@@ -0,0 +1 @@
+<doc>&lt;&amp;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/020.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/020.xml
new file mode 100644 (file)
index 0000000..95ae08a
--- /dev/null
@@ -0,0 +1 @@
+<doc>&lt;&amp;]&gt;]</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/021.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/021.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/022.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/022.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/023.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/023.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/024.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/024.xml
new file mode 100644 (file)
index 0000000..a9aa207
--- /dev/null
@@ -0,0 +1 @@
+<doc><foo></foo></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/025.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/025.xml
new file mode 100644 (file)
index 0000000..de0f566
--- /dev/null
@@ -0,0 +1 @@
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/026.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/026.xml
new file mode 100644 (file)
index 0000000..de0f566
--- /dev/null
@@ -0,0 +1 @@
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/027.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/027.xml
new file mode 100644 (file)
index 0000000..de0f566
--- /dev/null
@@ -0,0 +1 @@
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/028.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/028.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/029.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/029.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/030.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/030.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/031.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/031.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/032.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/032.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/033.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/033.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/034.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/034.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/035.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/035.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/036.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/036.xml
new file mode 100644 (file)
index 0000000..2bcfb06
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc><?pi data?>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/037.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/037.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/038.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/038.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/039.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/039.xml
new file mode 100644 (file)
index 0000000..82d117d
--- /dev/null
@@ -0,0 +1 @@
+<?pi data?><doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/040.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/040.xml
new file mode 100644 (file)
index 0000000..d79cfe1
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="&quot;&lt;&amp;&gt;'"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/041.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/041.xml
new file mode 100644 (file)
index 0000000..6f2cd58
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="A"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/042.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/042.xml
new file mode 100644 (file)
index 0000000..f683039
--- /dev/null
@@ -0,0 +1 @@
+<doc>A</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/043.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/043.xml
new file mode 100644 (file)
index 0000000..e162b76
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="foo bar"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/044.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/044.xml
new file mode 100644 (file)
index 0000000..78028b7
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#10;<e a1="v1" a2="v2" a3="v3"></e>&#10;<e a1="w1" a2="v2"></e>&#10;<e a1="v1" a2="w2" a3="v3"></e>&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/045.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/045.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/046.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/046.xml
new file mode 100644 (file)
index 0000000..7293fb6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/047.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/047.xml
new file mode 100644 (file)
index 0000000..b327ebd
--- /dev/null
@@ -0,0 +1 @@
+<doc>X&#10;Y</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/048.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/048.xml
new file mode 100644 (file)
index 0000000..ced7d02
--- /dev/null
@@ -0,0 +1 @@
+<doc>]</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/049.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/049.xml
new file mode 100644 (file)
index 0000000..7cc53f9
--- /dev/null
@@ -0,0 +1 @@
+<doc>£</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/050.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/050.xml
new file mode 100644 (file)
index 0000000..33703c7
--- /dev/null
@@ -0,0 +1 @@
+<doc>เจมส์</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/051.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/051.xml
new file mode 100644 (file)
index 0000000..cfeb5a5
--- /dev/null
@@ -0,0 +1 @@
+<เจมส์></เจมส์>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/052.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/052.xml
new file mode 100644 (file)
index 0000000..f5a0484
--- /dev/null
@@ -0,0 +1 @@
+<doc>ð€€ô¿½</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/053.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/053.xml
new file mode 100644 (file)
index 0000000..c408384
--- /dev/null
@@ -0,0 +1 @@
+<doc><e></e></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/054.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/054.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/055.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/055.xml
new file mode 100644 (file)
index 0000000..82d117d
--- /dev/null
@@ -0,0 +1 @@
+<?pi data?><doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/056.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/056.xml
new file mode 100644 (file)
index 0000000..f683039
--- /dev/null
@@ -0,0 +1 @@
+<doc>A</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/057.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/057.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/058.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/058.xml
new file mode 100644 (file)
index 0000000..f898cc8
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="1 2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/059.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/059.xml
new file mode 100644 (file)
index 0000000..78028b7
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#10;<e a1="v1" a2="v2" a3="v3"></e>&#10;<e a1="w1" a2="v2"></e>&#10;<e a1="v1" a2="w2" a3="v3"></e>&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/060.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/060.xml
new file mode 100644 (file)
index 0000000..b327ebd
--- /dev/null
@@ -0,0 +1 @@
+<doc>X&#10;Y</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/061.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/061.xml
new file mode 100644 (file)
index 0000000..7cc53f9
--- /dev/null
@@ -0,0 +1 @@
+<doc>£</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/062.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/062.xml
new file mode 100644 (file)
index 0000000..33703c7
--- /dev/null
@@ -0,0 +1 @@
+<doc>เจมส์</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/063.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/063.xml
new file mode 100644 (file)
index 0000000..cfeb5a5
--- /dev/null
@@ -0,0 +1 @@
+<เจมส์></เจมส์>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/064.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/064.xml
new file mode 100644 (file)
index 0000000..f5a0484
--- /dev/null
@@ -0,0 +1 @@
+<doc>ð€€ô¿½</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/065.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/065.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/066.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/066.xml
new file mode 100644 (file)
index 0000000..7597d31
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="&quot;"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/067.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/067.xml
new file mode 100644 (file)
index 0000000..4bbdad4
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#13;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/068.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/068.xml
new file mode 100644 (file)
index 0000000..4bbdad4
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#13;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/069.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/069.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/070.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/070.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/071.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/071.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/072.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/072.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/073.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/073.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/074.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/074.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/075.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/075.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/076.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/076.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/077.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/077.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/078.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/078.xml
new file mode 100644 (file)
index 0000000..fcab0cd
--- /dev/null
@@ -0,0 +1 @@
+<doc a="v"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/079.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/079.xml
new file mode 100644 (file)
index 0000000..fcab0cd
--- /dev/null
@@ -0,0 +1 @@
+<doc a="v"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/080.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/080.xml
new file mode 100644 (file)
index 0000000..fcab0cd
--- /dev/null
@@ -0,0 +1 @@
+<doc a="v"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/081.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/081.xml
new file mode 100644 (file)
index 0000000..e356e7e
--- /dev/null
@@ -0,0 +1 @@
+<doc><a></a><b></b><c><a></a></c></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/082.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/082.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/083.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/083.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/084.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/084.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/085.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/085.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/086.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/086.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/087.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/087.xml
new file mode 100644 (file)
index 0000000..a9aa207
--- /dev/null
@@ -0,0 +1 @@
+<doc><foo></foo></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/088.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/088.xml
new file mode 100644 (file)
index 0000000..a547101
--- /dev/null
@@ -0,0 +1 @@
+<doc>&lt;foo&gt;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/089.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/089.xml
new file mode 100644 (file)
index 0000000..e01d86e
--- /dev/null
@@ -0,0 +1 @@
+<doc>ð€€ô¿½ô¿¿</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/090.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/090.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/091.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/091.xml
new file mode 100644 (file)
index 0000000..dd3bbed
--- /dev/null
@@ -0,0 +1 @@
+<doc a="e"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/092.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/092.xml
new file mode 100644 (file)
index 0000000..87269f7
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#10;<a></a>&#10;    <a></a>&#9;<a></a>&#10;&#10;&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/093.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/093.xml
new file mode 100644 (file)
index 0000000..631bfde
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#10;&#10;&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/094.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/094.xml
new file mode 100644 (file)
index 0000000..636ab47
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="%e;"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/095.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/095.xml
new file mode 100644 (file)
index 0000000..a20706e
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="1  2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/096.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/096.xml
new file mode 100644 (file)
index 0000000..f898cc8
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="1 2"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/097.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/097.xml
new file mode 100644 (file)
index 0000000..e05cfe6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/098.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/098.xml
new file mode 100644 (file)
index 0000000..f6408de
--- /dev/null
@@ -0,0 +1,2 @@
+<doc><?pi x
+y?></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/099.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/099.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/100.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/100.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/101.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/101.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/102.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/102.xml
new file mode 100644 (file)
index 0000000..6e66b8d
--- /dev/null
@@ -0,0 +1 @@
+<doc a="&quot;"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/103.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/103.xml
new file mode 100644 (file)
index 0000000..96495d4
--- /dev/null
@@ -0,0 +1 @@
+<doc>&lt;doc&gt;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/104.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/104.xml
new file mode 100644 (file)
index 0000000..cc3def3
--- /dev/null
@@ -0,0 +1 @@
+<doc a="x y"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/105.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/105.xml
new file mode 100644 (file)
index 0000000..5aed3d6
--- /dev/null
@@ -0,0 +1 @@
+<doc a="x&#9;y"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/106.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/106.xml
new file mode 100644 (file)
index 0000000..1197d2f
--- /dev/null
@@ -0,0 +1 @@
+<doc a="x&#10;y"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/107.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/107.xml
new file mode 100644 (file)
index 0000000..288f23c
--- /dev/null
@@ -0,0 +1 @@
+<doc a="x&#13;y"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/108.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/108.xml
new file mode 100644 (file)
index 0000000..cc3def3
--- /dev/null
@@ -0,0 +1 @@
+<doc a="x y"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/109.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/109.xml
new file mode 100644 (file)
index 0000000..c43bdf9
--- /dev/null
@@ -0,0 +1 @@
+<doc a=""></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/110.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/110.xml
new file mode 100644 (file)
index 0000000..a92237b
--- /dev/null
@@ -0,0 +1 @@
+<doc a="x  y"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/111.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/111.xml
new file mode 100644 (file)
index 0000000..cc3def3
--- /dev/null
@@ -0,0 +1 @@
+<doc a="x y"></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/112.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/112.xml
new file mode 100644 (file)
index 0000000..c82f47b
--- /dev/null
@@ -0,0 +1 @@
+<doc><a></a></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/113.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/113.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/114.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/114.xml
new file mode 100644 (file)
index 0000000..8e0722a
--- /dev/null
@@ -0,0 +1 @@
+<doc>&amp;foo;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/115.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/115.xml
new file mode 100644 (file)
index 0000000..682b814
--- /dev/null
@@ -0,0 +1 @@
+<doc>v</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/116.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/116.xml
new file mode 100644 (file)
index 0000000..a79dff6
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#10;</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/117.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/117.xml
new file mode 100644 (file)
index 0000000..ced7d02
--- /dev/null
@@ -0,0 +1 @@
+<doc>]</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/118.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/118.xml
new file mode 100644 (file)
index 0000000..31e37a9
--- /dev/null
@@ -0,0 +1 @@
+<doc>]]</doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/119.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_jclark_valid/sa/out/119.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/001.xml
new file mode 100644 (file)
index 0000000..b13acd2
--- /dev/null
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ENTITY x PUBLIC "x" "" NDATA p>
+<!ENTITY y PUBLIC "x" "" NDATA p>
+<!ENTITY z PUBLIC "x" "" NDATA p>
+
+<!NOTATION p PUBLIC "image/gif">
+<!NOTATION q PUBLIC "image/jpeg">
+<!NOTATION r PUBLIC "image/png">
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el        
+          cdata     CDATA     #IMPLIED
+          id        ID        #IMPLIED
+          idref     IDREF     #IMPLIED
+          idrefs    IDREFS    #IMPLIED
+          entity    ENTITY    #IMPLIED
+          entities  ENTITIES  #IMPLIED
+          nmtoken   NMTOKEN   #IMPLIED
+          nmtokens  NMTOKENS  #IMPLIED
+          enum      (a|b|c)   #IMPLIED
+          notation  NOTATION (p|q|r)  #IMPLIED
+>
+
+<!ELEMENT any ANY>
+]>
+
+<any>
+  <el cdata="a b c"/>
+  <el cdata=" a b c "/>
+  <el cdata="  a  b  c  "/>
+  <el id="A"/>
+  <el id=" B "/>
+  <el id="  C  "/>
+  <el idref="C"/>
+  <el idref=" A "/>
+  <el idref="  B  "/>
+  <el idrefs="A B C"/>
+  <el idrefs=" A B C "/>
+  <el idrefs="  A  B  C  "/>
+  <el entity="x"/>
+  <el entity=" x "/>
+  <el entity="  x  "/>
+  <el entities="x y z"/>
+  <el entities=" x y z "/>
+  <el entities="  x  y  z  "/>
+  <el nmtoken="a"/>
+  <el nmtoken=" a "/>
+  <el nmtoken="  a  "/>
+  <el nmtokens="a b c"/>
+  <el nmtokens=" a b c "/>
+  <el nmtokens="  a  b  c  "/>
+  <el enum="a"/>
+  <el enum=" a "/>
+  <el enum="  a  "/>
+  <el notation="p"/>
+  <el notation=" p "/>
+  <el notation="  p  "/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/002.xml
new file mode 100644 (file)
index 0000000..759207a
--- /dev/null
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el        
+          cdata     CDATA     #IMPLIED
+          nmtoken   NMTOKEN   #IMPLIED
+          nmtokens  NMTOKENS  #IMPLIED
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+  <el cdata="a
+b
+c      d
+e "/>
+  <el nmtoken=" a  "/>
+  <el nmtoken="
+a
+"/>
+  <el nmtoken="
+a
+"/>
+  <el nmtoken="        a       "/>
+  <el nmtokens=" a b c "/>
+  <el nmtokens="
+a
+b
+c
+"/>
+  <el nmtokens="
+a
+b
+c
+"/>
+  <el nmtokens="       a       b       c       "/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/003.xml
new file mode 100644 (file)
index 0000000..1335a77
--- /dev/null
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el        
+          cdata     CDATA     #IMPLIED
+          nmtoken   NMTOKEN   #IMPLIED
+          nmtokens  NMTOKENS  #IMPLIED
+>
+<!ELEMENT any ANY>
+
+<!ENTITY elinstance
+  '<el cdata="a
+b
+c      d
+e "/>
+  <el nmtoken=" a  "/>
+  <el nmtoken="
+a
+"/>
+  <el nmtoken="
+a
+"/>
+  <el nmtoken="        a       "/>
+  <el nmtokens=" a b c "/>
+  <el nmtokens="
+a
+b
+c
+"/>
+  <el nmtokens="
+a
+b
+c
+"/>
+  <el nmtokens="       a       b       c       "/>'>
+]>
+
+<any>&elinstance;</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/004.xml
new file mode 100644 (file)
index 0000000..7cad5de
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el        
+          cdata     CDATA     #IMPLIED
+>
+<!ELEMENT any ANY>
+
+<!ENTITY elinstance
+  '<el cdata="a&#13;
+b"/>'>
+]>
+
+<any>&elinstance;</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/005.xml
new file mode 100644 (file)
index 0000000..7f05469
--- /dev/null
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el        
+          cdata     CDATA     #IMPLIED
+          nmtoken   NMTOKEN   #IMPLIED
+          nmtokens  NMTOKENS  #IMPLIED
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+  <el cdata="a&#10;b&#13;c&#9;d&#13;&#10;e&#32;"/>
+  <el nmtoken="&#32;a&#32;"/>
+  <el nmtoken="&#10;a&#10;"/>
+  <el nmtoken="&#13;a&#13;"/>
+  <el nmtoken="&#9;a&#9;"/>
+  <el nmtokens="&#32;a&#32;b&#32;c&#32;"/>
+  <el nmtokens="&#10;a&#10;b&#10;c&#10;"/>
+  <el nmtokens="&#13;a&#13;b&#13;c&#13;"/>
+  <el nmtokens="&#9;a&#9;b&#9;c&#9;"/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/006.xml
new file mode 100644 (file)
index 0000000..8f1287c
--- /dev/null
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el        
+          nmtoken   NMTOKEN   #FIXED "a"
+          nmtokens  NMTOKENS  #FIXED "a b c"
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+  <el nmtoken="
+a
+"/>
+  <el nmtokens="
+a
+b
+c
+"/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/007.xml
new file mode 100644 (file)
index 0000000..9cecac2
--- /dev/null
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el        
+          nmtoken   NMTOKEN   #FIXED "
+a
+"
+          nmtokens  NMTOKENS  #FIXED "a
+b
+c"
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+  <el nmtoken="
+a
+"/>
+  <el nmtokens="
+a
+b
+c
+"/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/INDEX
new file mode 100644 (file)
index 0000000..e8fccb0
--- /dev/null
@@ -0,0 +1,12 @@
+001.xml                tests whether additional white space in attribute value
+               is removed during normalization for every att type but
+               not for CDATA
+002.xml                tests whether TABs, CRs, LFs, and CRLFs are converted
+               to spaces (only for CDATA, NMTOKEN, NMTOKENS)
+003.xml                similar to 002.xml, but the attribute values occur
+               in internal entities
+004.xml                tests whether CRLF normalization happens only once
+005.xml                tests whether spaces, TABs, LFs, CRs, and CRLFs are correctly
+               processed if they are written as character references
+006.xml                tests whether normalization is done before #FIXED comparison
+007.xml                tests whether normalization is done before #FIXED comparison
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/001.xml
new file mode 100644 (file)
index 0000000..818ca6e
--- /dev/null
@@ -0,0 +1 @@
+<any>&#10;  <el cdata="a b c"></el>&#10;  <el cdata=" a b c "></el>&#10;  <el cdata="  a  b  c  "></el>&#10;  <el id="A"></el>&#10;  <el id="B"></el>&#10;  <el id="C"></el>&#10;  <el idref="C"></el>&#10;  <el idref="A"></el>&#10;  <el idref="B"></el>&#10;  <el idrefs="A B C"></el>&#10;  <el idrefs="A B C"></el>&#10;  <el idrefs="A B C"></el>&#10;  <el entity="x"></el>&#10;  <el entity="x"></el>&#10;  <el entity="x"></el>&#10;  <el entities="x y z"></el>&#10;  <el entities="x y z"></el>&#10;  <el entities="x y z"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el enum="a"></el>&#10;  <el enum="a"></el>&#10;  <el enum="a"></el>&#10;  <el notation="p"></el>&#10;  <el notation="p"></el>&#10;  <el notation="p"></el>&#10;</any>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/002.xml
new file mode 100644 (file)
index 0000000..5167ac3
--- /dev/null
@@ -0,0 +1 @@
+<any>&#10;  <el cdata="a b c d e "></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;</any>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/003.xml
new file mode 100644 (file)
index 0000000..782bcfb
--- /dev/null
@@ -0,0 +1 @@
+<any><el cdata="a b c d e "></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el></any>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/004.xml
new file mode 100644 (file)
index 0000000..1b1c171
--- /dev/null
@@ -0,0 +1 @@
+<any><el cdata="a  b"></el></any>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/005.xml
new file mode 100644 (file)
index 0000000..1f722da
--- /dev/null
@@ -0,0 +1 @@
+<any>&#10;  <el cdata="a&#10;b&#13;c&#9;d&#13;&#10;e "></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtoken="a"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;  <el nmtokens="a b c"></el>&#10;</any>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/006.xml
new file mode 100644 (file)
index 0000000..3aefc89
--- /dev/null
@@ -0,0 +1 @@
+<any>&#10;  <el nmtoken="a" nmtokens="a b c"></el>&#10;  <el nmtoken="a" nmtokens="a b c"></el>&#10;</any>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/att_normalization/out/007.xml
new file mode 100644 (file)
index 0000000..3aefc89
--- /dev/null
@@ -0,0 +1 @@
+<any>&#10;  <el nmtoken="a" nmtokens="a b c"></el>&#10;  <el nmtoken="a" nmtokens="a b c"></el>&#10;</any>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/001.xml
new file mode 100644 (file)
index 0000000..4523f1b
--- /dev/null
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE schema [
+<!ELEMENT schema ANY>
+<!ELEMENT element ANY>
+<!ATTLIST element minOccurs CDATA #IMPLIED>
+]>
+<schema>
+       <element minOccurs='0'/>
+       <element minOccurs='0'/>
+</schema>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/002+.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/002+.xml
new file mode 100644 (file)
index 0000000..81a22e2
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE x [
+<!ELEMENT x ANY>
+]>
+<x/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/003.xml
new file mode 100644 (file)
index 0000000..45cd7eb
--- /dev/null
@@ -0,0 +1,13 @@
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<?pi 0?>
+]>
+<?pi 1?>
+<a>
+  <?pi 2?>
+  <a>
+    <?pi 3?>
+  </a>
+  <?pi 4?>
+</a>
+<?pi 5?>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/INDEX
new file mode 100644 (file)
index 0000000..56368c5
--- /dev/null
@@ -0,0 +1,12 @@
+This directory contains real regression tests, i.e. it is tested whether
+reported bugs have been fixed.
+
+001.xml                2000-08-26: Haruo's single quote bug. Attvalues delimited
+               by single quotes did not work for the UTF-8 lexer.
+002+.xml       2000-08-26: Haruo's file-names-are-not-URLs bug. from_file
+               interpreted the file name as URL-encoded string. "002+.xml"
+               because the "+" must not be decoded as space.
+003.xml                2000-08-26: Alain's bug that data nodes must not be merged
+               where PI nodes are created. In the "comments" directory
+               there is another test for the case that comments delimit
+               data material
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/001.xml
new file mode 100644 (file)
index 0000000..e9d83b3
--- /dev/null
@@ -0,0 +1 @@
+<schema>&#10;&#9;<element minOccurs="0"></element>&#10;&#9;<element minOccurs="0"></element>&#10;</schema>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/002+.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/002+.xml
new file mode 100644 (file)
index 0000000..794447b
--- /dev/null
@@ -0,0 +1 @@
+<x></x>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/bugfixes/out/003.xml
new file mode 100644 (file)
index 0000000..22bd710
--- /dev/null
@@ -0,0 +1 @@
+<?pi 1?><a>&#10;  <?pi 2?>&#10;  <a>&#10;    <?pi 3?>&#10;  </a>&#10;  <?pi 4?>&#10;</a><?pi 5?>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/001.xml
new file mode 100644 (file)
index 0000000..fcb4a00
--- /dev/null
@@ -0,0 +1,13 @@
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!-- Comment 0 -->
+]>
+<!-- Comment 1 -->
+<a>
+  <!-- Comment -2 -->
+  <a>
+    <!-- Comment 3 -->
+  </a>
+  <!-- Comment 4 -->
+</a>
+<!-- Comment 5 -->
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/INDEX
new file mode 100644 (file)
index 0000000..0df471f
--- /dev/null
@@ -0,0 +1 @@
+001            Checks whether enable_comment_nodes works
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/comments/out/001.xml
new file mode 100644 (file)
index 0000000..f03911d
--- /dev/null
@@ -0,0 +1 @@
+<!-- Comment 1 --><a>&#10;  <!-- Comment -2 -->&#10;  <a>&#10;    <!-- Comment 3 -->&#10;  </a>&#10;  <!-- Comment 4 -->&#10;</a><!-- Comment 5 -->
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.ent
new file mode 100644 (file)
index 0000000..fde4af2
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc EMPTY>
+<![IGNORE[<!ATTLIST doc att CDATA #REQUIRED>]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/001.xml
new file mode 100644 (file)
index 0000000..02ef0bb
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "001.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.ent
new file mode 100644 (file)
index 0000000..19c0206
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc EMPTY>
+<!-- Only a precondition check for test 003: the first ATTLIST counts -->
+<!ATTLIST doc att CDATA #IMPLIED>
+<!ATTLIST doc att CDATA #REQUIRED>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/002.xml
new file mode 100644 (file)
index 0000000..0c5372c
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "002.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.ent
new file mode 100644 (file)
index 0000000..90bda5f
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc EMPTY>
+<![INCLUDE[<!ATTLIST doc att CDATA #IMPLIED>]]>
+<!ATTLIST doc att CDATA #REQUIRED>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/003.xml
new file mode 100644 (file)
index 0000000..c4b33e4
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "003.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.ent
new file mode 100644 (file)
index 0000000..8ab5697
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "IGNORE">
+<![%e;[<!ATTLIST doc att CDATA #REQUIRED>]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/004.xml
new file mode 100644 (file)
index 0000000..740d173
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "004.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.ent
new file mode 100644 (file)
index 0000000..bd26a54
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "INCLUDE">
+<![%e;[<!ATTLIST doc att CDATA #IMPLIED>]]>
+<!ATTLIST doc att CDATA #REQUIRED>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/005.xml
new file mode 100644 (file)
index 0000000..aa3a8f9
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "005.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.ent
new file mode 100644 (file)
index 0000000..7ff3cdd
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc EMPTY>
+<![IGNORE[This is illegal here]]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/006.xml
new file mode 100644 (file)
index 0000000..bd2ee32
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "006.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.ent
new file mode 100644 (file)
index 0000000..180c6b0
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "]]>">
+<![IGNORE[%e;]]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/007.xml
new file mode 100644 (file)
index 0000000..1c5bc80
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "007.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.ent
new file mode 100644 (file)
index 0000000..e36d707
--- /dev/null
@@ -0,0 +1,4 @@
+<!ELEMENT doc EMPTY>
+<![IGNORE[<!ENTITY e "]]>">]]>
+<![IGNORE[<!ENTITY e ']]>'>]]>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/008.xml
new file mode 100644 (file)
index 0000000..c140c0a
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "008.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.ent
new file mode 100644 (file)
index 0000000..617d3d9
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc EMPTY>
+<![IGNORE[<!-- ]]> -->]]>
+<![IGNORE[x <!-- ]]> -->]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/009.xml
new file mode 100644 (file)
index 0000000..c75bbb6
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "009.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.ent
new file mode 100644 (file)
index 0000000..9a72698
--- /dev/null
@@ -0,0 +1,5 @@
+<!ELEMENT doc EMPTY>
+<![IGNORE[x <![IGNORE[xxx]]>]]>
+<![IGNORE[<![IGNORE[xxx]]>]]>
+<![IGNORE[x <![INCLUDE[xxx]]>]]>
+<![IGNORE[<![INCLUDE[xxx]]>]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/010.xml
new file mode 100644 (file)
index 0000000..93b5cf6
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "010.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.ent b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.ent
new file mode 100644 (file)
index 0000000..e24aad1
--- /dev/null
@@ -0,0 +1,6 @@
+<!ELEMENT doc EMPTY>
+<![INCLUDE[ <![INCLUDE[ <!ATTLIST doc att CDATA #IMPLIED> ]]> 
+            <![IGNORE[ xxx ]]> 
+]]>
+<!ATTLIST doc att CDATA #REQUIRED>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/011.xml
new file mode 100644 (file)
index 0000000..587ab5d
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "011.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/INDEX
new file mode 100644 (file)
index 0000000..0360292
--- /dev/null
@@ -0,0 +1,20 @@
+001    IGNORE works: <![IGNORE[ ... ]]>
+002    [precondition for 003] The first ATTLIST declaration for the same
+       attribute counts
+003    INCLUDE works: <![INCLUDE[ ... ]]>
+004    IGNORE works: <![%e;[ ... ]]> with e="IGNORE"
+005    INCLUDE works: <![%e;[ ... ]]> with e="INCLUDE"
+006    IGNORE works: <![IGNORE[ ... ]]> ignoring a section that would
+       be illegal
+007    Within ignored sections references to parameter references are
+       not resolved.
+       NOTE: You cannot derive this directly from the XML spec. because a 
+       precise definition what "ignoring" means is missing. This property
+       is an interpretation of the statement about reliable parsing in
+       section 3.4.
+008    Ignored sections may contain string literals containing "]]>".
+       NOTE: same problem with XML spec as 007
+009    Ignored sections may contain comments containing "]]>".
+       NOTE: same problem with XML spec as 007
+010    Nested conditional sections with outermost IGNORE
+011    Nested conditional sections with outermost INCLUDE
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/001.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/002.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/003.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/004.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/005.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/006.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/007.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/008.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/008.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/009.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/009.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/010.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/010.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/011.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/conditional/out/011.xml
new file mode 100644 (file)
index 0000000..7e8f183
--- /dev/null
@@ -0,0 +1 @@
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/001.xml
new file mode 100644 (file)
index 0000000..0915363
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE a [
+  <!ELEMENT a ANY>
+  <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b/></a>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/002.xml
new file mode 100644 (file)
index 0000000..e993d3a
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE a [
+  <!ELEMENT a ANY>
+  <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b att1="1" att2=" 1 2 3 "/></a>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/003.xml
new file mode 100644 (file)
index 0000000..c0bfad4
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE a [
+  <!ELEMENT a (b)>
+  <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b/></a>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/004.xml
new file mode 100644 (file)
index 0000000..889604d
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE a [
+  <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b/></a>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/005.xml
new file mode 100644 (file)
index 0000000..609962b
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE a [
+  <!ELEMENT a ANY>
+  <!ENTITY x SYSTEM "sample" NDATA m>
+  <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/006.xml
new file mode 100644 (file)
index 0000000..265d272
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE a [
+  <!ELEMENT a ANY>
+  <!ATTLIST a g ENTITY #IMPLIED>
+  <!ENTITY x SYSTEM "sample" NDATA m>
+  <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a g="x"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/007.xml
new file mode 100644 (file)
index 0000000..56fce53
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE a [
+  <!ELEMENT a ANY>
+  <?pxp:dtd optional-attribute-declarations elements="a"?>
+]>
+<a x="y"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/INDEX b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/INDEX
new file mode 100644 (file)
index 0000000..21b68b9
--- /dev/null
@@ -0,0 +1,14 @@
+<?pxp:dtd optional-element-and-notation-declarations?>
+
+001.xml                Whether it works for undeclared elements
+002.xml                Whether it works for undeclared elements with attributes
+003.xml                Whether it works for undeclared elements in declarations
+004.xml                Whether it works for undeclared root elements
+005.xml                Whether it works for undeclared notations
+006.xml                Whether it works for undeclared notations which are actually
+               referred to
+
+<?pxp:dtd optional-attribute-declarations?>
+
+007.xml                Whether it works
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/001.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/001.xml
new file mode 100644 (file)
index 0000000..61040a8
--- /dev/null
@@ -0,0 +1 @@
+<a><b></b></a>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/002.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/002.xml
new file mode 100644 (file)
index 0000000..d894fe8
--- /dev/null
@@ -0,0 +1 @@
+<a><b att1="1" att2=" 1 2 3 "></b></a>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/003.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/003.xml
new file mode 100644 (file)
index 0000000..61040a8
--- /dev/null
@@ -0,0 +1 @@
+<a><b></b></a>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/004.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/004.xml
new file mode 100644 (file)
index 0000000..61040a8
--- /dev/null
@@ -0,0 +1 @@
+<a><b></b></a>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/005.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/005.xml
new file mode 100644 (file)
index 0000000..7df325b
--- /dev/null
@@ -0,0 +1 @@
+<a></a>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/006.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/006.xml
new file mode 100644 (file)
index 0000000..f5d7bec
--- /dev/null
@@ -0,0 +1 @@
+<a g="x"></a>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/007.xml b/helm/DEVEL/pxp/pxp/rtests/canonxml/data_valid/optional_decls/out/007.xml
new file mode 100644 (file)
index 0000000..8685a8a
--- /dev/null
@@ -0,0 +1 @@
+<a x="y"></a>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/run_canonxml b/helm/DEVEL/pxp/pxp/rtests/canonxml/run_canonxml
new file mode 100755 (executable)
index 0000000..2c40807
--- /dev/null
@@ -0,0 +1,31 @@
+#! /bin/bash
+
+check_dir () {
+    dir="$1"
+    shift
+    xmlfiles=`cd $dir && echo *.xml`
+    for file in $xmlfiles; do
+        echo -n "File $dir/$file: "
+       ./test_canonxml "$@" "$dir/$file" >out.xml
+       if cmp out.xml "$dir/out/$file"; then
+           echo "OK"
+       else
+           echo "NOT OK"
+           read
+       fi
+    done
+}
+
+check_dir "data_valid/conditional"
+check_dir "data_valid/att_normalization"
+check_dir "data_valid/optional_decls"
+check_dir "data_valid/comments" -comments
+check_dir "data_valid/bugfixes"
+
+#check_dir "data_jclark_valid/sa-problems"
+#check_dir "data_jclark_valid/ext-sa-problems"
+check_dir "data_jclark_valid/sa"
+check_dir "data_jclark_valid/not-sa"
+check_dir "data_jclark_valid/ext-sa"
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml b/helm/DEVEL/pxp/pxp/rtests/canonxml/test_canonxml.ml
new file mode 100644 (file)
index 0000000..ef83a28
--- /dev/null
@@ -0,0 +1,239 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+  prerr_endline (string_of_exn e)
+;;
+
+class warner =
+  object 
+    method warn w =
+      prerr_endline ("WARNING: " ^ w)
+  end
+;;
+
+let outbuf = String.create 8192;;
+
+let output_utf8 config s =
+  match config.encoding  with
+      `Enc_utf8 ->
+       print_string s
+    | `Enc_iso88591 ->
+       for i = 0 to String.length s - 1 do
+         let c = Char.code(s.[i]) in
+         if c <= 127 then
+           print_char(Char.chr(c))
+         else begin
+           print_char(Char.chr(0xc0 lor (c lsr 6)));
+           print_char(Char.chr(0x80 lor (c land 0x3f)));
+         end
+       done
+    | _ -> assert false
+;;
+
+
+let re = Str.regexp "[&<>\"\009\010\013]";;
+
+let escaped s =
+  Str.global_substitute 
+    re
+    (fun _ ->
+       match Str.matched_string s with
+          "&"    -> "&amp;"
+        | "<"    -> "&lt;"
+        | ">"    -> "&gt;"
+        | "\""   -> "&quot;"
+        | "\009" -> "&#9;"
+        | "\010" -> "&#10;"
+        | "\013" -> "&#13;"
+        | _      -> assert false
+    )
+    s
+;;
+
+
+let rec output_xml config n =
+  match n # node_type with
+      T_super_root ->
+       n # iter_nodes (output_xml config)
+    | T_pinstr pi_name ->
+       let [ pi ] = n # pinstr pi_name in
+       output_utf8 config "<?";
+       output_utf8 config (pi # target);
+       output_utf8 config " ";
+       output_utf8 config (pi # value);
+       output_utf8 config "?>";
+    | T_element name ->
+       output_utf8 config "<";
+       output_utf8 config name;
+       let sorted_attnames = 
+         Sort.list ( <= ) (n # attribute_names) in
+       List.iter
+         (fun attname ->
+            match n # attribute attname with
+                Value v ->
+                  output_utf8 config " ";
+                  output_utf8 config attname;
+                  output_utf8 config "=\"";
+                  output_utf8 config (escaped v);
+                  output_utf8 config "\"";
+              | Valuelist vl ->
+                  let v = String.concat " " vl in
+                  output_utf8 config " ";
+                  output_utf8 config attname;
+                  output_utf8 config "=\"";
+                  output_utf8 config (escaped v);
+                  output_utf8 config "\"";
+              | Implied_value -> 
+                  ()
+         )
+         sorted_attnames;
+       output_utf8 config ">";
+       n # iter_nodes (output_xml config);
+       output_utf8 config "</";
+       output_utf8 config name;
+       output_utf8 config ">";
+    | T_data ->
+       let v = n # data in
+       output_utf8 config (escaped v)
+    | T_comment ->
+       let v =
+         match n # comment with
+             None -> assert false
+           | Some x -> x
+       in
+       output_utf8 config ("<!--" ^ v ^ "-->")
+    | _ -> 
+       assert false
+;;
+
+
+let parse debug wf iso88591 comments filename =
+  let spec =
+    let e = new element_impl default_extension in
+    e # keep_always_whitespace_mode;
+    make_spec_from_mapping
+      ~super_root_exemplar:      e
+      ~default_pinstr_exemplar:  e
+      ~comment_exemplar:         e
+      ~data_exemplar:            (new data_impl default_extension)
+      ~default_element_exemplar: e
+      ~element_mapping:          (Hashtbl.create 1)
+      ()
+  in
+  let config =
+      { default_config with 
+         warner = new warner;
+         debugging_mode = debug;
+         enable_pinstr_nodes = true;
+         enable_super_root_node = true;
+         enable_comment_nodes = comments;
+         encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+         idref_pass = true;
+      }
+  in
+  try 
+    let parse_fn =
+      if wf then parse_wfdocument_entity 
+      else 
+       let index = new hash_index in
+       parse_document_entity 
+         ?transform_dtd:None 
+         ~id_index:(index :> 'ext index)
+    in
+    let tree =
+      parse_fn
+        config
+       (from_file filename)
+       spec 
+    in
+    output_xml config (tree # root)
+  with
+      e ->
+       error_happened := true;
+       prerr_error e
+;;
+
+
+let main() =
+  let debug = ref false in
+  let wf = ref false in
+  let iso88591 = ref false in
+  let comments = ref false in
+  let files = ref [] in
+  Arg.parse
+      [ "-d",   Arg.Set debug, 
+          "          turn debugging mode on";
+       "-wf",  Arg.Set wf,    
+            "         check only on well-formedness";
+       "-iso-8859-1", Arg.Set iso88591, 
+                    " use ISO-8859-1 as internal encoding instead of UTF-8";
+       "-comments", Arg.Set comments, 
+                 "   output comments, too";
+      ]
+      (fun x -> files := x :: !files)
+      "
+usage: test_canonxml [options] file ...
+
+List of options:";
+  files := List.rev !files;
+  List.iter (parse !debug !wf !iso88591 !comments) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:32  lpadovan
+ * Initial revision
+ *
+ * Revision 1.8  2000/08/17 00:51:57  gerd
+ *     Added -comments option to test enable_comment_nodes.
+ *
+ * Revision 1.7  2000/08/16 23:44:17  gerd
+ *     Updates because of changes of the PXP API.
+ *
+ * Revision 1.6  2000/07/14 14:56:55  gerd
+ *     Updated: warner.
+ *
+ * Revision 1.5  2000/07/14 14:17:58  gerd
+ *     Updated because of iterface changes.
+ *
+ * Revision 1.4  2000/07/09 01:06:20  gerd
+ *     Updated.
+ *
+ * Revision 1.3  2000/06/04 20:31:03  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.2  2000/05/20 20:34:28  gerd
+ *     Changed for UTF-8 support.
+ *
+ * Revision 1.1  2000/04/30 20:13:01  gerd
+ *     Initial revision.
+ *
+ * Revision 1.3  1999/11/09 22:27:30  gerd
+ *     The programs returns now an exit code of 1 if one of the
+ * XML files produces an error.
+ *
+ * Revision 1.2  1999/09/01 23:09:56  gerd
+ *     Added the option -wf that switches to well-formedness checking
+ * instead of validation.
+ *
+ * Revision 1.1  1999/08/14 22:20:53  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/Makefile b/helm/DEVEL/pxp/pxp/rtests/codewriter/Makefile
new file mode 100644 (file)
index 0000000..bacc75c
--- /dev/null
@@ -0,0 +1,28 @@
+# make validate:        make bytecode executable
+# make validate.opt:    make native executable
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+compile: compile.ml
+       ocamlfind ocamlc -g -custom -o compile -package .,str -linkpkg compile.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa sample sample.ml out1 out2
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       rm -f compile
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/compile.ml b/helm/DEVEL/pxp/pxp/rtests/codewriter/compile.ml
new file mode 100644 (file)
index 0000000..1bd6e22
--- /dev/null
@@ -0,0 +1,131 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+  prerr_endline (string_of_exn e)
+;;
+
+
+class warner =
+  object 
+    method warn w =
+      prerr_endline ("WARNING: " ^ w)
+  end
+;;
+
+
+let compile in_filename out_filename print super_root pis comments =
+  let spec =
+    let e = new element_impl default_extension in
+    make_spec_from_mapping
+      ~super_root_exemplar:      e
+      ~default_pinstr_exemplar:  e
+      ~comment_exemplar:         e
+      ~data_exemplar:            (new data_impl default_extension)
+      ~default_element_exemplar: e
+      ~element_mapping:          (Hashtbl.create 1)
+      ()
+  in
+  let config =
+      { default_config with 
+         encoding = `Enc_utf8;
+         warner = new warner;
+         enable_super_root_node = super_root;
+         enable_pinstr_nodes = pis;
+         enable_comment_nodes = comments;
+      }
+  in
+  try 
+    let tree =
+      parse_document_entity
+        config
+       (from_file in_filename)
+       spec 
+    in
+    
+    let ch = open_out out_filename in
+    Pxp_codewriter.write_document ch tree;
+    output_string ch "(create_document (new Pxp_types.drop_warnings) Pxp_yacc.default_spec) # write (Pxp_types.Out_channel stdout) `Enc_utf8;;\n";
+    close_out ch;
+
+    if print then
+      tree # write (Out_channel stdout) `Enc_utf8;
+  with
+      e ->
+       error_happened := true;
+       prerr_error e
+;;
+
+
+let main() =
+  let in_file = ref "" in
+  let out_file = ref "" in
+  let print_file = ref false in
+  let super_root = ref false in
+  let pis = ref false in
+  let comments = ref false in
+  Arg.parse
+      [ "-in", (Arg.String (fun s -> in_file := s)),
+            " <file>      Set the XML file to read";
+       "-out", (Arg.String (fun s -> out_file := s)),
+            " <file>     Set the Ocaml file to write";
+       "-print", (Arg.Set print_file),
+              "          Print the XML file in standard form";
+       "-super-root", Arg.Set super_root,
+                   "     Generate a super root node";
+       "-pis", Arg.Set pis,
+            "            Generate wrapper nodes for processing instructions";
+       "-comments", Arg.Set comments,
+                 "       Generate nodes for comments";
+      ]
+      (fun x -> raise (Arg.Bad "Unexpected argument"))
+      "
+usage: compile [ options ]
+
+List of options:";
+  if !in_file = "" then begin
+    prerr_endline "No input file specified.";
+    exit 1
+  end;
+  if !out_file = "" then begin
+    prerr_endline "No output file specified.";
+    exit 1
+  end;
+  compile !in_file !out_file !print_file !super_root !pis !comments
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:35  lpadovan
+ * Initial revision
+ *
+ * Revision 1.4  2000/08/17 01:20:15  gerd
+ *     Update: Also tested whether super root nodes, pinstr nodes
+ * and comment nodes work.
+ *     Note: comment nodes are not fully tested yet.
+ *
+ * Revision 1.3  2000/08/16 23:44:19  gerd
+ *     Updates because of changes of the PXP API.
+ *
+ * Revision 1.2  2000/07/16 17:54:15  gerd
+ *     Updated because of PXP interface changes.
+ *
+ * Revision 1.1  2000/07/09 00:33:32  gerd
+ *     Initial revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/run_codewriter b/helm/DEVEL/pxp/pxp/rtests/codewriter/run_codewriter
new file mode 100755 (executable)
index 0000000..08e0a42
--- /dev/null
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+./test_codewriter sample001.xml
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/sample001.xml b/helm/DEVEL/pxp/pxp/rtests/codewriter/sample001.xml
new file mode 100644 (file)
index 0000000..4516b71
--- /dev/null
@@ -0,0 +1,44 @@
+<!DOCTYPE a [
+
+<!ELEMENT a (b | (c, d)* | (e, f)+ | g?)>
+<!ELEMENT b (#PCDATA | a)*>
+<!ELEMENT c EMPTY>
+<!ELEMENT d ANY>
+<!ELEMENT e EMPTY>
+<!ELEMENT f EMPTY>
+<!ELEMENT g EMPTY>
+
+<!ATTLIST a u CDATA #IMPLIED
+            v NMTOKEN "huhu"
+            w (q|p)   #REQUIRED
+            x NOTATION (n1|n2) "n1"
+            y ENTITY #IMPLIED>
+
+<!NOTATION n1 SYSTEM "/bin/n1-processor">
+<!NOTATION n2 SYSTEM "/bin/n2-processor">
+
+<!ENTITY u1 SYSTEM "file-u1" NDATA n1>
+<!ENTITY u2 SYSTEM "file-u2" NDATA n2>
+
+<!-- comment 1 -->
+<?pi1 args ...?>
+]>
+
+<!-- comment 2 -->
+<a u="1" w="q" x="n2">
+  <!-- comment 3 -->
+  <b>
+    <?pi2 args ...?>
+    This is text!
+    <a w="p" y="u1">
+      <c/>
+      <d/>
+    </a>
+  </b>
+  <!-- comment 4 -->
+</a>
+
+<!-- comment 5 -->
+<?pi3 args ...?>
+<!-- comment 6 -->
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/codewriter/test_codewriter b/helm/DEVEL/pxp/pxp/rtests/codewriter/test_codewriter
new file mode 100755 (executable)
index 0000000..769b6b9
--- /dev/null
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+set -e
+
+sample="$1"
+echo "Testing $sample:"
+./compile -in "$sample" -out "sample.ml" -print -super-root -pis -comments >"out1"
+echo "- code written to sample.ml, formatted data to out1"
+OCAMLPATH=../.. ocamlfind ocamlc -package . -linkpkg -custom sample.ml -o sample
+echo "- sample.ml compiled to sample"
+./sample >out2
+echo "- re-read data written to out2"
+if cmp out1 out2; then
+    echo "- out1 and out2 are identical! OK"
+else
+    echo "- out1 and out2 differ! FAILURE!"
+    exit 1
+fi
diff --git a/helm/DEVEL/pxp/pxp/rtests/dumpfiles.ml b/helm/DEVEL/pxp/pxp/rtests/dumpfiles.ml
new file mode 100644 (file)
index 0000000..d5f9e50
--- /dev/null
@@ -0,0 +1,56 @@
+
+
+let dump_file name =
+  let ch = open_in_bin name in
+  let len = in_channel_length ch in
+  let sin = String.create len in
+  really_input ch sin 0 len;
+  close_in ch;
+
+  Printf.printf "\\noindent\\begin{minipage}{5.5cm}\n";
+  (* Printf.printf "\\rule{5.5cm}{1pt}\n"; *)
+  Printf.printf "\\footnotesize\\bf File %s:\\\\\n" name;
+  Printf.printf "\\tt{}";
+  
+  for i = 0 to len - 1 do
+    match sin.[i] with
+       ('\000'..'\008'|'\011'|'\012'|'\014'..'\031'|'\127'..'\255') as c ->
+         Printf.printf "{\\sl (%02x)}\\linebreak[2]" (Char.code c)
+      | '\009' ->
+         Printf.printf "{\\sl HT}\\linebreak[3]"
+      | '\010' ->
+         Printf.printf "{\\sl LF}\\\\\n"
+      | '\013' ->
+         Printf.printf "{\\sl CR}";
+         if not(i < len - 1 && sin.[i+1] = '\010') then
+           Printf.printf "\\\\\n";
+      | ' ' ->
+         Printf.printf "\\symbol{32}\\linebreak[3]"
+
+      | ('"'|'#'|'$'|'%'|'&'|'-'|'<'|'>'|'['|'\\'|']'|'^'|'_'|'`'|
+        '{'|'|'|'}'|'~') as c ->
+         Printf.printf "\\symbol{%d}\\linebreak[2]" (Char.code c)
+      | c ->
+         print_char c;
+         print_string "\\linebreak[0]"
+  done;
+
+  Printf.printf "\\mbox{}\\\\\n";
+  Printf.printf "\\rule{5.5cm}{1pt}\n";
+  Printf.printf "\\end{minipage}\n"
+;;
+
+
+print_endline "\\documentclass[a4paper]{article}";
+print_endline "\\usepackage{multicol}";
+print_endline "\\begin{document}";
+print_endline "\\begin{multicols}{2}";
+for i = 1 to Array.length(Sys.argv)-1 do
+  dump_file Sys.argv.(i)
+done;
+print_endline "\\end{multicols}";
+print_endline "\\end{document}"
+;;
+
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/Makefile b/helm/DEVEL/pxp/pxp/rtests/negative/Makefile
new file mode 100644 (file)
index 0000000..2e8842c
--- /dev/null
@@ -0,0 +1,28 @@
+# make test_negative:   make bytecode executable
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_negative: test_negative.ml
+       ocamlfind ocamlc -custom -o test_negative -package .,str -linkpkg test_negative.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa current.out
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       rm -f test_negative 
+
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/README b/helm/DEVEL/pxp/pxp/rtests/negative/README
new file mode 100644 (file)
index 0000000..cfdaf38
--- /dev/null
@@ -0,0 +1,25 @@
+----------------------------------------------------------------------
+Regression test "negative":
+----------------------------------------------------------------------
+
+- An errorneous XML file is parsed, and the error message is printed.
+
+- The output is compared with a reference file. The test is only
+  passed if the output and the reference are equal.
+
+- Test data "data_jclark_notwf":
+  Contains the samples by James Clark that are not well-formed. 
+  The subdirectories:
+  - sa:           standalone documents
+  - not-sa:       non-standalone document (with external DTD)
+  - ext-sa:       non-standalone document (with other external entity)
+
+- Test data "data_jclark_invalid":
+  Contains the samples by James Clark that are invalid.
+
+- Tests that are not passed have been moved into the *-problems directories.
+  The reason is typically that characters have been used not in the 
+  Latin 1 character set.
+
+- Test data "data_notwf":
+  Contains own tests with samples that are not well-formed.
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.out
new file mode 100644 (file)
index 0000000..2d51ff6
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/001.xml", at line 5, position 3:
+ERROR (Validity constraint): The root element is `b' but is declared as `a
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/001.xml
new file mode 100644 (file)
index 0000000..9a346e7
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!ELEMENT b ANY>
+]>
+<b>x</b>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.out
new file mode 100644 (file)
index 0000000..8a2f8d6
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/010.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `id' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/010.xml
new file mode 100644 (file)
index 0000000..16e0e23
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED>
+]>
+
+<el id="100"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.out
new file mode 100644 (file)
index 0000000..a03a204
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/011.xml", at line 10, position 17:
+ERROR (Validity constraint): ID not unique
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/011.xml
new file mode 100644 (file)
index 0000000..6aa3091
--- /dev/null
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED>
+]>
+
+<any>
+  <el id="x100"/>
+  <el id="x100"/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.out
new file mode 100644 (file)
index 0000000..1870963
--- /dev/null
@@ -0,0 +1,3 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/012.xml", at line 6, position 1:
+ERROR (Validity constraint): More than one ID attribute for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/012.xml
new file mode 100644 (file)
index 0000000..d5bd7fa
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id1 ID #IMPLIED>
+<!ATTLIST el id2 ID #IMPLIED>
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.out
new file mode 100644 (file)
index 0000000..98092c3
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/013.xml", at line 5, position 1:
+ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/013.xml
new file mode 100644 (file)
index 0000000..7d321a4
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID "a">
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.out
new file mode 100644 (file)
index 0000000..ec1247b
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/014.xml", at line 5, position 1:
+ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/014.xml
new file mode 100644 (file)
index 0000000..a56220d
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #FIXED "a">
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.out
new file mode 100644 (file)
index 0000000..18dd9c9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/015.xml", at line 7, position 17:
+ERROR (Validity constraint): Attribute `idref' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/015.xml
new file mode 100644 (file)
index 0000000..d707f89
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idref IDREF #IMPLIED>
+]>
+
+<el idref="100"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.out
new file mode 100644 (file)
index 0000000..4192286
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/016.xml", at line 7, position 22:
+ERROR (Validity constraint): Attribute `idrefs' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/016.xml
new file mode 100644 (file)
index 0000000..7ea9455
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idrefs IDREFS #IMPLIED>
+]>
+
+<el idrefs="100 200"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.out
new file mode 100644 (file)
index 0000000..97115e6
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/017.xml" at line 12, position 2:
+ERROR (Validity constraint): Attribute `idref' of element `el' refers to unknown ID `a20'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/017.xml
new file mode 100644 (file)
index 0000000..da6e582
--- /dev/null
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED
+             idref IDREF #IMPLIED
+>
+]>
+
+<any>
+  <el id="a10"/>
+  <el idref="a20"/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.out
new file mode 100644 (file)
index 0000000..8d761c9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/018.xml" at line 12, position 2:
+ERROR (Validity constraint): Attribute `idrefs' of element `el' refers to unknown ID `a20'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/018.xml
new file mode 100644 (file)
index 0000000..c3f011b
--- /dev/null
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED
+             idrefs IDREFS #IMPLIED
+>
+]>
+
+<any>
+  <el id="a10"/>
+  <el idrefs="a10 a20"/>
+</any>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.out
new file mode 100644 (file)
index 0000000..6617245
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/019.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/019.xml
new file mode 100644 (file)
index 0000000..e10ed94
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ent ENTITY #IMPLIED>
+]>
+
+<el ent="10"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.out
new file mode 100644 (file)
index 0000000..a95a6a7
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/020.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/020.xml
new file mode 100644 (file)
index 0000000..4f696dd
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ents ENTITIES #IMPLIED>
+]>
+
+<el ents="a 10"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.out
new file mode 100644 (file)
index 0000000..bb950c7
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/021.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/021.xml
new file mode 100644 (file)
index 0000000..64662ac
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ent ENTITY #IMPLIED>
+]>
+
+<el ent="x"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.out
new file mode 100644 (file)
index 0000000..2b8e091
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/022.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/022.xml
new file mode 100644 (file)
index 0000000..3e8fe89
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ents ENTITIES #IMPLIED>
+]>
+
+<el ents="ndata a"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.out
new file mode 100644 (file)
index 0000000..4775ce1
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/023.xml", at line 6, position 13:
+ERROR (Validity constraint): Attribute `nm' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/023.xml
new file mode 100644 (file)
index 0000000..acf09e1
--- /dev/null
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nm NMTOKEN #IMPLIED>
+]>
+<el nm="[]"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.out
new file mode 100644 (file)
index 0000000..ad436a1
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/024.xml", at line 6, position 17:
+ERROR (Validity constraint): Attribute `nms' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/024.xml
new file mode 100644 (file)
index 0000000..cdeabbc
--- /dev/null
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nms NMTOKENS #IMPLIED>
+]>
+<el nms="10 []"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.out
new file mode 100644 (file)
index 0000000..f832e5d
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/025.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `idref' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/025.xml
new file mode 100644 (file)
index 0000000..6b5531c
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idref IDREF "100">
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.out
new file mode 100644 (file)
index 0000000..2e311ed
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/026.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `idrefs' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/026.xml
new file mode 100644 (file)
index 0000000..75757c4
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idrefs IDREFS "100 200">
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.out
new file mode 100644 (file)
index 0000000..f753d46
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/027.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/027.xml
new file mode 100644 (file)
index 0000000..9c1fffb
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ent ENTITY "10">
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.out
new file mode 100644 (file)
index 0000000..b8c4c83
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/028.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/028.xml
new file mode 100644 (file)
index 0000000..2165803
--- /dev/null
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ents ENTITIES "a 10">
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.out
new file mode 100644 (file)
index 0000000..3e71466
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/029.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `nm' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/029.xml
new file mode 100644 (file)
index 0000000..a9cc68f
--- /dev/null
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nm NMTOKEN "[]">
+]>
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.out
new file mode 100644 (file)
index 0000000..1c25ca8
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/030.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `nms' is lexically malformed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/030.xml
new file mode 100644 (file)
index 0000000..da8a26c
--- /dev/null
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nms NMTOKENS "10 []">
+]>
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.out
new file mode 100644 (file)
index 0000000..07ff0ff
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/031.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `jpeg'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/031.xml
new file mode 100644 (file)
index 0000000..16d3b3b
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif|jpeg) #IMPLIED>
+]>
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.out
new file mode 100644 (file)
index 0000000..5a5b4b6
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/032.xml", at line 6, position 1:
+ERROR (Validity constraint): Illegal default value for attribute `n' in declaration for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/032.xml
new file mode 100644 (file)
index 0000000..f4e2d4d
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif) "jpeg">
+]>
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.out
new file mode 100644 (file)
index 0000000..de96bda
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/033.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `n' does not match one of the declared notation names
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/033.xml
new file mode 100644 (file)
index 0000000..0ca58da
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif) #IMPLIED>
+]>
+<el n="jpeg"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.out
new file mode 100644 (file)
index 0000000..e1cca97
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/034.xml", at line 8, position 1:
+ERROR (Validity constraint): More than one NOTATION attribute for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/034.xml
new file mode 100644 (file)
index 0000000..10ee38f
--- /dev/null
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif) #IMPLIED
+             m NOTATION (gif) #IMPLIED
+>
+]>
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.out
new file mode 100644 (file)
index 0000000..9c3ea45
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/035.xml", at line 5, position 1:
+ERROR (Validity constraint): Illegal default value for attribute `enum' in declaration for element `el'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/035.xml
new file mode 100644 (file)
index 0000000..a42060c
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el enum (a|b|c) "d">
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.out
new file mode 100644 (file)
index 0000000..39e3f77
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/036.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `enum' does not match one of the declared enumerator tokens
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/036.xml
new file mode 100644 (file)
index 0000000..12cf01d
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el enum (a|b|c) #IMPLIED>
+]>
+
+<el enum="d"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.out
new file mode 100644 (file)
index 0000000..afa614c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/037.xml", at line 7, position 5:
+ERROR (Validity constraint): Required attribute `x' is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/037.xml
new file mode 100644 (file)
index 0000000..2619e60
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el x CDATA #REQUIRED>
+]>
+
+<el/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.out
new file mode 100644 (file)
index 0000000..902d7cd
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/038.xml", at line 7, position 13:
+ERROR (Validity constraint): Attribute `x' is fixed, but has here a different value
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/038.xml
new file mode 100644 (file)
index 0000000..2ad30db
--- /dev/null
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el x CDATA #FIXED "abc">
+]>
+
+<el x="def"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.ent
new file mode 100644 (file)
index 0000000..8c23f3e
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST el v3 CDATA "ghi">
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.out
new file mode 100644 (file)
index 0000000..29296ae
--- /dev/null
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/060.xml", at line 17, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/060.xml
new file mode 100644 (file)
index 0000000..1cc0f2d
--- /dev/null
@@ -0,0 +1,19 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 CDATA "abc">
+<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
+%declare_v2;
+<!ENTITY % declare_v3 SYSTEM "060.ent">
+%declare_v3;
+]>
+
+<any>
+  <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
+  <any><el v2="DEF" v3="GHI"/></any>
+  <any><el v3="GHI"/></any>
+  <any><el/></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.ent
new file mode 100644 (file)
index 0000000..03674e9
--- /dev/null
@@ -0,0 +1,2 @@
+<!ENTITY % declare_v3 '<!ATTLIST el v3 CDATA "ghi">'>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.out
new file mode 100644 (file)
index 0000000..40d1d07
--- /dev/null
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/061.xml", at line 18, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/061.xml
new file mode 100644 (file)
index 0000000..226df78
--- /dev/null
@@ -0,0 +1,20 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 CDATA "abc">
+<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
+%declare_v2;
+<!ENTITY % declare_declare_v3 SYSTEM "061.ent">
+%declare_declare_v3;
+%declare_v3;
+]>
+
+<any>
+  <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
+  <any><el v2="DEF" v3="GHI"/></any>
+  <any><el v3="GHI"/></any>
+  <any><el/></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.ent
new file mode 100644 (file)
index 0000000..8c23f3e
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST el v3 CDATA "ghi">
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.out
new file mode 100644 (file)
index 0000000..b026613
--- /dev/null
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/062.xml", at line 15, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/062.xml
new file mode 100644 (file)
index 0000000..458ac28
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any SYSTEM "062.ent" [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 CDATA "abc">
+<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
+%declare_v2;
+]>
+
+<any>
+  <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
+  <any><el v2="DEF" v3="GHI"/></any>
+  <any><el v3="GHI"/></any>
+  <any><el/></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.ent
new file mode 100644 (file)
index 0000000..2d72317
--- /dev/null
@@ -0,0 +1 @@
+<!ENTITY e3 "ghi">
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.out
new file mode 100644 (file)
index 0000000..275ca29
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/063.xml", at line 15, position 2:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/063.xml
new file mode 100644 (file)
index 0000000..18b7061
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 "abc">
+<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "063.ent">
+%declare_e3;
+]>
+
+<any>
+  &e1;
+  &e2;
+  &e3;
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.ent
new file mode 100644 (file)
index 0000000..2d72317
--- /dev/null
@@ -0,0 +1 @@
+<!ENTITY e3 "ghi">
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.out
new file mode 100644 (file)
index 0000000..d825206
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/064.xml", at line 17, position 10:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/064.xml
new file mode 100644 (file)
index 0000000..4fb66cd
--- /dev/null
@@ -0,0 +1,19 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 "abc">
+<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "064.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att CDATA #IMPLIED>
+]>
+
+<any>
+  <el att="&e1;"/>
+  <el att="&e2;"/>
+  <el att="&e3;"/>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.ent
new file mode 100644 (file)
index 0000000..2d72317
--- /dev/null
@@ -0,0 +1 @@
+<!ENTITY e3 "ghi">
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.out
new file mode 100644 (file)
index 0000000..e2591c6
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/065.xml", at line 13, position 24:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/065.xml
new file mode 100644 (file)
index 0000000..3a52645
--- /dev/null
@@ -0,0 +1,23 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 "abc">
+<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "065.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att1 CDATA "&e1;"
+             att2 CDATA "&e2;"
+             att3 CDATA "&e3;"
+>
+]>
+
+<any>
+  <el att1="1" att2="2" att3="3"/>
+  <el att2="2" att3="3"/>
+  <el att3="3"/>
+  <el/>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.ent
new file mode 100644 (file)
index 0000000..136d73e
--- /dev/null
@@ -0,0 +1 @@
+<!ENTITY e3 SYSTEM "ghi" NDATA n3>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.out
new file mode 100644 (file)
index 0000000..d14209f
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/066.xml", at line 13, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `n3'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/066.xml
new file mode 100644 (file)
index 0000000..a1cd7a6
--- /dev/null
@@ -0,0 +1,20 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 SYSTEM "abc" NDATA n1>
+<!ENTITY % declare_e2 '<!ENTITY e2 SYSTEM "def" NDATA n2>'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "066.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att ENTITY #IMPLIED
+>
+]>
+
+<any>
+  <any><el att="e1"/></any>
+  <any><el att="e2"/></any>
+  <any><el att="e3"/></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.ent
new file mode 100644 (file)
index 0000000..136d73e
--- /dev/null
@@ -0,0 +1 @@
+<!ENTITY e3 SYSTEM "ghi" NDATA n3>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.out
new file mode 100644 (file)
index 0000000..117eda9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/067.xml", at line 15, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `n3'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/067.xml
new file mode 100644 (file)
index 0000000..bb4edd3
--- /dev/null
@@ -0,0 +1,23 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 SYSTEM "abc" NDATA n1>
+<!ENTITY % declare_e2 '<!ENTITY e2 SYSTEM "def" NDATA n2>'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "067.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att1 ENTITY "e1"
+             att2 ENTITY "e2"
+             att3 ENTITY "e3"
+>
+]>
+
+<any>
+  <any><el att1="e1" att2="e1" att3="e1"/></any>
+  <any><el att2="e1" att3="e1"/></any>
+  <any><el att3="e1"/></any>
+  <any><el/></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.ent
new file mode 100644 (file)
index 0000000..5a19ad8
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST el v3 NMTOKEN #IMPLIED>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.out
new file mode 100644 (file)
index 0000000..3942556
--- /dev/null
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/068.xml", at line 19, position 23:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/068.xml
new file mode 100644 (file)
index 0000000..31c29f8
--- /dev/null
@@ -0,0 +1,21 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 NMTOKEN #IMPLIED>
+<!ENTITY % declare_v2 '<!ATTLIST el v2 NMTOKEN #IMPLIED>'>
+%declare_v2;
+<!ENTITY % declare_v3 SYSTEM "068.ent">
+%declare_v3;
+]>
+
+<any>
+  <any><el v1="abc"/></any>
+  <any><el v2="abc"/></any>
+  <any><el v3="abc"/></any>
+  <any><el v1=" abc "/></any>
+  <any><el v2=" abc "/></any>
+  <any><el v3=" abc "/></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.ent
new file mode 100644 (file)
index 0000000..4bad199
--- /dev/null
@@ -0,0 +1 @@
+<!ATTLIST el v3 NMTOKENS #IMPLIED>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.out
new file mode 100644 (file)
index 0000000..ca71500
--- /dev/null
@@ -0,0 +1,4 @@
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/069.xml", at line 19, position 27:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/069.xml
new file mode 100644 (file)
index 0000000..a0f1b70
--- /dev/null
@@ -0,0 +1,21 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 NMTOKENS #IMPLIED>
+<!ENTITY % declare_v2 '<!ATTLIST el v2 NMTOKENS #IMPLIED>'>
+%declare_v2;
+<!ENTITY % declare_v3 SYSTEM "069.ent">
+%declare_v3;
+]>
+
+<any>
+  <any><el v1="abc def"/></any>
+  <any><el v2="abc def"/></any>
+  <any><el v3="abc def"/></any>
+  <any><el v1=" abc def "/></any>
+  <any><el v2=" abc def "/></any>
+  <any><el v3=" abc def "/></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.ent
new file mode 100644 (file)
index 0000000..1792bb9
--- /dev/null
@@ -0,0 +1 @@
+<!ELEMENT outer3 (inner)>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.out
new file mode 100644 (file)
index 0000000..4bbfb52
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/070.xml", at line 19, position 32:
+ERROR (Validity constraint): Element `outer3' violates standalone declaration because extra white space separates the sub elements
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/070.xml
new file mode 100644 (file)
index 0000000..85e400a
--- /dev/null
@@ -0,0 +1,21 @@
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT inner EMPTY>
+<!ELEMENT outer1 (inner)>
+<!ENTITY % declare_outer2 '<!ELEMENT outer2 (inner)>'>
+%declare_outer2;
+<!ENTITY % declare_outer3 SYSTEM "070.ent">
+%declare_outer3;
+]>
+
+<any>
+  <any><outer1><inner/></outer1></any>
+  <any><outer2><inner/></outer2></any>
+  <any><outer3><inner/></outer3></any>
+  <any><outer1><inner/> </outer1></any>
+  <any><outer2><inner/> </outer2></any>
+  <any><outer3><inner/> </outer3></any>
+</any>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.out
new file mode 100644 (file)
index 0000000..174afa0
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/080.xml", at line 4, position 0:
+ERROR (Validity constraint): The content model of element `b' is not deterministic
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/080.xml
new file mode 100644 (file)
index 0000000..8deeef4
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!ELEMENT b ((a,b)|a+)>]>
+<a/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.out
new file mode 100644 (file)
index 0000000..1f3cfaa
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/081.xml", at line 4, position 0:
+ERROR (Validity constraint): The content model of element `b' is not deterministic
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/081.xml
new file mode 100644 (file)
index 0000000..293eeae
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!ELEMENT b ((b|a+),a)>]>
+<a/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/INDEX b/helm/DEVEL/pxp/pxp/rtests/negative/data_invalid/INDEX
new file mode 100644 (file)
index 0000000..e2090fd
--- /dev/null
@@ -0,0 +1,75 @@
+----------------------------------------
+Root element
+----------------------------------------
+
+001.xml                Declared root element type matches actual root element type
+
+----------------------------------------
+Attributes
+----------------------------------------
+
+010.xml                ID attributes must match the Name production (not nmtoken)
+011.xml                ID attributes uniquely identify the element bearing them
+               *** TODO ***
+012.xml                No element type must have several ID attributes declared
+013.xml                No ID attribute must have a default
+014.xml                No ID attribute must have a default (FIXED)
+015.xml                Attributes of type IDREF must match the Name production
+016.xml                Attributes of type IDREFS must match the Names production
+017.xml                Attributes of type IDREF must match the value of an ID
+               attribute
+               *** TODO ***
+018.xml                Attributes of type IDREFS must match the values of ID
+               attributes
+               *** TODO ***
+019.xml                Attributes of type ENTITY must match the Name production
+020.xml                Attributes of type ENTITIES must match the Names production
+021.xml                Attributes of type ENTITY must match an unparsed entity
+022.xml                Attributes of type ENTITIES must match unparsed entities
+023.xml                Attributes of type NMTOKEN must match the nmtoken production
+024.xml                Attributes of type NMTOKENS must match the nmtokens production
+025.xml                like 015.xml, but the default value is tested
+026.xml                like 016.xml, but the default value is tested
+027.xml                like 019.xml, but the default value is tested
+028.xml                like 020.xml, but the default value is tested
+029.xml                like 023.xml, but the default value is tested
+030.xml                like 024.xml, but the default value is tested
+031.xml                all notation names in the declaration must have been declared
+032.xml                Values of NOTATION type must match one declared value
+033.xml                Values of NOTATION type must match one declared value
+034.xml                Only one NOTATION attribute per element
+035.xml                Values of enum type must match one of the declared values
+036.xml                Values of enum type must match one of the declared values
+037.xml                missing #REQUIRED attribute
+038.xml                #FIXED attributes must match the declared default
+
+----------------------------------------
+Standalone declaration
+----------------------------------------
+
+060.xml                Externally declared default values are rejected
+061.xml                variant of 060.xml (internal entity within external entity)
+062.xml                variant of 060.xml (external subset of DTD)
+063.xml                Externally declared parsed general entities are rejected
+               (entity ref occurs in main text)
+064.xml                Externally declared parsed general entities are rejected
+               (entity ref occurs in attribute value)
+065.xml                Externally declared parsed general entities are rejected
+               (entity ref occurs in attribute default)
+               *** THINK ABOUT THIS CASE AGAIN ***
+066.xml                Externally declared unparsed entities are rejected
+               (entity ref occurs in attribute value)
+067.xml        Externally declared unparsed entities are rejected
+               (entity ref occurs in attribute default)
+068.xml                Externally declared NMTOKEN attributes require normal form
+069.xml                Externally declared NMTOKENS attributes require normal form
+070.xml                Externally declared elements with regexp content model
+               do not like extra white space
+
+----------------------------------------
+Deterministics models
+----------------------------------------
+
+080.xml                One example
+081.xml                Another example
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.ent
new file mode 100644 (file)
index 0000000..fea9c4e
--- /dev/null
@@ -0,0 +1,3 @@
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "<!--">
+%e; -->
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.out
new file mode 100644 (file)
index 0000000..074f8ea
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/001.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "001.ent", at line 3, position 3:
+ERROR (Well-formedness constraint): `-->' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/001.xml
new file mode 100644 (file)
index 0000000..02ef0bb
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "001.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.ent
new file mode 100644 (file)
index 0000000..35b4309
--- /dev/null
@@ -0,0 +1,2 @@
+<!ENTITY % e "(#PCDATA">
+<!ELEMENT doc %e;)>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.out
new file mode 100644 (file)
index 0000000..2883a32
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/002.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "002.ent", at line 2, position 18:
+ERROR (Validity constraint): Entities not properly nested with parentheses
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/002.xml
new file mode 100644 (file)
index 0000000..0c5372c
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "002.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.ent
new file mode 100644 (file)
index 0000000..3610f12
--- /dev/null
@@ -0,0 +1,2 @@
+<!ENTITY % e "<!ELEMENT ">
+%e; doc (#PCDATA)>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.out
new file mode 100644 (file)
index 0000000..9125d01
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/003.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "003.ent", at line 2, position 17:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/003.xml
new file mode 100644 (file)
index 0000000..c4b33e4
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "003.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.ent
new file mode 100644 (file)
index 0000000..3127263
--- /dev/null
@@ -0,0 +1,3 @@
+<!ENTITY % e1 "<!ELEMENT ">
+<!ENTITY % e2 ">">
+%e1; doc (#PCDATA) %e2;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.out
new file mode 100644 (file)
index 0000000..f7b93fc
--- /dev/null
@@ -0,0 +1,4 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/004.xml", at line 1, position 30:
+In entity e2, at line 1, position 1:
+Called from entity [dtd] = SYSTEM "004.ent", line 3, position 19:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/004.xml
new file mode 100644 (file)
index 0000000..740d173
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "004.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.ent
new file mode 100644 (file)
index 0000000..3326c04
--- /dev/null
@@ -0,0 +1,2 @@
+<!ENTITY % e ">">
+<!ELEMENT doc (#PCDATA) %e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.out
new file mode 100644 (file)
index 0000000..d146659
--- /dev/null
@@ -0,0 +1,4 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/005.xml", at line 1, position 30:
+In entity e, at line 1, position 1:
+Called from entity [dtd] = SYSTEM "005.ent", line 2, position 24:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/005.xml
new file mode 100644 (file)
index 0000000..aa3a8f9
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "005.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.ent
new file mode 100644 (file)
index 0000000..4bd7314
--- /dev/null
@@ -0,0 +1,2 @@
+<!ENTITY % e "(#PCDATA)>">
+<!ELEMENT doc %e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.out
new file mode 100644 (file)
index 0000000..fa7403c
--- /dev/null
@@ -0,0 +1,4 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/006.xml", at line 1, position 30:
+In entity e, at line 1, position 10:
+Called from entity [dtd] = SYSTEM "006.ent", line 2, position 14:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_invalid/006.xml
new file mode 100644 (file)
index 0000000..bd2ee32
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "006.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.ent
new file mode 100644 (file)
index 0000000..378a207
--- /dev/null
@@ -0,0 +1 @@
+&e;
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.out
new file mode 100644 (file)
index 0000000..57edec3
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/001.xml", at line 3, position 1:
+ERROR (Validity constraint): The root element is not declared
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/001.xml
new file mode 100644 (file)
index 0000000..673dc58
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "001.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.ent
new file mode 100644 (file)
index 0000000..c7bec63
--- /dev/null
@@ -0,0 +1,3 @@
+<?xml version="1.0" standalone="yes"?>
+data
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.out
new file mode 100644 (file)
index 0000000..65e741d
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e = SYSTEM "002.ent", at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/002.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/002.xml
new file mode 100644 (file)
index 0000000..2ee5988
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "002.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.ent
new file mode 100644 (file)
index 0000000..a0d0d04
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0"?><?xml version="1.0"?>
+data
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.out
new file mode 100644 (file)
index 0000000..1d090c7
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e = SYSTEM "003.ent", at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/003.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/ext-sa/003.xml
new file mode 100644 (file)
index 0000000..407a4a1
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "003.ent">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.ent
new file mode 100644 (file)
index 0000000..b0292fc
--- /dev/null
@@ -0,0 +1,3 @@
+<![ INCLUDE [
+<!ELEMENT doc (#PCDATA)>
+]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.out
new file mode 100644 (file)
index 0000000..577cce2
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/001.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "001.ent", at line 3, position 0:
+ERROR (Well-formedness constraint): `>]>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/001.xml
new file mode 100644 (file)
index 0000000..02ef0bb
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "001.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.out
new file mode 100644 (file)
index 0000000..8cb6783
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 1:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/002.xml", line 4, position 0:
+ERROR (Well-formedness constraint): `]' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/002.xml
new file mode 100644 (file)
index 0000000..a0a538d
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "<?xml version='1.0' encoding='UTF-8'?>">
+%e;
+]>
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.ent
new file mode 100644 (file)
index 0000000..5b49337
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<![ IGNORE [
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.out
new file mode 100644 (file)
index 0000000..9c5bd24
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/003.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "003.ent", at line 2, position 11:
+ERROR (Well-formedness constraint): Bad conditional section
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/003.xml
new file mode 100644 (file)
index 0000000..c4b33e4
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "003.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.ent
new file mode 100644 (file)
index 0000000..fcce6e0
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+<![ INCLUDE [
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.out
new file mode 100644 (file)
index 0000000..3c164bc
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/004.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "004.ent", at line 3, position 0:
+ERROR (Well-formedness constraint): `>]>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/004.xml
new file mode 100644 (file)
index 0000000..740d173
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "004.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.ent
new file mode 100644 (file)
index 0000000..aae8559
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc (#PCDATA)>
+%e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.out
new file mode 100644 (file)
index 0000000..da38b03
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/005.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "005.ent", at line 2, position 0:
+ERROR (Well-formedness constraint): Reference to undeclared parameter entity `e'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/005.xml
new file mode 100644 (file)
index 0000000..aa3a8f9
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "005.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.ent
new file mode 100644 (file)
index 0000000..b60f41c
--- /dev/null
@@ -0,0 +1,3 @@
+<![INCLUDE
+<!ELEMENT doc (#PCDATA)>
+]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.out
new file mode 100644 (file)
index 0000000..edc29ea
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/006.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "006.ent", at line 2, position 0:
+ERROR (Well-formedness constraint): Bad conditional section
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/006.xml
new file mode 100644 (file)
index 0000000..bd2ee32
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "006.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.ent
new file mode 100644 (file)
index 0000000..62a92ed
--- /dev/null
@@ -0,0 +1,3 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.out
new file mode 100644 (file)
index 0000000..e8b088d
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/007.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "007.ent", at line 1, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/007.xml
new file mode 100644 (file)
index 0000000..1c5bc80
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "007.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.ent
new file mode 100644 (file)
index 0000000..11172a9
--- /dev/null
@@ -0,0 +1,2 @@
+<!ELEMENT doc ANY>
+<!ENTITY e "100%">
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.out
new file mode 100644 (file)
index 0000000..7370c42
--- /dev/null
@@ -0,0 +1,3 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/008.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "008.ent", at line 2, position 17:
+ERROR (Well-formedness constraint): The character '%' must be written as '&#37;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/not-sa/008.xml
new file mode 100644 (file)
index 0000000..c140c0a
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc SYSTEM "008.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/140.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/140.xml
new file mode 100644 (file)
index 0000000..062b213
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<&#x309a;></&#x309a;>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/141.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa-problems/141.xml
new file mode 100644 (file)
index 0000000..6d864a3
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<X&#xe5c;></X&#xe5c;>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.out
new file mode 100644 (file)
index 0000000..bff9b82
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/001.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/001.xml
new file mode 100644 (file)
index 0000000..253fea6
--- /dev/null
@@ -0,0 +1,5 @@
+<doc>
+<doc
+?
+<a</a>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.out
new file mode 100644 (file)
index 0000000..c6d2a49
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/002.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '&lt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/002.xml
new file mode 100644 (file)
index 0000000..6ca801a
--- /dev/null
@@ -0,0 +1,4 @@
+<doc>
+<.doc></.doc>
+</doc>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.out
new file mode 100644 (file)
index 0000000..65fd8ef
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/003.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/003.xml
new file mode 100644 (file)
index 0000000..07a534d
--- /dev/null
@@ -0,0 +1 @@
+<doc><? ?></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.out
new file mode 100644 (file)
index 0000000..18d0d66
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/004.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/004.xml
new file mode 100644 (file)
index 0000000..f89e662
--- /dev/null
@@ -0,0 +1 @@
+<doc><?target some data></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.out
new file mode 100644 (file)
index 0000000..f5ece68
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/005.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/005.xml
new file mode 100644 (file)
index 0000000..16127dc
--- /dev/null
@@ -0,0 +1 @@
+<doc><?target some data?</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.out
new file mode 100644 (file)
index 0000000..06dd728
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/006.xml", at line 1, position 20:
+ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/006.xml
new file mode 100644 (file)
index 0000000..789d0b8
--- /dev/null
@@ -0,0 +1 @@
+<doc><!-- a comment -- another --></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.out
new file mode 100644 (file)
index 0000000..dccf064
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/007.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/007.xml
new file mode 100644 (file)
index 0000000..2b017e3
--- /dev/null
@@ -0,0 +1 @@
+<doc>&amp no refc</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.out
new file mode 100644 (file)
index 0000000..fc16241
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/008.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/008.xml
new file mode 100644 (file)
index 0000000..3117de4
--- /dev/null
@@ -0,0 +1 @@
+<doc>&.entity;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.out
new file mode 100644 (file)
index 0000000..74802e5
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/009.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/009.xml
new file mode 100644 (file)
index 0000000..62f9239
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#RE;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.out
new file mode 100644 (file)
index 0000000..c7c12d0
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/010.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/010.xml
new file mode 100644 (file)
index 0000000..44a48f9
--- /dev/null
@@ -0,0 +1 @@
+<doc>A & B</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.out
new file mode 100644 (file)
index 0000000..af9e50e
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/011.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): Bad attribute list
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/011.xml
new file mode 100644 (file)
index 0000000..ae41758
--- /dev/null
@@ -0,0 +1 @@
+<doc a1></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.out
new file mode 100644 (file)
index 0000000..5447f51
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/012.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Bad attribute list
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/012.xml
new file mode 100644 (file)
index 0000000..d4ffd74
--- /dev/null
@@ -0,0 +1 @@
+<doc a1=v1></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.out
new file mode 100644 (file)
index 0000000..701c4aa
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/013.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/013.xml
new file mode 100644 (file)
index 0000000..c436a25
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1'></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/014.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/014.out
new file mode 100644 (file)
index 0000000..c38c1d4
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/014.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Attribute value contains character '<' literally
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/014.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/014.xml
new file mode 100644 (file)
index 0000000..0482941
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="<foo>"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.out
new file mode 100644 (file)
index 0000000..a7826ec
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/015.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Bad attribute list
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/015.xml
new file mode 100644 (file)
index 0000000..6f6f32f
--- /dev/null
@@ -0,0 +1 @@
+<doc a1=></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.out
new file mode 100644 (file)
index 0000000..c7e489f
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/016.xml", at line 1, position 13:
+ERROR (Well-formedness constraint): `>' or `/>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/016.xml
new file mode 100644 (file)
index 0000000..6a69ddb
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="v1" "v2"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.out
new file mode 100644 (file)
index 0000000..9ad92fb
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/017.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/017.xml
new file mode 100644 (file)
index 0000000..9474c49
--- /dev/null
@@ -0,0 +1 @@
+<doc><![CDATA[</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.out
new file mode 100644 (file)
index 0000000..9fd2963
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/018.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/018.xml
new file mode 100644 (file)
index 0000000..cdbf8b3
--- /dev/null
@@ -0,0 +1 @@
+<doc><![CDATA [ stuff]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.out
new file mode 100644 (file)
index 0000000..5e1846c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/019.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '&lt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/019.xml
new file mode 100644 (file)
index 0000000..7205628
--- /dev/null
@@ -0,0 +1 @@
+<doc></>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.out
new file mode 100644 (file)
index 0000000..8e64d37
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/020.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/020.xml
new file mode 100644 (file)
index 0000000..d672b1c
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="A & B"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.out
new file mode 100644 (file)
index 0000000..d6e979c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/021.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/021.xml
new file mode 100644 (file)
index 0000000..c5d1e39
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="a&b"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.out
new file mode 100644 (file)
index 0000000..f16b0e4
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/022.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/022.xml
new file mode 100644 (file)
index 0000000..c47a4d6
--- /dev/null
@@ -0,0 +1 @@
+<doc a1="&#123:"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.out
new file mode 100644 (file)
index 0000000..dca9b94
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/023.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/023.xml
new file mode 100644 (file)
index 0000000..9d1ecd8
--- /dev/null
@@ -0,0 +1 @@
+<doc 12="34"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.out
new file mode 100644 (file)
index 0000000..a6cec01
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/024.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '&lt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/024.xml
new file mode 100644 (file)
index 0000000..3e56727
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<123></123>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.out
new file mode 100644 (file)
index 0000000..8992cdf
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/025.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]&gt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/025.xml
new file mode 100644 (file)
index 0000000..9ad4266
--- /dev/null
@@ -0,0 +1 @@
+<doc>]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.out
new file mode 100644 (file)
index 0000000..6f3ff05
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/026.xml", at line 1, position 6:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]&gt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/026.xml
new file mode 100644 (file)
index 0000000..5543609
--- /dev/null
@@ -0,0 +1 @@
+<doc>]]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.out
new file mode 100644 (file)
index 0000000..5274a84
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/027.xml", at line 4, position 0:
+ERROR (Well-formedness constraint): `-->' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/027.xml
new file mode 100644 (file)
index 0000000..0ae9fa6
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<!-- abc
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/028.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/028.out
new file mode 100644 (file)
index 0000000..2f4e3b3
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/028.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Illegal processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/028.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/028.xml
new file mode 100644 (file)
index 0000000..278098e
--- /dev/null
@@ -0,0 +1,4 @@
+<doc>
+<?a pi that is not closed
+</doc>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/029.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/029.out
new file mode 100644 (file)
index 0000000..8482668
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/029.xml", at line 1, position 9:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]&gt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/029.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/029.xml
new file mode 100644 (file)
index 0000000..d5e881d
--- /dev/null
@@ -0,0 +1 @@
+<doc>abc]]]>def</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/030.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/030.out
new file mode 100644 (file)
index 0000000..f4bdf54
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/030.xml", at line 1, position 18:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/030.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/030.xml
new file mode 100644 (file)
index 0000000..cadbbcc
--- /dev/null
@@ -0,0 +1 @@
+<doc>A form feed (\f) is not legal in data</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/031.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/031.out
new file mode 100644 (file)
index 0000000..7d6ed42
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/031.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/031.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/031.xml
new file mode 100644 (file)
index 0000000..3a3d82a
--- /dev/null
@@ -0,0 +1 @@
+<doc><?pi a form feed (\f) is not allowed in a pi?></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/032.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/032.out
new file mode 100644 (file)
index 0000000..ea5ebf7
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/032.xml", at line 1, position 23:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/032.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/032.xml
new file mode 100644 (file)
index 0000000..c9981ad
--- /dev/null
@@ -0,0 +1 @@
+<doc><!-- a form feed (\f) is not allowed in a comment --></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.out
new file mode 100644 (file)
index 0000000..61b6001
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/033.xml", at line 1, position 8:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/033.xml
new file mode 100644 (file)
index 0000000..ea1693c
--- /dev/null
@@ -0,0 +1 @@
+<doc>abc\edef</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.out
new file mode 100644 (file)
index 0000000..48ee600
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/034.xml", at line 1, position 4:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/034.xml
new file mode 100644 (file)
index 0000000..84841f4
--- /dev/null
@@ -0,0 +1 @@
+<doc\f>A form-feed is not white space or a name character</doc\f>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.out
new file mode 100644 (file)
index 0000000..9798306
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/035.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '&lt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/035.xml
new file mode 100644 (file)
index 0000000..7032f9a
--- /dev/null
@@ -0,0 +1 @@
+<doc>1 < 2 but not in XML</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.out
new file mode 100644 (file)
index 0000000..63baee3
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/036.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Data not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/036.xml
new file mode 100644 (file)
index 0000000..b3259d0
--- /dev/null
@@ -0,0 +1,2 @@
+<doc></doc>
+Illegal data
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.out
new file mode 100644 (file)
index 0000000..bdeb907
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/037.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Character reference not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/037.xml
new file mode 100644 (file)
index 0000000..356448a
--- /dev/null
@@ -0,0 +1,2 @@
+<doc></doc>
+&#32;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.out
new file mode 100644 (file)
index 0000000..9c9acf3
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/038.xml", at line 1, position 29:
+ERROR (Well-formedness constraint): Attribute `x' occurs twice in element `doc'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/038.xml
new file mode 100644 (file)
index 0000000..0590e53
--- /dev/null
@@ -0,0 +1 @@
+<doc x="foo" y="bar" x="baz"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.out
new file mode 100644 (file)
index 0000000..3227cb0
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/039.xml", at line 1, position 12:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/039.xml
new file mode 100644 (file)
index 0000000..971effa
--- /dev/null
@@ -0,0 +1 @@
+<doc><a></aa></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.out
new file mode 100644 (file)
index 0000000..e58e599
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/040.xml", at line 2, position 5:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/040.xml
new file mode 100644 (file)
index 0000000..7591d8b
--- /dev/null
@@ -0,0 +1,2 @@
+<doc></doc>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.out
new file mode 100644 (file)
index 0000000..c90cfad
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/041.xml", at line 2, position 5:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/041.xml
new file mode 100644 (file)
index 0000000..405efd3
--- /dev/null
@@ -0,0 +1,2 @@
+<doc/>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.out
new file mode 100644 (file)
index 0000000..ea78b5e
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/042.xml", at line 1, position 11:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/042.xml
new file mode 100644 (file)
index 0000000..1247cc8
--- /dev/null
@@ -0,0 +1 @@
+<doc/></doc/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.out
new file mode 100644 (file)
index 0000000..24860aa
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/043.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Data not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/043.xml
new file mode 100644 (file)
index 0000000..5cc527c
--- /dev/null
@@ -0,0 +1,2 @@
+<doc/>
+Illegal data
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.out
new file mode 100644 (file)
index 0000000..573b7c4
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/044.xml", at line 1, position 12:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/044.xml
new file mode 100644 (file)
index 0000000..b71d062
--- /dev/null
@@ -0,0 +1 @@
+<doc/><doc/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.out
new file mode 100644 (file)
index 0000000..78a1ed3
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/045.xml", at line 2, position 2:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/045.xml
new file mode 100644 (file)
index 0000000..f7540e8
--- /dev/null
@@ -0,0 +1,4 @@
+<doc>
+<a/
+</doc>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.out
new file mode 100644 (file)
index 0000000..790f167
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/046.xml", at line 2, position 2:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/046.xml
new file mode 100644 (file)
index 0000000..d5d901e
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<a/</a>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.out
new file mode 100644 (file)
index 0000000..6b2fd78
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/047.xml", at line 2, position 3:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/047.xml
new file mode 100644 (file)
index 0000000..00ae523
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<a / >
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.out
new file mode 100644 (file)
index 0000000..4d54039
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/048.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): CDATA section not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/048.xml
new file mode 100644 (file)
index 0000000..9092ffa
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+</doc>
+<![CDATA[]]>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.out
new file mode 100644 (file)
index 0000000..260dee4
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/049.xml", at line 3, position 15:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/049.xml
new file mode 100644 (file)
index 0000000..e5a6b70
--- /dev/null
@@ -0,0 +1,4 @@
+<doc>
+<a><![CDATA[xyz]]]></a>
+<![CDATA[]]></a>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.out
new file mode 100644 (file)
index 0000000..b813938
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/050.xml", at line 1, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/050.xml
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.out
new file mode 100644 (file)
index 0000000..15e4f65
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/051.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/051.xml
new file mode 100644 (file)
index 0000000..19f13de
--- /dev/null
@@ -0,0 +1,3 @@
+<!-- a comment -->
+<![CDATA[]]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.out
new file mode 100644 (file)
index 0000000..331a4c3
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/052.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/052.xml
new file mode 100644 (file)
index 0000000..b3fe376
--- /dev/null
@@ -0,0 +1,3 @@
+<!-- a comment -->
+&#32;
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.out
new file mode 100644 (file)
index 0000000..88a0dda
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/053.xml", at line 1, position 10:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/053.xml
new file mode 100644 (file)
index 0000000..bf5c6d5
--- /dev/null
@@ -0,0 +1 @@
+<doc></DOC>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.out
new file mode 100644 (file)
index 0000000..e20db32
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/054.xml", at line 2, position 36:
+ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/054.xml
new file mode 100644 (file)
index 0000000..f4d24e2
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY foo PUBLIC "some public id">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.out
new file mode 100644 (file)
index 0000000..9d9f52d
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/055.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/055.xml
new file mode 100644 (file)
index 0000000..ae922ee
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc [
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.out
new file mode 100644 (file)
index 0000000..ba062b0
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/056.xml", at line 1, position 14:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/056.xml
new file mode 100644 (file)
index 0000000..b4a32cb
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc -- a comment -- []>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.out
new file mode 100644 (file)
index 0000000..66ab6fa
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/057.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/057.xml
new file mode 100644 (file)
index 0000000..af3b264
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "whatever" -- a comment -->
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.out
new file mode 100644 (file)
index 0000000..4a42cd2
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/058.xml", at line 3, position 21:
+ERROR (Well-formedness constraint): `|' and more names expected, or `)'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/058.xml
new file mode 100644 (file)
index 0000000..6b525cf
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 (foo,bar) #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.out
new file mode 100644 (file)
index 0000000..a31204f
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/059.xml", at line 3, position 25:
+ERROR (Well-formedness constraint): #REQUIRED, #IMPLIED, #FIXED or a string literal expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/059.xml
new file mode 100644 (file)
index 0000000..4a5c956
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 NMTOKEN v1>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.out
new file mode 100644 (file)
index 0000000..295f543
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/060.xml", at line 3, position 21:
+ERROR (Well-formedness constraint): One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/060.xml
new file mode 100644 (file)
index 0000000..3ddde34
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 NAME #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.out
new file mode 100644 (file)
index 0000000..1545e27
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/061.xml", at line 2, position 28:
+ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/061.xml
new file mode 100644 (file)
index 0000000..d58093d
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e PUBLIC "whatever""e.ent">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.out
new file mode 100644 (file)
index 0000000..581d3bb
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/062.xml", at line 2, position 12:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/062.xml
new file mode 100644 (file)
index 0000000..4f091e4
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY foo"some text">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.out
new file mode 100644 (file)
index 0000000..128b191
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/063.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/063.xml
new file mode 100644 (file)
index 0000000..f9bd03c
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<![INCLUDE[ ]]>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.out
new file mode 100644 (file)
index 0000000..ad7f21c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/064.xml", at line 3, position 20:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/064.xml
new file mode 100644 (file)
index 0000000..f8d5894
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST e a1 CDATA"foo">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.out
new file mode 100644 (file)
index 0000000..4675f75
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/065.xml", at line 3, position 16:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/065.xml
new file mode 100644 (file)
index 0000000..29dc6e5
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1(foo|bar) #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.out
new file mode 100644 (file)
index 0000000..0812fc4
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/066.xml", at line 3, position 26:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/066.xml
new file mode 100644 (file)
index 0000000..04dbdb7
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 (foo|bar)#IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.out
new file mode 100644 (file)
index 0000000..4ed7110
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/067.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/067.xml
new file mode 100644 (file)
index 0000000..de12579
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 (foo)"foo">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.out
new file mode 100644 (file)
index 0000000..186ea42
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/068.xml", at line 3, position 25:
+ERROR (Well-formedness constraint): Error in NOTATION type (perhaps missing whitespace after NOTATION?)
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/068.xml
new file mode 100644 (file)
index 0000000..9f4a009
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 NOTATION(foo) #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.out
new file mode 100644 (file)
index 0000000..d508784
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/069.xml", at line 4, position 38:
+ERROR (Well-formedness constraint): Whitespace missing before `NDATA'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/069.xml
new file mode 100644 (file)
index 0000000..a3ac7ea
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!NOTATION eps SYSTEM "eps.exe">
+<!-- missing space before NDATA -->
+<!ENTITY foo SYSTEM "foo.eps"NDATA eps>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.out
new file mode 100644 (file)
index 0000000..d0813ed
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/070.xml", at line 1, position 40:
+ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/070.xml
new file mode 100644 (file)
index 0000000..a3ec12f
--- /dev/null
@@ -0,0 +1,2 @@
+<!-- a comment ending with three dashes --->
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.out
new file mode 100644 (file)
index 0000000..01100f9
--- /dev/null
@@ -0,0 +1,5 @@
+In entity e3, at line 1, position 0:
+Called from entity e2, line 1, position 0:
+Called from entity e1, line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/071.xml", line 6, position 5:
+ERROR (Validity constraint): Recursive reference to entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/071.xml
new file mode 100644 (file)
index 0000000..8fe3ef7
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+]>
+<doc>&e1;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.out
new file mode 100644 (file)
index 0000000..5534c5c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/072.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/072.xml
new file mode 100644 (file)
index 0000000..65fd17c
--- /dev/null
@@ -0,0 +1 @@
+<doc>&foo;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.out
new file mode 100644 (file)
index 0000000..855179a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/073.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `f'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/073.xml
new file mode 100644 (file)
index 0000000..cd61644
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "whatever">
+]>
+<doc>&f;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.out
new file mode 100644 (file)
index 0000000..5cf7301
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 5:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/074.xml", line 5, position 5:
+ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/074.xml
new file mode 100644 (file)
index 0000000..dca3f11
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e "</foo><foo>">
+]>
+<doc>
+<foo>&e;</foo>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.out
new file mode 100644 (file)
index 0000000..ed842c9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/075.xml", at line 6, position 7:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/075.xml
new file mode 100644 (file)
index 0000000..9784de3
--- /dev/null
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+]>
+<doc a="&e1;"></doc>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.out
new file mode 100644 (file)
index 0000000..63a0d38
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/076.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/076.xml
new file mode 100644 (file)
index 0000000..4064194
--- /dev/null
@@ -0,0 +1 @@
+<doc a="&foo;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.out
new file mode 100644 (file)
index 0000000..6c854b7
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/077.xml", at line 4, position 7:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `bar'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/077.xml
new file mode 100644 (file)
index 0000000..36f3391
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY foo "&bar;">
+]>
+<doc a="&foo;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.out
new file mode 100644 (file)
index 0000000..fffb553
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/078.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/078.xml
new file mode 100644 (file)
index 0000000..e3af2f2
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&foo;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.out
new file mode 100644 (file)
index 0000000..5469d59
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/079.xml", at line 6, position 22:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/079.xml
new file mode 100644 (file)
index 0000000..c778a80
--- /dev/null
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e1;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.out
new file mode 100644 (file)
index 0000000..aadb4b7
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/080.xml", at line 6, position 29:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/080.xml
new file mode 100644 (file)
index 0000000..4949e99
--- /dev/null
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #FIXED "&e1;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.out
new file mode 100644 (file)
index 0000000..cdaf257
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/081.xml", at line 4, position 7:
+Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/081.xml
new file mode 100644 (file)
index 0000000..3be7e41
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul">
+]>
+<doc a="&e;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.out
new file mode 100644 (file)
index 0000000..72a6b3a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/082.xml", at line 4, position 22:
+Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/082.xml
new file mode 100644 (file)
index 0000000..f99640f
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.out
new file mode 100644 (file)
index 0000000..0744b43
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/083.xml", at line 4, position 5:
+ERROR (Validity constraint): Invalid reference to NDATA entity e
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/083.xml
new file mode 100644 (file)
index 0000000..8d6ff96
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul" NDATA n>
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.out
new file mode 100644 (file)
index 0000000..3a09c34
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/084.xml", at line 4, position 22:
+ERROR (Validity constraint): Invalid reference to NDATA entity e
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/084.xml
new file mode 100644 (file)
index 0000000..5b10fb9
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul" NDATA n>
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.out
new file mode 100644 (file)
index 0000000..87ff19d
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/085.xml", at line 1, position 25:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/085.xml
new file mode 100644 (file)
index 0000000..1d64364
--- /dev/null
@@ -0,0 +1,2 @@
+<!DOCTYPE doc PUBLIC "[" "null.ent">
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.out
new file mode 100644 (file)
index 0000000..63bb4a1
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/086.xml", at line 2, position 24:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/086.xml
new file mode 100644 (file)
index 0000000..454893a
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY foo PUBLIC "[" "null.xml">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.out
new file mode 100644 (file)
index 0000000..4dc1495
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/087.xml", at line 2, position 36:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/087.xml
new file mode 100644 (file)
index 0000000..01e8760
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!NOTATION foo PUBLIC "[" "null.ent">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.out
new file mode 100644 (file)
index 0000000..574049d
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/088.xml", at line 6, position 7:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/088.xml
new file mode 100644 (file)
index 0000000..a581de8
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+<!ENTITY e '"'>
+]>
+<doc a="&e;></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.out
new file mode 100644 (file)
index 0000000..6640fe9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/089.xml", at line 2, position 32:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/089.xml
new file mode 100644 (file)
index 0000000..a788aab
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY % foo SYSTEM "foo.xml" NDATA bar>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.out
new file mode 100644 (file)
index 0000000..82d8758
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 7:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/090.xml", line 4, position 5:
+ERROR (Well-formedness constraint): Attribute value contains character '<' literally
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/090.xml
new file mode 100644 (file)
index 0000000..f82c238
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<foo a='&#60;'></foo>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.out
new file mode 100644 (file)
index 0000000..9b96ad6
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/091.xml", at line 3, position 32:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/091.xml
new file mode 100644 (file)
index 0000000..9601dcb
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!NOTATION n SYSTEM "n">
+<!ENTITY % foo SYSTEM "foo.xml" NDATA n>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.out
new file mode 100644 (file)
index 0000000..b32fb9a
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 7:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/092.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/092.xml
new file mode 100644 (file)
index 0000000..a867ecd
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<foo a='&#38;'></foo>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.out
new file mode 100644 (file)
index 0000000..eda559a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/093.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/093.xml
new file mode 100644 (file)
index 0000000..a5f8638
--- /dev/null
@@ -0,0 +1 @@
+<doc>&#X58;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.out
new file mode 100644 (file)
index 0000000..0422cf8
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/094.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/094.xml
new file mode 100644 (file)
index 0000000..483ed52
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml VERSION="1.0"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.out
new file mode 100644 (file)
index 0000000..9f39293
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/095.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/095.xml
new file mode 100644 (file)
index 0000000..ba2cbe8
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml encoding="UTF-8" version="1.0"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.out
new file mode 100644 (file)
index 0000000..17a3a1b
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/096.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/096.xml
new file mode 100644 (file)
index 0000000..f41eaba
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0"encoding="UTF-8" ?>
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.out
new file mode 100644 (file)
index 0000000..3e512b4
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/097.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/097.xml
new file mode 100644 (file)
index 0000000..3b95065
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0' encoding="UTF-8" ?>
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.out
new file mode 100644 (file)
index 0000000..0e7f223
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/098.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/098.xml
new file mode 100644 (file)
index 0000000..9627acb
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" version="1.0"?>
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.out
new file mode 100644 (file)
index 0000000..e7471f8
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/099.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/099.xml
new file mode 100644 (file)
index 0000000..02637f0
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" valid="no" ?>
+<doc></doc>
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.out
new file mode 100644 (file)
index 0000000..c4ec738
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/100.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Illegal 'standalone' declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/100.xml
new file mode 100644 (file)
index 0000000..38beda8
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" standalone="YES" ?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.out
new file mode 100644 (file)
index 0000000..8c31631
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/101.xml", at line 1, position 0:
+Other exception: Failure("Netconversion.encoding_of_string: unknown encoding")
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/101.xml
new file mode 100644 (file)
index 0000000..6191a80
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding=" UTF-8"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.out
new file mode 100644 (file)
index 0000000..895c6b8
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/102.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML version string
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/102.xml
new file mode 100644 (file)
index 0000000..a4cde40
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0 " ?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.out
new file mode 100644 (file)
index 0000000..86dda33
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/103.xml", at line 4, position 13:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/103.xml
new file mode 100644 (file)
index 0000000..fc5d152
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#60;foo>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.out
new file mode 100644 (file)
index 0000000..b3c9bbb
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/104.xml", at line 4, position 13:
+ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/104.xml
new file mode 100644 (file)
index 0000000..b35b90e
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<foo>">
+]>
+<doc>&e;</foo></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.out
new file mode 100644 (file)
index 0000000..146e980
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/105.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/105.xml
new file mode 100644 (file)
index 0000000..5f60551
--- /dev/null
@@ -0,0 +1,4 @@
+<?pi stuff?>
+<![CDATA[]]>
+<doc>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.out
new file mode 100644 (file)
index 0000000..cec4501
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/106.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/106.xml
new file mode 100644 (file)
index 0000000..87c56d7
--- /dev/null
@@ -0,0 +1,2 @@
+<?pi data?>
+&#32;<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.out
new file mode 100644 (file)
index 0000000..8f6919d
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/107.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/107.xml
new file mode 100644 (file)
index 0000000..2a69906
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<![CDATA[]]>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.out
new file mode 100644 (file)
index 0000000..bbca44b
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/108.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/108.xml
new file mode 100644 (file)
index 0000000..187b07f
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<![CDATA [  ]]>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.out
new file mode 100644 (file)
index 0000000..bf1f79a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/109.xml", at line 4, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/109.xml
new file mode 100644 (file)
index 0000000..33b1cf3
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<doc></doc>">
+]>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.out
new file mode 100644 (file)
index 0000000..4176538
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/110.xml", at line 5, position 3:
+ERROR (Well-formedness constraint): Entity reference not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/110.xml
new file mode 100644 (file)
index 0000000..4d7bf99
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY e "">
+]>
+<doc></doc>
+&e;
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.out
new file mode 100644 (file)
index 0000000..be02f88
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/111.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/111.xml
new file mode 100644 (file)
index 0000000..530c6cc
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "foo='bar'">
+]>
+<doc &e;></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.out
new file mode 100644 (file)
index 0000000..5361f56
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/112.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/112.xml
new file mode 100644 (file)
index 0000000..13cfcc5
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<![cdata[data]]>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.out
new file mode 100644 (file)
index 0000000..23c9284
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/113.xml", at line 2, position 18:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/113.xml
new file mode 100644 (file)
index 0000000..899102b
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY % foo "&">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.out
new file mode 100644 (file)
index 0000000..06be1fd
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/114.xml", at line 2, position 16:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/114.xml
new file mode 100644 (file)
index 0000000..32d6d07
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY foo "&">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.out
new file mode 100644 (file)
index 0000000..54a9241
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/115.xml", at line 4, position 7:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/115.xml
new file mode 100644 (file)
index 0000000..af014a0
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#38;">
+]>
+<doc a="&e;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.out
new file mode 100644 (file)
index 0000000..bebbe2c
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/116.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/116.xml
new file mode 100644 (file)
index 0000000..ce37ca0
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#38;#9">
+]>
+<doc>&e;7;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.out
new file mode 100644 (file)
index 0000000..2b613d7
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/117.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/117.xml
new file mode 100644 (file)
index 0000000..5ba4eb0
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#38;">
+]>
+<doc>&e;#97;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.out
new file mode 100644 (file)
index 0000000..fda0ee4
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/118.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/118.xml
new file mode 100644 (file)
index 0000000..49b4b8c
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "#">
+]>
+<doc>&&e;97;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.out
new file mode 100644 (file)
index 0000000..0c3a84a
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/119.xml", line 5, position 0:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/119.xml
new file mode 100644 (file)
index 0000000..7ee56be
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#38;">
+]>
+<doc>
+&e;#38;
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.out
new file mode 100644 (file)
index 0000000..4b1ff9c
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/120.xml", line 5, position 0:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/120.xml
new file mode 100644 (file)
index 0000000..ae8f55a
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#38;">
+]>
+<doc>
+&e;
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.out
new file mode 100644 (file)
index 0000000..1daf3a8
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/121.xml", at line 2, position 9:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/121.xml
new file mode 100644 (file)
index 0000000..63ecbe4
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY #DEFAULT "default">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.out
new file mode 100644 (file)
index 0000000..8d65e69
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/122.xml", at line 2, position 27:
+ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/122.xml
new file mode 100644 (file)
index 0000000..e8a7082
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a, (b) | c)?>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.out
new file mode 100644 (file)
index 0000000..50f7364
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/123.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/123.xml
new file mode 100644 (file)
index 0000000..f2dc633
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc ((doc?)))>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.out
new file mode 100644 (file)
index 0000000..9d1931c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/124.xml", at line 2, position 19:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/124.xml
new file mode 100644 (file)
index 0000000..1abde7b
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (doc|#PCDATA)*>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.out
new file mode 100644 (file)
index 0000000..adb0e6e
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/125.xml", at line 2, position 16:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/125.xml
new file mode 100644 (file)
index 0000000..15519d4
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc ((#PCDATA))>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.out
new file mode 100644 (file)
index 0000000..8a93b34
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/126.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/126.xml
new file mode 100644 (file)
index 0000000..b6cdb0c
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)+>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.out
new file mode 100644 (file)
index 0000000..9deb084
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/127.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/127.xml
new file mode 100644 (file)
index 0000000..557df35
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)?>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.out
new file mode 100644 (file)
index 0000000..3fa460a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/128.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/128.xml
new file mode 100644 (file)
index 0000000..e8f8543
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc CDATA>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.out
new file mode 100644 (file)
index 0000000..0762512
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/129.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): Content model expression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/129.xml
new file mode 100644 (file)
index 0000000..6471a8d
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc - - (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.out
new file mode 100644 (file)
index 0000000..6cd8d45
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/130.xml", at line 2, position 21:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/130.xml
new file mode 100644 (file)
index 0000000..a4f0e86
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (doc?) +(foo)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.out
new file mode 100644 (file)
index 0000000..4bc40b0
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/131.xml", at line 2, position 21:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/131.xml
new file mode 100644 (file)
index 0000000..783537f
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (doc?) -(foo)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.out
new file mode 100644 (file)
index 0000000..cf96a2b
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/132.xml", at line 2, position 41:
+ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/132.xml
new file mode 100644 (file)
index 0000000..00823ff
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a, (b, c), (d, (e, f) | g))?>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.out
new file mode 100644 (file)
index 0000000..9910ede
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/133.xml", at line 2, position 17:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/133.xml
new file mode 100644 (file)
index 0000000..d7444eb
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a *)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.out
new file mode 100644 (file)
index 0000000..ebb96aa
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/134.xml", at line 2, position 18:
+ERROR (Well-formedness constraint): `>' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/134.xml
new file mode 100644 (file)
index 0000000..78b1a59
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a) *>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.out
new file mode 100644 (file)
index 0000000..9a95cbd
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/135.xml", at line 2, position 17:
+ERROR (Well-formedness constraint): References to general entities not allowed in DTDs
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/135.xml
new file mode 100644 (file)
index 0000000..6e2421e
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a & b)?>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.out
new file mode 100644 (file)
index 0000000..3af635a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/136.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/136.xml
new file mode 100644 (file)
index 0000000..a1a0b2e
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc O O (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.out
new file mode 100644 (file)
index 0000000..b5659d7
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/137.xml", at line 2, position 13:
+ERROR (Well-formedness constraint): Whitespace is missing
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/137.xml
new file mode 100644 (file)
index 0000000..de472bd
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc(#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.out
new file mode 100644 (file)
index 0000000..405657a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/138.xml", at line 2, position 19:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/138.xml
new file mode 100644 (file)
index 0000000..d81dd54
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (doc*?)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.out
new file mode 100644 (file)
index 0000000..a376e2b
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/139.xml", at line 2, position 15:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/139.xml
new file mode 100644 (file)
index 0000000..2c6c92e
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc ()>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.out
new file mode 100644 (file)
index 0000000..abf07ef
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/140.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '&lt;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/140.xml
new file mode 100644 (file)
index 0000000..062b213
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<&#x309a;></&#x309a;>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.out
new file mode 100644 (file)
index 0000000..cbd6185
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 2:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/141.xml", line 4, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/141.xml
new file mode 100644 (file)
index 0000000..6d864a3
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "<X&#xe5c;></X&#xe5c;>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.out
new file mode 100644 (file)
index 0000000..88943b2
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/142.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 0 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/142.xml
new file mode 100644 (file)
index 0000000..57517d2
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#0;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.out
new file mode 100644 (file)
index 0000000..d1b511b
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/143.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 31 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/143.xml
new file mode 100644 (file)
index 0000000..52c25d7
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#31;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.out
new file mode 100644 (file)
index 0000000..d67fe5a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/144.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 65535 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/144.xml
new file mode 100644 (file)
index 0000000..0f98e23
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#xFFFF;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.out
new file mode 100644 (file)
index 0000000..4c79e18
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/145.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 55296 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/145.xml
new file mode 100644 (file)
index 0000000..4909796
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#xD800;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.out
new file mode 100644 (file)
index 0000000..f90c91c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/146.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 1114112 outside the accepted range of code points
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/146.xml
new file mode 100644 (file)
index 0000000..53e9875
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&#x110000;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.out
new file mode 100644 (file)
index 0000000..41035fe
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/147.xml", at line 2, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/147.xml
new file mode 100644 (file)
index 0000000..93fa1ee
--- /dev/null
@@ -0,0 +1,3 @@
+
+<?xml version="1.0"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.out
new file mode 100644 (file)
index 0000000..c42a3e9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/148.xml", at line 2, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/148.xml
new file mode 100644 (file)
index 0000000..a1623d5
--- /dev/null
@@ -0,0 +1,3 @@
+<!-- -->
+<?xml version="1.0"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.out
new file mode 100644 (file)
index 0000000..71e2832
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/149.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): `]' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/149.xml
new file mode 100644 (file)
index 0000000..0632eb7
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<?xml version="1.0"?>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.out
new file mode 100644 (file)
index 0000000..160ef30
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/150.xml", at line 2, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/150.xml
new file mode 100644 (file)
index 0000000..e7c6e8c
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<?xml version="1.0"?>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.out
new file mode 100644 (file)
index 0000000..444e8d6
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/151.xml", at line 3, position 0:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/151.xml
new file mode 100644 (file)
index 0000000..fd9616c
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+</doc>
+<?xml version="1.0"?>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.out
new file mode 100644 (file)
index 0000000..211ee8a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/152.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/152.xml
new file mode 100644 (file)
index 0000000..3245b2e
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml encoding="UTF-8"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.out
new file mode 100644 (file)
index 0000000..923e998
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/153.xml", line 5, position 5:
+SYNTAX ERROR
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/153.xml
new file mode 100644 (file)
index 0000000..07bd8df
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "<?xml encoding='UTF-8'?>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.out
new file mode 100644 (file)
index 0000000..c8d7702
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/154.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/154.xml
new file mode 100644 (file)
index 0000000..f58969c
--- /dev/null
@@ -0,0 +1,2 @@
+<?XML version="1.0"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.out
new file mode 100644 (file)
index 0000000..36fc528
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/155.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/155.xml
new file mode 100644 (file)
index 0000000..87eccf0
--- /dev/null
@@ -0,0 +1,2 @@
+<?xmL version="1.0"?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.out
new file mode 100644 (file)
index 0000000..88abf6a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/156.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/156.xml
new file mode 100644 (file)
index 0000000..98e2c4b
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<?xMl version="1.0"?>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.out
new file mode 100644 (file)
index 0000000..793da43
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/157.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/157.xml
new file mode 100644 (file)
index 0000000..363821a
--- /dev/null
@@ -0,0 +1,3 @@
+<doc>
+<?xmL?>
+</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.out
new file mode 100644 (file)
index 0000000..1bfd00d
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/158.xml", at line 4, position 10:
+ERROR (Well-formedness constraint): Illegal token or character
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/158.xml
new file mode 100644 (file)
index 0000000..ebbeb51
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!NOTATION gif PUBLIC "image/gif" "">
+<!ATTLIST #NOTATION gif a1 CDATA #IMPLIED>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.out
new file mode 100644 (file)
index 0000000..83264a9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/159.xml", at line 3, position 38:
+ERROR (Well-formedness constraint): The character '&' must be written as '&amp;'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/159.xml
new file mode 100644 (file)
index 0000000..3a017ef
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "<![CDATA[Tim & Michael]]>">
+]>
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.out
new file mode 100644 (file)
index 0000000..1669390
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/160.xml", at line 4, position 18:
+ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/160.xml
new file mode 100644 (file)
index 0000000..7e33116
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "">
+<!ENTITY foo "%e;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.out
new file mode 100644 (file)
index 0000000..0d78a8d
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 9:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/161.xml", line 3, position 15:
+ERROR (Well-formedness constraint): Bad content model expression
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/161.xml
new file mode 100644 (file)
index 0000000..e256995
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY % e "#PCDATA">
+<!ELEMENT doc (%e;)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.out
new file mode 100644 (file)
index 0000000..f06c269
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/162.xml", at line 4, position 20:
+ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/162.xml
new file mode 100644 (file)
index 0000000..d1336da
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e1 "">
+<!ENTITY % e2 "%e1;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.out
new file mode 100644 (file)
index 0000000..b7fb7b0
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/163.xml", at line 5, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/163.xml
new file mode 100644 (file)
index 0000000..bb35a7b
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "">
+]>
+%e;
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.out
new file mode 100644 (file)
index 0000000..6151b95
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/164.xml", at line 4, position 2:
+ERROR (Well-formedness constraint): References to parameter entities not allowed here
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/164.xml
new file mode 100644 (file)
index 0000000..31da4ff
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "">
+] %e; >
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.out
new file mode 100644 (file)
index 0000000..981b8a0
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/165.xml", at line 2, position 8:
+ERROR (Well-formedness constraint): Whitespace is missing after ENTITY
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/165.xml
new file mode 100644 (file)
index 0000000..9b5198e
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY% e "">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.out
new file mode 100644 (file)
index 0000000..7ade465
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/166.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/166.xml
new file mode 100644 (file)
index 0000000..60f66f8
--- /dev/null
@@ -0,0 +1 @@
+<doc>ï¿¿</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.out
new file mode 100644 (file)
index 0000000..56d06ae
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/167.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/167.xml
new file mode 100644 (file)
index 0000000..fc536a1
--- /dev/null
@@ -0,0 +1 @@
+<doc>￾</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.out
new file mode 100644 (file)
index 0000000..41163ea
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/168.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/168.xml
new file mode 100644 (file)
index 0000000..ce8b4d7
--- /dev/null
@@ -0,0 +1 @@
+<doc>í €</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.out
new file mode 100644 (file)
index 0000000..623ad46
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/169.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/169.xml
new file mode 100644 (file)
index 0000000..6c1a0bf
--- /dev/null
@@ -0,0 +1 @@
+<doc>í°€</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.out
new file mode 100644 (file)
index 0000000..29011da
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/170.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/170.xml
new file mode 100644 (file)
index 0000000..6d02d89
--- /dev/null
@@ -0,0 +1 @@
+<doc>÷€€€</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.out
new file mode 100644 (file)
index 0000000..7ccaffa
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/171.xml", at line 1, position 5:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/171.xml
new file mode 100644 (file)
index 0000000..7fa118b
--- /dev/null
@@ -0,0 +1,2 @@
+<!-- ï¿¿ -->
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.out
new file mode 100644 (file)
index 0000000..8b7d0ab
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/172.xml", at line 1, position 0:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/172.xml
new file mode 100644 (file)
index 0000000..434f799
--- /dev/null
@@ -0,0 +1,2 @@
+<?pi ï¿¿?>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.out
new file mode 100644 (file)
index 0000000..25ef8fd
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/173.xml", at line 1, position 7:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/173.xml
new file mode 100644 (file)
index 0000000..ab5a447
--- /dev/null
@@ -0,0 +1 @@
+<doc a="ï¿¿"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.out
new file mode 100644 (file)
index 0000000..faec5b2
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/174.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/174.xml
new file mode 100644 (file)
index 0000000..b7f3db9
--- /dev/null
@@ -0,0 +1 @@
+<doc><![CDATA[ï¿¿]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.out
new file mode 100644 (file)
index 0000000..0f0e72c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/175.xml", at line 3, position 18:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/175.xml
new file mode 100644 (file)
index 0000000..6d13a21
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "ï¿¿">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.out
new file mode 100644 (file)
index 0000000..b1940a1
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/176.xml", at line 5, position 0:
+ERROR (Well-formedness constraint): Missing end tag
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/176.xml
new file mode 100644 (file)
index 0000000..9c8e2e4
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.out
new file mode 100644 (file)
index 0000000..58d032c
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/177.xml", at line 4, position 6:
+ERROR: Bad character stream
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/177.xml
new file mode 100644 (file)
index 0000000..bde27a6
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>Aï¿¿</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.out
new file mode 100644 (file)
index 0000000..4f32440
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/178.xml", at line 5, position 7:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/178.xml
new file mode 100644 (file)
index 0000000..ba36a31
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="&#34;></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/179.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/179.out
new file mode 100644 (file)
index 0000000..3174804
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/179.xml", at line 2, position 11:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/179.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/179.xml
new file mode 100644 (file)
index 0000000..e59d926
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#34;>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.out
new file mode 100644 (file)
index 0000000..15cabef
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/180.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/180.xml
new file mode 100644 (file)
index 0000000..d51b190
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+<!ENTITY e "v">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.out
new file mode 100644 (file)
index 0000000..b9c2569
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/181.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/181.xml
new file mode 100644 (file)
index 0000000..c438f1b
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#60;![CDATA[">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e;]]></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.out
new file mode 100644 (file)
index 0000000..e54bfb9
--- /dev/null
@@ -0,0 +1,3 @@
+In entity e, at line 1, position 4:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/182.xml", line 5, position 5:
+ERROR (Well-formedness constraint): `-->' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/182.xml
new file mode 100644 (file)
index 0000000..106df72
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ENTITY e "&#60;!--">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e;--></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.out
new file mode 100644 (file)
index 0000000..5d8c43d
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/183.xml", at line 2, position 28:
+ERROR (Well-formedness constraint): `)*' expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/183.xml
new file mode 100644 (file)
index 0000000..85ddfc8
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA | foo*)* >
+<!ELEMENT foo EMPTY>
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.out
new file mode 100644 (file)
index 0000000..3b62191
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/184.xml", at line 2, position 25:
+ERROR (Well-formedness constraint): Name expected
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/184.xml
new file mode 100644 (file)
index 0000000..f875392
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA | (foo))* >
+<!ELEMENT foo EMPTY>
+]>
+<doc></doc>
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.ent
new file mode 100644 (file)
index 0000000..fdd8077
--- /dev/null
@@ -0,0 +1 @@
+<!ELEMENT doc (#PCDATA)>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.out
new file mode 100644 (file)
index 0000000..53d52c7
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/185.xml", at line 3, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/185.xml
new file mode 100644 (file)
index 0000000..ea2f6d7
--- /dev/null
@@ -0,0 +1,3 @@
+<?xml version="1.0" standalone="yes"?>
+<!DOCTYPE doc SYSTEM "185.ent">
+<doc>&e;</doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.out
new file mode 100644 (file)
index 0000000..6fa931a
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/186.xml", at line 5, position 15:
+ERROR (Well-formedness constraint): Whitespace is missing between attributes `b' and `d'
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/186.xml
new file mode 100644 (file)
index 0000000..0bbacca
--- /dev/null
@@ -0,0 +1,5 @@
+<!DOCTYPE a [
+<!ELEMENT a EMPTY>
+<!ATTLIST a b CDATA #IMPLIED d CDATA #IMPLIED>
+]>
+<a b="c"d="e"/>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/null.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_jclark_notwf/sa/null.ent
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.out
new file mode 100644 (file)
index 0000000..8b203de
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/001.xml", at line 4, position 7:
+ERROR (Validity constraint): Found reference to external entity in attribute value
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/001.xml
new file mode 100644 (file)
index 0000000..56b5100
--- /dev/null
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "null.ent">
+]>
+<doc a="&e;"></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.out b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.out
new file mode 100644 (file)
index 0000000..ea6c1f9
--- /dev/null
@@ -0,0 +1,2 @@
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/002.xml", at line 4, position 22:
+ERROR (Validity constraint): Found reference to external entity in attribute value
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.xml b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/002.xml
new file mode 100644 (file)
index 0000000..f247879
--- /dev/null
@@ -0,0 +1,6 @@
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "null.ent">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+]>
+<doc></doc>
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/null.ent b/helm/DEVEL/pxp/pxp/rtests/negative/data_notwf/sa/null.ent
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/run_negative b/helm/DEVEL/pxp/pxp/rtests/negative/run_negative
new file mode 100755 (executable)
index 0000000..3c58a4e
--- /dev/null
@@ -0,0 +1,117 @@
+#! /bin/bash
+
+# $Id$
+
+
+t=./test_negative
+
+init_test () {
+    # $1: Options for test_negative
+    # $2: Path to test record
+    options="$1"
+    input="$2"
+    output=`dirname $input`/`basename $input .xml`.out
+    if  [ -f "$output" ]; then
+       echo "Test $input already initialized; skipping"
+    else
+       $t $options "$input" >"$output"
+       echo Test $input initialized.
+    fi
+}
+
+
+check_test () {
+    # $1: Options for test_negative
+    # $2: Path to test record
+    options="$1"
+    input="$2"
+    output=`dirname $input`/`basename $input .xml`.out
+    $t $options "$input" >current.out
+    if [ -f "$output" ]; then
+       if cmp "$output" current.out; then
+           echo Test $input OK
+       else
+           echo Test $input FAILED!!!
+       fi
+    else
+       echo Test $input still uninitialized
+       echo - OUTPUT:
+       cat current.out
+    fi
+}
+
+
+for_directory () {
+    what="$1"
+    shift
+    options="$1"
+    shift
+    while [ $# -gt 0 ]; do
+       input="$1"
+       shift
+       if [ -f "$input" ]; then
+           $what "$options" "$input"
+       else
+           if [ -d "$input" ]; then
+               for ent in $input/*.xml; do
+                   for_directory $what "$options" $ent
+               done
+           else
+               echo "Not found: $input" >&2
+           fi
+       fi
+    done
+}
+
+
+usage () {
+    cat <<EOF >&2
+usage: $0 [ -init -wf ] file ... dir ...
+EOF
+    exit 1
+}
+
+
+action="check_test"
+options=""
+while true; do
+    case "x$1" in
+       x-init)
+           action="init_test"
+           shift
+           ;;
+       x-wf)
+           options="$options -wf"
+           shift
+           ;;
+       x-*)
+           usage
+           ;;
+       *)
+           break
+           ;;
+    esac
+done
+
+
+if [ $# -gt 0 ]; then
+    for_directory $action "$options" "$@"
+else
+    for_directory $action -wf \
+       data_jclark_notwf/ext-sa data_jclark_notwf/not-sa data_jclark_notwf/sa \
+       data_notwf/sa
+    for_directory $action "" \
+       data_jclark_invalid data_invalid
+fi
+
+# ======================================================================
+# $Log$
+# Revision 1.1  2000/11/17 09:57:33  lpadovan
+# Initial revision
+#
+# Revision 1.2  2000/05/01 16:23:39  gerd
+#      Added data_invalid.
+#
+# Revision 1.1  2000/05/01 15:58:50  gerd
+#      Initial revision.
+#
diff --git a/helm/DEVEL/pxp/pxp/rtests/negative/test_negative.ml b/helm/DEVEL/pxp/pxp/rtests/negative/test_negative.ml
new file mode 100644 (file)
index 0000000..13f049c
--- /dev/null
@@ -0,0 +1,105 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec print_error e =
+  print_endline (string_of_exn e)
+;;
+
+class warner =
+  object 
+    method warn w =
+      print_endline ("WARNING: " ^ w)
+  end
+;;
+
+let parse debug wf iso88591 filename =
+  try 
+  let config =
+      { default_config with 
+         warner = new warner;
+          debugging_mode = debug;
+          encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+         idref_pass = true;
+      }
+  in
+    let parse_fn =
+      if wf then parse_wfdocument_entity 
+      else 
+       let index = new hash_index in
+       parse_document_entity 
+         ?transform_dtd:None 
+         ~id_index:(index :> 'ext index)
+    in
+    let tree =
+      parse_fn
+       config
+       (from_file filename)
+       default_spec
+    in
+    print_endline "Parsed without error";
+  with
+      e ->
+       error_happened := true;
+       print_error e
+;;
+
+
+let main() =
+  let debug = ref false in
+  let wf = ref false in
+  let iso88591 = ref false in
+  let files = ref [] in
+  Arg.parse
+      [ "-d",   Arg.Set debug, "turn debugging mode on";
+       "-wf",  Arg.Set wf,    "check only on well-formedness";
+        "-iso-8859-1", Arg.Set iso88591, "use ISO-8859-1 as internal encoding instead of UTF-8";
+      ]
+      (fun x -> files := x :: !files)
+      "
+usage: test_negative [options] file ...
+
+List of options:";
+  files := List.rev !files;
+  List.iter (parse !debug !wf !iso88591) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:33  lpadovan
+ * Initial revision
+ *
+ * Revision 1.6  2000/07/14 14:57:12  gerd
+ *     Updated: warner
+ *
+ * Revision 1.5  2000/07/14 14:20:11  gerd
+ *     Updated because of PXP interface changes.
+ *
+ * Revision 1.4  2000/07/09 01:49:09  gerd
+ *     Updated because of PXP interface changes.
+ *
+ * Revision 1.3  2000/06/04 20:31:21  gerd
+ *     Updates because of renamed PXP modules.
+ *
+ * Revision 1.2  2000/05/28 17:23:22  gerd
+ *     Updated.
+ *
+ * Revision 1.1  2000/05/01 15:58:50  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/Makefile b/helm/DEVEL/pxp/pxp/rtests/reader/Makefile
new file mode 100644 (file)
index 0000000..b1f204f
--- /dev/null
@@ -0,0 +1,31 @@
+# make validate:        make bytecode executable
+# make validate.opt:    make native executable
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_reader: test_reader.ml
+       ocamllex minilex.mll
+       ocamlfind ocamlc -custom -o test_reader -package .,unix,threads \
+               -linkpkg -thread -noautolink \
+               -g minilex.ml test_reader.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa minilex.ml
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       rm -f test_reader
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/minilex.mll b/helm/DEVEL/pxp/pxp/rtests/reader/minilex.mll
new file mode 100644 (file)
index 0000000..1c9fbec
--- /dev/null
@@ -0,0 +1,7 @@
+{ }
+rule nextchar = parse 
+    _ 
+      { Some (Lexing.lexeme lexbuf).[0] }
+  | eof
+      { None }
+{ }
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/t100.dat b/helm/DEVEL/pxp/pxp/rtests/reader/t100.dat
new file mode 100644 (file)
index 0000000..ad47100
--- /dev/null
@@ -0,0 +1 @@
+0123456789
\ No newline at end of file
diff --git a/helm/DEVEL/pxp/pxp/rtests/reader/test_reader.ml b/helm/DEVEL/pxp/pxp/rtests/reader/test_reader.ml
new file mode 100644 (file)
index 0000000..f01edf5
--- /dev/null
@@ -0,0 +1,455 @@
+open Pxp_reader;;
+open Pxp_types;;
+open Minilex;;
+
+let make_channel s =
+  (* Returns a channel reading the bytes from the string s *)
+  let rd, wr = Unix.pipe() in
+  let ch_rd = Unix.in_channel_of_descr rd in
+  let ch_wr = Unix.out_channel_of_descr wr in
+  ignore
+    (Thread.create
+       (fun () ->
+          output_string ch_wr s;
+          close_out ch_wr;
+       )
+       ()
+    );
+  ch_rd
+;;
+
+(**********************************************************************)
+
+let t001 () =
+  (* Reads from a string (without recoding it), checks the lexbuf size *)
+  let s = "0123456789abc" in
+  let r = new resolve_read_this_string s in
+  r # init_rep_encoding `Enc_iso88591;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in Anonymous in
+  let c = nextchar lb in
+  assert (c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+   * now be at the end of the buffer indicating that the buffer is now
+   * empty.
+   *)
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  let c = nextchar lb in
+  assert (c = Some '9');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  r # change_encoding "";
+  let c = nextchar lb in
+  assert (c = Some 'a');
+  assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+  ignore(nextchar lb);
+  let c = nextchar lb in
+  assert (c = Some 'c');
+  let c = nextchar lb in
+  assert (c = None);
+  r # close_in;
+  true
+;;
+
+
+let t002 () =
+  (* Like t001, but reads from a channel *)
+  let ch = make_channel "0123456789abc" in
+  let r = new resolve_read_this_channel ch in
+  r # init_rep_encoding `Enc_iso88591;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in Anonymous in
+  let c = nextchar lb in
+  assert (c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+   * now be at the end of the buffer indicating that the buffer is now
+   * empty.
+   *)
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  ignore(nextchar lb);
+  let c = nextchar lb in
+  assert (c = Some '9');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  r # change_encoding "";
+  let c = nextchar lb in
+  assert (c = Some 'a');
+  assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+  ignore(nextchar lb);
+  let c = nextchar lb in
+  assert (c = Some 'c');
+  let c = nextchar lb in
+  assert (c = None);
+  r # close_in;
+  true
+;;
+
+
+let t003 () =
+  (* Tests non-automatic encoding conversion from ISO-8859-1 to UTF-8 *)
+  let s = "0«»°áàâãäÃÀÂÃÄéèêëíìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ" in
+  let r = new resolve_read_this_string ~fixenc:`Enc_iso88591 s in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in Anonymous in
+  let c = ref (nextchar lb) in
+  assert (!c = Some '0');
+  assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+  (* Note: because we initialize the resolver with ~fixenc, the resolver can
+   * fill the buffer with more than one byte from the beginning.
+   *)
+  let u = ref "" in
+  while !c <> None do
+    ( match !c with
+         Some x -> u := !u ^ String.make 1 x
+       | None -> ()
+    );
+    c := nextchar lb
+  done;
+  r # close_in;
+  !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+
+let t004 () =
+  (* Tests non-automatic encoding conversion from UTF-8 to ISO-8859-1 *)
+  let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
+  let r = new resolve_read_this_string ~fixenc:`Enc_utf8 s in
+  r # init_rep_encoding `Enc_iso88591;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in Anonymous in
+  let c = ref (nextchar lb) in
+  assert (!c = Some '0');
+  assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+  (* Note: because we initialize the resolver with ~fixenc, the resolver can
+   * fill the buffer with more than one byte from the beginning.
+   *)
+  let u = ref "" in
+  while !c <> None do
+    ( match !c with
+         Some x -> u := !u ^ String.make 1 x
+       | None -> ()
+    );
+    c := nextchar lb
+  done;
+  r # close_in;
+  !u = "0«»°áàâãäÃÀÂÃÄéèêëíìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ"
+;;
+
+
+let t005 () =
+  (* Tests automatic encoding conversion from UTF-8 to ISO-8859-1 *)
+  let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
+  let r = new resolve_read_this_string s in
+  r # init_rep_encoding `Enc_iso88591;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in Anonymous in
+  let c = ref (nextchar lb) in
+  assert (!c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  let u = ref "" in
+  while !c <> None do
+    ( match !c with
+         Some x -> u := !u ^ String.make 1 x
+       | None -> ()
+    );
+    c := nextchar lb
+  done;
+  r # close_in;
+  !u = "0«»°áàâãäÃÀÂÃÄéèêëíìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ"
+;;
+
+
+let t006 () =
+  (* Tests automatic encoding conversion from UTF-16-BE to UTF-8 
+   * This variant invokes change_encoding early.
+   *)
+  let s = "\254\255\0000\000«\000»\000°\000á\000à\000â\000ã\000ä\000Ã\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000í\000ì\000î\000ï\000Ã\000ÃŒ\000ÃŽ\000Ã\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ã’\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ã\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
+  let r = new resolve_read_this_string s in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in Anonymous in
+  let c = ref (nextchar lb) in
+  assert (!c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  r # change_encoding "";
+  let u = ref "" in
+  while !c <> None do
+    ( match !c with
+         Some x -> u := !u ^ String.make 1 x
+       | None -> ()
+    );
+    c := nextchar lb
+  done;
+  r # close_in;
+  !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+
+let t007 () =
+  (* Tests automatic encoding conversion from UTF-16-BE to UTF-8 
+   * This variant does not invoke change_encoding
+   *)
+  let s = "\254\255\0000\000«\000»\000°\000á\000à\000â\000ã\000ä\000Ã\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000í\000ì\000î\000ï\000Ã\000ÃŒ\000ÃŽ\000Ã\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ã’\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ã\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
+  let r = new resolve_read_this_string s in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in Anonymous in
+  let c = ref (nextchar lb) in
+  assert (!c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  let u = ref "" in
+  while !c <> None do
+    ( match !c with
+         Some x -> u := !u ^ String.make 1 x
+       | None -> ()
+    );
+    c := nextchar lb
+  done;
+  r # close_in;
+  !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+(**********************************************************************)
+
+let t100 () =
+  (* Reads from a file without recoding it *)
+  let r = new resolve_as_file () in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let cwd = Sys.getcwd() in
+  let lb = r # open_in (System ("file://localhost" ^ cwd ^ "/t100.dat")) in
+  let c = nextchar lb in
+  assert (c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+   * now be at the end of the buffer indicating that the buffer is now
+   * empty.
+   *)
+  for i = 1 to 8 do
+    ignore(nextchar lb);
+  done;
+  let c = nextchar lb in
+  assert (c = Some '9');
+  r # close_in;
+  true
+;;
+
+let t101 () =
+  (* Reads from a file without recoding it *)
+  let r = new resolve_as_file () in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let cwd = Sys.getcwd() in
+  let lb = r # open_in (System ("//localhost" ^ cwd ^ "/t100.dat")) in
+  let c = nextchar lb in
+  assert (c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+   * now be at the end of the buffer indicating that the buffer is now
+   * empty.
+   *)
+  for i = 1 to 8 do
+    ignore(nextchar lb);
+  done;
+  let c = nextchar lb in
+  assert (c = Some '9');
+  r # close_in;
+  true
+;;
+
+let t102 () =
+  (* Reads from a file without recoding it *)
+  let r = new resolve_as_file () in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let cwd = Sys.getcwd() in
+  let lb = r # open_in (System (cwd ^ "/t100.dat")) in
+  let c = nextchar lb in
+  assert (c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+   * now be at the end of the buffer indicating that the buffer is now
+   * empty.
+   *)
+  for i = 1 to 8 do
+    ignore(nextchar lb);
+  done;
+  let c = nextchar lb in
+  assert (c = Some '9');
+  r # close_in;
+  true
+;;
+
+let t103 () =
+  (* Reads from a file without recoding it *)
+  let r = new resolve_as_file () in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in (System "t100.dat") in
+  let c = nextchar lb in
+  assert (c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+   * now be at the end of the buffer indicating that the buffer is now
+   * empty.
+   *)
+  for i = 1 to 8 do
+    ignore(nextchar lb);
+  done;
+  let c = nextchar lb in
+  assert (c = Some '9');
+  r # close_in;
+  true
+;;
+
+(**********************************************************************)
+
+let t110 () =
+  (* Checks whether relative URLs are properly handled *)
+  let r = new resolve_as_file () in
+  r # init_rep_encoding `Enc_utf8;
+  r # init_warner (new drop_warnings);
+  let lb = r # open_in (System "t100.dat") in
+  let c = nextchar lb in
+  assert (c = Some '0');
+  assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+  (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+   * now be at the end of the buffer indicating that the buffer is now
+   * empty.
+   *)
+  for i = 1 to 8 do
+    ignore(nextchar lb);
+  done;
+  let r' = r # clone in
+  let lb' = r' # open_in (System "t100.dat") in
+  let c = nextchar lb' in
+  assert (c = Some '0');
+  for i = 1 to 8 do
+    ignore(nextchar lb');
+  done;
+  let c = nextchar lb' in
+  assert (c = Some '9');
+  r' # close_in;
+  let c = nextchar lb in
+  assert (c = Some '9');
+  r # close_in;
+  true
+;;
+
+(**********************************************************************)
+(* Tests whether the encoding handling of System IDs is okay *)
+
+let t200 () =
+  (* Check the technique for the following tests:
+   * [Checks also 'combine' to some extent.)
+   *)
+  let r1 = new resolve_read_this_string
+            ~id:(System "b.xml")
+            ~fixenc:`Enc_iso88591
+            "ae" in
+  let r2 = new resolve_read_this_string
+            ~id:(System "a.xml")
+            ~fixenc:`Enc_iso88591
+            "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'b.xml'> ]> <a>&ae;</a>" in
+  let r = new combine [ r1; r2 ] in
+  (* It should now be possible to resolve &ae; *)
+  let _ =
+    Pxp_yacc.parse_document_entity 
+      { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
+      (Pxp_yacc.ExtID(System "a.xml", r))
+      Pxp_yacc.default_spec
+  in
+  true
+;;
+
+
+let t201 () =
+  (* Check that System IDs are converted to UTF-8. rep_encoding = ISO-8859-1 *)
+  let r1 = new resolve_read_this_string
+            ~id:(System "\195\164.xml")      (* This is an UTF-8 "ä"! *)
+            ~fixenc:`Enc_iso88591
+            "ae" in
+  let r2 = new resolve_read_this_string
+            ~id:(System "a.xml")
+            ~fixenc:`Enc_iso88591
+            "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
+  let r = new combine [ r1; r2 ] in
+  (* It should now be possible to resolve &ae; *)
+  let _ =
+    Pxp_yacc.parse_document_entity 
+      { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
+      (Pxp_yacc.ExtID(System "a.xml", r))
+      Pxp_yacc.default_spec
+  in
+  true
+;;
+
+
+let t202 () =
+  (* Check that System IDs are converted to UTF-8. rep_encoding = UTF-8 *)
+  let r1 = new resolve_read_this_string
+            ~id:(System "\195\164.xml")
+            ~fixenc:`Enc_iso88591
+            "ae" in
+  let r2 = new resolve_read_this_string
+            ~id:(System "a.xml")
+            ~fixenc:`Enc_iso88591
+            "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
+  let r = new combine [ r1; r2 ] in
+  (* It should now be possible to resolve &ae; *)
+  let _ =
+    Pxp_yacc.parse_document_entity 
+      { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_utf8 }
+      (Pxp_yacc.ExtID(System "a.xml", r))
+      Pxp_yacc.default_spec
+  in
+  true
+;;
+
+(**********************************************************************)
+
+let test f n =
+  try
+    print_string ("Reader test " ^ n);
+    flush stdout;
+    if f() then
+      print_endline " ok"
+    else
+      print_endline " FAILED!!!!";
+  with
+      error ->
+       print_endline (" FAILED: " ^ string_of_exn error)
+;;
+
+test t001 "001";;
+test t002 "002";;
+test t003 "003";;
+test t004 "004";;
+test t005 "005";;
+test t006 "006";;
+test t007 "007";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+test t103 "103";;
+
+test t110 "110";;
+
+test t200 "200";;
+test t201 "201";;
+test t202 "202";;
diff --git a/helm/DEVEL/pxp/pxp/rtests/run b/helm/DEVEL/pxp/pxp/rtests/run
new file mode 100755 (executable)
index 0000000..11b573c
--- /dev/null
@@ -0,0 +1,9 @@
+#! /bin/sh
+
+set -e
+
+(cd reader && ./test_reader)
+(cd canonxml && ./run_canonxml)
+(cd write && ./run_write)
+(cd codewriter && ./run_codewriter)
+(cd negative && ./run_negative)
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/Makefile b/helm/DEVEL/pxp/pxp/rtests/write/Makefile
new file mode 100644 (file)
index 0000000..634b272
--- /dev/null
@@ -0,0 +1,28 @@
+# make validate:        make bytecode executable
+# make validate.opt:    make native executable
+# make clean:          remove intermediate files (in this directory)
+# make CLEAN:           remove intermediate files (recursively)
+# make distclean:      remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_write: test_write.ml
+       ocamlfind ocamlc -g -custom -o test_write -package .,str -linkpkg test_write.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+       rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out1 out2 out3
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+       rm -f *~
+       rm -f test_write
+
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/run_write b/helm/DEVEL/pxp/pxp/rtests/write/run_write
new file mode 100755 (executable)
index 0000000..1c43acb
--- /dev/null
@@ -0,0 +1,17 @@
+#! /bin/bash
+
+test_sample () {
+    file="$1"
+    echo -n "Testing $file... "
+    ./test_write -in "$file" >out1
+    ./test_write -in out1    >out2
+    ./test_write -in out2    >out3
+    if cmp out1 out3; then
+       echo "OK"
+    else
+       echo "FAILED"
+    fi
+}
+
+
+test_sample "sample001.xml"
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/sample001.xml b/helm/DEVEL/pxp/pxp/rtests/write/sample001.xml
new file mode 100644 (file)
index 0000000..4973505
--- /dev/null
@@ -0,0 +1,37 @@
+<!DOCTYPE a [
+
+<!ELEMENT a (b | (c, d)* | (e, f)+ | g?)>
+<!ELEMENT b (#PCDATA | a)*>
+<!ELEMENT c EMPTY>
+<!ELEMENT d ANY>
+<!ELEMENT e EMPTY>
+<!ELEMENT f EMPTY>
+<!ELEMENT g EMPTY>
+
+<!ATTLIST a u CDATA #IMPLIED
+            v NMTOKEN "huhu"
+            w (q|p)   #REQUIRED
+            x NOTATION (n1|n2) "n1"
+            y ENTITY #IMPLIED>
+
+<!NOTATION n1 SYSTEM "/bin/n1-processor">
+<!NOTATION n2 SYSTEM "/bin/n2-processor">
+
+<!ENTITY u1 SYSTEM "file-u1" NDATA n1>
+<!ENTITY u2 SYSTEM "file-u2" NDATA n2>
+
+<?pi1 args ...?>
+]>
+
+<a u="1" w="q" x="n2">
+  <b>
+    <?pi2 args ...?>
+    This is text!
+    <a w="p" y="u1">
+      <c/>
+      <d/>
+    </a>
+  </b>
+</a>
+
+<?pi3 args ...?>
diff --git a/helm/DEVEL/pxp/pxp/rtests/write/test_write.ml b/helm/DEVEL/pxp/pxp/rtests/write/test_write.ml
new file mode 100644 (file)
index 0000000..48defd2
--- /dev/null
@@ -0,0 +1,94 @@
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+  prerr_endline (string_of_exn e)
+;;
+
+class warner =
+  object 
+    method warn w =
+      prerr_endline ("WARNING: " ^ w)
+  end
+;;
+
+let parse_and_write in_filename =
+  let spec =
+    let e = new element_impl default_extension in
+    make_spec_from_mapping
+      ~super_root_exemplar:      e
+      ~default_pinstr_exemplar:  e
+      ~data_exemplar:            (new data_impl default_extension)
+      ~default_element_exemplar: e
+      ~element_mapping:          (Hashtbl.create 1)
+      ()
+  in
+  let config =
+      { default_config with 
+         warner = new warner;
+         enable_pinstr_nodes = true;
+         enable_super_root_node = true;
+         encoding = `Enc_utf8;
+      }
+  in
+  try 
+    let tree =
+      parse_document_entity
+        config
+       (from_file in_filename)
+       spec 
+    in
+    
+    tree # write (Out_channel stdout) `Enc_utf8;
+  with
+      e ->
+       error_happened := true;
+       prerr_error e
+;;
+
+
+let main() =
+  let in_file = ref "" in
+  Arg.parse
+      [ "-in", (Arg.String (fun s -> in_file := s)),
+            " <file>      Set the XML file to read";
+      ]
+      (fun x -> raise (Arg.Bad "Unexpected argument"))
+      "
+usage: test_write [ options ]
+
+List of options:";
+  if !in_file = "" then begin
+    prerr_endline "No input file specified.";
+    exit 1
+  end;
+  parse_and_write !in_file 
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:35  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/08/16 23:44:21  gerd
+ *     Updates because of changes of the PXP API.
+ *
+ * Revision 1.1  2000/07/16 17:50:39  gerd
+ *     Initial revision.
+ *
+ *)
diff --git a/helm/DEVEL/pxp/pxp/tools/collect_files b/helm/DEVEL/pxp/pxp/tools/collect_files
new file mode 100755 (executable)
index 0000000..d1770eb
--- /dev/null
@@ -0,0 +1,25 @@
+#! /bin/sh
+#
+# $Id$
+# ----------------------------------------------------------------------
+#
+# usage: collect_files file ...
+#
+# Prints the names of the files passed as arguments which actually
+# exist and are regular files.
+
+for x in "$@"; do
+    if [ -f "$x" ]; then
+       echo "$x"
+    fi
+done
+
+# ======================================================================
+#
+# $Log$
+# Revision 1.1  2000/11/17 09:57:35  lpadovan
+# Initial revision
+#
+# Revision 1.1  2000/07/27 21:07:26  gerd
+#      Initial revision.
+#
diff --git a/helm/DEVEL/pxp/pxp/tools/insert_variant b/helm/DEVEL/pxp/pxp/tools/insert_variant
new file mode 100755 (executable)
index 0000000..cb592bb
--- /dev/null
@@ -0,0 +1,105 @@
+#! /bin/sh
+# (*
+exec ocaml "$0" "$@"
+*) directory ".";;
+
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let get_arg variant insert_line =
+  (* returns the argument of an "#insert" line *)
+  let s = ref "" in
+  for i = 8 to String.length insert_line - 1 do
+    match insert_line.[i] with
+       ' ' -> ()
+      | '*' ->
+         (* replace '*' with 'variant' *)
+         s := !s ^ variant
+      | c ->
+         s := !s ^ String.make 1 c
+  done;
+  !s
+;;
+
+
+let edit_file variant name =
+  let basename = Filename.chop_suffix name ".src" in
+  let mllname = basename ^ "_" ^ variant ^ ".mll" in
+  let chin = open_in name in
+  let chout = open_out mllname in
+  output_string chout "(* File generated by insert_variant; DO NOT EDIT! *)\n";
+  begin try
+    while true do
+      let line = input_line chin in
+      (* We do not have Str here. *)
+      if String.length line >= 8 & String.sub line 0 8 = "#insert " then begin
+       let insname = get_arg variant line in
+       (* Copy the file 'insname' to chout *)
+       let chcopy = open_in insname in
+       let n = in_channel_length chcopy in
+       let s = String.create n in
+       really_input chcopy s 0 n;
+       close_in chcopy;
+       output_string chout s;
+      end
+      else begin
+       output_string chout line;
+       output_char chout '\n';
+      end
+    done
+  with
+      End_of_file -> ()
+  end;
+  close_in chin;
+  close_out chout
+;;
+
+
+let main() =
+  let variant = ref "" in
+  let files = ref [] in
+  Arg.current := 0;          (* Because of a OCaml-3.00 bug *)
+  Arg.parse
+      [ "-variant", Arg.String (fun s -> variant := s),
+               "<name>  Set the variant (character encoding)";
+      ]
+      (fun s -> files := !files @ [s])
+      "insert_variant [ options ] file.src ...
+
+Reads the files, replaces the #insert lines by the referred files, and 
+writes the file file_variant.mll. 
+
+The #insert lines include the specified file into the source. The
+asterisk (*) is replaced by the name of the variant.
+
+Options:
+";
+  
+  if !variant = "" then 
+    failwith "No variant specified!";
+
+  List.iter 
+    (fun name -> edit_file !variant name)
+    !files
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ * 
+ * $Log$
+ * Revision 1.1  2000/11/17 09:57:35  lpadovan
+ * Initial revision
+ *
+ * Revision 1.2  2000/05/20 21:14:33  gerd
+ *     Workaround for an OCaml 3.00 bug.
+ *
+ * Revision 1.1  2000/05/20 20:30:15  gerd
+ *     Initial revision.
+ *
+ * 
+ *)
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/.cvsignore b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/.cvsignore
new file mode 100644 (file)
index 0000000..deb5b7f
--- /dev/null
@@ -0,0 +1,4 @@
+*.cmo
+*.cmx
+*.cmi
+
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/Makefile b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/Makefile
new file mode 100644 (file)
index 0000000..504cfe5
--- /dev/null
@@ -0,0 +1,50 @@
+#(******************************************************)
+#(*    Claudio Sacerdoti Coen <sacerdot@cs.unibo.it>   *)
+#(*                   14/05/2000                       *)
+#(******************************************************)
+
+OCAMLC = ocamlc 
+OCAMLOPT = ocamlopt 
+OCAMLDEP = ocamldep
+OCAMLLEX = ocamllex
+OCAMLYACC = ocamlyacc
+
+all: ucs2_to_utf8
+opt: ucs2_to_utf8.opt
+
+DEPOBJS = ucs2_to_utf8.ml lexer.ml parser.ml parser.mli types.ml
+
+UCS2_TO_UTF8OBJS = types.cmo lexer.cmo parser.cmo ucs2_to_utf8.cmo
+UCS2_TO_UTF8OPTOBJS = types.cmx lexer.cmx parser.cmx ucs2_to_utf8.cmx
+
+lexer.ml:
+       $(OCAMLLEX) lexer.mll
+
+parser.ml:
+       $(OCAMLYACC) parser.mly
+
+parser.mli:
+       $(OCAMLYACC) parser.mly
+
+depend: lexer.ml parser.ml parser.mli
+       $(OCAMLDEP) $(DEPOBJS) > depend
+ucs2_to_utf8: $(UCS2_TO_UTF8OBJS)
+       $(OCAMLC) -o ucs2_to_utf8 $(UCS2_TO_UTF8OBJS)
+
+ucs2_to_utf8.opt: $(UCS2_TO_UTF8OPTOBJS)
+       $(OCAMLOPT) -o ucs2_to_utf8.opt $(UCS2_TO_UTF8OPTOBJS)
+
+.SUFFIXES: .ml .mli .cmo .cmi .cmx
+.ml.cmo:
+       $(OCAMLC) -c $<
+.mli.cmi:
+       $(OCAMLC) -c $<
+.ml.cmx:
+       $(OCAMLOPT) -c $<
+
+clean:
+       rm -f *.cm[iox] *.o lexer.ml parser.ml parser.mli \
+       ucs2_to_utf8 ucs2_to_utf8.opt
+
+include depend
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/README b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/README
new file mode 100644 (file)
index 0000000..d02ae8c
--- /dev/null
@@ -0,0 +1,15 @@
+(******************************************************)
+(*    Claudio Sacerdoti Coen <sacerdot@cs.unibo.it>   *)
+(*                   14/05/2000                       *)
+(******************************************************)
+
+How to compile: "make clean && make depend && make && make opt"
+
+Usage: "cat input.mll | ./ucs2_to_utf8 > output.mll"
+ where in input.mll there are definitions of ucs2 regular expressions
+ and in output.mll there are the same utf8 regular expressions in the
+ format expected by ocamllex
+
+ See input/input.mll for an example (the definitions are taken from the
+ appendix B of the XML reccomendation) and input/example.mll for a
+ smaller one.
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/lexer.mll b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/lexer.mll
new file mode 100644 (file)
index 0000000..dfbeb5a
--- /dev/null
@@ -0,0 +1,43 @@
+{
+(******************************************************)
+(*    Claudio Sacerdoti Coen <sacerdot@cs.unibo.it>   *)
+(*                   14/05/2000                       *)
+(******************************************************)
+
+open Parser
+
+let comment_depth = ref 0;;
+
+let charint_of_lexeme l =
+ String.set l 0 '0' ;
+ int_of_string l
+;;
+}
+
+let digit = ['0'-'9']|['A'-'F']
+
+rule token =
+ parse
+    [' ' '\t' '\n']                           { token lexbuf }
+  | "let"                                     { LET }
+  | (['a'-'z']|'_')(['a'-'z']|['A'-'Z']|'_'|['0'-'9']|'\'')*
+                                              { IDENT (Lexing.lexeme lexbuf) }
+  | '='                                       { EQ }
+  | ";;"                                      { END_OF_LET }
+  | "|"                                       { PIPE }
+  | '['                                       { LBRACKET }
+  | ']'                                       { RBRACKET }
+  | '-'                                       { RANGE }
+  | "(*"                                      { incr comment_depth ;
+                                                comment lexbuf
+                                              }
+  | "#x" digit digit digit digit              { CHAR (charint_of_lexeme (Lexing.lexeme lexbuf)) }
+  | eof                                       { EOF }
+
+and comment =
+ parse
+    "(*" { incr comment_depth ; comment lexbuf }
+  | "*)" { decr comment_depth ;
+           if !comment_depth = 0 then token lexbuf else comment lexbuf
+         }
+  | _    { comment lexbuf }
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/parser.mly b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/parser.mly
new file mode 100644 (file)
index 0000000..2fba775
--- /dev/null
@@ -0,0 +1,40 @@
+/******************************************************/
+/*    Claudio Sacerdoti Coen <sacerdot@cs.unibo.it>   */
+/*                   14/05/2000                       */
+/******************************************************/
+
+%token <int>CHAR
+%token <string>IDENT
+%token LET
+%token EQ
+%token END_OF_LET
+%token RBRACKET
+%token PIPE
+%token LBRACKET
+%token RANGE
+%token EOF
+%start main
+%type <Types.definition list> main
+
+%%
+
+main:
+   EOF              { [] }
+ | declaration main { $1::$2 }
+;
+
+declaration:
+   LET IDENT EQ regexp END_OF_LET
+      { { Types.id = $2 ; Types.rel = $4 } }
+;
+
+regexp:
+   regexptoken PIPE regexp  { $1::$3 }
+ | regexptoken              { [$1] }
+;
+
+regexptoken:
+   CHAR                               { Types.Char $1 }
+ | LBRACKET CHAR RANGE CHAR RBRACKET  { Types.Interval ($2,$4) }
+ | IDENT                              { Types.Identifier $1 }
+;
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/types.ml b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/types.ml
new file mode 100644 (file)
index 0000000..e2da7dc
--- /dev/null
@@ -0,0 +1,13 @@
+(******************************************************)
+(*    Claudio Sacerdoti Coen <sacerdot@cs.unibo.it>   *)
+(*                   14/05/2000                       *)
+(******************************************************)
+
+type regexp =
+   Char of int
+ | Interval of int * int      (* lower bound, upper bound *)
+ | Identifier of string
+ | Concat of regexp list list (* concatenation of disjunctions *)
+;;
+
+type definition = { id : string ; rel : regexp list } ;;
diff --git a/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/ucs2_to_utf8.ml b/helm/DEVEL/pxp/pxp/tools/ucs2_to_utf8/ucs2_to_utf8.ml
new file mode 100644 (file)
index 0000000..1512d23
--- /dev/null
@@ -0,0 +1,215 @@
+(******************************************************)
+(*    Claudio Sacerdoti Coen <sacerdot@cs.unibo.it>   *)
+(*                   14/05/2000                       *)
+(******************************************************)
+
+(* Surrogate Pairs are not accepted in XML files (is it true???) *)
+exception SurrogatePairs;;
+
+(* Interval (n,m) where n >m m *)
+exception InvalidInterval of int * int;;
+
+(* Given an ucs2 character code, returns it in utf8 *)
+(* (as a concatenation of characters)               *)
+let char_ucs2_to_utf8 =
+ function
+    n when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+  | n when n <= 0x007F -> Types.Char n
+  | n when n <= 0x07FF ->
+     Types.Concat
+      [[Types.Char (n lsr  6 land 0b00011111 lor 0b11000000)] ;
+       [Types.Char (n        land 0b00111111 lor 0b10000000)]]
+  | n ->
+     Types.Concat
+      [[Types.Char (n lsr 12 land 0b00001111 lor 0b11100000)] ;
+       [Types.Char (n lsr  6 land 0b00111111 lor 0b10000000)] ;
+       [Types.Char (n        land 0b00111111 lor 0b10000000)]]
+;;
+
+(*CSC: Two functions for debugging pourposes only
+
+let char_ucs2_to_utf8 =
+ function
+    n when n >= 0xD800 && n <= 0xDFFF -> assert false
+  | n when n <= 0x007F -> [[n]]
+  | n when n <= 0x07FF ->
+     [[(n lsr  6 land 0b00011111 lor 0b11000000)] ;
+      [(n        land 0b00111111 lor 0b10000000)]]
+  | n ->
+     [[(n lsr 12 land 0b00001111 lor 0b11100000)] ;
+      [(n lsr  6 land 0b00111111 lor 0b10000000)] ;
+      [(n        land 0b00111111 lor 0b10000000)]]
+;;
+
+let rec bprint =
+ function
+    0 -> ""
+  | n -> bprint (n / 2) ^ string_of_int (n mod 2)
+;;
+*)
+
+(* A few useful functions *)
+let rec mklist e =
+ function
+    0 -> []
+  | n -> e::(mklist e (n - 1))
+;;
+
+let sup =
+ let t = Types.Char 0b10111111 in
+  function
+     1 -> t
+   | n -> Types.Concat (mklist [t] n)
+;;
+
+let rec inf =
+ let b = Types.Char 0b10000000 in
+  function
+     1 -> [[b]]
+   | n -> mklist [b] n
+;;
+
+let mysucc =
+ function
+    [Types.Char n] -> n + 1
+  | _ -> assert false
+;;
+
+let mypred =
+ function
+    [Types.Char n] -> n - 1
+  | _ -> assert false
+;;
+
+(* Given two utf8-encoded extremes of an interval character code      *)
+(* whose 'length' is the same, it returns the utf8 regular expression *)
+(* matching all the characters in the interval                        *)
+let rec same_length_ucs2_to_utf8 =
+ let module T = Types in
+  function
+     (T.Char n, T.Char m) when n = m -> [T.Char n]
+   | (T.Char n, T.Char m) -> [T.Interval (n,m)]
+   | (T.Concat [hen ; [tln]], T.Concat [hem ; [tlm]]) when hen = hem ->
+      [T.Concat [hen ; same_length_ucs2_to_utf8 (tln,tlm)]]
+   | (T.Concat [hen ; [tln]], T.Concat ([hem ; [tlm]] as e2)) ->
+      (T.Concat [hen ; same_length_ucs2_to_utf8 (tln,sup 1)]) ::
+      (let shen = mysucc hen
+       and phem = mypred hem in
+       let succhen = [T.Char shen] in
+        if succhen = hem then
+         same_length_ucs2_to_utf8 (T.Concat (succhen::(inf 1)), T.Concat e2)
+        else
+         (T.Concat [[T.Interval (shen, phem)] ;
+          [T.Interval (0b10000000,0b10111111)]])::
+           same_length_ucs2_to_utf8 (T.Concat (hem::(inf 1)), T.Concat e2)
+      )
+    (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf 1)), T.Concat e2)*)
+   | (T.Concat (hen::tln), T.Concat (hem::tlm)) when hen = hem ->
+      [T.Concat [hen ; same_length_ucs2_to_utf8 (T.Concat tln, T.Concat tlm)]]
+   | (T.Concat (hen::tln), T.Concat ((hem::tlm) as e2)) ->
+      let n = List.length tln in
+       (T.Concat
+        [hen ; same_length_ucs2_to_utf8 (T.Concat tln,sup n)]) ::
+         (let shen = mysucc hen
+          and phem = mypred hem in
+          let succhen = [T.Char shen] in
+           if succhen = hem then
+            same_length_ucs2_to_utf8 (T.Concat (succhen::(inf n)), T.Concat e2)
+           else
+            (T.Concat [[T.Interval (shen, phem)] ;
+             [T.Interval (0b10000000,0b10111111)] ;
+             [T.Interval (0b10000000,0b10111111)]]
+            )::
+             same_length_ucs2_to_utf8 (T.Concat (hem::(inf n)), T.Concat e2)
+       )
+     (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf n)),T.Concat e2)*)
+   | _ -> assert false
+;;
+
+(* Given an interval of ucs2 characters, splits *)
+(* the list in subintervals whose extremes has  *)
+(* the same utf8 encoding length and, for each  *)
+(* extreme, calls same_length_ucs2_to_utf8      *)
+let rec seq_ucs2_to_utf8 =
+ function
+    (n,_) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+  | (_,n) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+  | (n,m) when n > m -> raise (InvalidInterval (n,m))
+  | (n,m) when n = m -> [char_ucs2_to_utf8 n]
+  | (n,m) when n <= 0x07F && m > 0x07F ->
+      (seq_ucs2_to_utf8 (n,0x07F)) @ (seq_ucs2_to_utf8 (0x080,m))
+  | (n,m) when n <= 0x07FF && m > 0x07FF ->
+      (seq_ucs2_to_utf8 (n,0x07FF)) @ (seq_ucs2_to_utf8 (0x0800,m))
+  | (n,m) ->
+      let utf8n = char_ucs2_to_utf8 n
+      and utf8m = char_ucs2_to_utf8 m in
+       same_length_ucs2_to_utf8 (utf8n,utf8m)
+;;
+
+(* Given an ucs2 regual expression, returns  *)
+(* the corresponding utf8 regular expression *)
+let ucs2_to_utf8 { Types.id = id ; Types.rel = rel } =
+ let rec aux re l2 =
+  match re with
+     Types.Char i -> char_ucs2_to_utf8 i :: l2
+   | Types.Interval (l,u) -> seq_ucs2_to_utf8 (l,u) @ l2
+   | Types.Identifier _ as i -> i :: l2
+   | Types.Concat rell ->
+      let foo rel = List.fold_right aux rel [] in
+       Types.Concat (List.map foo rell) :: l2
+ in
+  { Types.id = id ; Types.rel = List.fold_right aux rel [] }
+;;
+
+(* The function actually used to produce the output *)
+let output = print_string ;;
+
+(* padded_string_of_int i returns the string representing the        *)
+(* integer i (i < 256) using exactly 3 digits (example: 13 -> "013") *)
+let padded_string_of_int i =
+ if i < 10 then
+  "00" ^ string_of_int i
+ else if i < 100 then
+  "0" ^ string_of_int i
+ else
+  string_of_int i
+;;
+
+(* Two functions useful to print a definition *)
+let rec print_disjunction ?(first = true) =
+ function
+    [] -> ()
+  | he::tl ->
+     if not first then output " | " ;
+     print_re he ;
+     print_disjunction ~first:false tl
+and print_re =
+ function
+    Types.Char i -> output ("'\\" ^ padded_string_of_int i ^ "'")
+  | Types.Interval (l,u) ->
+     output ("['\\" ^ padded_string_of_int l ^ "'-'\\" ^
+      padded_string_of_int u ^ "']")
+  | Types.Identifier i -> output i
+  | Types.Concat rell ->
+     let foo rel =
+      if List.length rel > 1 then
+       (output "(" ; print_disjunction rel ; output ")")
+      else
+       print_disjunction rel
+     in
+      List.iter foo rell
+;;
+
+(* print_definition prints a definition in the format expected by ocamllex *)
+let print_definition { Types.id = id ; Types.rel = rel } =
+ output ("let " ^ id ^ " =\n   ") ;
+ print_disjunction rel ;
+ output "\n\n"
+;;
+
+(* main *)
+let _ =
+ let lexbuf = Lexing.from_channel stdin in
+  let ucs2_result = Parser.main Lexer.token lexbuf in
+   List.iter print_definition (List.map ucs2_to_utf8 ucs2_result)
+;;