X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fpxp%2Fpxp_types.mli;fp=helm%2FDEVEL%2Fpxp%2Fpxp%2Fpxp_types.mli;h=e8b471170a450ee9363291d50f5c3366e3893a94;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/pxp/pxp_types.mli b/helm/DEVEL/pxp/pxp/pxp_types.mli new file mode 100644 index 000000000..e8b471170 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/pxp_types.mli @@ -0,0 +1,224 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * PXP: The polymorphic XML parser for Objective Caml. + * Copyright 1999 by Gerd Stolpmann. See LICENSE for details. + *) + + +type ext_id = + System of string + | Public of (string * string) + | Anonymous + + (* external identifiers are either "system identifiers" (filenames or URLs), + * or "public identifiers" Public(id,sysid) where "id" is the representation + * of the public ID, and "sysid" a fallback system ID, or the empty string. + * + * New in PXP: Sometimes the external ID is not known. This case can be + * referred to as Anonymous ID. + * + * Encoding: The identifiers are _always_ encoded as UTF8 strings, + * regardless of whether another encoding is configured for the parser. + * TODO: umsetzen + *) + + +type dtd_id = + External of ext_id (* DTD is completely external *) + | Derived of ext_id (* DTD is derived from an external DTD *) + | Internal (* DTD is completely internal *) +;; + +type content_model_type = + Unspecified (* A specification of the model has not yet been + * found + *) + | Empty (* Nothing is allowed as content *) + | Any (* Everything is allowed as content *) + | Mixed of mixed_spec list (* The contents consist of elements and PCDATA + * in arbitrary order. What is allowed in + * particular is given as mixed_spec. + *) + | Regexp of regexp_spec (* The contents are elements following this regular + * expression + *) + +and mixed_spec = + MPCDATA (* PCDATA children are allowed *) + | MChild of string (* This kind of Element is allowed *) + +and regexp_spec = + Optional of regexp_spec (* subexpression? *) + | Repeated of regexp_spec (* subexpression* *) + | Repeated1 of regexp_spec (* subexpression+ *) + | Alt of regexp_spec list (* subexpr1 | subexpr2 | ... | subexprN *) + | Seq of regexp_spec list (* subexpr1 , subexpr2 , ... , subexprN *) + | Child of string (* This kind of Element is allowed here *) +;; + + +type att_type = + A_cdata (* CDATA *) + | A_id (* ID *) + | A_idref (* IDREF *) + | A_idrefs (* IDREFS *) + | A_entity (* ENTITY *) + | A_entities (* ENTiTIES *) + | A_nmtoken (* NMTOKEN *) + | A_nmtokens (* NMTOKENS *) + | A_notation of string list (* NOTATION (name1 | name2 | ... | nameN) *) + | A_enum of string list (* (name1 | name2 | ... | nameN) *) +;; + + +type att_default = + D_required (* #REQUIRED *) + | D_implied (* #IMPLIED *) + | D_default of string (* -- The value is already expanded *) + | D_fixed of string (* FIXED -- The value is already expanded *) +;; + + +type att_value = + Value of string (* a single value *) + | Valuelist of string list (* a list of values *) + | Implied_value (* a value left out *) +;; + + +class type collect_warnings = + object + method warn : string -> unit + end +;; + + +class drop_warnings : collect_warnings;; + + +type encoding = Netconversion.encoding;; + (* We accept all encodings for character sets which are defined in + * Netconversion (package netstring). + *) + +type rep_encoding = + (* The subset of 'encoding' that may be used for internal representation + * of strings. + * Note: The following encodings are ASCII-compatible! This is an important + * property used throghout the whole PXP code. + *) + [ `Enc_utf8 (* UTF-8 *) + | `Enc_iso88591 (* ISO-8859-1 *) + ] +;; + + +exception Validation_error of string + (* Violation of a validity constraint *) + +exception WF_error of string + (* Violation of a well-formedness constraint *) + +exception Error of string + (* Other error *) + +exception Character_not_supported + +exception At of (string * exn) + (* The string is a description where the exn happened. The exn value can + * again be At(_,_) (for example, when an entity within an entity causes + * the error). + *) + +exception Undeclared + (* Indicates that declaration is available and because of this every kind + * of usage is allowed. + *) + +val string_of_exn : exn -> string + (* Converts a Markup exception into a readable string *) + + +type output_stream = + Out_buffer of Buffer.t + | Out_channel of out_channel + | Out_function of (string -> int -> int -> unit) + +val write : output_stream -> string -> int -> int -> unit + (* write os s pos len: Writes the string to the buffer/channel/stream *) + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:29 lpadovan + * Initial revision + * + * Revision 1.8 2000/08/14 22:24:55 gerd + * Moved the module Pxp_encoding to the netstring package under + * the new name Netconversion. + * + * Revision 1.7 2000/07/27 00:41:15 gerd + * new 8 bit codes + * + * Revision 1.6 2000/07/16 18:31:09 gerd + * The exception Illegal_character has been dropped. + * + * Revision 1.5 2000/07/16 16:34:21 gerd + * Updated comments. + * + * Revision 1.4 2000/07/14 21:25:27 gerd + * Simplified the type 'collect_warnings'. + * + * Revision 1.3 2000/07/08 16:23:50 gerd + * Added the exception 'Error'. + * + * Revision 1.2 2000/07/04 22:08:26 gerd + * type ext_id: New variant Anonymous. - The System and Public + * variants are now encoded as UTF-8. + * collect_warnings is now a class type only. New class + * drop_warnings. + * New functions encoding_of_string and string_of_encoding. + * + * Revision 1.1 2000/05/29 23:48:38 gerd + * Changed module names: + * Markup_aux into Pxp_aux + * Markup_codewriter into Pxp_codewriter + * Markup_document into Pxp_document + * Markup_dtd into Pxp_dtd + * Markup_entity into Pxp_entity + * Markup_lexer_types into Pxp_lexer_types + * Markup_reader into Pxp_reader + * Markup_types into Pxp_types + * Markup_yacc into Pxp_yacc + * See directory "compatibility" for (almost) compatible wrappers emulating + * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc. + * + * ====================================================================== + * Old logs from Markup_types.mli: + * + * Revision 1.7 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.6 2000/05/20 20:31:40 gerd + * Big change: Added support for various encodings of the + * internal representation. + * + * Revision 1.5 2000/05/01 20:43:25 gerd + * New type output_stream; new function 'write'. + * + * Revision 1.4 1999/09/01 16:25:35 gerd + * Dropped Illegal_token and Content_not_allowed_here. WF_error can + * be used instead. + * + * Revision 1.3 1999/08/15 02:22:40 gerd + * Added exception Undeclared. + * + * Revision 1.2 1999/08/14 22:15:17 gerd + * New class "collect_warnings". + * + * Revision 1.1 1999/08/10 00:35:52 gerd + * Initial revision. + * + * + *)