--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* The type 'document' represents parsed HTML documents.
+ * Element (name, args, subnodes): is an element node for an element of
+ * type 'name' (i.e. written <name ...>...</name>) with arguments 'args'
+ * and subnodes 'subnodes' (the material within the element). The arguments
+ * are simply name/value pairs. Entity references (something like %xy;)
+ * occuring in the values are NOT resolved.
+ * Arguments without values (e.g. <select name="x" multiple>: here,
+ * "multiple" is such an argument) are represented as (name,name), i.e. the
+ * name is returned as value.
+ * As argument names are case-insensitive, the names are all lowercase.
+ * Data s: is a character data node. Again, entity references are contained
+ * as such and not as what they mean.
+ *)
+
+type document =
+ Element of (string * (string*string) list * document list)
+ | Data of string
+;;
+
+
+val no_end_tag : string list ref;;
+ (* List of tags which are always empty. This variable is pre-configured,
+ * but you may want to change it.
+ * It is important to know which elements are always empty, because HTML
+ * allows it to omit the end tag for them. For example,
+ * <a><b>x</a> is parsed as
+ * Element("a",[],[ Element("b",[],[]); Data "x" ])
+ * if we know that "a" is an empty element, but it is wrongly parsed as
+ * Element("a",[],[ Element("b",[], [ Data "x"]) ])
+ * if "a" is actually empty but we do not know it.
+ * An example of such a tag is "br".
+ *)
+
+val special_tag : string list ref;;
+ (* List of tags with a special rule for recognizing the end.
+ * This variable is pre-configured, but you may want to change it.
+ * The special rule is that the metacharacters '<', '>' and so on lose
+ * their meaning within the element, and that only the corresponding
+ * end tag stops this kind of scanning. An example is the element
+ * "javascript". Inner elements are not recognized, and the element
+ * can only be ended by </javascript>. (Other elements are also ended
+ * if an embracing element ends, e.g. "j" in <k><j></k>!)
+ *
+ * Note that comments are not recognized within special elements;
+ * comments are returned as character material.
+ *)
+
+val parse_string : string -> document list
+ (* Parses the HTML document from a string and returns it *)
+
+val parse_file : in_channel -> document list
+ (* Parses the HTML document from a file and returns it *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/03 01:07:25 gerd
+ * Initial revision.
+ *
+ *
+ *)