2 * ----------------------------------------------------------------------
9 Element of (string * (string*string) list * document list)
14 exception End_of_scan;;
17 let no_end_tag = (* empty HTML elements *)
34 let special_tag = (* other lexical rules *)
42 let rec parse_comment buf =
43 let t = scan_comment buf in
54 let rec parse_doctype buf =
55 let t = scan_doctype buf in
66 let parse_document buf =
67 let current_name = ref "" in
68 let current_atts = ref [] in
69 let current_subs = ref [] in
70 let stack = Stack.create() in
73 let rec next_no_space() =
74 match scan_element buf with
75 Space _ -> next_no_space()
79 let rec parse_atts_lookahead next =
83 begin match next_no_space() with
85 begin match next_no_space() with
87 (String.lowercase n, String.uppercase v) ::
88 parse_atts_lookahead (next_no_space())
90 (String.lowercase n,v) ::
91 parse_atts_lookahead (next_no_space())
99 parse_atts_lookahead (next_no_space())
104 (* <tag name> <==> <tag name="name"> *)
105 [ String.lowercase n, String.lowercase n ]
107 (* assume <tag name ... > <==> <tag name="name" ...> *)
108 ( String.lowercase n, String.lowercase n ) ::
109 parse_atts_lookahead next'
115 parse_atts_lookahead (next_no_space())
117 parse_atts_lookahead (next_no_space())
120 let rec parse_special name =
121 (* Parse until </name> *)
122 match scan_special buf with
127 "</" ^ n ^ parse_special name
131 s ^ parse_special name
137 let rec skip_element() =
139 match scan_element buf with
148 let rec parse_next() =
149 let t = scan_document buf in
158 let name = String.lowercase name in
159 if List.mem name !no_end_tag then begin
160 let atts = parse_atts() in
161 current_subs := (Element(name, atts, [])) :: !current_subs;
164 else if List.mem name !special_tag then begin
165 let atts = parse_atts() in
166 let data = parse_special name in
169 current_subs := (Element(name, atts, [Data data])) :: !current_subs;
173 let atts = parse_atts() in
174 Stack.push (!current_name, !current_atts, !current_subs) stack;
175 current_name := name;
176 current_atts := atts;
181 current_subs := (Data data) :: !current_subs;
183 | Lelementend name ->
184 let name = String.lowercase name in
187 (* Search the element to close on the stack: *)
188 let found = ref (name = !current_name) in
190 (fun (old_name, _, _) ->
191 if name = old_name then found := true)
193 (* If not found, the end tag is wrong. Simply ignore it. *)
197 (* Put the current element on to the stack: *)
198 Stack.push (!current_name, !current_atts, !current_subs) stack;
199 (* If found: Remove the elements from the stack, and append
200 * them to the previous element as sub elements
203 let old_name, old_atts, old_subs = Stack.pop stack in
204 (* or raise Stack.Empty *)
205 if old_name = name then
206 old_name, old_atts, old_subs
208 let older_name, older_atts, older_subs = remove() in
211 (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
213 let old_name, old_atts, old_subs = remove() in
214 (* Remove one more element: the element containing the element
215 * currently being closed.
217 let new_name, new_atts, new_subs = Stack.pop stack in
218 current_name := new_name;
219 current_atts := new_atts;
220 current_subs := (Element (old_name, old_atts, List.rev old_subs))
232 List.rev !current_subs
235 (* Close all remaining elements: *)
236 Stack.push (!current_name, !current_atts, !current_subs) stack;
238 let old_name, old_atts, old_subs = Stack.pop stack in
239 (* or raise Stack.Empty *)
241 let older_name, older_atts, older_subs = remove() in
244 (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
247 old_name, old_atts, old_subs
249 let name, atts, subs = remove() in
255 let buf = Lexing.from_string s in
261 let buf = Lexing.from_channel fd in
265 (* ======================================================================
269 * Revision 1.1 2000/11/17 09:57:28 lpadovan
272 * Revision 1.1 2000/03/03 01:07:25 gerd