6 (* Returns a channel reading the bytes from the string s *)
7 let rd, wr = Unix.pipe() in
8 let ch_rd = Unix.in_channel_of_descr rd in
9 let ch_wr = Unix.out_channel_of_descr wr in
13 output_string ch_wr s;
21 (**********************************************************************)
24 (* Reads from a string (without recoding it), checks the lexbuf size *)
25 let s = "0123456789abc" in
26 let r = new resolve_read_this_string s in
27 r # init_rep_encoding `Enc_iso88591;
28 r # init_warner (new drop_warnings);
29 let lb = r # open_in Anonymous in
30 let c = nextchar lb in
31 assert (c = Some '0');
32 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
33 (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
34 * now be at the end of the buffer indicating that the buffer is now
45 let c = nextchar lb in
46 assert (c = Some '9');
47 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
48 r # change_encoding "";
49 let c = nextchar lb in
50 assert (c = Some 'a');
51 assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
53 let c = nextchar lb in
54 assert (c = Some 'c');
55 let c = nextchar lb in
63 (* Like t001, but reads from a channel *)
64 let ch = make_channel "0123456789abc" in
65 let r = new resolve_read_this_channel ch in
66 r # init_rep_encoding `Enc_iso88591;
67 r # init_warner (new drop_warnings);
68 let lb = r # open_in Anonymous in
69 let c = nextchar lb in
70 assert (c = Some '0');
71 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
72 (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
73 * now be at the end of the buffer indicating that the buffer is now
84 let c = nextchar lb in
85 assert (c = Some '9');
86 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
87 r # change_encoding "";
88 let c = nextchar lb in
89 assert (c = Some 'a');
90 assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
92 let c = nextchar lb in
93 assert (c = Some 'c');
94 let c = nextchar lb in
102 (* Tests non-automatic encoding conversion from ISO-8859-1 to UTF-8 *)
103 let s = "0«»°áàâãäÁÀÂÃÄéèêëíìîïÍÌÎÏóòôõøöÓÒÔÕØÖúùûüýÿÝßç¡¿ñÑ" in
104 let r = new resolve_read_this_string ~fixenc:`Enc_iso88591 s in
105 r # init_rep_encoding `Enc_utf8;
106 r # init_warner (new drop_warnings);
107 let lb = r # open_in Anonymous in
108 let c = ref (nextchar lb) in
109 assert (!c = Some '0');
110 assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
111 (* Note: because we initialize the resolver with ~fixenc, the resolver can
112 * fill the buffer with more than one byte from the beginning.
117 Some x -> u := !u ^ String.make 1 x
123 !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
128 (* Tests non-automatic encoding conversion from UTF-8 to ISO-8859-1 *)
129 let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
130 let r = new resolve_read_this_string ~fixenc:`Enc_utf8 s in
131 r # init_rep_encoding `Enc_iso88591;
132 r # init_warner (new drop_warnings);
133 let lb = r # open_in Anonymous in
134 let c = ref (nextchar lb) in
135 assert (!c = Some '0');
136 assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
137 (* Note: because we initialize the resolver with ~fixenc, the resolver can
138 * fill the buffer with more than one byte from the beginning.
143 Some x -> u := !u ^ String.make 1 x
149 !u = "0«»°áàâãäÁÀÂÃÄéèêëíìîïÍÌÎÏóòôõøöÓÒÔÕØÖúùûüýÿÝßç¡¿ñÑ"
154 (* Tests automatic encoding conversion from UTF-8 to ISO-8859-1 *)
155 let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
156 let r = new resolve_read_this_string s in
157 r # init_rep_encoding `Enc_iso88591;
158 r # init_warner (new drop_warnings);
159 let lb = r # open_in Anonymous in
160 let c = ref (nextchar lb) in
161 assert (!c = Some '0');
162 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
166 Some x -> u := !u ^ String.make 1 x
172 !u = "0«»°áàâãäÁÀÂÃÄéèêëíìîïÍÌÎÏóòôõøöÓÒÔÕØÖúùûüýÿÝßç¡¿ñÑ"
177 (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
178 * This variant invokes change_encoding early.
180 let s = "\254\255\0000\000«\000»\000°\000á\000à\000â\000ã\000ä\000Á\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000í\000ì\000î\000ï\000Í\000Ì\000Î\000Ï\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ò\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ý\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
181 let r = new resolve_read_this_string s in
182 r # init_rep_encoding `Enc_utf8;
183 r # init_warner (new drop_warnings);
184 let lb = r # open_in Anonymous in
185 let c = ref (nextchar lb) in
186 assert (!c = Some '0');
187 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
188 r # change_encoding "";
192 Some x -> u := !u ^ String.make 1 x
198 !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
203 (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
204 * This variant does not invoke change_encoding
206 let s = "\254\255\0000\000«\000»\000°\000á\000à\000â\000ã\000ä\000Á\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000í\000ì\000î\000ï\000Í\000Ì\000Î\000Ï\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ò\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ý\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
207 let r = new resolve_read_this_string s in
208 r # init_rep_encoding `Enc_utf8;
209 r # init_warner (new drop_warnings);
210 let lb = r # open_in Anonymous in
211 let c = ref (nextchar lb) in
212 assert (!c = Some '0');
213 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
217 Some x -> u := !u ^ String.make 1 x
223 !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
226 (**********************************************************************)
229 (* Reads from a file without recoding it *)
230 let r = new resolve_as_file () in
231 r # init_rep_encoding `Enc_utf8;
232 r # init_warner (new drop_warnings);
233 let cwd = Sys.getcwd() in
234 let lb = r # open_in (System ("file://localhost" ^ cwd ^ "/t100.dat")) in
235 let c = nextchar lb in
236 assert (c = Some '0');
237 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
238 (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
239 * now be at the end of the buffer indicating that the buffer is now
245 let c = nextchar lb in
246 assert (c = Some '9');
252 (* Reads from a file without recoding it *)
253 let r = new resolve_as_file () in
254 r # init_rep_encoding `Enc_utf8;
255 r # init_warner (new drop_warnings);
256 let cwd = Sys.getcwd() in
257 let lb = r # open_in (System ("//localhost" ^ cwd ^ "/t100.dat")) in
258 let c = nextchar lb in
259 assert (c = Some '0');
260 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
261 (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
262 * now be at the end of the buffer indicating that the buffer is now
268 let c = nextchar lb in
269 assert (c = Some '9');
275 (* Reads from a file without recoding it *)
276 let r = new resolve_as_file () in
277 r # init_rep_encoding `Enc_utf8;
278 r # init_warner (new drop_warnings);
279 let cwd = Sys.getcwd() in
280 let lb = r # open_in (System (cwd ^ "/t100.dat")) in
281 let c = nextchar lb in
282 assert (c = Some '0');
283 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
284 (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
285 * now be at the end of the buffer indicating that the buffer is now
291 let c = nextchar lb in
292 assert (c = Some '9');
298 (* Reads from a file without recoding it *)
299 let r = new resolve_as_file () in
300 r # init_rep_encoding `Enc_utf8;
301 r # init_warner (new drop_warnings);
302 let lb = r # open_in (System "t100.dat") in
303 let c = nextchar lb in
304 assert (c = Some '0');
305 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
306 (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
307 * now be at the end of the buffer indicating that the buffer is now
313 let c = nextchar lb in
314 assert (c = Some '9');
319 (**********************************************************************)
322 (* Checks whether relative URLs are properly handled *)
323 let r = new resolve_as_file () in
324 r # init_rep_encoding `Enc_utf8;
325 r # init_warner (new drop_warnings);
326 let lb = r # open_in (System "t100.dat") in
327 let c = nextchar lb in
328 assert (c = Some '0');
329 assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
330 (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
331 * now be at the end of the buffer indicating that the buffer is now
337 let r' = r # clone in
338 let lb' = r' # open_in (System "t100.dat") in
339 let c = nextchar lb' in
340 assert (c = Some '0');
342 ignore(nextchar lb');
344 let c = nextchar lb' in
345 assert (c = Some '9');
347 let c = nextchar lb in
348 assert (c = Some '9');
353 (**********************************************************************)
354 (* Tests whether the encoding handling of System IDs is okay *)
357 (* Check the technique for the following tests:
358 * [Checks also 'combine' to some extent.)
360 let r1 = new resolve_read_this_string
362 ~fixenc:`Enc_iso88591
364 let r2 = new resolve_read_this_string
366 ~fixenc:`Enc_iso88591
367 "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'b.xml'> ]> <a>&ae;</a>" in
368 let r = new combine [ r1; r2 ] in
369 (* It should now be possible to resolve &ae; *)
371 Pxp_yacc.parse_document_entity
372 { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
373 (Pxp_yacc.ExtID(System "a.xml", r))
374 Pxp_yacc.default_spec
381 (* Check that System IDs are converted to UTF-8. rep_encoding = ISO-8859-1 *)
382 let r1 = new resolve_read_this_string
383 ~id:(System "\195\164.xml") (* This is an UTF-8 "ä"! *)
384 ~fixenc:`Enc_iso88591
386 let r2 = new resolve_read_this_string
388 ~fixenc:`Enc_iso88591
389 "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
390 let r = new combine [ r1; r2 ] in
391 (* It should now be possible to resolve &ae; *)
393 Pxp_yacc.parse_document_entity
394 { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
395 (Pxp_yacc.ExtID(System "a.xml", r))
396 Pxp_yacc.default_spec
403 (* Check that System IDs are converted to UTF-8. rep_encoding = UTF-8 *)
404 let r1 = new resolve_read_this_string
405 ~id:(System "\195\164.xml")
406 ~fixenc:`Enc_iso88591
408 let r2 = new resolve_read_this_string
410 ~fixenc:`Enc_iso88591
411 "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
412 let r = new combine [ r1; r2 ] in
413 (* It should now be possible to resolve &ae; *)
415 Pxp_yacc.parse_document_entity
416 { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_utf8 }
417 (Pxp_yacc.ExtID(System "a.xml", r))
418 Pxp_yacc.default_spec
423 (**********************************************************************)
427 print_string ("Reader test " ^ n);
432 print_endline " FAILED!!!!";
435 print_endline (" FAILED: " ^ string_of_exn error)