2 * ----------------------------------------------------------------------
6 let ws = [ ' ' '\t' '\r' '\n' ]
8 let ascii_digit = ['0'-'9']
10 let ascii_hexdigit = ['0'-'9' 'a'-'h' 'A'-'H']
12 let namechar = letter | digit | '.' | ':' | '-' | '_' | combiningChar | extender
14 let name = ( letter | '_' | ':' ) namechar*
16 let nmtoken = namechar+
18 (* Valid characters are:
19 * #9, #10, #13, #32-#xD7FF, #xE000-#xFFFD, #x10000-#x10FFFF
21 * #xD7FF as UTF-8 sequence:
22 * 1110xxxx 10xxxxxx 10xxxxxx
23 * 1110...D 10...7.. 10.F...F = ED 9F BF
25 * #xE000 as UTF-8 sequence:
26 * 1110xxxx 10xxxxxx 10xxxxxx
27 * 1110...E 10...0.. 10.0...0 = EE 80 80
29 * UTF-8 sequence CF BE BF as character:
30 * 1110xxxx 10xxxxxx 10xxxxxx
31 * 1110...F 10111110 10111111 = #FFBF
33 * #xFFFD as UTF-8 sequence:
34 * 1110xxxx 10xxxxxx 10xxxxxx
35 * 1110...F 10...F.. 10.F...D = EF BF BD
37 * #x010000 as UTF-8 sequence:
38 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
39 * 111100.. 10.1...0 10...0.. 10.0...0 = F0 90 80 80
41 * #x10FFFF as UTF-8 sequence:
42 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
43 * 111101.. 10.0...F 10...F.. 10.F...F = F4 8F BF BF
47 let non_ascii_character =
48 ['\192'-'\223'] ['\128'-'\191'] (* #x80-#x7FF *)
49 | ['\224'-'\236'] ['\128'-'\191'] ['\128'-'\191'] (* #x800-#xCFFF *)
50 | '\237' ['\128'-'\159'] ['\128'-'\191'] (* #xD000-#xD7FF *)
51 | '\238' ['\128'-'\191'] ['\128'-'\191'] (* #xE000-#xEFFF *)
52 | '\239' ['\128'-'\190'] ['\128'-'\191'] (* #xF000-#xFFBF *)
53 | '\239' '\191' ['\128'-'\189'] (* #xFFC0-#xFFFD *)
54 | '\240' ['\144'-'\191'] ['\128'-'\191'] ['\128'-'\191']
55 (* #x010000-#x03FFFF *)
56 | ['\241'-'\243'] ['\128'-'\191'] ['\128'-'\191'] ['\128'-'\191']
57 (* #x040000-#x0FFFFF *)
58 | '\244' ['\128'-'\143'] ['\128'-'\191'] ['\128'-'\191']
59 (* #x100000-#10FFFFF *)
62 [ '\009' '\010' '\013' '\032'-'\127' ]
66 let character_except_question_mark = (* '?' = '\063' *)
67 [ '\009' '\010' '\013' '\032'-'\062' '\064'-'\127' ]
71 let character_except_right_angle_bracket = (* '>' = '\062' *)
72 [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\127' ]
76 let character_except_minus = (* '-' = '\045' *)
77 [ '\009' '\010' '\013' '\032'-'\044' '\046'-'\127' ]
81 let character_except_quot = (* '"' = '\034' *)
82 [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\255' ]
86 let character_except_apos = (* '\'' = '\039' *)
87 [ '\009' '\010' '\013' '\032'-'\038' '\040'-'\255' ]
91 let pi_string = character_except_question_mark*
92 ( '?' character_except_right_angle_bracket
93 character_except_question_mark* )*
97 let comment_string = character_except_minus*
98 ('-' character_except_minus+ )*
101 let normal_character =
102 (* Character except '&' = '\038', '<' = '\060', ']' = '\093', and CR LF *)
103 [ '\009' '\032'-'\037' '\039'-'\059' '\061'-'\092' '\094'-'\127' ]
104 | non_ascii_character
107 let character_except_rbracket = (* ']' = '\093' *)
108 [ '\009' '\010' '\013' '\032'-'\092' '\094'-'\127' ]
109 | non_ascii_character
112 let character_except_rbracket_rangle = (* ']' = '\093', '>' = '\062' *)
113 [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\092' '\094'-'\127' ]
114 | non_ascii_character
118 character_except_rbracket*
119 ( "]" character_except_rbracket+ |
120 "]]" ']'* character_except_rbracket_rangle character_except_rbracket*
125 let printable_character_except_amp_lt =
126 (* '&' = '\038', '<' = '\060' *)
127 [ '\032'-'\037' '\039'-'\059' '\061'-'\127']
128 | non_ascii_character
131 let printable_character_except_amp_percent =
132 (* '%' = '\037', '&' = '\038' *)
133 [ '\032'-'\036' '\039'-'\127']
134 | non_ascii_character
137 let character_except_special =
138 (* '<'=060, ']'=093, '"'=034, '\''=039 *)
139 [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\038' '\040'-'\059'
140 '\061'-'\092' '\094'-'\127' ]
141 | non_ascii_character
145 (* ======================================================================
149 * Revision 1.1 2000/11/17 09:57:32 lpadovan
152 * Revision 1.2 2000/08/26 19:58:08 gerd
153 * Bugfix in character_except_apos. The bug caused that attribute
154 * values delimited by ' could not be scanned at all.
156 * Revision 1.1 2000/05/20 20:33:25 gerd