| |
1 /* |
| |
2 The contents of this file are subject to the Mozilla Public License |
| |
3 Version 1.1 (the "License"); you may not use this file except in |
| |
4 compliance with the License. You may obtain a copy of the License at |
| |
5 http://www.mozilla.org/MPL/ |
| |
6 |
| |
7 Software distributed under the License is distributed on an "AS IS" |
| |
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the |
| |
9 License for the specific language governing rights and limitations |
| |
10 under the License. |
| |
11 |
| |
12 The Original Code is expat. |
| |
13 |
| |
14 The Initial Developer of the Original Code is James Clark. |
| |
15 Portions created by James Clark are Copyright (C) 1998, 1999 |
| |
16 James Clark. All Rights Reserved. |
| |
17 |
| |
18 Contributor(s): |
| |
19 |
| |
20 Alternatively, the contents of this file may be used under the terms |
| |
21 of the GNU General Public License (the "GPL"), in which case the |
| |
22 provisions of the GPL are applicable instead of those above. If you |
| |
23 wish to allow use of your version of this file only under the terms of |
| |
24 the GPL and not to allow others to use your version of this file under |
| |
25 the MPL, indicate your decision by deleting the provisions above and |
| |
26 replace them with the notice and other provisions required by the |
| |
27 GPL. If you do not delete the provisions above, a recipient may use |
| |
28 your version of this file under either the MPL or the GPL. |
| |
29 */ |
| |
30 |
| |
31 #ifndef IS_INVALID_CHAR |
| |
32 #define IS_INVALID_CHAR(enc, ptr, n) (0) |
| |
33 #endif |
| |
34 |
| |
35 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
| |
36 case BT_LEAD ## n: \ |
| |
37 if (end - ptr < n) \ |
| |
38 return XML_TOK_PARTIAL_CHAR; \ |
| |
39 if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
| |
40 *(nextTokPtr) = (ptr); \ |
| |
41 return XML_TOK_INVALID; \ |
| |
42 } \ |
| |
43 ptr += n; \ |
| |
44 break; |
| |
45 |
| |
46 #define INVALID_CASES(ptr, nextTokPtr) \ |
| |
47 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
| |
48 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
| |
49 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
| |
50 case BT_NONXML: \ |
| |
51 case BT_MALFORM: \ |
| |
52 case BT_TRAIL: \ |
| |
53 *(nextTokPtr) = (ptr); \ |
| |
54 return XML_TOK_INVALID; |
| |
55 |
| |
56 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
| |
57 case BT_LEAD ## n: \ |
| |
58 if (end - ptr < n) \ |
| |
59 return XML_TOK_PARTIAL_CHAR; \ |
| |
60 if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
| |
61 *nextTokPtr = ptr; \ |
| |
62 return XML_TOK_INVALID; \ |
| |
63 } \ |
| |
64 ptr += n; \ |
| |
65 break; |
| |
66 |
| |
67 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
| |
68 case BT_NONASCII: \ |
| |
69 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
| |
70 *nextTokPtr = ptr; \ |
| |
71 return XML_TOK_INVALID; \ |
| |
72 } \ |
| |
73 case BT_NMSTRT: \ |
| |
74 case BT_HEX: \ |
| |
75 case BT_DIGIT: \ |
| |
76 case BT_NAME: \ |
| |
77 case BT_MINUS: \ |
| |
78 ptr += MINBPC(enc); \ |
| |
79 break; \ |
| |
80 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
| |
81 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
| |
82 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
| |
83 |
| |
84 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
| |
85 case BT_LEAD ## n: \ |
| |
86 if (end - ptr < n) \ |
| |
87 return XML_TOK_PARTIAL_CHAR; \ |
| |
88 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
| |
89 *nextTokPtr = ptr; \ |
| |
90 return XML_TOK_INVALID; \ |
| |
91 } \ |
| |
92 ptr += n; \ |
| |
93 break; |
| |
94 |
| |
95 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
| |
96 case BT_NONASCII: \ |
| |
97 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
| |
98 *nextTokPtr = ptr; \ |
| |
99 return XML_TOK_INVALID; \ |
| |
100 } \ |
| |
101 case BT_NMSTRT: \ |
| |
102 case BT_HEX: \ |
| |
103 ptr += MINBPC(enc); \ |
| |
104 break; \ |
| |
105 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
| |
106 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
| |
107 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
| |
108 |
| |
109 #ifndef PREFIX |
| |
110 #define PREFIX(ident) ident |
| |
111 #endif |
| |
112 |
| |
113 /* ptr points to character following "<!-" */ |
| |
114 |
| |
115 static |
| |
116 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, |
| |
117 const char **nextTokPtr) |
| |
118 { |
| |
119 if (ptr != end) { |
| |
120 if (!CHAR_MATCHES(enc, ptr, '-')) { |
| |
121 *nextTokPtr = ptr; |
| |
122 return XML_TOK_INVALID; |
| |
123 } |
| |
124 ptr += MINBPC(enc); |
| |
125 while (ptr != end) { |
| |
126 switch (BYTE_TYPE(enc, ptr)) { |
| |
127 INVALID_CASES(ptr, nextTokPtr) |
| |
128 case BT_MINUS: |
| |
129 if ((ptr += MINBPC(enc)) == end) |
| |
130 return XML_TOK_PARTIAL; |
| |
131 if (CHAR_MATCHES(enc, ptr, '-')) { |
| |
132 if ((ptr += MINBPC(enc)) == end) |
| |
133 return XML_TOK_PARTIAL; |
| |
134 if (!CHAR_MATCHES(enc, ptr, '>')) { |
| |
135 *nextTokPtr = ptr; |
| |
136 return XML_TOK_INVALID; |
| |
137 } |
| |
138 *nextTokPtr = ptr + MINBPC(enc); |
| |
139 return XML_TOK_COMMENT; |
| |
140 } |
| |
141 break; |
| |
142 default: |
| |
143 ptr += MINBPC(enc); |
| |
144 break; |
| |
145 } |
| |
146 } |
| |
147 } |
| |
148 return XML_TOK_PARTIAL; |
| |
149 } |
| |
150 |
| |
151 /* ptr points to character following "<!" */ |
| |
152 |
| |
153 static |
| |
154 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, |
| |
155 const char **nextTokPtr) |
| |
156 { |
| |
157 if (ptr == end) |
| |
158 return XML_TOK_PARTIAL; |
| |
159 switch (BYTE_TYPE(enc, ptr)) { |
| |
160 case BT_MINUS: |
| |
161 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
162 case BT_LSQB: |
| |
163 *nextTokPtr = ptr + MINBPC(enc); |
| |
164 return XML_TOK_COND_SECT_OPEN; |
| |
165 case BT_NMSTRT: |
| |
166 case BT_HEX: |
| |
167 ptr += MINBPC(enc); |
| |
168 break; |
| |
169 default: |
| |
170 *nextTokPtr = ptr; |
| |
171 return XML_TOK_INVALID; |
| |
172 } |
| |
173 while (ptr != end) { |
| |
174 switch (BYTE_TYPE(enc, ptr)) { |
| |
175 case BT_PERCNT: |
| |
176 if (ptr + MINBPC(enc) == end) |
| |
177 return XML_TOK_PARTIAL; |
| |
178 /* don't allow <!ENTITY% foo "whatever"> */ |
| |
179 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
| |
180 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
| |
181 *nextTokPtr = ptr; |
| |
182 return XML_TOK_INVALID; |
| |
183 } |
| |
184 /* fall through */ |
| |
185 case BT_S: case BT_CR: case BT_LF: |
| |
186 *nextTokPtr = ptr; |
| |
187 return XML_TOK_DECL_OPEN; |
| |
188 case BT_NMSTRT: |
| |
189 case BT_HEX: |
| |
190 ptr += MINBPC(enc); |
| |
191 break; |
| |
192 default: |
| |
193 *nextTokPtr = ptr; |
| |
194 return XML_TOK_INVALID; |
| |
195 } |
| |
196 } |
| |
197 return XML_TOK_PARTIAL; |
| |
198 } |
| |
199 |
| |
200 static |
| |
201 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) |
| |
202 { |
| |
203 int upper = 0; |
| |
204 *tokPtr = XML_TOK_PI; |
| |
205 if (end - ptr != MINBPC(enc)*3) |
| |
206 return 1; |
| |
207 switch (BYTE_TO_ASCII(enc, ptr)) { |
| |
208 case 'x': |
| |
209 break; |
| |
210 case 'X': |
| |
211 upper = 1; |
| |
212 break; |
| |
213 default: |
| |
214 return 1; |
| |
215 } |
| |
216 ptr += MINBPC(enc); |
| |
217 switch (BYTE_TO_ASCII(enc, ptr)) { |
| |
218 case 'm': |
| |
219 break; |
| |
220 case 'M': |
| |
221 upper = 1; |
| |
222 break; |
| |
223 default: |
| |
224 return 1; |
| |
225 } |
| |
226 ptr += MINBPC(enc); |
| |
227 switch (BYTE_TO_ASCII(enc, ptr)) { |
| |
228 case 'l': |
| |
229 break; |
| |
230 case 'L': |
| |
231 upper = 1; |
| |
232 break; |
| |
233 default: |
| |
234 return 1; |
| |
235 } |
| |
236 if (upper) |
| |
237 return 0; |
| |
238 *tokPtr = XML_TOK_XML_DECL; |
| |
239 return 1; |
| |
240 } |
| |
241 |
| |
242 /* ptr points to character following "<?" */ |
| |
243 |
| |
244 static |
| |
245 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, |
| |
246 const char **nextTokPtr) |
| |
247 { |
| |
248 int tok; |
| |
249 const char *target = ptr; |
| |
250 if (ptr == end) |
| |
251 return XML_TOK_PARTIAL; |
| |
252 switch (BYTE_TYPE(enc, ptr)) { |
| |
253 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
254 default: |
| |
255 *nextTokPtr = ptr; |
| |
256 return XML_TOK_INVALID; |
| |
257 } |
| |
258 while (ptr != end) { |
| |
259 switch (BYTE_TYPE(enc, ptr)) { |
| |
260 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
261 case BT_S: case BT_CR: case BT_LF: |
| |
262 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
| |
263 *nextTokPtr = ptr; |
| |
264 return XML_TOK_INVALID; |
| |
265 } |
| |
266 ptr += MINBPC(enc); |
| |
267 while (ptr != end) { |
| |
268 switch (BYTE_TYPE(enc, ptr)) { |
| |
269 INVALID_CASES(ptr, nextTokPtr) |
| |
270 case BT_QUEST: |
| |
271 ptr += MINBPC(enc); |
| |
272 if (ptr == end) |
| |
273 return XML_TOK_PARTIAL; |
| |
274 if (CHAR_MATCHES(enc, ptr, '>')) { |
| |
275 *nextTokPtr = ptr + MINBPC(enc); |
| |
276 return tok; |
| |
277 } |
| |
278 break; |
| |
279 default: |
| |
280 ptr += MINBPC(enc); |
| |
281 break; |
| |
282 } |
| |
283 } |
| |
284 return XML_TOK_PARTIAL; |
| |
285 case BT_QUEST: |
| |
286 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
| |
287 *nextTokPtr = ptr; |
| |
288 return XML_TOK_INVALID; |
| |
289 } |
| |
290 ptr += MINBPC(enc); |
| |
291 if (ptr == end) |
| |
292 return XML_TOK_PARTIAL; |
| |
293 if (CHAR_MATCHES(enc, ptr, '>')) { |
| |
294 *nextTokPtr = ptr + MINBPC(enc); |
| |
295 return tok; |
| |
296 } |
| |
297 /* fall through */ |
| |
298 default: |
| |
299 *nextTokPtr = ptr; |
| |
300 return XML_TOK_INVALID; |
| |
301 } |
| |
302 } |
| |
303 return XML_TOK_PARTIAL; |
| |
304 } |
| |
305 |
| |
306 |
| |
307 static |
| |
308 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, |
| |
309 const char **nextTokPtr) |
| |
310 { |
| |
311 int i; |
| |
312 /* CDATA[ */ |
| |
313 if (end - ptr < 6 * MINBPC(enc)) |
| |
314 return XML_TOK_PARTIAL; |
| |
315 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
| |
316 if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) { |
| |
317 *nextTokPtr = ptr; |
| |
318 return XML_TOK_INVALID; |
| |
319 } |
| |
320 } |
| |
321 *nextTokPtr = ptr; |
| |
322 return XML_TOK_CDATA_SECT_OPEN; |
| |
323 } |
| |
324 |
| |
325 static |
| |
326 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, |
| |
327 const char **nextTokPtr) |
| |
328 { |
| |
329 if (ptr == end) |
| |
330 return XML_TOK_NONE; |
| |
331 if (MINBPC(enc) > 1) { |
| |
332 size_t n = end - ptr; |
| |
333 if (n & (MINBPC(enc) - 1)) { |
| |
334 n &= ~(MINBPC(enc) - 1); |
| |
335 if (n == 0) |
| |
336 return XML_TOK_PARTIAL; |
| |
337 end = ptr + n; |
| |
338 } |
| |
339 } |
| |
340 switch (BYTE_TYPE(enc, ptr)) { |
| |
341 case BT_RSQB: |
| |
342 ptr += MINBPC(enc); |
| |
343 if (ptr == end) |
| |
344 return XML_TOK_PARTIAL; |
| |
345 if (!CHAR_MATCHES(enc, ptr, ']')) |
| |
346 break; |
| |
347 ptr += MINBPC(enc); |
| |
348 if (ptr == end) |
| |
349 return XML_TOK_PARTIAL; |
| |
350 if (!CHAR_MATCHES(enc, ptr, '>')) { |
| |
351 ptr -= MINBPC(enc); |
| |
352 break; |
| |
353 } |
| |
354 *nextTokPtr = ptr + MINBPC(enc); |
| |
355 return XML_TOK_CDATA_SECT_CLOSE; |
| |
356 case BT_CR: |
| |
357 ptr += MINBPC(enc); |
| |
358 if (ptr == end) |
| |
359 return XML_TOK_PARTIAL; |
| |
360 if (BYTE_TYPE(enc, ptr) == BT_LF) |
| |
361 ptr += MINBPC(enc); |
| |
362 *nextTokPtr = ptr; |
| |
363 return XML_TOK_DATA_NEWLINE; |
| |
364 case BT_LF: |
| |
365 *nextTokPtr = ptr + MINBPC(enc); |
| |
366 return XML_TOK_DATA_NEWLINE; |
| |
367 INVALID_CASES(ptr, nextTokPtr) |
| |
368 default: |
| |
369 ptr += MINBPC(enc); |
| |
370 break; |
| |
371 } |
| |
372 while (ptr != end) { |
| |
373 switch (BYTE_TYPE(enc, ptr)) { |
| |
374 #define LEAD_CASE(n) \ |
| |
375 case BT_LEAD ## n: \ |
| |
376 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
| |
377 *nextTokPtr = ptr; \ |
| |
378 return XML_TOK_DATA_CHARS; \ |
| |
379 } \ |
| |
380 ptr += n; \ |
| |
381 break; |
| |
382 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
383 #undef LEAD_CASE |
| |
384 case BT_NONXML: |
| |
385 case BT_MALFORM: |
| |
386 case BT_TRAIL: |
| |
387 case BT_CR: |
| |
388 case BT_LF: |
| |
389 case BT_RSQB: |
| |
390 *nextTokPtr = ptr; |
| |
391 return XML_TOK_DATA_CHARS; |
| |
392 default: |
| |
393 ptr += MINBPC(enc); |
| |
394 break; |
| |
395 } |
| |
396 } |
| |
397 *nextTokPtr = ptr; |
| |
398 return XML_TOK_DATA_CHARS; |
| |
399 } |
| |
400 |
| |
401 /* ptr points to character following "</" */ |
| |
402 |
| |
403 static |
| |
404 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, |
| |
405 const char **nextTokPtr) |
| |
406 { |
| |
407 if (ptr == end) |
| |
408 return XML_TOK_PARTIAL; |
| |
409 switch (BYTE_TYPE(enc, ptr)) { |
| |
410 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
411 default: |
| |
412 *nextTokPtr = ptr; |
| |
413 return XML_TOK_INVALID; |
| |
414 } |
| |
415 while (ptr != end) { |
| |
416 switch (BYTE_TYPE(enc, ptr)) { |
| |
417 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
418 case BT_S: case BT_CR: case BT_LF: |
| |
419 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
| |
420 switch (BYTE_TYPE(enc, ptr)) { |
| |
421 case BT_S: case BT_CR: case BT_LF: |
| |
422 break; |
| |
423 case BT_GT: |
| |
424 *nextTokPtr = ptr + MINBPC(enc); |
| |
425 return XML_TOK_END_TAG; |
| |
426 default: |
| |
427 *nextTokPtr = ptr; |
| |
428 return XML_TOK_INVALID; |
| |
429 } |
| |
430 } |
| |
431 return XML_TOK_PARTIAL; |
| |
432 #ifdef XML_NS |
| |
433 case BT_COLON: |
| |
434 /* no need to check qname syntax here, since end-tag must match exactly */ |
| |
435 ptr += MINBPC(enc); |
| |
436 break; |
| |
437 #endif |
| |
438 case BT_GT: |
| |
439 *nextTokPtr = ptr + MINBPC(enc); |
| |
440 return XML_TOK_END_TAG; |
| |
441 default: |
| |
442 *nextTokPtr = ptr; |
| |
443 return XML_TOK_INVALID; |
| |
444 } |
| |
445 } |
| |
446 return XML_TOK_PARTIAL; |
| |
447 } |
| |
448 |
| |
449 /* ptr points to character following "&#X" */ |
| |
450 |
| |
451 static |
| |
452 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, |
| |
453 const char **nextTokPtr) |
| |
454 { |
| |
455 if (ptr != end) { |
| |
456 switch (BYTE_TYPE(enc, ptr)) { |
| |
457 case BT_DIGIT: |
| |
458 case BT_HEX: |
| |
459 break; |
| |
460 default: |
| |
461 *nextTokPtr = ptr; |
| |
462 return XML_TOK_INVALID; |
| |
463 } |
| |
464 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
| |
465 switch (BYTE_TYPE(enc, ptr)) { |
| |
466 case BT_DIGIT: |
| |
467 case BT_HEX: |
| |
468 break; |
| |
469 case BT_SEMI: |
| |
470 *nextTokPtr = ptr + MINBPC(enc); |
| |
471 return XML_TOK_CHAR_REF; |
| |
472 default: |
| |
473 *nextTokPtr = ptr; |
| |
474 return XML_TOK_INVALID; |
| |
475 } |
| |
476 } |
| |
477 } |
| |
478 return XML_TOK_PARTIAL; |
| |
479 } |
| |
480 |
| |
481 /* ptr points to character following "&#" */ |
| |
482 |
| |
483 static |
| |
484 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, |
| |
485 const char **nextTokPtr) |
| |
486 { |
| |
487 if (ptr != end) { |
| |
488 if (CHAR_MATCHES(enc, ptr, 'x')) |
| |
489 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
490 switch (BYTE_TYPE(enc, ptr)) { |
| |
491 case BT_DIGIT: |
| |
492 break; |
| |
493 default: |
| |
494 *nextTokPtr = ptr; |
| |
495 return XML_TOK_INVALID; |
| |
496 } |
| |
497 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
| |
498 switch (BYTE_TYPE(enc, ptr)) { |
| |
499 case BT_DIGIT: |
| |
500 break; |
| |
501 case BT_SEMI: |
| |
502 *nextTokPtr = ptr + MINBPC(enc); |
| |
503 return XML_TOK_CHAR_REF; |
| |
504 default: |
| |
505 *nextTokPtr = ptr; |
| |
506 return XML_TOK_INVALID; |
| |
507 } |
| |
508 } |
| |
509 } |
| |
510 return XML_TOK_PARTIAL; |
| |
511 } |
| |
512 |
| |
513 /* ptr points to character following "&" */ |
| |
514 |
| |
515 static |
| |
516 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
| |
517 const char **nextTokPtr) |
| |
518 { |
| |
519 if (ptr == end) |
| |
520 return XML_TOK_PARTIAL; |
| |
521 switch (BYTE_TYPE(enc, ptr)) { |
| |
522 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
523 case BT_NUM: |
| |
524 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
525 default: |
| |
526 *nextTokPtr = ptr; |
| |
527 return XML_TOK_INVALID; |
| |
528 } |
| |
529 while (ptr != end) { |
| |
530 switch (BYTE_TYPE(enc, ptr)) { |
| |
531 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
532 case BT_SEMI: |
| |
533 *nextTokPtr = ptr + MINBPC(enc); |
| |
534 return XML_TOK_ENTITY_REF; |
| |
535 default: |
| |
536 *nextTokPtr = ptr; |
| |
537 return XML_TOK_INVALID; |
| |
538 } |
| |
539 } |
| |
540 return XML_TOK_PARTIAL; |
| |
541 } |
| |
542 |
| |
543 /* ptr points to character following first character of attribute name */ |
| |
544 |
| |
545 static |
| |
546 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
| |
547 const char **nextTokPtr) |
| |
548 { |
| |
549 #ifdef XML_NS |
| |
550 int hadColon = 0; |
| |
551 #endif |
| |
552 while (ptr != end) { |
| |
553 switch (BYTE_TYPE(enc, ptr)) { |
| |
554 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
555 #ifdef XML_NS |
| |
556 case BT_COLON: |
| |
557 if (hadColon) { |
| |
558 *nextTokPtr = ptr; |
| |
559 return XML_TOK_INVALID; |
| |
560 } |
| |
561 hadColon = 1; |
| |
562 ptr += MINBPC(enc); |
| |
563 if (ptr == end) |
| |
564 return XML_TOK_PARTIAL; |
| |
565 switch (BYTE_TYPE(enc, ptr)) { |
| |
566 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
567 default: |
| |
568 *nextTokPtr = ptr; |
| |
569 return XML_TOK_INVALID; |
| |
570 } |
| |
571 break; |
| |
572 #endif |
| |
573 case BT_S: case BT_CR: case BT_LF: |
| |
574 for (;;) { |
| |
575 int t; |
| |
576 |
| |
577 ptr += MINBPC(enc); |
| |
578 if (ptr == end) |
| |
579 return XML_TOK_PARTIAL; |
| |
580 t = BYTE_TYPE(enc, ptr); |
| |
581 if (t == BT_EQUALS) |
| |
582 break; |
| |
583 switch (t) { |
| |
584 case BT_S: |
| |
585 case BT_LF: |
| |
586 case BT_CR: |
| |
587 break; |
| |
588 default: |
| |
589 *nextTokPtr = ptr; |
| |
590 return XML_TOK_INVALID; |
| |
591 } |
| |
592 } |
| |
593 /* fall through */ |
| |
594 case BT_EQUALS: |
| |
595 { |
| |
596 int open; |
| |
597 #ifdef XML_NS |
| |
598 hadColon = 0; |
| |
599 #endif |
| |
600 for (;;) { |
| |
601 |
| |
602 ptr += MINBPC(enc); |
| |
603 if (ptr == end) |
| |
604 return XML_TOK_PARTIAL; |
| |
605 open = BYTE_TYPE(enc, ptr); |
| |
606 if (open == BT_QUOT || open == BT_APOS) |
| |
607 break; |
| |
608 switch (open) { |
| |
609 case BT_S: |
| |
610 case BT_LF: |
| |
611 case BT_CR: |
| |
612 break; |
| |
613 default: |
| |
614 *nextTokPtr = ptr; |
| |
615 return XML_TOK_INVALID; |
| |
616 } |
| |
617 } |
| |
618 ptr += MINBPC(enc); |
| |
619 /* in attribute value */ |
| |
620 for (;;) { |
| |
621 int t; |
| |
622 if (ptr == end) |
| |
623 return XML_TOK_PARTIAL; |
| |
624 t = BYTE_TYPE(enc, ptr); |
| |
625 if (t == open) |
| |
626 break; |
| |
627 switch (t) { |
| |
628 INVALID_CASES(ptr, nextTokPtr) |
| |
629 case BT_AMP: |
| |
630 { |
| |
631 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
| |
632 if (tok <= 0) { |
| |
633 if (tok == XML_TOK_INVALID) |
| |
634 *nextTokPtr = ptr; |
| |
635 return tok; |
| |
636 } |
| |
637 break; |
| |
638 } |
| |
639 case BT_LT: |
| |
640 *nextTokPtr = ptr; |
| |
641 return XML_TOK_INVALID; |
| |
642 default: |
| |
643 ptr += MINBPC(enc); |
| |
644 break; |
| |
645 } |
| |
646 } |
| |
647 ptr += MINBPC(enc); |
| |
648 if (ptr == end) |
| |
649 return XML_TOK_PARTIAL; |
| |
650 switch (BYTE_TYPE(enc, ptr)) { |
| |
651 case BT_S: |
| |
652 case BT_CR: |
| |
653 case BT_LF: |
| |
654 break; |
| |
655 case BT_SOL: |
| |
656 goto sol; |
| |
657 case BT_GT: |
| |
658 goto gt; |
| |
659 default: |
| |
660 *nextTokPtr = ptr; |
| |
661 return XML_TOK_INVALID; |
| |
662 } |
| |
663 /* ptr points to closing quote */ |
| |
664 for (;;) { |
| |
665 ptr += MINBPC(enc); |
| |
666 if (ptr == end) |
| |
667 return XML_TOK_PARTIAL; |
| |
668 switch (BYTE_TYPE(enc, ptr)) { |
| |
669 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
670 case BT_S: case BT_CR: case BT_LF: |
| |
671 continue; |
| |
672 case BT_GT: |
| |
673 gt: |
| |
674 *nextTokPtr = ptr + MINBPC(enc); |
| |
675 return XML_TOK_START_TAG_WITH_ATTS; |
| |
676 case BT_SOL: |
| |
677 sol: |
| |
678 ptr += MINBPC(enc); |
| |
679 if (ptr == end) |
| |
680 return XML_TOK_PARTIAL; |
| |
681 if (!CHAR_MATCHES(enc, ptr, '>')) { |
| |
682 *nextTokPtr = ptr; |
| |
683 return XML_TOK_INVALID; |
| |
684 } |
| |
685 *nextTokPtr = ptr + MINBPC(enc); |
| |
686 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
| |
687 default: |
| |
688 *nextTokPtr = ptr; |
| |
689 return XML_TOK_INVALID; |
| |
690 } |
| |
691 break; |
| |
692 } |
| |
693 break; |
| |
694 } |
| |
695 default: |
| |
696 *nextTokPtr = ptr; |
| |
697 return XML_TOK_INVALID; |
| |
698 } |
| |
699 } |
| |
700 return XML_TOK_PARTIAL; |
| |
701 } |
| |
702 |
| |
703 /* ptr points to character following "<" */ |
| |
704 |
| |
705 static |
| |
706 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
| |
707 const char **nextTokPtr) |
| |
708 { |
| |
709 #ifdef XML_NS |
| |
710 int hadColon; |
| |
711 #endif |
| |
712 if (ptr == end) |
| |
713 return XML_TOK_PARTIAL; |
| |
714 switch (BYTE_TYPE(enc, ptr)) { |
| |
715 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
716 case BT_EXCL: |
| |
717 if ((ptr += MINBPC(enc)) == end) |
| |
718 return XML_TOK_PARTIAL; |
| |
719 switch (BYTE_TYPE(enc, ptr)) { |
| |
720 case BT_MINUS: |
| |
721 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
722 case BT_LSQB: |
| |
723 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
724 } |
| |
725 *nextTokPtr = ptr; |
| |
726 return XML_TOK_INVALID; |
| |
727 case BT_QUEST: |
| |
728 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
729 case BT_SOL: |
| |
730 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
731 default: |
| |
732 *nextTokPtr = ptr; |
| |
733 return XML_TOK_INVALID; |
| |
734 } |
| |
735 #ifdef XML_NS |
| |
736 hadColon = 0; |
| |
737 #endif |
| |
738 /* we have a start-tag */ |
| |
739 while (ptr != end) { |
| |
740 switch (BYTE_TYPE(enc, ptr)) { |
| |
741 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
742 #ifdef XML_NS |
| |
743 case BT_COLON: |
| |
744 if (hadColon) { |
| |
745 *nextTokPtr = ptr; |
| |
746 return XML_TOK_INVALID; |
| |
747 } |
| |
748 hadColon = 1; |
| |
749 ptr += MINBPC(enc); |
| |
750 if (ptr == end) |
| |
751 return XML_TOK_PARTIAL; |
| |
752 switch (BYTE_TYPE(enc, ptr)) { |
| |
753 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
754 default: |
| |
755 *nextTokPtr = ptr; |
| |
756 return XML_TOK_INVALID; |
| |
757 } |
| |
758 break; |
| |
759 #endif |
| |
760 case BT_S: case BT_CR: case BT_LF: |
| |
761 { |
| |
762 ptr += MINBPC(enc); |
| |
763 while (ptr != end) { |
| |
764 switch (BYTE_TYPE(enc, ptr)) { |
| |
765 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
766 case BT_GT: |
| |
767 goto gt; |
| |
768 case BT_SOL: |
| |
769 goto sol; |
| |
770 case BT_S: case BT_CR: case BT_LF: |
| |
771 ptr += MINBPC(enc); |
| |
772 continue; |
| |
773 default: |
| |
774 *nextTokPtr = ptr; |
| |
775 return XML_TOK_INVALID; |
| |
776 } |
| |
777 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
| |
778 } |
| |
779 return XML_TOK_PARTIAL; |
| |
780 } |
| |
781 case BT_GT: |
| |
782 gt: |
| |
783 *nextTokPtr = ptr + MINBPC(enc); |
| |
784 return XML_TOK_START_TAG_NO_ATTS; |
| |
785 case BT_SOL: |
| |
786 sol: |
| |
787 ptr += MINBPC(enc); |
| |
788 if (ptr == end) |
| |
789 return XML_TOK_PARTIAL; |
| |
790 if (!CHAR_MATCHES(enc, ptr, '>')) { |
| |
791 *nextTokPtr = ptr; |
| |
792 return XML_TOK_INVALID; |
| |
793 } |
| |
794 *nextTokPtr = ptr + MINBPC(enc); |
| |
795 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
| |
796 default: |
| |
797 *nextTokPtr = ptr; |
| |
798 return XML_TOK_INVALID; |
| |
799 } |
| |
800 } |
| |
801 return XML_TOK_PARTIAL; |
| |
802 } |
| |
803 |
| |
804 static |
| |
805 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
| |
806 const char **nextTokPtr) |
| |
807 { |
| |
808 if (ptr == end) |
| |
809 return XML_TOK_NONE; |
| |
810 if (MINBPC(enc) > 1) { |
| |
811 size_t n = end - ptr; |
| |
812 if (n & (MINBPC(enc) - 1)) { |
| |
813 n &= ~(MINBPC(enc) - 1); |
| |
814 if (n == 0) |
| |
815 return XML_TOK_PARTIAL; |
| |
816 end = ptr + n; |
| |
817 } |
| |
818 } |
| |
819 switch (BYTE_TYPE(enc, ptr)) { |
| |
820 case BT_LT: |
| |
821 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
822 case BT_AMP: |
| |
823 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
824 case BT_CR: |
| |
825 ptr += MINBPC(enc); |
| |
826 if (ptr == end) |
| |
827 return XML_TOK_TRAILING_CR; |
| |
828 if (BYTE_TYPE(enc, ptr) == BT_LF) |
| |
829 ptr += MINBPC(enc); |
| |
830 *nextTokPtr = ptr; |
| |
831 return XML_TOK_DATA_NEWLINE; |
| |
832 case BT_LF: |
| |
833 *nextTokPtr = ptr + MINBPC(enc); |
| |
834 return XML_TOK_DATA_NEWLINE; |
| |
835 case BT_RSQB: |
| |
836 ptr += MINBPC(enc); |
| |
837 if (ptr == end) |
| |
838 return XML_TOK_TRAILING_RSQB; |
| |
839 if (!CHAR_MATCHES(enc, ptr, ']')) |
| |
840 break; |
| |
841 ptr += MINBPC(enc); |
| |
842 if (ptr == end) |
| |
843 return XML_TOK_TRAILING_RSQB; |
| |
844 if (!CHAR_MATCHES(enc, ptr, '>')) { |
| |
845 ptr -= MINBPC(enc); |
| |
846 break; |
| |
847 } |
| |
848 *nextTokPtr = ptr; |
| |
849 return XML_TOK_INVALID; |
| |
850 INVALID_CASES(ptr, nextTokPtr) |
| |
851 default: |
| |
852 ptr += MINBPC(enc); |
| |
853 break; |
| |
854 } |
| |
855 while (ptr != end) { |
| |
856 switch (BYTE_TYPE(enc, ptr)) { |
| |
857 #define LEAD_CASE(n) \ |
| |
858 case BT_LEAD ## n: \ |
| |
859 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
| |
860 *nextTokPtr = ptr; \ |
| |
861 return XML_TOK_DATA_CHARS; \ |
| |
862 } \ |
| |
863 ptr += n; \ |
| |
864 break; |
| |
865 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
866 #undef LEAD_CASE |
| |
867 case BT_RSQB: |
| |
868 if (ptr + MINBPC(enc) != end) { |
| |
869 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) { |
| |
870 ptr += MINBPC(enc); |
| |
871 break; |
| |
872 } |
| |
873 if (ptr + 2*MINBPC(enc) != end) { |
| |
874 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) { |
| |
875 ptr += MINBPC(enc); |
| |
876 break; |
| |
877 } |
| |
878 *nextTokPtr = ptr + 2*MINBPC(enc); |
| |
879 return XML_TOK_INVALID; |
| |
880 } |
| |
881 } |
| |
882 /* fall through */ |
| |
883 case BT_AMP: |
| |
884 case BT_LT: |
| |
885 case BT_NONXML: |
| |
886 case BT_MALFORM: |
| |
887 case BT_TRAIL: |
| |
888 case BT_CR: |
| |
889 case BT_LF: |
| |
890 *nextTokPtr = ptr; |
| |
891 return XML_TOK_DATA_CHARS; |
| |
892 default: |
| |
893 ptr += MINBPC(enc); |
| |
894 break; |
| |
895 } |
| |
896 } |
| |
897 *nextTokPtr = ptr; |
| |
898 return XML_TOK_DATA_CHARS; |
| |
899 } |
| |
900 |
| |
901 /* ptr points to character following "%" */ |
| |
902 |
| |
903 static |
| |
904 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
| |
905 const char **nextTokPtr) |
| |
906 { |
| |
907 if (ptr == end) |
| |
908 return XML_TOK_PARTIAL; |
| |
909 switch (BYTE_TYPE(enc, ptr)) { |
| |
910 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
911 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
| |
912 *nextTokPtr = ptr; |
| |
913 return XML_TOK_PERCENT; |
| |
914 default: |
| |
915 *nextTokPtr = ptr; |
| |
916 return XML_TOK_INVALID; |
| |
917 } |
| |
918 while (ptr != end) { |
| |
919 switch (BYTE_TYPE(enc, ptr)) { |
| |
920 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
921 case BT_SEMI: |
| |
922 *nextTokPtr = ptr + MINBPC(enc); |
| |
923 return XML_TOK_PARAM_ENTITY_REF; |
| |
924 default: |
| |
925 *nextTokPtr = ptr; |
| |
926 return XML_TOK_INVALID; |
| |
927 } |
| |
928 } |
| |
929 return XML_TOK_PARTIAL; |
| |
930 } |
| |
931 |
| |
932 static |
| |
933 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
| |
934 const char **nextTokPtr) |
| |
935 { |
| |
936 if (ptr == end) |
| |
937 return XML_TOK_PARTIAL; |
| |
938 switch (BYTE_TYPE(enc, ptr)) { |
| |
939 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
| |
940 default: |
| |
941 *nextTokPtr = ptr; |
| |
942 return XML_TOK_INVALID; |
| |
943 } |
| |
944 while (ptr != end) { |
| |
945 switch (BYTE_TYPE(enc, ptr)) { |
| |
946 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
947 case BT_CR: case BT_LF: case BT_S: |
| |
948 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
| |
949 *nextTokPtr = ptr; |
| |
950 return XML_TOK_POUND_NAME; |
| |
951 default: |
| |
952 *nextTokPtr = ptr; |
| |
953 return XML_TOK_INVALID; |
| |
954 } |
| |
955 } |
| |
956 return XML_TOK_PARTIAL; |
| |
957 } |
| |
958 |
| |
959 static |
| |
960 int PREFIX(scanLit)(int open, const ENCODING *enc, |
| |
961 const char *ptr, const char *end, |
| |
962 const char **nextTokPtr) |
| |
963 { |
| |
964 while (ptr != end) { |
| |
965 int t = BYTE_TYPE(enc, ptr); |
| |
966 switch (t) { |
| |
967 INVALID_CASES(ptr, nextTokPtr) |
| |
968 case BT_QUOT: |
| |
969 case BT_APOS: |
| |
970 ptr += MINBPC(enc); |
| |
971 if (t != open) |
| |
972 break; |
| |
973 if (ptr == end) |
| |
974 return XML_TOK_PARTIAL; |
| |
975 *nextTokPtr = ptr; |
| |
976 switch (BYTE_TYPE(enc, ptr)) { |
| |
977 case BT_S: case BT_CR: case BT_LF: |
| |
978 case BT_GT: case BT_PERCNT: case BT_LSQB: |
| |
979 return XML_TOK_LITERAL; |
| |
980 default: |
| |
981 return XML_TOK_INVALID; |
| |
982 } |
| |
983 default: |
| |
984 ptr += MINBPC(enc); |
| |
985 break; |
| |
986 } |
| |
987 } |
| |
988 return XML_TOK_PARTIAL; |
| |
989 } |
| |
990 |
| |
991 static |
| |
992 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
| |
993 const char **nextTokPtr) |
| |
994 { |
| |
995 int tok; |
| |
996 if (ptr == end) |
| |
997 return XML_TOK_NONE; |
| |
998 if (MINBPC(enc) > 1) { |
| |
999 size_t n = end - ptr; |
| |
1000 if (n & (MINBPC(enc) - 1)) { |
| |
1001 n &= ~(MINBPC(enc) - 1); |
| |
1002 if (n == 0) |
| |
1003 return XML_TOK_PARTIAL; |
| |
1004 end = ptr + n; |
| |
1005 } |
| |
1006 } |
| |
1007 switch (BYTE_TYPE(enc, ptr)) { |
| |
1008 case BT_QUOT: |
| |
1009 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1010 case BT_APOS: |
| |
1011 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1012 case BT_LT: |
| |
1013 { |
| |
1014 ptr += MINBPC(enc); |
| |
1015 if (ptr == end) |
| |
1016 return XML_TOK_PARTIAL; |
| |
1017 switch (BYTE_TYPE(enc, ptr)) { |
| |
1018 case BT_EXCL: |
| |
1019 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1020 case BT_QUEST: |
| |
1021 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1022 case BT_NMSTRT: |
| |
1023 case BT_HEX: |
| |
1024 case BT_NONASCII: |
| |
1025 case BT_LEAD2: |
| |
1026 case BT_LEAD3: |
| |
1027 case BT_LEAD4: |
| |
1028 *nextTokPtr = ptr - MINBPC(enc); |
| |
1029 return XML_TOK_INSTANCE_START; |
| |
1030 } |
| |
1031 *nextTokPtr = ptr; |
| |
1032 return XML_TOK_INVALID; |
| |
1033 } |
| |
1034 case BT_CR: |
| |
1035 if (ptr + MINBPC(enc) == end) |
| |
1036 return XML_TOK_TRAILING_CR; |
| |
1037 /* fall through */ |
| |
1038 case BT_S: case BT_LF: |
| |
1039 for (;;) { |
| |
1040 ptr += MINBPC(enc); |
| |
1041 if (ptr == end) |
| |
1042 break; |
| |
1043 switch (BYTE_TYPE(enc, ptr)) { |
| |
1044 case BT_S: case BT_LF: |
| |
1045 break; |
| |
1046 case BT_CR: |
| |
1047 /* don't split CR/LF pair */ |
| |
1048 if (ptr + MINBPC(enc) != end) |
| |
1049 break; |
| |
1050 /* fall through */ |
| |
1051 default: |
| |
1052 *nextTokPtr = ptr; |
| |
1053 return XML_TOK_PROLOG_S; |
| |
1054 } |
| |
1055 } |
| |
1056 *nextTokPtr = ptr; |
| |
1057 return XML_TOK_PROLOG_S; |
| |
1058 case BT_PERCNT: |
| |
1059 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1060 case BT_COMMA: |
| |
1061 *nextTokPtr = ptr + MINBPC(enc); |
| |
1062 return XML_TOK_COMMA; |
| |
1063 case BT_LSQB: |
| |
1064 *nextTokPtr = ptr + MINBPC(enc); |
| |
1065 return XML_TOK_OPEN_BRACKET; |
| |
1066 case BT_RSQB: |
| |
1067 ptr += MINBPC(enc); |
| |
1068 if (ptr == end) |
| |
1069 return XML_TOK_PARTIAL; |
| |
1070 if (CHAR_MATCHES(enc, ptr, ']')) { |
| |
1071 if (ptr + MINBPC(enc) == end) |
| |
1072 return XML_TOK_PARTIAL; |
| |
1073 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) { |
| |
1074 *nextTokPtr = ptr + 2*MINBPC(enc); |
| |
1075 return XML_TOK_COND_SECT_CLOSE; |
| |
1076 } |
| |
1077 } |
| |
1078 *nextTokPtr = ptr; |
| |
1079 return XML_TOK_CLOSE_BRACKET; |
| |
1080 case BT_LPAR: |
| |
1081 *nextTokPtr = ptr + MINBPC(enc); |
| |
1082 return XML_TOK_OPEN_PAREN; |
| |
1083 case BT_RPAR: |
| |
1084 ptr += MINBPC(enc); |
| |
1085 if (ptr == end) |
| |
1086 return XML_TOK_PARTIAL; |
| |
1087 switch (BYTE_TYPE(enc, ptr)) { |
| |
1088 case BT_AST: |
| |
1089 *nextTokPtr = ptr + MINBPC(enc); |
| |
1090 return XML_TOK_CLOSE_PAREN_ASTERISK; |
| |
1091 case BT_QUEST: |
| |
1092 *nextTokPtr = ptr + MINBPC(enc); |
| |
1093 return XML_TOK_CLOSE_PAREN_QUESTION; |
| |
1094 case BT_PLUS: |
| |
1095 *nextTokPtr = ptr + MINBPC(enc); |
| |
1096 return XML_TOK_CLOSE_PAREN_PLUS; |
| |
1097 case BT_CR: case BT_LF: case BT_S: |
| |
1098 case BT_GT: case BT_COMMA: case BT_VERBAR: |
| |
1099 case BT_RPAR: |
| |
1100 *nextTokPtr = ptr; |
| |
1101 return XML_TOK_CLOSE_PAREN; |
| |
1102 } |
| |
1103 *nextTokPtr = ptr; |
| |
1104 return XML_TOK_INVALID; |
| |
1105 case BT_VERBAR: |
| |
1106 *nextTokPtr = ptr + MINBPC(enc); |
| |
1107 return XML_TOK_OR; |
| |
1108 case BT_GT: |
| |
1109 *nextTokPtr = ptr + MINBPC(enc); |
| |
1110 return XML_TOK_DECL_CLOSE; |
| |
1111 case BT_NUM: |
| |
1112 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1113 #define LEAD_CASE(n) \ |
| |
1114 case BT_LEAD ## n: \ |
| |
1115 if (end - ptr < n) \ |
| |
1116 return XML_TOK_PARTIAL_CHAR; \ |
| |
1117 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
| |
1118 ptr += n; \ |
| |
1119 tok = XML_TOK_NAME; \ |
| |
1120 break; \ |
| |
1121 } \ |
| |
1122 if (IS_NAME_CHAR(enc, ptr, n)) { \ |
| |
1123 ptr += n; \ |
| |
1124 tok = XML_TOK_NMTOKEN; \ |
| |
1125 break; \ |
| |
1126 } \ |
| |
1127 *nextTokPtr = ptr; \ |
| |
1128 return XML_TOK_INVALID; |
| |
1129 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
1130 #undef LEAD_CASE |
| |
1131 case BT_NMSTRT: |
| |
1132 case BT_HEX: |
| |
1133 tok = XML_TOK_NAME; |
| |
1134 ptr += MINBPC(enc); |
| |
1135 break; |
| |
1136 case BT_DIGIT: |
| |
1137 case BT_NAME: |
| |
1138 case BT_MINUS: |
| |
1139 #ifdef XML_NS |
| |
1140 case BT_COLON: |
| |
1141 #endif |
| |
1142 tok = XML_TOK_NMTOKEN; |
| |
1143 ptr += MINBPC(enc); |
| |
1144 break; |
| |
1145 case BT_NONASCII: |
| |
1146 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
| |
1147 ptr += MINBPC(enc); |
| |
1148 tok = XML_TOK_NAME; |
| |
1149 break; |
| |
1150 } |
| |
1151 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
| |
1152 ptr += MINBPC(enc); |
| |
1153 tok = XML_TOK_NMTOKEN; |
| |
1154 break; |
| |
1155 } |
| |
1156 /* fall through */ |
| |
1157 default: |
| |
1158 *nextTokPtr = ptr; |
| |
1159 return XML_TOK_INVALID; |
| |
1160 } |
| |
1161 while (ptr != end) { |
| |
1162 switch (BYTE_TYPE(enc, ptr)) { |
| |
1163 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
1164 case BT_GT: case BT_RPAR: case BT_COMMA: |
| |
1165 case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
| |
1166 case BT_S: case BT_CR: case BT_LF: |
| |
1167 *nextTokPtr = ptr; |
| |
1168 return tok; |
| |
1169 #ifdef XML_NS |
| |
1170 case BT_COLON: |
| |
1171 ptr += MINBPC(enc); |
| |
1172 switch (tok) { |
| |
1173 case XML_TOK_NAME: |
| |
1174 if (ptr == end) |
| |
1175 return XML_TOK_PARTIAL; |
| |
1176 tok = XML_TOK_PREFIXED_NAME; |
| |
1177 switch (BYTE_TYPE(enc, ptr)) { |
| |
1178 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
| |
1179 default: |
| |
1180 tok = XML_TOK_NMTOKEN; |
| |
1181 break; |
| |
1182 } |
| |
1183 break; |
| |
1184 case XML_TOK_PREFIXED_NAME: |
| |
1185 tok = XML_TOK_NMTOKEN; |
| |
1186 break; |
| |
1187 } |
| |
1188 break; |
| |
1189 #endif |
| |
1190 case BT_PLUS: |
| |
1191 if (tok == XML_TOK_NMTOKEN) { |
| |
1192 *nextTokPtr = ptr; |
| |
1193 return XML_TOK_INVALID; |
| |
1194 } |
| |
1195 *nextTokPtr = ptr + MINBPC(enc); |
| |
1196 return XML_TOK_NAME_PLUS; |
| |
1197 case BT_AST: |
| |
1198 if (tok == XML_TOK_NMTOKEN) { |
| |
1199 *nextTokPtr = ptr; |
| |
1200 return XML_TOK_INVALID; |
| |
1201 } |
| |
1202 *nextTokPtr = ptr + MINBPC(enc); |
| |
1203 return XML_TOK_NAME_ASTERISK; |
| |
1204 case BT_QUEST: |
| |
1205 if (tok == XML_TOK_NMTOKEN) { |
| |
1206 *nextTokPtr = ptr; |
| |
1207 return XML_TOK_INVALID; |
| |
1208 } |
| |
1209 *nextTokPtr = ptr + MINBPC(enc); |
| |
1210 return XML_TOK_NAME_QUESTION; |
| |
1211 default: |
| |
1212 *nextTokPtr = ptr; |
| |
1213 return XML_TOK_INVALID; |
| |
1214 } |
| |
1215 } |
| |
1216 return XML_TOK_PARTIAL; |
| |
1217 } |
| |
1218 |
| |
1219 static |
| |
1220 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, |
| |
1221 const char **nextTokPtr) |
| |
1222 { |
| |
1223 const char *start; |
| |
1224 if (ptr == end) |
| |
1225 return XML_TOK_NONE; |
| |
1226 start = ptr; |
| |
1227 while (ptr != end) { |
| |
1228 switch (BYTE_TYPE(enc, ptr)) { |
| |
1229 #define LEAD_CASE(n) \ |
| |
1230 case BT_LEAD ## n: ptr += n; break; |
| |
1231 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
1232 #undef LEAD_CASE |
| |
1233 case BT_AMP: |
| |
1234 if (ptr == start) |
| |
1235 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1236 *nextTokPtr = ptr; |
| |
1237 return XML_TOK_DATA_CHARS; |
| |
1238 case BT_LT: |
| |
1239 /* this is for inside entity references */ |
| |
1240 *nextTokPtr = ptr; |
| |
1241 return XML_TOK_INVALID; |
| |
1242 case BT_LF: |
| |
1243 if (ptr == start) { |
| |
1244 *nextTokPtr = ptr + MINBPC(enc); |
| |
1245 return XML_TOK_DATA_NEWLINE; |
| |
1246 } |
| |
1247 *nextTokPtr = ptr; |
| |
1248 return XML_TOK_DATA_CHARS; |
| |
1249 case BT_CR: |
| |
1250 if (ptr == start) { |
| |
1251 ptr += MINBPC(enc); |
| |
1252 if (ptr == end) |
| |
1253 return XML_TOK_TRAILING_CR; |
| |
1254 if (BYTE_TYPE(enc, ptr) == BT_LF) |
| |
1255 ptr += MINBPC(enc); |
| |
1256 *nextTokPtr = ptr; |
| |
1257 return XML_TOK_DATA_NEWLINE; |
| |
1258 } |
| |
1259 *nextTokPtr = ptr; |
| |
1260 return XML_TOK_DATA_CHARS; |
| |
1261 case BT_S: |
| |
1262 if (ptr == start) { |
| |
1263 *nextTokPtr = ptr + MINBPC(enc); |
| |
1264 return XML_TOK_ATTRIBUTE_VALUE_S; |
| |
1265 } |
| |
1266 *nextTokPtr = ptr; |
| |
1267 return XML_TOK_DATA_CHARS; |
| |
1268 default: |
| |
1269 ptr += MINBPC(enc); |
| |
1270 break; |
| |
1271 } |
| |
1272 } |
| |
1273 *nextTokPtr = ptr; |
| |
1274 return XML_TOK_DATA_CHARS; |
| |
1275 } |
| |
1276 |
| |
1277 static |
| |
1278 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, |
| |
1279 const char **nextTokPtr) |
| |
1280 { |
| |
1281 const char *start; |
| |
1282 if (ptr == end) |
| |
1283 return XML_TOK_NONE; |
| |
1284 start = ptr; |
| |
1285 while (ptr != end) { |
| |
1286 switch (BYTE_TYPE(enc, ptr)) { |
| |
1287 #define LEAD_CASE(n) \ |
| |
1288 case BT_LEAD ## n: ptr += n; break; |
| |
1289 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
1290 #undef LEAD_CASE |
| |
1291 case BT_AMP: |
| |
1292 if (ptr == start) |
| |
1293 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1294 *nextTokPtr = ptr; |
| |
1295 return XML_TOK_DATA_CHARS; |
| |
1296 case BT_PERCNT: |
| |
1297 if (ptr == start) |
| |
1298 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
| |
1299 *nextTokPtr = ptr; |
| |
1300 return XML_TOK_DATA_CHARS; |
| |
1301 case BT_LF: |
| |
1302 if (ptr == start) { |
| |
1303 *nextTokPtr = ptr + MINBPC(enc); |
| |
1304 return XML_TOK_DATA_NEWLINE; |
| |
1305 } |
| |
1306 *nextTokPtr = ptr; |
| |
1307 return XML_TOK_DATA_CHARS; |
| |
1308 case BT_CR: |
| |
1309 if (ptr == start) { |
| |
1310 ptr += MINBPC(enc); |
| |
1311 if (ptr == end) |
| |
1312 return XML_TOK_TRAILING_CR; |
| |
1313 if (BYTE_TYPE(enc, ptr) == BT_LF) |
| |
1314 ptr += MINBPC(enc); |
| |
1315 *nextTokPtr = ptr; |
| |
1316 return XML_TOK_DATA_NEWLINE; |
| |
1317 } |
| |
1318 *nextTokPtr = ptr; |
| |
1319 return XML_TOK_DATA_CHARS; |
| |
1320 default: |
| |
1321 ptr += MINBPC(enc); |
| |
1322 break; |
| |
1323 } |
| |
1324 } |
| |
1325 *nextTokPtr = ptr; |
| |
1326 return XML_TOK_DATA_CHARS; |
| |
1327 } |
| |
1328 |
| |
1329 static |
| |
1330 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
| |
1331 const char **badPtr) |
| |
1332 { |
| |
1333 ptr += MINBPC(enc); |
| |
1334 end -= MINBPC(enc); |
| |
1335 for (; ptr != end; ptr += MINBPC(enc)) { |
| |
1336 switch (BYTE_TYPE(enc, ptr)) { |
| |
1337 case BT_DIGIT: |
| |
1338 case BT_HEX: |
| |
1339 case BT_MINUS: |
| |
1340 case BT_APOS: |
| |
1341 case BT_LPAR: |
| |
1342 case BT_RPAR: |
| |
1343 case BT_PLUS: |
| |
1344 case BT_COMMA: |
| |
1345 case BT_SOL: |
| |
1346 case BT_EQUALS: |
| |
1347 case BT_QUEST: |
| |
1348 case BT_CR: |
| |
1349 case BT_LF: |
| |
1350 case BT_SEMI: |
| |
1351 case BT_EXCL: |
| |
1352 case BT_AST: |
| |
1353 case BT_PERCNT: |
| |
1354 case BT_NUM: |
| |
1355 #ifdef XML_NS |
| |
1356 case BT_COLON: |
| |
1357 #endif |
| |
1358 break; |
| |
1359 case BT_S: |
| |
1360 if (CHAR_MATCHES(enc, ptr, '\t')) { |
| |
1361 *badPtr = ptr; |
| |
1362 return 0; |
| |
1363 } |
| |
1364 break; |
| |
1365 case BT_NAME: |
| |
1366 case BT_NMSTRT: |
| |
1367 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
| |
1368 break; |
| |
1369 default: |
| |
1370 switch (BYTE_TO_ASCII(enc, ptr)) { |
| |
1371 case 0x24: /* $ */ |
| |
1372 case 0x40: /* @ */ |
| |
1373 break; |
| |
1374 default: |
| |
1375 *badPtr = ptr; |
| |
1376 return 0; |
| |
1377 } |
| |
1378 break; |
| |
1379 } |
| |
1380 } |
| |
1381 return 1; |
| |
1382 } |
| |
1383 |
| |
1384 /* This must only be called for a well-formed start-tag or empty element tag. |
| |
1385 Returns the number of attributes. Pointers to the first attsMax attributes |
| |
1386 are stored in atts. */ |
| |
1387 |
| |
1388 static |
| |
1389 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
| |
1390 int attsMax, ATTRIBUTE *atts) |
| |
1391 { |
| |
1392 enum { other, inName, inValue } state = inName; |
| |
1393 int nAtts = 0; |
| |
1394 int open; |
| |
1395 |
| |
1396 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
| |
1397 switch (BYTE_TYPE(enc, ptr)) { |
| |
1398 #define START_NAME \ |
| |
1399 if (state == other) { \ |
| |
1400 if (nAtts < attsMax) { \ |
| |
1401 atts[nAtts].name = ptr; \ |
| |
1402 atts[nAtts].normalized = 1; \ |
| |
1403 } \ |
| |
1404 state = inName; \ |
| |
1405 } |
| |
1406 #define LEAD_CASE(n) \ |
| |
1407 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
| |
1408 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
1409 #undef LEAD_CASE |
| |
1410 case BT_NONASCII: |
| |
1411 case BT_NMSTRT: |
| |
1412 case BT_HEX: |
| |
1413 START_NAME |
| |
1414 break; |
| |
1415 #undef START_NAME |
| |
1416 case BT_QUOT: |
| |
1417 if (state != inValue) { |
| |
1418 if (nAtts < attsMax) |
| |
1419 atts[nAtts].valuePtr = ptr + MINBPC(enc); |
| |
1420 state = inValue; |
| |
1421 open = BT_QUOT; |
| |
1422 } |
| |
1423 else if (open == BT_QUOT) { |
| |
1424 state = other; |
| |
1425 if (nAtts < attsMax) |
| |
1426 atts[nAtts].valueEnd = ptr; |
| |
1427 nAtts++; |
| |
1428 } |
| |
1429 break; |
| |
1430 case BT_APOS: |
| |
1431 if (state != inValue) { |
| |
1432 if (nAtts < attsMax) |
| |
1433 atts[nAtts].valuePtr = ptr + MINBPC(enc); |
| |
1434 state = inValue; |
| |
1435 open = BT_APOS; |
| |
1436 } |
| |
1437 else if (open == BT_APOS) { |
| |
1438 state = other; |
| |
1439 if (nAtts < attsMax) |
| |
1440 atts[nAtts].valueEnd = ptr; |
| |
1441 nAtts++; |
| |
1442 } |
| |
1443 break; |
| |
1444 case BT_AMP: |
| |
1445 if (nAtts < attsMax) |
| |
1446 atts[nAtts].normalized = 0; |
| |
1447 break; |
| |
1448 case BT_S: |
| |
1449 if (state == inName) |
| |
1450 state = other; |
| |
1451 else if (state == inValue |
| |
1452 && nAtts < attsMax |
| |
1453 && atts[nAtts].normalized |
| |
1454 && (ptr == atts[nAtts].valuePtr |
| |
1455 || BYTE_TO_ASCII(enc, ptr) != ' ' |
| |
1456 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' ' |
| |
1457 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
| |
1458 atts[nAtts].normalized = 0; |
| |
1459 break; |
| |
1460 case BT_CR: case BT_LF: |
| |
1461 /* This case ensures that the first attribute name is counted |
| |
1462 Apart from that we could just change state on the quote. */ |
| |
1463 if (state == inName) |
| |
1464 state = other; |
| |
1465 else if (state == inValue && nAtts < attsMax) |
| |
1466 atts[nAtts].normalized = 0; |
| |
1467 break; |
| |
1468 case BT_GT: |
| |
1469 case BT_SOL: |
| |
1470 if (state != inValue) |
| |
1471 return nAtts; |
| |
1472 break; |
| |
1473 default: |
| |
1474 break; |
| |
1475 } |
| |
1476 } |
| |
1477 /* not reached */ |
| |
1478 } |
| |
1479 |
| |
1480 static |
| |
1481 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) |
| |
1482 { |
| |
1483 int result = 0; |
| |
1484 /* skip &# */ |
| |
1485 ptr += 2*MINBPC(enc); |
| |
1486 if (CHAR_MATCHES(enc, ptr, 'x')) { |
| |
1487 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { |
| |
1488 int c = BYTE_TO_ASCII(enc, ptr); |
| |
1489 switch (c) { |
| |
1490 case '0': case '1': case '2': case '3': case '4': |
| |
1491 case '5': case '6': case '7': case '8': case '9': |
| |
1492 result <<= 4; |
| |
1493 result |= (c - '0'); |
| |
1494 break; |
| |
1495 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
| |
1496 result <<= 4; |
| |
1497 result += 10 + (c - 'A'); |
| |
1498 break; |
| |
1499 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
| |
1500 result <<= 4; |
| |
1501 result += 10 + (c - 'a'); |
| |
1502 break; |
| |
1503 } |
| |
1504 if (result >= 0x110000) |
| |
1505 return -1; |
| |
1506 } |
| |
1507 } |
| |
1508 else { |
| |
1509 for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { |
| |
1510 int c = BYTE_TO_ASCII(enc, ptr); |
| |
1511 result *= 10; |
| |
1512 result += (c - '0'); |
| |
1513 if (result >= 0x110000) |
| |
1514 return -1; |
| |
1515 } |
| |
1516 } |
| |
1517 return checkCharRefNumber(result); |
| |
1518 } |
| |
1519 |
| |
1520 static |
| |
1521 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) |
| |
1522 { |
| |
1523 switch ((end - ptr)/MINBPC(enc)) { |
| |
1524 case 2: |
| |
1525 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) { |
| |
1526 switch (BYTE_TO_ASCII(enc, ptr)) { |
| |
1527 case 'l': |
| |
1528 return '<'; |
| |
1529 case 'g': |
| |
1530 return '>'; |
| |
1531 } |
| |
1532 } |
| |
1533 break; |
| |
1534 case 3: |
| |
1535 if (CHAR_MATCHES(enc, ptr, 'a')) { |
| |
1536 ptr += MINBPC(enc); |
| |
1537 if (CHAR_MATCHES(enc, ptr, 'm')) { |
| |
1538 ptr += MINBPC(enc); |
| |
1539 if (CHAR_MATCHES(enc, ptr, 'p')) |
| |
1540 return '&'; |
| |
1541 } |
| |
1542 } |
| |
1543 break; |
| |
1544 case 4: |
| |
1545 switch (BYTE_TO_ASCII(enc, ptr)) { |
| |
1546 case 'q': |
| |
1547 ptr += MINBPC(enc); |
| |
1548 if (CHAR_MATCHES(enc, ptr, 'u')) { |
| |
1549 ptr += MINBPC(enc); |
| |
1550 if (CHAR_MATCHES(enc, ptr, 'o')) { |
| |
1551 ptr += MINBPC(enc); |
| |
1552 if (CHAR_MATCHES(enc, ptr, 't')) |
| |
1553 return '"'; |
| |
1554 } |
| |
1555 } |
| |
1556 break; |
| |
1557 case 'a': |
| |
1558 ptr += MINBPC(enc); |
| |
1559 if (CHAR_MATCHES(enc, ptr, 'p')) { |
| |
1560 ptr += MINBPC(enc); |
| |
1561 if (CHAR_MATCHES(enc, ptr, 'o')) { |
| |
1562 ptr += MINBPC(enc); |
| |
1563 if (CHAR_MATCHES(enc, ptr, 's')) |
| |
1564 return '\''; |
| |
1565 } |
| |
1566 } |
| |
1567 break; |
| |
1568 } |
| |
1569 } |
| |
1570 return 0; |
| |
1571 } |
| |
1572 |
| |
1573 static |
| |
1574 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
| |
1575 { |
| |
1576 for (;;) { |
| |
1577 switch (BYTE_TYPE(enc, ptr1)) { |
| |
1578 #define LEAD_CASE(n) \ |
| |
1579 case BT_LEAD ## n: \ |
| |
1580 if (*ptr1++ != *ptr2++) \ |
| |
1581 return 0; |
| |
1582 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) |
| |
1583 #undef LEAD_CASE |
| |
1584 /* fall through */ |
| |
1585 if (*ptr1++ != *ptr2++) |
| |
1586 return 0; |
| |
1587 break; |
| |
1588 case BT_NONASCII: |
| |
1589 case BT_NMSTRT: |
| |
1590 #ifdef XML_NS |
| |
1591 case BT_COLON: |
| |
1592 #endif |
| |
1593 case BT_HEX: |
| |
1594 case BT_DIGIT: |
| |
1595 case BT_NAME: |
| |
1596 case BT_MINUS: |
| |
1597 if (*ptr2++ != *ptr1++) |
| |
1598 return 0; |
| |
1599 if (MINBPC(enc) > 1) { |
| |
1600 if (*ptr2++ != *ptr1++) |
| |
1601 return 0; |
| |
1602 if (MINBPC(enc) > 2) { |
| |
1603 if (*ptr2++ != *ptr1++) |
| |
1604 return 0; |
| |
1605 if (MINBPC(enc) > 3) { |
| |
1606 if (*ptr2++ != *ptr1++) |
| |
1607 return 0; |
| |
1608 } |
| |
1609 } |
| |
1610 } |
| |
1611 break; |
| |
1612 default: |
| |
1613 if (MINBPC(enc) == 1 && *ptr1 == *ptr2) |
| |
1614 return 1; |
| |
1615 switch (BYTE_TYPE(enc, ptr2)) { |
| |
1616 case BT_LEAD2: |
| |
1617 case BT_LEAD3: |
| |
1618 case BT_LEAD4: |
| |
1619 case BT_NONASCII: |
| |
1620 case BT_NMSTRT: |
| |
1621 #ifdef XML_NS |
| |
1622 case BT_COLON: |
| |
1623 #endif |
| |
1624 case BT_HEX: |
| |
1625 case BT_DIGIT: |
| |
1626 case BT_NAME: |
| |
1627 case BT_MINUS: |
| |
1628 return 0; |
| |
1629 default: |
| |
1630 return 1; |
| |
1631 } |
| |
1632 } |
| |
1633 } |
| |
1634 /* not reached */ |
| |
1635 } |
| |
1636 |
| |
1637 static |
| |
1638 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
| |
1639 { |
| |
1640 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
| |
1641 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
| |
1642 return 0; |
| |
1643 } |
| |
1644 switch (BYTE_TYPE(enc, ptr1)) { |
| |
1645 case BT_LEAD2: |
| |
1646 case BT_LEAD3: |
| |
1647 case BT_LEAD4: |
| |
1648 case BT_NONASCII: |
| |
1649 case BT_NMSTRT: |
| |
1650 #ifdef XML_NS |
| |
1651 case BT_COLON: |
| |
1652 #endif |
| |
1653 case BT_HEX: |
| |
1654 case BT_DIGIT: |
| |
1655 case BT_NAME: |
| |
1656 case BT_MINUS: |
| |
1657 return 0; |
| |
1658 default: |
| |
1659 return 1; |
| |
1660 } |
| |
1661 } |
| |
1662 |
| |
1663 static |
| |
1664 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
| |
1665 { |
| |
1666 const char *start = ptr; |
| |
1667 for (;;) { |
| |
1668 switch (BYTE_TYPE(enc, ptr)) { |
| |
1669 #define LEAD_CASE(n) \ |
| |
1670 case BT_LEAD ## n: ptr += n; break; |
| |
1671 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
1672 #undef LEAD_CASE |
| |
1673 case BT_NONASCII: |
| |
1674 case BT_NMSTRT: |
| |
1675 #ifdef XML_NS |
| |
1676 case BT_COLON: |
| |
1677 #endif |
| |
1678 case BT_HEX: |
| |
1679 case BT_DIGIT: |
| |
1680 case BT_NAME: |
| |
1681 case BT_MINUS: |
| |
1682 ptr += MINBPC(enc); |
| |
1683 break; |
| |
1684 default: |
| |
1685 return ptr - start; |
| |
1686 } |
| |
1687 } |
| |
1688 } |
| |
1689 |
| |
1690 static |
| |
1691 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
| |
1692 { |
| |
1693 for (;;) { |
| |
1694 switch (BYTE_TYPE(enc, ptr)) { |
| |
1695 case BT_LF: |
| |
1696 case BT_CR: |
| |
1697 case BT_S: |
| |
1698 ptr += MINBPC(enc); |
| |
1699 break; |
| |
1700 default: |
| |
1701 return ptr; |
| |
1702 } |
| |
1703 } |
| |
1704 } |
| |
1705 |
| |
1706 static |
| |
1707 void PREFIX(updatePosition)(const ENCODING *enc, |
| |
1708 const char *ptr, |
| |
1709 const char *end, |
| |
1710 POSITION *pos) |
| |
1711 { |
| |
1712 while (ptr != end) { |
| |
1713 switch (BYTE_TYPE(enc, ptr)) { |
| |
1714 #define LEAD_CASE(n) \ |
| |
1715 case BT_LEAD ## n: \ |
| |
1716 ptr += n; \ |
| |
1717 break; |
| |
1718 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
| |
1719 #undef LEAD_CASE |
| |
1720 case BT_LF: |
| |
1721 pos->columnNumber = (unsigned)-1; |
| |
1722 pos->lineNumber++; |
| |
1723 ptr += MINBPC(enc); |
| |
1724 break; |
| |
1725 case BT_CR: |
| |
1726 pos->lineNumber++; |
| |
1727 ptr += MINBPC(enc); |
| |
1728 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) |
| |
1729 ptr += MINBPC(enc); |
| |
1730 pos->columnNumber = (unsigned)-1; |
| |
1731 break; |
| |
1732 default: |
| |
1733 ptr += MINBPC(enc); |
| |
1734 break; |
| |
1735 } |
| |
1736 pos->columnNumber++; |
| |
1737 } |
| |
1738 } |
| |
1739 |
| |
1740 #undef DO_LEAD_CASE |
| |
1741 #undef MULTIBYTE_CASES |
| |
1742 #undef INVALID_CASES |
| |
1743 #undef CHECK_NAME_CASE |
| |
1744 #undef CHECK_NAME_CASES |
| |
1745 #undef CHECK_NMSTRT_CASE |
| |
1746 #undef CHECK_NMSTRT_CASES |