libpurple/purplemarkup.c

changeset 40564
2c5b4dc2e86a
child 40578
4118acc90778
equal deleted inserted replaced
40563:9ce322b6f2d1 40564:2c5b4dc2e86a
1 /*
2 * Purple - Internet Messenging Library
3 * Copyright (C) Pidgin Developers <devel@pidgin.im>
4 *
5 * Purple is the legal property of its developers, whose names are too numerous
6 * to list here. Please refer to the COPYRIGHT file distributed with this
7 * source distribution.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, see <https://www.gnu.org/licenses/>.
21 */
22
23 #include "purplemarkup.h"
24
25 #include "util.h"
26
27 /*
28 * This function is stolen from glib's gmarkup.c and modified to not
29 * replace ' with &apos;
30 */
31 static void append_escaped_text(GString *str,
32 const gchar *text, gssize length)
33 {
34 const gchar *p;
35 const gchar *end;
36 gunichar c;
37
38 p = text;
39 end = text + length;
40
41 while (p != end)
42 {
43 const gchar *next;
44 next = g_utf8_next_char (p);
45
46 switch (*p)
47 {
48 case '&':
49 g_string_append (str, "&amp;");
50 break;
51
52 case '<':
53 g_string_append (str, "&lt;");
54 break;
55
56 case '>':
57 g_string_append (str, "&gt;");
58 break;
59
60 case '"':
61 g_string_append (str, "&quot;");
62 break;
63
64 default:
65 c = g_utf8_get_char (p);
66 if ((0x1 <= c && c <= 0x8) ||
67 (0xb <= c && c <= 0xc) ||
68 (0xe <= c && c <= 0x1f) ||
69 (0x7f <= c && c <= 0x84) ||
70 (0x86 <= c && c <= 0x9f))
71 g_string_append_printf (str, "&#x%x;", c);
72 else
73 g_string_append_len (str, p, next - p);
74 break;
75 }
76
77 p = next;
78 }
79 }
80
81 /* This function is stolen from glib's gmarkup.c */
82 gchar *purple_markup_escape_text(const gchar *text, gssize length)
83 {
84 GString *str;
85
86 g_return_val_if_fail(text != NULL, NULL);
87
88 if (length < 0)
89 length = strlen(text);
90
91 /* prealloc at least as long as original text */
92 str = g_string_sized_new(length);
93 append_escaped_text(str, text, length);
94
95 return g_string_free(str, FALSE);
96 }
97
98 const char *
99 purple_markup_unescape_entity(const char *text, int *length)
100 {
101 const char *pln;
102 int len;
103
104 if (!text || *text != '&')
105 return NULL;
106
107 #define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
108
109 if(IS_ENTITY("&amp;"))
110 pln = "&";
111 else if(IS_ENTITY("&lt;"))
112 pln = "<";
113 else if(IS_ENTITY("&gt;"))
114 pln = ">";
115 else if(IS_ENTITY("&nbsp;"))
116 pln = " ";
117 else if(IS_ENTITY("&copy;"))
118 pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */
119 else if(IS_ENTITY("&quot;"))
120 pln = "\"";
121 else if(IS_ENTITY("&reg;"))
122 pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */
123 else if(IS_ENTITY("&apos;"))
124 pln = "\'";
125 else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
126 static char buf[7];
127 const char *start = text + 2;
128 char *end;
129 guint64 pound;
130 int base = 10;
131 int buflen;
132
133 if (*start == 'x') {
134 base = 16;
135 start++;
136 }
137
138 pound = g_ascii_strtoull(start, &end, base);
139 if (pound == 0 || pound > INT_MAX || *end != ';') {
140 return NULL;
141 }
142
143 len = (end - text) + 1;
144
145 buflen = g_unichar_to_utf8((gunichar)pound, buf);
146 buf[buflen] = '\0';
147 pln = buf;
148 }
149 else
150 return NULL;
151
152 if (length)
153 *length = len;
154 return pln;
155 }
156
157 char *
158 purple_markup_get_css_property(const gchar *style,
159 const gchar *opt)
160 {
161 const gchar *css_str = style;
162 const gchar *css_value_start;
163 const gchar *css_value_end;
164 gchar *tmp;
165 gchar *ret;
166
167 g_return_val_if_fail(opt != NULL, NULL);
168
169 if (!css_str)
170 return NULL;
171
172 /* find the CSS property */
173 while (1)
174 {
175 /* skip whitespace characters */
176 while (*css_str && g_ascii_isspace(*css_str))
177 css_str++;
178 if (!g_ascii_isalpha(*css_str))
179 return NULL;
180 if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
181 {
182 /* go to next css property positioned after the next ';' */
183 while (*css_str && *css_str != '"' && *css_str != ';')
184 css_str++;
185 if(*css_str != ';')
186 return NULL;
187 css_str++;
188 }
189 else
190 break;
191 }
192
193 /* find the CSS value position in the string */
194 css_str += strlen(opt);
195 while (*css_str && g_ascii_isspace(*css_str))
196 css_str++;
197 if (*css_str != ':')
198 return NULL;
199 css_str++;
200 while (*css_str && g_ascii_isspace(*css_str))
201 css_str++;
202 if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
203 return NULL;
204
205 /* mark the CSS value */
206 css_value_start = css_str;
207 while (*css_str && *css_str != '"' && *css_str != ';')
208 css_str++;
209 css_value_end = css_str - 1;
210
211 /* Removes trailing whitespace */
212 while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
213 css_value_end--;
214
215 tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
216 ret = purple_unescape_html(tmp);
217 g_free(tmp);
218
219 return ret;
220 }
221
222 gboolean purple_markup_is_rtl(const char *html)
223 {
224 GData *attributes;
225 const gchar *start, *end;
226 gboolean res = FALSE;
227
228 if (purple_markup_find_tag("span", html, &start, &end, &attributes))
229 {
230 /* tmp is a member of attributes and is free with g_datalist_clear call */
231 const char *tmp = g_datalist_get_data(&attributes, "dir");
232 if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
233 res = TRUE;
234 if (!res)
235 {
236 tmp = g_datalist_get_data(&attributes, "style");
237 if (tmp)
238 {
239 char *tmp2 = purple_markup_get_css_property(tmp, "direction");
240 if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
241 res = TRUE;
242 g_free(tmp2);
243 }
244
245 }
246 g_datalist_clear(&attributes);
247 }
248 return res;
249 }
250
251 gboolean
252 purple_markup_find_tag(const char *needle, const char *haystack,
253 const char **start, const char **end, GData **attributes)
254 {
255 GData *attribs;
256 const char *cur = haystack;
257 char *name = NULL;
258 gboolean found = FALSE;
259 gboolean in_tag = FALSE;
260 gboolean in_attr = FALSE;
261 const char *in_quotes = NULL;
262 size_t needlelen;
263
264 g_return_val_if_fail( needle != NULL, FALSE);
265 g_return_val_if_fail( *needle != '\0', FALSE);
266 g_return_val_if_fail( haystack != NULL, FALSE);
267 g_return_val_if_fail( start != NULL, FALSE);
268 g_return_val_if_fail( end != NULL, FALSE);
269 g_return_val_if_fail(attributes != NULL, FALSE);
270
271 needlelen = strlen(needle);
272 g_datalist_init(&attribs);
273
274 while (*cur && !found) {
275 if (in_tag) {
276 if (in_quotes) {
277 const char *close = cur;
278
279 while (*close && *close != *in_quotes)
280 close++;
281
282 /* if we got the close quote, store the value and carry on from *
283 * after it. if we ran to the end of the string, point to the NULL *
284 * and we're outta here */
285 if (*close) {
286 /* only store a value if we have an attribute name */
287 if (name) {
288 size_t len = close - cur;
289 char *val = g_strndup(cur, len);
290
291 g_datalist_set_data_full(&attribs, name, val, g_free);
292 g_free(name);
293 name = NULL;
294 }
295
296 in_quotes = NULL;
297 cur = close + 1;
298 } else {
299 cur = close;
300 }
301 } else if (in_attr) {
302 const char *close = cur;
303
304 while (*close && *close != '>' && *close != '"' &&
305 *close != '\'' && *close != ' ' && *close != '=')
306 close++;
307
308 /* if we got the equals, store the name of the attribute. if we got
309 * the quote, save the attribute and go straight to quote mode.
310 * otherwise the tag closed or we reached the end of the string,
311 * so we can get outta here */
312 switch (*close) {
313 case '"':
314 case '\'':
315 in_quotes = close;
316 /* fall through */
317 case '=':
318 {
319 size_t len = close - cur;
320
321 /* don't store a blank attribute name */
322 if (len) {
323 g_free(name);
324 name = g_ascii_strdown(cur, len);
325 }
326
327 in_attr = FALSE;
328 cur = close + 1;
329 }
330 break;
331 case ' ':
332 case '>':
333 in_attr = FALSE;
334 /* fall through */
335 default:
336 cur = close;
337 break;
338 }
339 } else {
340 switch (*cur) {
341 case ' ':
342 /* swallow extra spaces inside tag */
343 while (*cur && *cur == ' ') cur++;
344 in_attr = TRUE;
345 break;
346 case '>':
347 found = TRUE;
348 *end = cur;
349 break;
350 case '"':
351 case '\'':
352 in_quotes = cur;
353 /* fall through */
354 default:
355 cur++;
356 break;
357 }
358 }
359 } else {
360 /* if we hit a < followed by the name of our tag... */
361 if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
362 *start = cur;
363 cur = cur + needlelen + 1;
364
365 /* if we're pointing at a space or a >, we found the right tag. if *
366 * we're not, we've found a longer tag, so we need to skip to the *
367 * >, but not being distracted by >s inside quotes. */
368 if (*cur == ' ' || *cur == '>') {
369 in_tag = TRUE;
370 } else {
371 while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
372 if (*cur == '"') {
373 cur++;
374 while (*cur && *cur != '"')
375 cur++;
376 } else if (*cur == '\'') {
377 cur++;
378 while (*cur && *cur != '\'')
379 cur++;
380 } else {
381 cur++;
382 }
383 }
384 }
385 } else {
386 cur++;
387 }
388 }
389 }
390
391 /* clean up any attribute name from a premature termination */
392 g_free(name);
393
394 if (found) {
395 *attributes = attribs;
396 } else {
397 *start = NULL;
398 *end = NULL;
399 *attributes = NULL;
400 }
401
402 return found;
403 }
404
405 struct purple_parse_tag {
406 char *src_tag;
407 char *dest_tag;
408 gboolean ignore;
409 };
410
411 /* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
412 recommended in the GCC docs). It contains 'continue's that should
413 affect the while-loop in purple_markup_html_to_xhtml and doing the
414 above would break that.
415 Also, remember to put braces in constructs that require them for
416 multiple statements when using this macro. */
417 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
418 const char *o = c + strlen("<" x); \
419 const char *p = NULL, *q = NULL, *r = NULL; \
420 /* o = iterating over full tag \
421 * p = > (end of tag) \
422 * q = start of quoted bit \
423 * r = < inside tag \
424 */ \
425 GString *innards = g_string_new(""); \
426 while(o && *o) { \
427 if(!q && (*o == '\"' || *o == '\'') ) { \
428 q = o; \
429 } else if(q) { \
430 if(*o == *q) { /* end of quoted bit */ \
431 char *unescaped = g_strndup(q+1, o-q-1); \
432 char *escaped = g_markup_escape_text(unescaped, -1); \
433 g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
434 g_free(unescaped); \
435 g_free(escaped); \
436 q = NULL; \
437 } else if(*c == '\\') { \
438 o++; \
439 } \
440 } else if(*o == '<') { \
441 r = o; \
442 } else if(*o == '>') { \
443 p = o; \
444 break; \
445 } else { \
446 innards = g_string_append_c(innards, *o); \
447 } \
448 o++; \
449 } \
450 if(p && !r) { /* got an end of tag and no other < earlier */\
451 if(*(p-1) != '/') { \
452 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
453 pt->src_tag = x; \
454 pt->dest_tag = y; \
455 tags = g_list_prepend(tags, pt); \
456 } \
457 if(xhtml) { \
458 xhtml = g_string_append(xhtml, "<" y); \
459 xhtml = g_string_append(xhtml, innards->str); \
460 xhtml = g_string_append_c(xhtml, '>'); \
461 } \
462 c = p + 1; \
463 } else { /* got end of tag with earlier < *or* didn't get anything */ \
464 if(xhtml) \
465 xhtml = g_string_append(xhtml, "&lt;"); \
466 if(plain) \
467 plain = g_string_append_c(plain, '<'); \
468 c++; \
469 } \
470 g_string_free(innards, TRUE); \
471 continue; \
472 } \
473 if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
474 (*(c+strlen("<" x)) == '>' || \
475 !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
476 if(xhtml) \
477 xhtml = g_string_append(xhtml, "<" y); \
478 c += strlen("<" x); \
479 if(*c != '/') { \
480 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
481 pt->src_tag = x; \
482 pt->dest_tag = y; \
483 tags = g_list_prepend(tags, pt); \
484 if(xhtml) \
485 xhtml = g_string_append_c(xhtml, '>'); \
486 } else { \
487 if(xhtml) \
488 xhtml = g_string_append(xhtml, "/>");\
489 } \
490 c = strchr(c, '>') + 1; \
491 continue; \
492 }
493 /* Don't forget to check the note above for ALLOW_TAG_ALT. */
494 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
495 void
496 purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
497 char **plain_out)
498 {
499 GString *xhtml = NULL;
500 GString *plain = NULL;
501 GString *url = NULL;
502 GString *cdata = NULL;
503 GList *tags = NULL, *tag;
504 const char *c = html;
505 char quote = '\0';
506
507 #define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
508 quote = *(ptr++); \
509 else \
510 quote = '\0';
511
512 #define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
513
514 g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
515
516 if(xhtml_out)
517 xhtml = g_string_new("");
518 if(plain_out)
519 plain = g_string_new("");
520
521 while(c && *c) {
522 if(*c == '<') {
523 if(*(c+1) == '/') { /* closing tag */
524 tag = tags;
525 while(tag) {
526 struct purple_parse_tag *pt = tag->data;
527 if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
528 c += strlen(pt->src_tag) + 3;
529 break;
530 }
531 tag = tag->next;
532 }
533 if(tag) {
534 while(tags) {
535 struct purple_parse_tag *pt = tags->data;
536 if(xhtml && !pt->ignore)
537 g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
538 if(plain && purple_strequal(pt->src_tag, "a")) {
539 /* if this is a link, we have to add the url to the plaintext, too */
540 if (cdata && url &&
541 (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
542 g_utf8_collate(url->str + 7, cdata->str) != 0)))
543 g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
544 if (cdata) {
545 g_string_free(cdata, TRUE);
546 cdata = NULL;
547 }
548
549 }
550 if(tags == tag)
551 break;
552 tags = g_list_delete_link(tags, tags);
553 g_free(pt);
554 }
555 g_free(tag->data);
556 tags = g_list_delete_link(tags, tag);
557 } else {
558 /* a closing tag we weren't expecting...
559 * we'll let it slide, if it's really a tag...if it's
560 * just a </ we'll escape it properly */
561 const char *end = c+2;
562 while(*end && g_ascii_isalpha(*end))
563 end++;
564 if(*end == '>') {
565 c = end+1;
566 } else {
567 if(xhtml)
568 xhtml = g_string_append(xhtml, "&lt;");
569 if(plain)
570 plain = g_string_append_c(plain, '<');
571 c++;
572 }
573 }
574 } else { /* opening tag */
575 ALLOW_TAG("blockquote");
576 ALLOW_TAG("cite");
577 ALLOW_TAG("div");
578 ALLOW_TAG("em");
579 ALLOW_TAG("h1");
580 ALLOW_TAG("h2");
581 ALLOW_TAG("h3");
582 ALLOW_TAG("h4");
583 ALLOW_TAG("h5");
584 ALLOW_TAG("h6");
585 /* we only allow html to start the message */
586 if(c == html) {
587 ALLOW_TAG("html");
588 }
589 ALLOW_TAG_ALT("i", "em");
590 ALLOW_TAG_ALT("italic", "em");
591 ALLOW_TAG("li");
592 ALLOW_TAG("ol");
593 ALLOW_TAG("p");
594 ALLOW_TAG("pre");
595 ALLOW_TAG("q");
596 ALLOW_TAG("span");
597 ALLOW_TAG("ul");
598
599
600 /* we skip <HR> because it's not legal in XHTML-IM. However,
601 * we still want to send something sensible, so we put a
602 * linebreak in its place. <BR> also needs special handling
603 * because putting a </BR> to close it would just be dumb. */
604 if((!g_ascii_strncasecmp(c, "<br", 3)
605 || !g_ascii_strncasecmp(c, "<hr", 3))
606 && (*(c+3) == '>' ||
607 !g_ascii_strncasecmp(c+3, "/>", 2) ||
608 !g_ascii_strncasecmp(c+3, " />", 3))) {
609 c = strchr(c, '>') + 1;
610 if(xhtml)
611 xhtml = g_string_append(xhtml, "<br/>");
612 if(plain && *c != '\n')
613 plain = g_string_append_c(plain, '\n');
614 continue;
615 }
616 if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
617 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
618 if (*(c+2) == '>')
619 pt->src_tag = "b";
620 else if (*(c+2) == 'o')
621 pt->src_tag = "bold";
622 else
623 pt->src_tag = "strong";
624 pt->dest_tag = "span";
625 tags = g_list_prepend(tags, pt);
626 c = strchr(c, '>') + 1;
627 if(xhtml)
628 xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
629 continue;
630 }
631 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
632 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
633 pt->src_tag = *(c+2) == '>' ? "u" : "underline";
634 pt->dest_tag = "span";
635 tags = g_list_prepend(tags, pt);
636 c = strchr(c, '>') + 1;
637 if (xhtml)
638 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
639 continue;
640 }
641 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
642 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
643 pt->src_tag = *(c+2) == '>' ? "s" : "strike";
644 pt->dest_tag = "span";
645 tags = g_list_prepend(tags, pt);
646 c = strchr(c, '>') + 1;
647 if(xhtml)
648 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
649 continue;
650 }
651 if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
652 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
653 pt->src_tag = "sub";
654 pt->dest_tag = "span";
655 tags = g_list_prepend(tags, pt);
656 c = strchr(c, '>') + 1;
657 if(xhtml)
658 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
659 continue;
660 }
661 if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
662 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
663 pt->src_tag = "sup";
664 pt->dest_tag = "span";
665 tags = g_list_prepend(tags, pt);
666 c = strchr(c, '>') + 1;
667 if(xhtml)
668 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
669 continue;
670 }
671 if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
672 const char *p = c + 4;
673 GString *src = NULL, *alt = NULL;
674 #define ESCAPE(from, to) \
675 CHECK_QUOTE(from); \
676 while (VALID_CHAR(from)) { \
677 int len; \
678 if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
679 to = g_string_append(to, "&amp;"); \
680 else if (*from == '\'') \
681 to = g_string_append(to, "&apos;"); \
682 else \
683 to = g_string_append_c(to, *from); \
684 from++; \
685 }
686
687 while (*p && *p != '>') {
688 if (!g_ascii_strncasecmp(p, "src=", 4)) {
689 const char *q = p + 4;
690 if (src)
691 g_string_free(src, TRUE);
692 src = g_string_new("");
693 ESCAPE(q, src);
694 p = q;
695 } else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
696 const char *q = p + 4;
697 if (alt)
698 g_string_free(alt, TRUE);
699 alt = g_string_new("");
700 ESCAPE(q, alt);
701 p = q;
702 } else {
703 p++;
704 }
705 }
706 #undef ESCAPE
707 if ((c = strchr(p, '>')) != NULL)
708 c++;
709 else
710 c = p;
711 /* src and alt are required! */
712 if(src && xhtml)
713 g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
714 if(alt) {
715 if(plain)
716 plain = g_string_append(plain, purple_unescape_html(alt->str));
717 if(!src && xhtml)
718 xhtml = g_string_append(xhtml, alt->str);
719 g_string_free(alt, TRUE);
720 }
721 g_string_free(src, TRUE);
722 continue;
723 }
724 if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
725 const char *p = c + 2;
726 struct purple_parse_tag *pt;
727 while (*p && *p != '>') {
728 if (!g_ascii_strncasecmp(p, "href=", 5)) {
729 const char *q = p + 5;
730 if (url)
731 g_string_free(url, TRUE);
732 url = g_string_new("");
733 if (cdata)
734 g_string_free(cdata, TRUE);
735 cdata = g_string_new("");
736 CHECK_QUOTE(q);
737 while (VALID_CHAR(q)) {
738 int len;
739 if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
740 url = g_string_append(url, "&amp;");
741 else if (*q == '"')
742 url = g_string_append(url, "&quot;");
743 else
744 url = g_string_append_c(url, *q);
745 q++;
746 }
747 p = q;
748 } else {
749 p++;
750 }
751 }
752 if ((c = strchr(p, '>')) != NULL)
753 c++;
754 else
755 c = p;
756 pt = g_new0(struct purple_parse_tag, 1);
757 pt->src_tag = "a";
758 pt->dest_tag = "a";
759 tags = g_list_prepend(tags, pt);
760 if(xhtml)
761 g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
762 continue;
763 }
764 #define ESCAPE(from, to) \
765 CHECK_QUOTE(from); \
766 while (VALID_CHAR(from)) { \
767 int len; \
768 if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
769 to = g_string_append(to, "&amp;"); \
770 else if (*from == '\'') \
771 to = g_string_append_c(to, '\"'); \
772 else \
773 to = g_string_append_c(to, *from); \
774 from++; \
775 }
776 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
777 const char *p = c + 5;
778 GString *style = g_string_new("");
779 struct purple_parse_tag *pt;
780 while (*p && *p != '>') {
781 if (!g_ascii_strncasecmp(p, "back=", 5)) {
782 const char *q = p + 5;
783 GString *color = g_string_new("");
784 ESCAPE(q, color);
785 g_string_append_printf(style, "background: %s; ", color->str);
786 g_string_free(color, TRUE);
787 p = q;
788 } else if (!g_ascii_strncasecmp(p, "color=", 6)) {
789 const char *q = p + 6;
790 GString *color = g_string_new("");
791 ESCAPE(q, color);
792 g_string_append_printf(style, "color: %s; ", color->str);
793 g_string_free(color, TRUE);
794 p = q;
795 } else if (!g_ascii_strncasecmp(p, "face=", 5)) {
796 const char *q = p + 5;
797 GString *face = g_string_new("");
798 ESCAPE(q, face);
799 g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
800 g_string_free(face, TRUE);
801 p = q;
802 } else if (!g_ascii_strncasecmp(p, "size=", 5)) {
803 const char *q = p + 5;
804 int sz;
805 const char *size = "medium";
806 CHECK_QUOTE(q);
807 sz = atoi(q);
808 switch (sz)
809 {
810 case 1:
811 size = "xx-small";
812 break;
813 case 2:
814 size = "small";
815 break;
816 case 3:
817 size = "medium";
818 break;
819 case 4:
820 size = "large";
821 break;
822 case 5:
823 size = "x-large";
824 break;
825 case 6:
826 case 7:
827 size = "xx-large";
828 break;
829 default:
830 break;
831 }
832 g_string_append_printf(style, "font-size: %s; ", size);
833 p = q;
834 } else {
835 p++;
836 }
837 }
838 if ((c = strchr(p, '>')) != NULL)
839 c++;
840 else
841 c = p;
842 pt = g_new0(struct purple_parse_tag, 1);
843 pt->src_tag = "font";
844 pt->dest_tag = "span";
845 tags = g_list_prepend(tags, pt);
846 if(style->len && xhtml)
847 g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
848 else
849 pt->ignore = TRUE;
850 g_string_free(style, TRUE);
851 continue;
852 }
853 #undef ESCAPE
854 if (!g_ascii_strncasecmp(c, "<body ", 6)) {
855 const char *p = c + 6;
856 gboolean did_something = FALSE;
857 while (*p && *p != '>') {
858 if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
859 const char *q = p + 8;
860 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
861 GString *color = g_string_new("");
862 CHECK_QUOTE(q);
863 while (VALID_CHAR(q)) {
864 color = g_string_append_c(color, *q);
865 q++;
866 }
867 if (xhtml)
868 g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
869 g_string_free(color, TRUE);
870 if ((c = strchr(p, '>')) != NULL)
871 c++;
872 else
873 c = p;
874 pt->src_tag = "body";
875 pt->dest_tag = "span";
876 tags = g_list_prepend(tags, pt);
877 did_something = TRUE;
878 break;
879 }
880 p++;
881 }
882 if (did_something) continue;
883 }
884 /* this has to come after the special case for bgcolor */
885 ALLOW_TAG("body");
886 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
887 char *p = strstr(c + strlen("<!--"), "-->");
888 if(p) {
889 if(xhtml)
890 xhtml = g_string_append(xhtml, "<!--");
891 c += strlen("<!--");
892 continue;
893 }
894 }
895
896 if(xhtml)
897 xhtml = g_string_append(xhtml, "&lt;");
898 if(plain)
899 plain = g_string_append_c(plain, '<');
900 c++;
901 }
902 } else if(*c == '&') {
903 char buf[7];
904 const char *pln;
905 int len;
906
907 if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
908 len = 1;
909 g_snprintf(buf, sizeof(buf), "%c", *c);
910 pln = buf;
911 }
912 if(xhtml)
913 xhtml = g_string_append_len(xhtml, c, len);
914 if(plain)
915 plain = g_string_append(plain, pln);
916 if(cdata)
917 cdata = g_string_append_len(cdata, c, len);
918 c += len;
919 } else {
920 if(xhtml)
921 xhtml = g_string_append_c(xhtml, *c);
922 if(plain)
923 plain = g_string_append_c(plain, *c);
924 if(cdata)
925 cdata = g_string_append_c(cdata, *c);
926 c++;
927 }
928 }
929 if(xhtml) {
930 for (tag = tags; tag ; tag = tag->next) {
931 struct purple_parse_tag *pt = tag->data;
932 if(!pt->ignore)
933 g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
934 }
935 }
936 g_list_free(tags);
937 if(xhtml_out)
938 *xhtml_out = g_string_free(xhtml, FALSE);
939 if(plain_out)
940 *plain_out = g_string_free(plain, FALSE);
941 if(url)
942 g_string_free(url, TRUE);
943 if (cdata)
944 g_string_free(cdata, TRUE);
945 #undef CHECK_QUOTE
946 #undef VALID_CHAR
947 }
948
949 /* The following are probably reasonable changes:
950 * - \n should be converted to a normal space
951 * - in addition to <br>, <p> and <div> etc. should also be converted into \n
952 * - We want to turn </td>#whitespace<td> sequences into a single tab
953 * - We want to turn </tr>#whitespace<tr> sequences into a single \n
954 * - <script>...</script> and <style>...</style> should be completely removed
955 */
956
957 char *
958 purple_markup_strip_html(const char *str)
959 {
960 int i, j, k, entlen;
961 gboolean visible = TRUE;
962 gboolean closing_td_p = FALSE;
963 gchar *str2;
964 const gchar *cdata_close_tag = NULL, *ent;
965 gchar *href = NULL;
966 int href_st = 0;
967
968 if(!str)
969 return NULL;
970
971 str2 = g_strdup(str);
972
973 for (i = 0, j = 0; str2[i]; i++)
974 {
975 if (str2[i] == '<')
976 {
977 if (cdata_close_tag)
978 {
979 /* Note: Don't even assume any other tag is a tag in CDATA */
980 if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
981 strlen(cdata_close_tag)) == 0)
982 {
983 i += strlen(cdata_close_tag) - 1;
984 cdata_close_tag = NULL;
985 }
986 continue;
987 }
988 else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
989 {
990 str2[j++] = '\t';
991 visible = TRUE;
992 }
993 else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
994 {
995 closing_td_p = TRUE;
996 visible = FALSE;
997 }
998 else
999 {
1000 closing_td_p = FALSE;
1001 visible = TRUE;
1002 }
1003
1004 k = i + 1;
1005
1006 if(g_ascii_isspace(str2[k]))
1007 visible = TRUE;
1008 else if (str2[k])
1009 {
1010 /* Scan until we end the tag either implicitly (closed start
1011 * tag) or explicitly, using a sloppy method (i.e., < or >
1012 * inside quoted attributes will screw us up)
1013 */
1014 while (str2[k] && str2[k] != '<' && str2[k] != '>')
1015 {
1016 k++;
1017 }
1018
1019 /* If we've got an <a> tag with an href, save the address
1020 * to print later. */
1021 if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
1022 g_ascii_isspace(str2[i+2]))
1023 {
1024 int st; /* start of href, inclusive [ */
1025 int end; /* end of href, exclusive ) */
1026 char delim = ' ';
1027 /* Find start of href */
1028 for (st = i + 3; st < k; st++)
1029 {
1030 if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
1031 {
1032 st += 5;
1033 if (str2[st] == '"' || str2[st] == '\'')
1034 {
1035 delim = str2[st];
1036 st++;
1037 }
1038 break;
1039 }
1040 }
1041 /* find end of address */
1042 for (end = st; end < k && str2[end] != delim; end++)
1043 {
1044 /* All the work is done in the loop construct above. */
1045 }
1046
1047 /* If there's an address, save it. If there was
1048 * already one saved, kill it. */
1049 if (st < k)
1050 {
1051 char *tmp;
1052 g_free(href);
1053 tmp = g_strndup(str2 + st, end - st);
1054 href = purple_unescape_html(tmp);
1055 g_free(tmp);
1056 href_st = j;
1057 }
1058 }
1059
1060 /* Replace </a> with an ascii representation of the
1061 * address the link was pointing to. */
1062 else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
1063 {
1064 size_t hrlen = strlen(href);
1065
1066 /* Only insert the href if it's different from the CDATA. */
1067 if ((hrlen != (gsize)(j - href_st) ||
1068 strncmp(str2 + href_st, href, hrlen)) &&
1069 (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
1070 strncmp(str2 + href_st, href + 7, hrlen - 7)))
1071 {
1072 str2[j++] = ' ';
1073 str2[j++] = '(';
1074 memmove(str2 + j, href, hrlen);
1075 j += hrlen;
1076 str2[j++] = ')';
1077 g_free(href);
1078 href = NULL;
1079 }
1080 }
1081
1082 /* Check for tags which should be mapped to newline (but ignore some of
1083 * the tags at the beginning of the text) */
1084 else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
1085 || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
1086 || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
1087 || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
1088 || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
1089 || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
1090 || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
1091 {
1092 str2[j++] = '\n';
1093 }
1094 /* Check for tags which begin CDATA and need to be closed */
1095 else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
1096 {
1097 cdata_close_tag = "</script>";
1098 }
1099 else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
1100 {
1101 cdata_close_tag = "</style>";
1102 }
1103 /* Update the index and continue checking after the tag */
1104 i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
1105 continue;
1106 }
1107 }
1108 else if (cdata_close_tag)
1109 {
1110 continue;
1111 }
1112 else if (!g_ascii_isspace(str2[i]))
1113 {
1114 visible = TRUE;
1115 }
1116
1117 if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
1118 {
1119 while (*ent)
1120 str2[j++] = *ent++;
1121 i += entlen - 1;
1122 continue;
1123 }
1124
1125 if (visible)
1126 str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
1127 }
1128
1129 g_free(href);
1130
1131 str2[j] = '\0';
1132
1133 return str2;
1134 }
1135
1136 static gboolean
1137 badchar(char c)
1138 {
1139 switch (c) {
1140 case ' ':
1141 case ',':
1142 case '\0':
1143 case '\n':
1144 case '\r':
1145 case '<':
1146 case '>':
1147 case '"':
1148 return TRUE;
1149 default:
1150 return FALSE;
1151 }
1152 }
1153
1154 static gboolean
1155 badentity(const char *c)
1156 {
1157 if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
1158 !g_ascii_strncasecmp(c, "&gt;", 4) ||
1159 !g_ascii_strncasecmp(c, "&quot;", 6)) {
1160 return TRUE;
1161 }
1162 return FALSE;
1163 }
1164
1165 static const char *
1166 process_link(GString *ret,
1167 const char *start, const char *c,
1168 int matchlen,
1169 const char *urlprefix,
1170 int inside_paren)
1171 {
1172 char *url_buf, *tmpurlbuf;
1173 const char *t;
1174
1175 for (t = c;; t++) {
1176 if (!badchar(*t) && !badentity(t))
1177 continue;
1178
1179 if (t - c == matchlen)
1180 break;
1181
1182 if (*t == ',' && *(t + 1) != ' ') {
1183 continue;
1184 }
1185
1186 if (t > start && *(t - 1) == '.')
1187 t--;
1188 if (t > start && *(t - 1) == ')' && inside_paren > 0)
1189 t--;
1190
1191 url_buf = g_strndup(c, t - c);
1192 tmpurlbuf = purple_unescape_html(url_buf);
1193 g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
1194 urlprefix,
1195 tmpurlbuf, url_buf);
1196 g_free(tmpurlbuf);
1197 g_free(url_buf);
1198 return t;
1199 }
1200
1201 return c;
1202 }
1203
1204 char *
1205 purple_markup_linkify(const char *text)
1206 {
1207 const char *c, *t, *q = NULL;
1208 char *tmpurlbuf, *url_buf;
1209 gunichar g;
1210 gboolean inside_html = FALSE;
1211 int inside_paren = 0;
1212 GString *ret;
1213
1214 if (text == NULL)
1215 return NULL;
1216
1217 ret = g_string_new("");
1218
1219 c = text;
1220 while (*c) {
1221
1222 if(*c == '(' && !inside_html) {
1223 inside_paren++;
1224 ret = g_string_append_c(ret, *c);
1225 c++;
1226 }
1227
1228 if(inside_html) {
1229 if(*c == '>') {
1230 inside_html = FALSE;
1231 } else if(!q && (*c == '\"' || *c == '\'')) {
1232 q = c;
1233 } else if(q) {
1234 if(*c == *q)
1235 q = NULL;
1236 }
1237 } else if(*c == '<') {
1238 inside_html = TRUE;
1239 if (!g_ascii_strncasecmp(c, "<A", 2)) {
1240 while (1) {
1241 if (!g_ascii_strncasecmp(c, "/A>", 3)) {
1242 inside_html = FALSE;
1243 break;
1244 }
1245 ret = g_string_append_c(ret, *c);
1246 c++;
1247 if (!(*c))
1248 break;
1249 }
1250 }
1251 } else if (!g_ascii_strncasecmp(c, "http://", 7)) {
1252 c = process_link(ret, text, c, 7, "", inside_paren);
1253 } else if (!g_ascii_strncasecmp(c, "https://", 8)) {
1254 c = process_link(ret, text, c, 8, "", inside_paren);
1255 } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
1256 c = process_link(ret, text, c, 6, "", inside_paren);
1257 } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
1258 c = process_link(ret, text, c, 7, "", inside_paren);
1259 } else if (!g_ascii_strncasecmp(c, "file://", 7)) {
1260 c = process_link(ret, text, c, 7, "", inside_paren);
1261 } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
1262 c = process_link(ret, text, c, 4, "http://", inside_paren);
1263 } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
1264 c = process_link(ret, text, c, 4, "ftp://", inside_paren);
1265 } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
1266 c = process_link(ret, text, c, 5, "", inside_paren);
1267 } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
1268 t = c;
1269 while (1) {
1270 if (badchar(*t) || badentity(t)) {
1271 char *d;
1272 if (t - c == 7) {
1273 break;
1274 }
1275 if (t > text && *(t - 1) == '.')
1276 t--;
1277 if ((d = strstr(c + 7, "?")) != NULL && d < t)
1278 url_buf = g_strndup(c + 7, d - c - 7);
1279 else
1280 url_buf = g_strndup(c + 7, t - c - 7);
1281 if (!purple_email_is_valid(url_buf)) {
1282 g_free(url_buf);
1283 break;
1284 }
1285 g_free(url_buf);
1286 url_buf = g_strndup(c, t - c);
1287 tmpurlbuf = purple_unescape_html(url_buf);
1288 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
1289 tmpurlbuf, url_buf);
1290 g_free(url_buf);
1291 g_free(tmpurlbuf);
1292 c = t;
1293 break;
1294 }
1295 t++;
1296 }
1297 } else if (c != text && (*c == '@')) {
1298 int flag;
1299 GString *gurl_buf = NULL;
1300 const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
1301
1302 if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
1303 flag = 0;
1304 else {
1305 flag = 1;
1306 gurl_buf = g_string_new("");
1307 }
1308
1309 t = c;
1310 while (flag) {
1311 /* iterate backwards grabbing the local part of an email address */
1312 g = g_utf8_get_char(t);
1313 if (badchar(*t) || (g >= 127) || (*t == '(') ||
1314 ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
1315 !g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
1316 (t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
1317 /* local part will already be part of ret, strip it out */
1318 ret = g_string_truncate(ret, ret->len - (c - t));
1319 ret = g_string_append_unichar(ret, g);
1320 break;
1321 } else {
1322 g_string_prepend_unichar(gurl_buf, g);
1323 t = g_utf8_find_prev_char(text, t);
1324 if (t < text) {
1325 ret = g_string_assign(ret, "");
1326 break;
1327 }
1328 }
1329 }
1330
1331 t = g_utf8_find_next_char(c, NULL);
1332
1333 while (flag) {
1334 /* iterate forwards grabbing the domain part of an email address */
1335 g = g_utf8_get_char(t);
1336 if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
1337 char *d;
1338
1339 url_buf = g_string_free(gurl_buf, FALSE);
1340 gurl_buf = NULL;
1341
1342 /* strip off trailing periods */
1343 if (*url_buf) {
1344 for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
1345 *d = '\0';
1346 }
1347
1348 tmpurlbuf = purple_unescape_html(url_buf);
1349 if (purple_email_is_valid(tmpurlbuf)) {
1350 g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
1351 tmpurlbuf, url_buf);
1352 } else {
1353 g_string_append(ret, url_buf);
1354 }
1355 g_free(url_buf);
1356 g_free(tmpurlbuf);
1357 c = t;
1358
1359 break;
1360 } else {
1361 g_string_append_unichar(gurl_buf, g);
1362 t = g_utf8_find_next_char(t, NULL);
1363 }
1364 }
1365
1366 if (gurl_buf) {
1367 g_string_free(gurl_buf, TRUE);
1368 }
1369 }
1370
1371 if(*c == ')' && !inside_html) {
1372 inside_paren--;
1373 ret = g_string_append_c(ret, *c);
1374 c++;
1375 }
1376
1377 if (*c == 0)
1378 break;
1379
1380 ret = g_string_append_c(ret, *c);
1381 c++;
1382
1383 }
1384 return g_string_free(ret, FALSE);
1385 }
1386
1387 char *purple_unescape_text(const char *in)
1388 {
1389 GString *ret;
1390 const char *c = in;
1391
1392 if (in == NULL)
1393 return NULL;
1394
1395 ret = g_string_new("");
1396 while (*c) {
1397 int len;
1398 const char *ent;
1399
1400 if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
1401 g_string_append(ret, ent);
1402 c += len;
1403 } else {
1404 g_string_append_c(ret, *c);
1405 c++;
1406 }
1407 }
1408
1409 return g_string_free(ret, FALSE);
1410 }
1411
1412 char *purple_unescape_html(const char *html)
1413 {
1414 GString *ret;
1415 const char *c = html;
1416
1417 if (html == NULL)
1418 return NULL;
1419
1420 ret = g_string_new("");
1421 while (*c) {
1422 int len;
1423 const char *ent;
1424
1425 if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
1426 g_string_append(ret, ent);
1427 c += len;
1428 } else if (!strncmp(c, "<br>", 4)) {
1429 g_string_append_c(ret, '\n');
1430 c += 4;
1431 } else {
1432 g_string_append_c(ret, *c);
1433 c++;
1434 }
1435 }
1436
1437 return g_string_free(ret, FALSE);
1438 }
1439
1440 char *
1441 purple_markup_slice(const char *str, guint x, guint y)
1442 {
1443 GString *ret;
1444 GQueue *q;
1445 guint z = 0;
1446 gboolean appended = FALSE;
1447 gunichar c;
1448 char *tag;
1449
1450 g_return_val_if_fail(str != NULL, NULL);
1451 g_return_val_if_fail(x <= y, NULL);
1452
1453 if (x == y)
1454 return g_strdup("");
1455
1456 ret = g_string_new("");
1457 q = g_queue_new();
1458
1459 while (*str && (z < y)) {
1460 c = g_utf8_get_char(str);
1461
1462 if (c == '<') {
1463 char *end = strchr(str, '>');
1464
1465 if (!end) {
1466 g_string_free(ret, TRUE);
1467 while ((tag = g_queue_pop_head(q)))
1468 g_free(tag);
1469 g_queue_free(q);
1470 return NULL;
1471 }
1472
1473 if (!g_ascii_strncasecmp(str, "<img ", 5)) {
1474 z += strlen("[Image]");
1475 } else if (!g_ascii_strncasecmp(str, "<br", 3)) {
1476 z += 1;
1477 } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
1478 z += strlen("\n---\n");
1479 } else if (!g_ascii_strncasecmp(str, "</", 2)) {
1480 /* pop stack */
1481 char *tmp;
1482
1483 tmp = g_queue_pop_head(q);
1484 g_free(tmp);
1485 /* z += 0; */
1486 } else {
1487 /* push it unto the stack */
1488 char *tmp;
1489
1490 tmp = g_strndup(str, end - str + 1);
1491 g_queue_push_head(q, tmp);
1492 /* z += 0; */
1493 }
1494
1495 if (z >= x) {
1496 g_string_append_len(ret, str, end - str + 1);
1497 }
1498
1499 str = end;
1500 } else if (c == '&') {
1501 char *end = strchr(str, ';');
1502 if (!end) {
1503 g_string_free(ret, TRUE);
1504 while ((tag = g_queue_pop_head(q)))
1505 g_free(tag);
1506 g_queue_free(q);
1507
1508 return NULL;
1509 }
1510
1511 if (z >= x)
1512 g_string_append_len(ret, str, end - str + 1);
1513
1514 z++;
1515 str = end;
1516 } else {
1517 if (z == x && z > 0 && !appended) {
1518 GList *l = q->tail;
1519
1520 while (l) {
1521 tag = l->data;
1522 g_string_append(ret, tag);
1523 l = l->prev;
1524 }
1525 appended = TRUE;
1526 }
1527
1528 if (z >= x)
1529 g_string_append_unichar(ret, c);
1530 z++;
1531 }
1532
1533 str = g_utf8_next_char(str);
1534 }
1535
1536 while ((tag = g_queue_pop_head(q))) {
1537 char *name;
1538
1539 name = purple_markup_get_tag_name(tag);
1540 g_string_append_printf(ret, "</%s>", name);
1541 g_free(name);
1542 g_free(tag);
1543 }
1544
1545 g_queue_free(q);
1546 return g_string_free(ret, FALSE);
1547 }
1548
1549 char *
1550 purple_markup_get_tag_name(const char *tag)
1551 {
1552 int i;
1553 g_return_val_if_fail(tag != NULL, NULL);
1554 g_return_val_if_fail(*tag == '<', NULL);
1555
1556 for (i = 1; tag[i]; i++)
1557 if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
1558 break;
1559
1560 return g_strndup(tag+1, i-1);
1561 }

mercurial