libpurple/purplemarkup.c

changeset 40564
2c5b4dc2e86a
child 40578
4118acc90778
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libpurple/purplemarkup.c	Fri Oct 16 02:27:21 2020 -0500
@@ -0,0 +1,1561 @@
+/*
+ * Purple - Internet Messenging Library
+ * Copyright (C) Pidgin Developers <devel@pidgin.im>
+ *
+ * Purple is the legal property of its developers, whose names are too numerous
+ * to list here.  Please refer to the COPYRIGHT file distributed with this
+ * source distribution.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "purplemarkup.h"
+
+#include "util.h"
+
+/*
+ * This function is stolen from glib's gmarkup.c and modified to not
+ * replace ' with &apos;
+ */
+static void append_escaped_text(GString *str,
+		const gchar *text, gssize length)
+{
+	const gchar *p;
+	const gchar *end;
+	gunichar c;
+
+	p = text;
+	end = text + length;
+
+	while (p != end)
+	{
+		const gchar *next;
+		next = g_utf8_next_char (p);
+
+		switch (*p)
+		{
+			case '&':
+				g_string_append (str, "&amp;");
+				break;
+
+			case '<':
+				g_string_append (str, "&lt;");
+				break;
+
+			case '>':
+				g_string_append (str, "&gt;");
+				break;
+
+			case '"':
+				g_string_append (str, "&quot;");
+				break;
+
+			default:
+				c = g_utf8_get_char (p);
+				if ((0x1 <= c && c <= 0x8) ||
+						(0xb <= c && c <= 0xc) ||
+						(0xe <= c && c <= 0x1f) ||
+						(0x7f <= c && c <= 0x84) ||
+						(0x86 <= c && c <= 0x9f))
+					g_string_append_printf (str, "&#x%x;", c);
+				else
+					g_string_append_len (str, p, next - p);
+				break;
+		}
+
+		p = next;
+	}
+}
+
+/* This function is stolen from glib's gmarkup.c */
+gchar *purple_markup_escape_text(const gchar *text, gssize length)
+{
+	GString *str;
+
+	g_return_val_if_fail(text != NULL, NULL);
+
+	if (length < 0)
+		length = strlen(text);
+
+	/* prealloc at least as long as original text */
+	str = g_string_sized_new(length);
+	append_escaped_text(str, text, length);
+
+	return g_string_free(str, FALSE);
+}
+
+const char *
+purple_markup_unescape_entity(const char *text, int *length)
+{
+	const char *pln;
+	int len;
+
+	if (!text || *text != '&')
+		return NULL;
+
+#define IS_ENTITY(s)  (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
+
+	if(IS_ENTITY("&amp;"))
+		pln = "&";
+	else if(IS_ENTITY("&lt;"))
+		pln = "<";
+	else if(IS_ENTITY("&gt;"))
+		pln = ">";
+	else if(IS_ENTITY("&nbsp;"))
+		pln = " ";
+	else if(IS_ENTITY("&copy;"))
+		pln = "\302\251";      /* or use g_unichar_to_utf8(0xa9); */
+	else if(IS_ENTITY("&quot;"))
+		pln = "\"";
+	else if(IS_ENTITY("&reg;"))
+		pln = "\302\256";      /* or use g_unichar_to_utf8(0xae); */
+	else if(IS_ENTITY("&apos;"))
+		pln = "\'";
+	else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) {
+		static char buf[7];
+		const char *start = text + 2;
+		char *end;
+		guint64 pound;
+		int base = 10;
+		int buflen;
+
+		if (*start == 'x') {
+			base = 16;
+			start++;
+		}
+
+		pound = g_ascii_strtoull(start, &end, base);
+		if (pound == 0 || pound > INT_MAX || *end != ';') {
+			return NULL;
+		}
+
+		len = (end - text) + 1;
+
+		buflen = g_unichar_to_utf8((gunichar)pound, buf);
+		buf[buflen] = '\0';
+		pln = buf;
+	}
+	else
+		return NULL;
+
+	if (length)
+		*length = len;
+	return pln;
+}
+
+char *
+purple_markup_get_css_property(const gchar *style,
+				const gchar *opt)
+{
+	const gchar *css_str = style;
+	const gchar *css_value_start;
+	const gchar *css_value_end;
+	gchar *tmp;
+	gchar *ret;
+
+	g_return_val_if_fail(opt != NULL, NULL);
+
+	if (!css_str)
+		return NULL;
+
+	/* find the CSS property */
+	while (1)
+	{
+		/* skip whitespace characters */
+		while (*css_str && g_ascii_isspace(*css_str))
+			css_str++;
+		if (!g_ascii_isalpha(*css_str))
+			return NULL;
+		if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
+		{
+			/* go to next css property positioned after the next ';' */
+			while (*css_str && *css_str != '"' && *css_str != ';')
+				css_str++;
+			if(*css_str != ';')
+				return NULL;
+			css_str++;
+		}
+		else
+			break;
+	}
+
+	/* find the CSS value position in the string */
+	css_str += strlen(opt);
+	while (*css_str && g_ascii_isspace(*css_str))
+		css_str++;
+	if (*css_str != ':')
+		return NULL;
+	css_str++;
+	while (*css_str && g_ascii_isspace(*css_str))
+		css_str++;
+	if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
+		return NULL;
+
+	/* mark the CSS value */
+	css_value_start = css_str;
+	while (*css_str && *css_str != '"' && *css_str != ';')
+		css_str++;
+	css_value_end = css_str - 1;
+
+	/* Removes trailing whitespace */
+	while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
+		css_value_end--;
+
+	tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
+	ret = purple_unescape_html(tmp);
+	g_free(tmp);
+
+	return ret;
+}
+
+gboolean purple_markup_is_rtl(const char *html)
+{
+	GData *attributes;
+	const gchar *start, *end;
+	gboolean res = FALSE;
+
+	if (purple_markup_find_tag("span", html, &start, &end, &attributes))
+	{
+		/* tmp is a member of attributes and is free with g_datalist_clear call */
+		const char *tmp = g_datalist_get_data(&attributes, "dir");
+		if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
+			res = TRUE;
+		if (!res)
+		{
+			tmp = g_datalist_get_data(&attributes, "style");
+			if (tmp)
+			{
+				char *tmp2 = purple_markup_get_css_property(tmp, "direction");
+				if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
+					res = TRUE;
+				g_free(tmp2);
+			}
+
+		}
+		g_datalist_clear(&attributes);
+	}
+	return res;
+}
+
+gboolean
+purple_markup_find_tag(const char *needle, const char *haystack,
+					 const char **start, const char **end, GData **attributes)
+{
+	GData *attribs;
+	const char *cur = haystack;
+	char *name = NULL;
+	gboolean found = FALSE;
+	gboolean in_tag = FALSE;
+	gboolean in_attr = FALSE;
+	const char *in_quotes = NULL;
+	size_t needlelen;
+
+	g_return_val_if_fail(    needle != NULL, FALSE);
+	g_return_val_if_fail(   *needle != '\0', FALSE);
+	g_return_val_if_fail(  haystack != NULL, FALSE);
+	g_return_val_if_fail(     start != NULL, FALSE);
+	g_return_val_if_fail(       end != NULL, FALSE);
+	g_return_val_if_fail(attributes != NULL, FALSE);
+
+	needlelen = strlen(needle);
+	g_datalist_init(&attribs);
+
+	while (*cur && !found) {
+		if (in_tag) {
+			if (in_quotes) {
+				const char *close = cur;
+
+				while (*close && *close != *in_quotes)
+					close++;
+
+				/* if we got the close quote, store the value and carry on from    *
+				 * after it. if we ran to the end of the string, point to the NULL *
+				 * and we're outta here */
+				if (*close) {
+					/* only store a value if we have an attribute name */
+					if (name) {
+						size_t len = close - cur;
+						char *val = g_strndup(cur, len);
+
+						g_datalist_set_data_full(&attribs, name, val, g_free);
+						g_free(name);
+						name = NULL;
+					}
+
+					in_quotes = NULL;
+					cur = close + 1;
+				} else {
+					cur = close;
+				}
+			} else if (in_attr) {
+				const char *close = cur;
+
+				while (*close && *close != '>' && *close != '"' &&
+						*close != '\'' && *close != ' ' && *close != '=')
+					close++;
+
+				/* if we got the equals, store the name of the attribute. if we got
+				 * the quote, save the attribute and go straight to quote mode.
+				 * otherwise the tag closed or we reached the end of the string,
+				 * so we can get outta here */
+				switch (*close) {
+				case '"':
+				case '\'':
+					in_quotes = close;
+					/* fall through */
+				case '=':
+					{
+						size_t len = close - cur;
+
+						/* don't store a blank attribute name */
+						if (len) {
+							g_free(name);
+							name = g_ascii_strdown(cur, len);
+						}
+
+						in_attr = FALSE;
+						cur = close + 1;
+					}
+					break;
+				case ' ':
+				case '>':
+					in_attr = FALSE;
+					/* fall through */
+				default:
+					cur = close;
+					break;
+				}
+			} else {
+				switch (*cur) {
+				case ' ':
+					/* swallow extra spaces inside tag */
+					while (*cur && *cur == ' ') cur++;
+					in_attr = TRUE;
+					break;
+				case '>':
+					found = TRUE;
+					*end = cur;
+					break;
+				case '"':
+				case '\'':
+					in_quotes = cur;
+					/* fall through */
+				default:
+					cur++;
+					break;
+				}
+			}
+		} else {
+			/* if we hit a < followed by the name of our tag... */
+			if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
+				*start = cur;
+				cur = cur + needlelen + 1;
+
+				/* if we're pointing at a space or a >, we found the right tag. if *
+				 * we're not, we've found a longer tag, so we need to skip to the  *
+				 * >, but not being distracted by >s inside quotes.                */
+				if (*cur == ' ' || *cur == '>') {
+					in_tag = TRUE;
+				} else {
+					while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
+						if (*cur == '"') {
+							cur++;
+							while (*cur && *cur != '"')
+								cur++;
+						} else if (*cur == '\'') {
+							cur++;
+							while (*cur && *cur != '\'')
+								cur++;
+						} else {
+							cur++;
+						}
+					}
+				}
+			} else {
+				cur++;
+			}
+		}
+	}
+
+	/* clean up any attribute name from a premature termination */
+	g_free(name);
+
+	if (found) {
+		*attributes = attribs;
+	} else {
+		*start = NULL;
+		*end = NULL;
+		*attributes = NULL;
+	}
+
+	return found;
+}
+
+struct purple_parse_tag {
+	char *src_tag;
+	char *dest_tag;
+	gboolean ignore;
+};
+
+/* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
+         recommended in the GCC docs). It contains 'continue's that should
+         affect the while-loop in purple_markup_html_to_xhtml and doing the
+         above would break that.
+         Also, remember to put braces in constructs that require them for
+         multiple statements when using this macro. */
+#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
+						const char *o = c + strlen("<" x); \
+						const char *p = NULL, *q = NULL, *r = NULL; \
+						/* o = iterating over full tag \
+						 * p = > (end of tag) \
+						 * q = start of quoted bit \
+						 * r = < inside tag \
+						 */ \
+						GString *innards = g_string_new(""); \
+						while(o && *o) { \
+							if(!q && (*o == '\"' || *o == '\'') ) { \
+								q = o; \
+							} else if(q) { \
+								if(*o == *q) { /* end of quoted bit */ \
+									char *unescaped = g_strndup(q+1, o-q-1); \
+									char *escaped = g_markup_escape_text(unescaped, -1); \
+									g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
+									g_free(unescaped); \
+									g_free(escaped); \
+									q = NULL; \
+								} else if(*c == '\\') { \
+									o++; \
+								} \
+							} else if(*o == '<') { \
+								r = o; \
+							} else if(*o == '>') { \
+								p = o; \
+								break; \
+							} else { \
+								innards = g_string_append_c(innards, *o); \
+							} \
+							o++; \
+						} \
+						if(p && !r) { /* got an end of tag and no other < earlier */\
+							if(*(p-1) != '/') { \
+								struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
+								pt->src_tag = x; \
+								pt->dest_tag = y; \
+								tags = g_list_prepend(tags, pt); \
+							} \
+							if(xhtml) { \
+								xhtml = g_string_append(xhtml, "<" y); \
+								xhtml = g_string_append(xhtml, innards->str); \
+								xhtml = g_string_append_c(xhtml, '>'); \
+							} \
+							c = p + 1; \
+						} else { /* got end of tag with earlier < *or* didn't get anything */ \
+							if(xhtml) \
+								xhtml = g_string_append(xhtml, "&lt;"); \
+							if(plain) \
+								plain = g_string_append_c(plain, '<'); \
+							c++; \
+						} \
+						g_string_free(innards, TRUE); \
+						continue; \
+					} \
+					if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
+							(*(c+strlen("<" x)) == '>' || \
+							 !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
+						if(xhtml) \
+							xhtml = g_string_append(xhtml, "<" y); \
+						c += strlen("<" x); \
+						if(*c != '/') { \
+							struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
+							pt->src_tag = x; \
+							pt->dest_tag = y; \
+							tags = g_list_prepend(tags, pt); \
+							if(xhtml) \
+								xhtml = g_string_append_c(xhtml, '>'); \
+						} else { \
+							if(xhtml) \
+								xhtml = g_string_append(xhtml, "/>");\
+						} \
+						c = strchr(c, '>') + 1; \
+						continue; \
+					}
+/* Don't forget to check the note above for ALLOW_TAG_ALT. */
+#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
+void
+purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
+						  char **plain_out)
+{
+	GString *xhtml = NULL;
+	GString *plain = NULL;
+	GString *url = NULL;
+	GString *cdata = NULL;
+	GList *tags = NULL, *tag;
+	const char *c = html;
+	char quote = '\0';
+
+#define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
+			quote = *(ptr++); \
+		else \
+			quote = '\0';
+
+#define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
+
+	g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
+
+	if(xhtml_out)
+		xhtml = g_string_new("");
+	if(plain_out)
+		plain = g_string_new("");
+
+	while(c && *c) {
+		if(*c == '<') {
+			if(*(c+1) == '/') { /* closing tag */
+				tag = tags;
+				while(tag) {
+					struct purple_parse_tag *pt = tag->data;
+					if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
+						c += strlen(pt->src_tag) + 3;
+						break;
+					}
+					tag = tag->next;
+				}
+				if(tag) {
+					while(tags) {
+						struct purple_parse_tag *pt = tags->data;
+						if(xhtml && !pt->ignore)
+							g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
+						if(plain && purple_strequal(pt->src_tag, "a")) {
+							/* if this is a link, we have to add the url to the plaintext, too */
+							if (cdata && url &&
+									(!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
+									                                 g_utf8_collate(url->str + 7, cdata->str) != 0)))
+									g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str)));
+							if (cdata) {
+								g_string_free(cdata, TRUE);
+								cdata = NULL;
+							}
+
+						}
+						if(tags == tag)
+							break;
+						tags = g_list_delete_link(tags, tags);
+						g_free(pt);
+					}
+					g_free(tag->data);
+					tags = g_list_delete_link(tags, tag);
+				} else {
+					/* a closing tag we weren't expecting...
+					 * we'll let it slide, if it's really a tag...if it's
+					 * just a </ we'll escape it properly */
+					const char *end = c+2;
+					while(*end && g_ascii_isalpha(*end))
+						end++;
+					if(*end == '>') {
+						c = end+1;
+					} else {
+						if(xhtml)
+							xhtml = g_string_append(xhtml, "&lt;");
+						if(plain)
+							plain = g_string_append_c(plain, '<');
+						c++;
+					}
+				}
+			} else { /* opening tag */
+				ALLOW_TAG("blockquote");
+				ALLOW_TAG("cite");
+				ALLOW_TAG("div");
+				ALLOW_TAG("em");
+				ALLOW_TAG("h1");
+				ALLOW_TAG("h2");
+				ALLOW_TAG("h3");
+				ALLOW_TAG("h4");
+				ALLOW_TAG("h5");
+				ALLOW_TAG("h6");
+				/* we only allow html to start the message */
+				if(c == html) {
+					ALLOW_TAG("html");
+				}
+				ALLOW_TAG_ALT("i", "em");
+				ALLOW_TAG_ALT("italic", "em");
+				ALLOW_TAG("li");
+				ALLOW_TAG("ol");
+				ALLOW_TAG("p");
+				ALLOW_TAG("pre");
+				ALLOW_TAG("q");
+				ALLOW_TAG("span");
+				ALLOW_TAG("ul");
+
+
+				/* we skip <HR> because it's not legal in XHTML-IM.  However,
+				 * we still want to send something sensible, so we put a
+				 * linebreak in its place. <BR> also needs special handling
+				 * because putting a </BR> to close it would just be dumb. */
+				if((!g_ascii_strncasecmp(c, "<br", 3)
+							|| !g_ascii_strncasecmp(c, "<hr", 3))
+						&& (*(c+3) == '>' ||
+							!g_ascii_strncasecmp(c+3, "/>", 2) ||
+							!g_ascii_strncasecmp(c+3, " />", 3))) {
+					c = strchr(c, '>') + 1;
+					if(xhtml)
+						xhtml = g_string_append(xhtml, "<br/>");
+					if(plain && *c != '\n')
+						plain = g_string_append_c(plain, '\n');
+					continue;
+				}
+				if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
+					struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+					if (*(c+2) == '>')
+						pt->src_tag = "b";
+					else if (*(c+2) == 'o')
+						pt->src_tag = "bold";
+					else
+						pt->src_tag = "strong";
+					pt->dest_tag = "span";
+					tags = g_list_prepend(tags, pt);
+					c = strchr(c, '>') + 1;
+					if(xhtml)
+						xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
+					continue;
+				}
+				if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
+					struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+					pt->src_tag = *(c+2) == '>' ? "u" : "underline";
+					pt->dest_tag = "span";
+					tags = g_list_prepend(tags, pt);
+					c = strchr(c, '>') + 1;
+					if (xhtml)
+						xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
+					continue;
+				}
+				if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
+					struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+					pt->src_tag = *(c+2) == '>' ? "s" : "strike";
+					pt->dest_tag = "span";
+					tags = g_list_prepend(tags, pt);
+					c = strchr(c, '>') + 1;
+					if(xhtml)
+						xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
+					continue;
+				}
+				if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
+					struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+					pt->src_tag = "sub";
+					pt->dest_tag = "span";
+					tags = g_list_prepend(tags, pt);
+					c = strchr(c, '>') + 1;
+					if(xhtml)
+						xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
+					continue;
+				}
+				if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
+					struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+					pt->src_tag = "sup";
+					pt->dest_tag = "span";
+					tags = g_list_prepend(tags, pt);
+					c = strchr(c, '>') + 1;
+					if(xhtml)
+						xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
+					continue;
+				}
+				if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
+					const char *p = c + 4;
+					GString *src = NULL, *alt = NULL;
+#define ESCAPE(from, to)        \
+		CHECK_QUOTE(from); \
+		while (VALID_CHAR(from)) { \
+			int len; \
+			if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
+				to = g_string_append(to, "&amp;"); \
+			else if (*from == '\'') \
+				to = g_string_append(to, "&apos;"); \
+			else \
+				to = g_string_append_c(to, *from); \
+			from++; \
+		}
+
+					while (*p && *p != '>') {
+						if (!g_ascii_strncasecmp(p, "src=", 4)) {
+							const char *q = p + 4;
+							if (src)
+								g_string_free(src, TRUE);
+							src = g_string_new("");
+							ESCAPE(q, src);
+							p = q;
+						} else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
+							const char *q = p + 4;
+							if (alt)
+								g_string_free(alt, TRUE);
+							alt = g_string_new("");
+							ESCAPE(q, alt);
+							p = q;
+						} else {
+							p++;
+						}
+					}
+#undef ESCAPE
+					if ((c = strchr(p, '>')) != NULL)
+						c++;
+					else
+						c = p;
+					/* src and alt are required! */
+					if(src && xhtml)
+						g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
+					if(alt) {
+						if(plain)
+							plain = g_string_append(plain, purple_unescape_html(alt->str));
+						if(!src && xhtml)
+							xhtml = g_string_append(xhtml, alt->str);
+						g_string_free(alt, TRUE);
+					}
+					g_string_free(src, TRUE);
+					continue;
+				}
+				if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
+					const char *p = c + 2;
+					struct purple_parse_tag *pt;
+					while (*p && *p != '>') {
+						if (!g_ascii_strncasecmp(p, "href=", 5)) {
+							const char *q = p + 5;
+							if (url)
+								g_string_free(url, TRUE);
+							url = g_string_new("");
+							if (cdata)
+								g_string_free(cdata, TRUE);
+							cdata = g_string_new("");
+							CHECK_QUOTE(q);
+							while (VALID_CHAR(q)) {
+								int len;
+								if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
+									url = g_string_append(url, "&amp;");
+								else if (*q == '"')
+									url = g_string_append(url, "&quot;");
+								else
+									url = g_string_append_c(url, *q);
+								q++;
+							}
+							p = q;
+						} else {
+							p++;
+						}
+					}
+					if ((c = strchr(p, '>')) != NULL)
+						c++;
+					else
+						c = p;
+					pt = g_new0(struct purple_parse_tag, 1);
+					pt->src_tag = "a";
+					pt->dest_tag = "a";
+					tags = g_list_prepend(tags, pt);
+					if(xhtml)
+						g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
+					continue;
+				}
+#define ESCAPE(from, to)        \
+		CHECK_QUOTE(from); \
+		while (VALID_CHAR(from)) { \
+			int len; \
+			if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \
+				to = g_string_append(to, "&amp;"); \
+			else if (*from == '\'') \
+				to = g_string_append_c(to, '\"'); \
+			else \
+				to = g_string_append_c(to, *from); \
+			from++; \
+		}
+				if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
+					const char *p = c + 5;
+					GString *style = g_string_new("");
+					struct purple_parse_tag *pt;
+					while (*p && *p != '>') {
+						if (!g_ascii_strncasecmp(p, "back=", 5)) {
+							const char *q = p + 5;
+							GString *color = g_string_new("");
+							ESCAPE(q, color);
+							g_string_append_printf(style, "background: %s; ", color->str);
+							g_string_free(color, TRUE);
+							p = q;
+						} else if (!g_ascii_strncasecmp(p, "color=", 6)) {
+							const char *q = p + 6;
+							GString *color = g_string_new("");
+							ESCAPE(q, color);
+							g_string_append_printf(style, "color: %s; ", color->str);
+							g_string_free(color, TRUE);
+							p = q;
+						} else if (!g_ascii_strncasecmp(p, "face=", 5)) {
+							const char *q = p + 5;
+							GString *face = g_string_new("");
+							ESCAPE(q, face);
+							g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
+							g_string_free(face, TRUE);
+							p = q;
+						} else if (!g_ascii_strncasecmp(p, "size=", 5)) {
+							const char *q = p + 5;
+							int sz;
+							const char *size = "medium";
+							CHECK_QUOTE(q);
+							sz = atoi(q);
+							switch (sz)
+							{
+							case 1:
+							  size = "xx-small";
+							  break;
+							case 2:
+							  size = "small";
+							  break;
+							case 3:
+							  size = "medium";
+							  break;
+							case 4:
+							  size = "large";
+							  break;
+							case 5:
+							  size = "x-large";
+							  break;
+							case 6:
+							case 7:
+							  size = "xx-large";
+							  break;
+							default:
+							  break;
+							}
+							g_string_append_printf(style, "font-size: %s; ", size);
+							p = q;
+						} else {
+							p++;
+						}
+					}
+					if ((c = strchr(p, '>')) != NULL)
+						c++;
+					else
+						c = p;
+					pt = g_new0(struct purple_parse_tag, 1);
+					pt->src_tag = "font";
+					pt->dest_tag = "span";
+					tags = g_list_prepend(tags, pt);
+					if(style->len && xhtml)
+						g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
+					else
+						pt->ignore = TRUE;
+					g_string_free(style, TRUE);
+					continue;
+				}
+#undef ESCAPE
+				if (!g_ascii_strncasecmp(c, "<body ", 6)) {
+					const char *p = c + 6;
+					gboolean did_something = FALSE;
+					while (*p && *p != '>') {
+						if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
+							const char *q = p + 8;
+							struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
+							GString *color = g_string_new("");
+							CHECK_QUOTE(q);
+							while (VALID_CHAR(q)) {
+								color = g_string_append_c(color, *q);
+								q++;
+							}
+							if (xhtml)
+								g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
+							g_string_free(color, TRUE);
+							if ((c = strchr(p, '>')) != NULL)
+								c++;
+							else
+								c = p;
+							pt->src_tag = "body";
+							pt->dest_tag = "span";
+							tags = g_list_prepend(tags, pt);
+							did_something = TRUE;
+							break;
+						}
+						p++;
+					}
+					if (did_something) continue;
+				}
+				/* this has to come after the special case for bgcolor */
+				ALLOW_TAG("body");
+				if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
+					char *p = strstr(c + strlen("<!--"), "-->");
+					if(p) {
+						if(xhtml)
+							xhtml = g_string_append(xhtml, "<!--");
+						c += strlen("<!--");
+						continue;
+					}
+				}
+
+				if(xhtml)
+					xhtml = g_string_append(xhtml, "&lt;");
+				if(plain)
+					plain = g_string_append_c(plain, '<');
+				c++;
+			}
+		} else if(*c == '&') {
+			char buf[7];
+			const char *pln;
+			int len;
+
+			if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
+				len = 1;
+				g_snprintf(buf, sizeof(buf), "%c", *c);
+				pln = buf;
+			}
+			if(xhtml)
+				xhtml = g_string_append_len(xhtml, c, len);
+			if(plain)
+				plain = g_string_append(plain, pln);
+			if(cdata)
+				cdata = g_string_append_len(cdata, c, len);
+			c += len;
+		} else {
+			if(xhtml)
+				xhtml = g_string_append_c(xhtml, *c);
+			if(plain)
+				plain = g_string_append_c(plain, *c);
+			if(cdata)
+				cdata = g_string_append_c(cdata, *c);
+			c++;
+		}
+	}
+	if(xhtml) {
+		for (tag = tags; tag ; tag = tag->next) {
+			struct purple_parse_tag *pt = tag->data;
+			if(!pt->ignore)
+				g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
+		}
+	}
+	g_list_free(tags);
+	if(xhtml_out)
+		*xhtml_out = g_string_free(xhtml, FALSE);
+	if(plain_out)
+		*plain_out = g_string_free(plain, FALSE);
+	if(url)
+		g_string_free(url, TRUE);
+	if (cdata)
+		g_string_free(cdata, TRUE);
+#undef CHECK_QUOTE
+#undef VALID_CHAR
+}
+
+/* The following are probably reasonable changes:
+ * - \n should be converted to a normal space
+ * - in addition to <br>, <p> and <div> etc. should also be converted into \n
+ * - We want to turn </td>#whitespace<td> sequences into a single tab
+ * - We want to turn </tr>#whitespace<tr> sequences into a single \n
+ * - <script>...</script> and <style>...</style> should be completely removed
+ */
+
+char *
+purple_markup_strip_html(const char *str)
+{
+	int i, j, k, entlen;
+	gboolean visible = TRUE;
+	gboolean closing_td_p = FALSE;
+	gchar *str2;
+	const gchar *cdata_close_tag = NULL, *ent;
+	gchar *href = NULL;
+	int href_st = 0;
+
+	if(!str)
+		return NULL;
+
+	str2 = g_strdup(str);
+
+	for (i = 0, j = 0; str2[i]; i++)
+	{
+		if (str2[i] == '<')
+		{
+			if (cdata_close_tag)
+			{
+				/* Note: Don't even assume any other tag is a tag in CDATA */
+				if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
+						strlen(cdata_close_tag)) == 0)
+				{
+					i += strlen(cdata_close_tag) - 1;
+					cdata_close_tag = NULL;
+				}
+				continue;
+			}
+			else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
+			{
+				str2[j++] = '\t';
+				visible = TRUE;
+			}
+			else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
+			{
+				closing_td_p = TRUE;
+				visible = FALSE;
+			}
+			else
+			{
+				closing_td_p = FALSE;
+				visible = TRUE;
+			}
+
+			k = i + 1;
+
+			if(g_ascii_isspace(str2[k]))
+				visible = TRUE;
+			else if (str2[k])
+			{
+				/* Scan until we end the tag either implicitly (closed start
+				 * tag) or explicitly, using a sloppy method (i.e., < or >
+				 * inside quoted attributes will screw us up)
+				 */
+				while (str2[k] && str2[k] != '<' && str2[k] != '>')
+				{
+					k++;
+				}
+
+				/* If we've got an <a> tag with an href, save the address
+				 * to print later. */
+				if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
+				    g_ascii_isspace(str2[i+2]))
+				{
+					int st; /* start of href, inclusive [ */
+					int end; /* end of href, exclusive ) */
+					char delim = ' ';
+					/* Find start of href */
+					for (st = i + 3; st < k; st++)
+					{
+						if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
+						{
+							st += 5;
+							if (str2[st] == '"' || str2[st] == '\'')
+							{
+								delim = str2[st];
+								st++;
+							}
+							break;
+						}
+					}
+					/* find end of address */
+					for (end = st; end < k && str2[end] != delim; end++)
+					{
+						/* All the work is done in the loop construct above. */
+					}
+
+					/* If there's an address, save it.  If there was
+					 * already one saved, kill it. */
+					if (st < k)
+					{
+						char *tmp;
+						g_free(href);
+						tmp = g_strndup(str2 + st, end - st);
+						href = purple_unescape_html(tmp);
+						g_free(tmp);
+						href_st = j;
+					}
+				}
+
+				/* Replace </a> with an ascii representation of the
+				 * address the link was pointing to. */
+				else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
+				{
+					size_t hrlen = strlen(href);
+
+					/* Only insert the href if it's different from the CDATA. */
+					if ((hrlen != (gsize)(j - href_st) ||
+					     strncmp(str2 + href_st, href, hrlen)) &&
+					    (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
+					     strncmp(str2 + href_st, href + 7, hrlen - 7)))
+					{
+						str2[j++] = ' ';
+						str2[j++] = '(';
+						memmove(str2 + j, href, hrlen);
+						j += hrlen;
+						str2[j++] = ')';
+						g_free(href);
+						href = NULL;
+					}
+				}
+
+				/* Check for tags which should be mapped to newline (but ignore some of
+				 * the tags at the beginning of the text) */
+				else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
+				              || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
+				              || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
+				              || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
+				              || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
+				 || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
+				 || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
+				{
+					str2[j++] = '\n';
+				}
+				/* Check for tags which begin CDATA and need to be closed */
+				else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
+				{
+					cdata_close_tag = "</script>";
+				}
+				else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
+				{
+					cdata_close_tag = "</style>";
+				}
+				/* Update the index and continue checking after the tag */
+				i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
+				continue;
+			}
+		}
+		else if (cdata_close_tag)
+		{
+			continue;
+		}
+		else if (!g_ascii_isspace(str2[i]))
+		{
+			visible = TRUE;
+		}
+
+		if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
+		{
+			while (*ent)
+				str2[j++] = *ent++;
+			i += entlen - 1;
+			continue;
+		}
+
+		if (visible)
+			str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
+	}
+
+	g_free(href);
+
+	str2[j] = '\0';
+
+	return str2;
+}
+
+static gboolean
+badchar(char c)
+{
+	switch (c) {
+	case ' ':
+	case ',':
+	case '\0':
+	case '\n':
+	case '\r':
+	case '<':
+	case '>':
+	case '"':
+		return TRUE;
+	default:
+		return FALSE;
+	}
+}
+
+static gboolean
+badentity(const char *c)
+{
+	if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
+		!g_ascii_strncasecmp(c, "&gt;", 4) ||
+		!g_ascii_strncasecmp(c, "&quot;", 6)) {
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static const char *
+process_link(GString *ret,
+		const char *start, const char *c,
+		int matchlen,
+		const char *urlprefix,
+		int inside_paren)
+{
+	char *url_buf, *tmpurlbuf;
+	const char *t;
+
+	for (t = c;; t++) {
+		if (!badchar(*t) && !badentity(t))
+			continue;
+
+		if (t - c == matchlen)
+			break;
+
+		if (*t == ',' && *(t + 1) != ' ') {
+			continue;
+		}
+
+		if (t > start && *(t - 1) == '.')
+			t--;
+		if (t > start && *(t - 1) == ')' && inside_paren > 0)
+			t--;
+
+		url_buf = g_strndup(c, t - c);
+		tmpurlbuf = purple_unescape_html(url_buf);
+		g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
+				urlprefix,
+				tmpurlbuf, url_buf);
+		g_free(tmpurlbuf);
+		g_free(url_buf);
+		return t;
+	}
+
+	return c;
+}
+
+char *
+purple_markup_linkify(const char *text)
+{
+	const char *c, *t, *q = NULL;
+	char *tmpurlbuf, *url_buf;
+	gunichar g;
+	gboolean inside_html = FALSE;
+	int inside_paren = 0;
+	GString *ret;
+
+	if (text == NULL)
+		return NULL;
+
+	ret = g_string_new("");
+
+	c = text;
+	while (*c) {
+
+		if(*c == '(' && !inside_html) {
+			inside_paren++;
+			ret = g_string_append_c(ret, *c);
+			c++;
+		}
+
+		if(inside_html) {
+			if(*c == '>') {
+				inside_html = FALSE;
+			} else if(!q && (*c == '\"' || *c == '\'')) {
+				q = c;
+			} else if(q) {
+				if(*c == *q)
+					q = NULL;
+			}
+		} else if(*c == '<') {
+			inside_html = TRUE;
+			if (!g_ascii_strncasecmp(c, "<A", 2)) {
+				while (1) {
+					if (!g_ascii_strncasecmp(c, "/A>", 3)) {
+						inside_html = FALSE;
+						break;
+					}
+					ret = g_string_append_c(ret, *c);
+					c++;
+					if (!(*c))
+						break;
+				}
+			}
+		} else if (!g_ascii_strncasecmp(c, "http://", 7)) {
+			c = process_link(ret, text, c, 7, "", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "https://", 8)) {
+			c = process_link(ret, text, c, 8, "", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
+			c = process_link(ret, text, c, 6, "", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
+			c = process_link(ret, text, c, 7, "", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "file://", 7)) {
+			c = process_link(ret, text, c, 7, "", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
+			c = process_link(ret, text, c, 4, "http://", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
+			c = process_link(ret, text, c, 4, "ftp://", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
+			c = process_link(ret, text, c, 5, "", inside_paren);
+		} else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
+			t = c;
+			while (1) {
+				if (badchar(*t) || badentity(t)) {
+					char *d;
+					if (t - c == 7) {
+						break;
+					}
+					if (t > text && *(t - 1) == '.')
+						t--;
+					if ((d = strstr(c + 7, "?")) != NULL && d < t)
+						url_buf = g_strndup(c + 7, d - c - 7);
+					else
+						url_buf = g_strndup(c + 7, t - c - 7);
+					if (!purple_email_is_valid(url_buf)) {
+						g_free(url_buf);
+						break;
+					}
+					g_free(url_buf);
+					url_buf = g_strndup(c, t - c);
+					tmpurlbuf = purple_unescape_html(url_buf);
+					g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
+							  tmpurlbuf, url_buf);
+					g_free(url_buf);
+					g_free(tmpurlbuf);
+					c = t;
+					break;
+				}
+				t++;
+			}
+		} else if (c != text && (*c == '@')) {
+			int flag;
+			GString *gurl_buf = NULL;
+			const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
+
+			if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
+				flag = 0;
+			else {
+				flag = 1;
+				gurl_buf = g_string_new("");
+			}
+
+			t = c;
+			while (flag) {
+				/* iterate backwards grabbing the local part of an email address */
+				g = g_utf8_get_char(t);
+				if (badchar(*t) || (g >= 127) || (*t == '(') ||
+					((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
+				                                       !g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
+				                     (t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
+					/* local part will already be part of ret, strip it out */
+					ret = g_string_truncate(ret, ret->len - (c - t));
+					ret = g_string_append_unichar(ret, g);
+					break;
+				} else {
+					g_string_prepend_unichar(gurl_buf, g);
+					t = g_utf8_find_prev_char(text, t);
+					if (t < text) {
+						ret = g_string_assign(ret, "");
+						break;
+					}
+				}
+			}
+
+			t = g_utf8_find_next_char(c, NULL);
+
+			while (flag) {
+				/* iterate forwards grabbing the domain part of an email address */
+				g = g_utf8_get_char(t);
+				if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
+					char *d;
+
+					url_buf = g_string_free(gurl_buf, FALSE);
+					gurl_buf = NULL;
+
+					/* strip off trailing periods */
+					if (*url_buf) {
+						for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
+							*d = '\0';
+					}
+
+					tmpurlbuf = purple_unescape_html(url_buf);
+					if (purple_email_is_valid(tmpurlbuf)) {
+						g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
+								tmpurlbuf, url_buf);
+					} else {
+						g_string_append(ret, url_buf);
+					}
+					g_free(url_buf);
+					g_free(tmpurlbuf);
+					c = t;
+
+					break;
+				} else {
+					g_string_append_unichar(gurl_buf, g);
+					t = g_utf8_find_next_char(t, NULL);
+				}
+			}
+
+			if (gurl_buf) {
+				g_string_free(gurl_buf, TRUE);
+			}
+		}
+
+		if(*c == ')' && !inside_html) {
+			inside_paren--;
+			ret = g_string_append_c(ret, *c);
+			c++;
+		}
+
+		if (*c == 0)
+			break;
+
+		ret = g_string_append_c(ret, *c);
+		c++;
+
+	}
+	return g_string_free(ret, FALSE);
+}
+
+char *purple_unescape_text(const char *in)
+{
+    GString *ret;
+    const char *c = in;
+
+    if (in == NULL)
+        return NULL;
+
+    ret = g_string_new("");
+    while (*c) {
+        int len;
+        const char *ent;
+
+        if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
+            g_string_append(ret, ent);
+            c += len;
+        } else {
+            g_string_append_c(ret, *c);
+            c++;
+        }
+    }
+
+    return g_string_free(ret, FALSE);
+}
+
+char *purple_unescape_html(const char *html)
+{
+	GString *ret;
+	const char *c = html;
+
+	if (html == NULL)
+		return NULL;
+
+	ret = g_string_new("");
+	while (*c) {
+		int len;
+		const char *ent;
+
+		if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
+			g_string_append(ret, ent);
+			c += len;
+		} else if (!strncmp(c, "<br>", 4)) {
+			g_string_append_c(ret, '\n');
+			c += 4;
+		} else {
+			g_string_append_c(ret, *c);
+			c++;
+		}
+	}
+
+	return g_string_free(ret, FALSE);
+}
+
+char *
+purple_markup_slice(const char *str, guint x, guint y)
+{
+	GString *ret;
+	GQueue *q;
+	guint z = 0;
+	gboolean appended = FALSE;
+	gunichar c;
+	char *tag;
+
+	g_return_val_if_fail(str != NULL, NULL);
+	g_return_val_if_fail(x <= y, NULL);
+
+	if (x == y)
+		return g_strdup("");
+
+	ret = g_string_new("");
+	q = g_queue_new();
+
+	while (*str && (z < y)) {
+		c = g_utf8_get_char(str);
+
+		if (c == '<') {
+			char *end = strchr(str, '>');
+
+			if (!end) {
+				g_string_free(ret, TRUE);
+				while ((tag = g_queue_pop_head(q)))
+					g_free(tag);
+				g_queue_free(q);
+				return NULL;
+			}
+
+			if (!g_ascii_strncasecmp(str, "<img ", 5)) {
+				z += strlen("[Image]");
+			} else if (!g_ascii_strncasecmp(str, "<br", 3)) {
+				z += 1;
+			} else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
+				z += strlen("\n---\n");
+			} else if (!g_ascii_strncasecmp(str, "</", 2)) {
+				/* pop stack */
+				char *tmp;
+
+				tmp = g_queue_pop_head(q);
+				g_free(tmp);
+				/* z += 0; */
+			} else {
+				/* push it unto the stack */
+				char *tmp;
+
+				tmp = g_strndup(str, end - str + 1);
+				g_queue_push_head(q, tmp);
+				/* z += 0; */
+			}
+
+			if (z >= x) {
+				g_string_append_len(ret, str, end - str + 1);
+			}
+
+			str = end;
+		} else if (c == '&') {
+			char *end = strchr(str, ';');
+			if (!end) {
+				g_string_free(ret, TRUE);
+				while ((tag = g_queue_pop_head(q)))
+					g_free(tag);
+				g_queue_free(q);
+
+				return NULL;
+			}
+
+			if (z >= x)
+				g_string_append_len(ret, str, end - str + 1);
+
+			z++;
+			str = end;
+		} else {
+			if (z == x && z > 0 && !appended) {
+				GList *l = q->tail;
+
+				while (l) {
+					tag = l->data;
+					g_string_append(ret, tag);
+					l = l->prev;
+				}
+				appended = TRUE;
+			}
+
+			if (z >= x)
+				g_string_append_unichar(ret, c);
+			z++;
+		}
+
+		str = g_utf8_next_char(str);
+	}
+
+	while ((tag = g_queue_pop_head(q))) {
+		char *name;
+
+		name = purple_markup_get_tag_name(tag);
+		g_string_append_printf(ret, "</%s>", name);
+		g_free(name);
+		g_free(tag);
+	}
+
+	g_queue_free(q);
+	return g_string_free(ret, FALSE);
+}
+
+char *
+purple_markup_get_tag_name(const char *tag)
+{
+	int i;
+	g_return_val_if_fail(tag != NULL, NULL);
+	g_return_val_if_fail(*tag == '<', NULL);
+
+	for (i = 1; tag[i]; i++)
+		if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
+			break;
+
+	return g_strndup(tag+1, i-1);
+}

mercurial