Mon, 22 Aug 2022 21:40:04 -0500
Inline pidgin_make_scrollable
We need to change it for GTK4, and there are few enough that it can be inlined. Eventually, that code might be a `.ui` anyway.
Testing Done:
Compile only.
Reviewed at https://reviews.imfreedom.org/r/1615/
/* * Purple - Internet Messaging Library * Copyright (C) Pidgin Developers <devel@pidgin.im> * * Purple is the legal property of its developers, whose names are too numerous * to list here. Please refer to the COPYRIGHT file distributed with this * source distribution. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <https://www.gnu.org/licenses/>. */ #include "purplemarkup.h" #include "util.h" /* * This function is stolen from glib's gmarkup.c and modified to not * replace ' with ' */ static void append_escaped_text(GString *str, const gchar *text, gssize length) { const gchar *p; const gchar *end; gunichar c; p = text; end = text + length; while (p != end) { const gchar *next; next = g_utf8_next_char (p); switch (*p) { case '&': g_string_append (str, "&"); break; case '<': g_string_append (str, "<"); break; case '>': g_string_append (str, ">"); break; case '"': g_string_append (str, """); break; default: c = g_utf8_get_char (p); if ((0x1 <= c && c <= 0x8) || (0xb <= c && c <= 0xc) || (0xe <= c && c <= 0x1f) || (0x7f <= c && c <= 0x84) || (0x86 <= c && c <= 0x9f)) g_string_append_printf (str, "&#x%x;", c); else g_string_append_len (str, p, next - p); break; } p = next; } } /* This function is stolen from glib's gmarkup.c */ gchar *purple_markup_escape_text(const gchar *text, gssize length) { GString *str; g_return_val_if_fail(text != NULL, NULL); if (length < 0) length = strlen(text); /* prealloc at least as long as original text */ str = g_string_sized_new(length); append_escaped_text(str, text, length); return g_string_free(str, FALSE); } const char * purple_markup_unescape_entity(const char *text, int *length) { const char *pln; int len; if (!text || *text != '&') return NULL; #define IS_ENTITY(s) (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1))) if(IS_ENTITY("&")) pln = "&"; else if(IS_ENTITY("<")) pln = "<"; else if(IS_ENTITY(">")) pln = ">"; else if(IS_ENTITY(" ")) pln = " "; else if(IS_ENTITY("©")) pln = "\302\251"; /* or use g_unichar_to_utf8(0xa9); */ else if(IS_ENTITY(""")) pln = "\""; else if(IS_ENTITY("®")) pln = "\302\256"; /* or use g_unichar_to_utf8(0xae); */ else if(IS_ENTITY("'")) pln = "\'"; else if(text[1] == '#' && (g_ascii_isxdigit(text[2]) || text[2] == 'x')) { static char buf[7]; const char *start = text + 2; char *end; guint64 pound; int base = 10; int buflen; if (*start == 'x') { base = 16; start++; } pound = g_ascii_strtoull(start, &end, base); if (pound == 0 || pound > INT_MAX || *end != ';') { return NULL; } len = (end - text) + 1; buflen = g_unichar_to_utf8((gunichar)pound, buf); buf[buflen] = '\0'; pln = buf; } else return NULL; if (length) *length = len; return pln; } char * purple_markup_get_css_property(const gchar *style, const gchar *opt) { const gchar *css_str = style; const gchar *css_value_start; const gchar *css_value_end; gchar *tmp; gchar *ret; g_return_val_if_fail(opt != NULL, NULL); if (!css_str) return NULL; /* find the CSS property */ while (1) { /* skip whitespace characters */ while (*css_str && g_ascii_isspace(*css_str)) css_str++; if (!g_ascii_isalpha(*css_str)) return NULL; if (g_ascii_strncasecmp(css_str, opt, strlen(opt))) { /* go to next css property positioned after the next ';' */ while (*css_str && *css_str != '"' && *css_str != ';') css_str++; if(*css_str != ';') return NULL; css_str++; } else break; } /* find the CSS value position in the string */ css_str += strlen(opt); while (*css_str && g_ascii_isspace(*css_str)) css_str++; if (*css_str != ':') return NULL; css_str++; while (*css_str && g_ascii_isspace(*css_str)) css_str++; if (*css_str == '\0' || *css_str == '"' || *css_str == ';') return NULL; /* mark the CSS value */ css_value_start = css_str; while (*css_str && *css_str != '"' && *css_str != ';') css_str++; css_value_end = css_str - 1; /* Removes trailing whitespace */ while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end)) css_value_end--; tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1); ret = purple_unescape_html(tmp); g_free(tmp); return ret; } gboolean purple_markup_is_rtl(const char *html) { GData *attributes; const gchar *start, *end; gboolean res = FALSE; if (purple_markup_find_tag("span", html, &start, &end, &attributes)) { /* tmp is a member of attributes and is free with g_datalist_clear call */ const char *tmp = g_datalist_get_data(&attributes, "dir"); if (tmp && !g_ascii_strcasecmp(tmp, "RTL")) res = TRUE; if (!res) { tmp = g_datalist_get_data(&attributes, "style"); if (tmp) { char *tmp2 = purple_markup_get_css_property(tmp, "direction"); if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL")) res = TRUE; g_free(tmp2); } } g_datalist_clear(&attributes); } return res; } gboolean purple_markup_find_tag(const char *needle, const char *haystack, const char **start, const char **end, GData **attributes) { GData *attribs; const char *cur = haystack; char *name = NULL; gboolean found = FALSE; gboolean in_tag = FALSE; gboolean in_attr = FALSE; const char *in_quotes = NULL; size_t needlelen; g_return_val_if_fail( needle != NULL, FALSE); g_return_val_if_fail( *needle != '\0', FALSE); g_return_val_if_fail( haystack != NULL, FALSE); g_return_val_if_fail( start != NULL, FALSE); g_return_val_if_fail( end != NULL, FALSE); g_return_val_if_fail(attributes != NULL, FALSE); needlelen = strlen(needle); g_datalist_init(&attribs); while (*cur && !found) { if (in_tag) { if (in_quotes) { const char *close = cur; while (*close && *close != *in_quotes) close++; /* if we got the close quote, store the value and carry on from * * after it. if we ran to the end of the string, point to the NULL * * and we're outta here */ if (*close) { /* only store a value if we have an attribute name */ if (name) { size_t len = close - cur; char *val = g_strndup(cur, len); g_datalist_set_data_full(&attribs, name, val, g_free); g_free(name); name = NULL; } in_quotes = NULL; cur = close + 1; } else { cur = close; } } else if (in_attr) { const char *close = cur; while (*close && *close != '>' && *close != '"' && *close != '\'' && *close != ' ' && *close != '=') close++; /* if we got the equals, store the name of the attribute. if we got * the quote, save the attribute and go straight to quote mode. * otherwise the tag closed or we reached the end of the string, * so we can get outta here */ switch (*close) { case '"': case '\'': in_quotes = close; /* fall through */ case '=': { size_t len = close - cur; /* don't store a blank attribute name */ if (len) { g_free(name); name = g_ascii_strdown(cur, len); } in_attr = FALSE; cur = close + 1; } break; case ' ': case '>': in_attr = FALSE; /* fall through */ default: cur = close; break; } } else { switch (*cur) { case ' ': /* swallow extra spaces inside tag */ while (*cur && *cur == ' ') cur++; in_attr = TRUE; break; case '>': found = TRUE; *end = cur; break; case '"': case '\'': in_quotes = cur; /* fall through */ default: cur++; break; } } } else { /* if we hit a < followed by the name of our tag... */ if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) { *start = cur; cur = cur + needlelen + 1; /* if we're pointing at a space or a >, we found the right tag. if * * we're not, we've found a longer tag, so we need to skip to the * * >, but not being distracted by >s inside quotes. */ if (*cur == ' ' || *cur == '>') { in_tag = TRUE; } else { while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') { if (*cur == '"') { cur++; while (*cur && *cur != '"') cur++; } else if (*cur == '\'') { cur++; while (*cur && *cur != '\'') cur++; } else { cur++; } } } } else { cur++; } } } /* clean up any attribute name from a premature termination */ g_free(name); if (found) { *attributes = attribs; } else { *start = NULL; *end = NULL; *attributes = NULL; } return found; } struct purple_parse_tag { char *src_tag; char *dest_tag; gboolean ignore; }; /* NOTE: Do not put `do {} while(0)` around this macro (as this is the method recommended in the GCC docs). It contains 'continue's that should affect the while-loop in purple_markup_html_to_xhtml and doing the above would break that. Also, remember to put braces in constructs that require them for multiple statements when using this macro. */ #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ const char *o = c + strlen("<" x); \ const char *p = NULL, *q = NULL, *r = NULL; \ /* o = iterating over full tag \ * p = > (end of tag) \ * q = start of quoted bit \ * r = < inside tag \ */ \ GString *innards = g_string_new(""); \ while(o && *o) { \ if(!q && (*o == '\"' || *o == '\'') ) { \ q = o; \ } else if(q) { \ if(*o == *q) { /* end of quoted bit */ \ char *unescaped = g_strndup(q+1, o-q-1); \ char *escaped = g_markup_escape_text(unescaped, -1); \ g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ g_free(unescaped); \ g_free(escaped); \ q = NULL; \ } else if(*c == '\\') { \ o++; \ } \ } else if(*o == '<') { \ r = o; \ } else if(*o == '>') { \ p = o; \ break; \ } else { \ innards = g_string_append_c(innards, *o); \ } \ o++; \ } \ if(p && !r) { /* got an end of tag and no other < earlier */\ if(*(p-1) != '/') { \ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ pt->src_tag = x; \ pt->dest_tag = y; \ tags = g_list_prepend(tags, pt); \ } \ if(xhtml) { \ xhtml = g_string_append(xhtml, "<" y); \ xhtml = g_string_append(xhtml, innards->str); \ xhtml = g_string_append_c(xhtml, '>'); \ } \ c = p + 1; \ } else { /* got end of tag with earlier < *or* didn't get anything */ \ if(xhtml) \ xhtml = g_string_append(xhtml, "<"); \ if(plain) \ plain = g_string_append_c(plain, '<'); \ c++; \ } \ g_string_free(innards, TRUE); \ continue; \ } \ if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ (*(c+strlen("<" x)) == '>' || \ !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ if(xhtml) \ xhtml = g_string_append(xhtml, "<" y); \ c += strlen("<" x); \ if(*c != '/') { \ struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \ pt->src_tag = x; \ pt->dest_tag = y; \ tags = g_list_prepend(tags, pt); \ if(xhtml) \ xhtml = g_string_append_c(xhtml, '>'); \ } else { \ if(xhtml) \ xhtml = g_string_append(xhtml, "/>");\ } \ c = strchr(c, '>') + 1; \ continue; \ } /* Don't forget to check the note above for ALLOW_TAG_ALT. */ #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) void purple_markup_html_to_xhtml(const char *html, char **xhtml_out, char **plain_out) { GString *xhtml = NULL; GString *plain = NULL; GString *url = NULL; GString *cdata = NULL; GList *tags = NULL, *tag; const char *c = html; char quote = '\0'; #define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \ quote = *(ptr++); \ else \ quote = '\0'; #define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>'))) g_return_if_fail(xhtml_out != NULL || plain_out != NULL); if(xhtml_out) xhtml = g_string_new(""); if(plain_out) plain = g_string_new(""); while(c && *c) { if(*c == '<') { if(*(c+1) == '/') { /* closing tag */ tag = tags; while(tag) { struct purple_parse_tag *pt = tag->data; if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { c += strlen(pt->src_tag) + 3; break; } tag = tag->next; } if(tag) { while(tags) { struct purple_parse_tag *pt = tags->data; if(xhtml && !pt->ignore) g_string_append_printf(xhtml, "</%s>", pt->dest_tag); if(plain && purple_strequal(pt->src_tag, "a")) { /* if this is a link, we have to add the url to the plaintext, too */ if (cdata && url && (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 || g_utf8_collate(url->str + 7, cdata->str) != 0))) g_string_append_printf(plain, " <%s>", g_strstrip(purple_unescape_html(url->str))); if (cdata) { g_string_free(cdata, TRUE); cdata = NULL; } } if(tags == tag) break; tags = g_list_delete_link(tags, tags); g_free(pt); } g_free(tag->data); tags = g_list_delete_link(tags, tag); } else { /* a closing tag we weren't expecting... * we'll let it slide, if it's really a tag...if it's * just a </ we'll escape it properly */ const char *end = c+2; while(*end && g_ascii_isalpha(*end)) end++; if(*end == '>') { c = end+1; } else { if(xhtml) xhtml = g_string_append(xhtml, "<"); if(plain) plain = g_string_append_c(plain, '<'); c++; } } } else { /* opening tag */ ALLOW_TAG("blockquote"); ALLOW_TAG("cite"); ALLOW_TAG("div"); ALLOW_TAG("em"); ALLOW_TAG("h1"); ALLOW_TAG("h2"); ALLOW_TAG("h3"); ALLOW_TAG("h4"); ALLOW_TAG("h5"); ALLOW_TAG("h6"); /* we only allow html to start the message */ if(c == html) { ALLOW_TAG("html"); } ALLOW_TAG_ALT("i", "em"); ALLOW_TAG_ALT("italic", "em"); ALLOW_TAG("li"); ALLOW_TAG("ol"); ALLOW_TAG("p"); ALLOW_TAG("pre"); ALLOW_TAG("q"); ALLOW_TAG("span"); ALLOW_TAG("ul"); /* we skip <HR> because it's not legal in XHTML-IM. However, * we still want to send something sensible, so we put a * linebreak in its place. <BR> also needs special handling * because putting a </BR> to close it would just be dumb. */ if((!g_ascii_strncasecmp(c, "<br", 3) || !g_ascii_strncasecmp(c, "<hr", 3)) && (*(c+3) == '>' || !g_ascii_strncasecmp(c+3, "/>", 2) || !g_ascii_strncasecmp(c+3, " />", 3))) { c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, "<br/>"); if(plain && *c != '\n') plain = g_string_append_c(plain, '\n'); continue; } if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); if (*(c+2) == '>') pt->src_tag = "b"; else if (*(c+2) == 'o') pt->src_tag = "bold"; else pt->src_tag = "strong"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>"); continue; } if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = *(c+2) == '>' ? "u" : "underline"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if (xhtml) xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); continue; } if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = *(c+2) == '>' ? "s" : "strike"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); continue; } if(!g_ascii_strncasecmp(c, "<sub>", 5)) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "sub"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); continue; } if(!g_ascii_strncasecmp(c, "<sup>", 5)) { struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "sup"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; if(xhtml) xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); continue; } if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) { const char *p = c + 4; GString *src = NULL, *alt = NULL; #define ESCAPE(from, to) \ CHECK_QUOTE(from); \ while (VALID_CHAR(from)) { \ int len; \ if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \ to = g_string_append(to, "&"); \ else if (*from == '\'') \ to = g_string_append(to, "'"); \ else \ to = g_string_append_c(to, *from); \ from++; \ } while (*p && *p != '>') { if (!g_ascii_strncasecmp(p, "src=", 4)) { const char *q = p + 4; if (src) g_string_free(src, TRUE); src = g_string_new(""); ESCAPE(q, src); p = q; } else if (!g_ascii_strncasecmp(p, "alt=", 4)) { const char *q = p + 4; if (alt) g_string_free(alt, TRUE); alt = g_string_new(""); ESCAPE(q, alt); p = q; } else { p++; } } #undef ESCAPE if ((c = strchr(p, '>')) != NULL) c++; else c = p; /* src and alt are required! */ if(src && xhtml) g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : ""); if(alt) { if(plain) plain = g_string_append(plain, purple_unescape_html(alt->str)); if(!src && xhtml) xhtml = g_string_append(xhtml, alt->str); g_string_free(alt, TRUE); } g_string_free(src, TRUE); continue; } if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) { const char *p = c + 2; struct purple_parse_tag *pt; while (*p && *p != '>') { if (!g_ascii_strncasecmp(p, "href=", 5)) { const char *q = p + 5; if (url) g_string_free(url, TRUE); url = g_string_new(""); if (cdata) g_string_free(cdata, TRUE); cdata = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { int len; if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL)) url = g_string_append(url, "&"); else if (*q == '"') url = g_string_append(url, """); else url = g_string_append_c(url, *q); q++; } p = q; } else { p++; } } if ((c = strchr(p, '>')) != NULL) c++; else c = p; pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "a"; pt->dest_tag = "a"; tags = g_list_prepend(tags, pt); if(xhtml) g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : ""); continue; } #define ESCAPE(from, to) \ CHECK_QUOTE(from); \ while (VALID_CHAR(from)) { \ int len; \ if ((*from == '&') && (purple_markup_unescape_entity(from, &len) == NULL)) \ to = g_string_append(to, "&"); \ else if (*from == '\'') \ to = g_string_append_c(to, '\"'); \ else \ to = g_string_append_c(to, *from); \ from++; \ } if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { const char *p = c + 5; GString *style = g_string_new(""); struct purple_parse_tag *pt; while (*p && *p != '>') { if (!g_ascii_strncasecmp(p, "back=", 5)) { const char *q = p + 5; GString *color = g_string_new(""); ESCAPE(q, color); g_string_append_printf(style, "background: %s; ", color->str); g_string_free(color, TRUE); p = q; } else if (!g_ascii_strncasecmp(p, "color=", 6)) { const char *q = p + 6; GString *color = g_string_new(""); ESCAPE(q, color); g_string_append_printf(style, "color: %s; ", color->str); g_string_free(color, TRUE); p = q; } else if (!g_ascii_strncasecmp(p, "face=", 5)) { const char *q = p + 5; GString *face = g_string_new(""); ESCAPE(q, face); g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str)); g_string_free(face, TRUE); p = q; } else if (!g_ascii_strncasecmp(p, "size=", 5)) { const char *q = p + 5; int sz; const char *size = "medium"; CHECK_QUOTE(q); sz = atoi(q); switch (sz) { case 1: size = "xx-small"; break; case 2: size = "small"; break; case 3: size = "medium"; break; case 4: size = "large"; break; case 5: size = "x-large"; break; case 6: case 7: size = "xx-large"; break; default: break; } g_string_append_printf(style, "font-size: %s; ", size); p = q; } else { p++; } } if ((c = strchr(p, '>')) != NULL) c++; else c = p; pt = g_new0(struct purple_parse_tag, 1); pt->src_tag = "font"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); if(style->len && xhtml) g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str)); else pt->ignore = TRUE; g_string_free(style, TRUE); continue; } #undef ESCAPE if (!g_ascii_strncasecmp(c, "<body ", 6)) { const char *p = c + 6; gboolean did_something = FALSE; while (*p && *p != '>') { if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) { const char *q = p + 8; struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); GString *color = g_string_new(""); CHECK_QUOTE(q); while (VALID_CHAR(q)) { color = g_string_append_c(color, *q); q++; } if (xhtml) g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str)); g_string_free(color, TRUE); if ((c = strchr(p, '>')) != NULL) c++; else c = p; pt->src_tag = "body"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); did_something = TRUE; break; } p++; } if (did_something) continue; } /* this has to come after the special case for bgcolor */ ALLOW_TAG("body"); if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { char *p = strstr(c + strlen("<!--"), "-->"); if(p) { if(xhtml) xhtml = g_string_append(xhtml, "<!--"); c += strlen("<!--"); continue; } } if(xhtml) xhtml = g_string_append(xhtml, "<"); if(plain) plain = g_string_append_c(plain, '<'); c++; } } else if(*c == '&') { char buf[7]; const char *pln; int len; if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) { len = 1; g_snprintf(buf, sizeof(buf), "%c", *c); pln = buf; } if(xhtml) xhtml = g_string_append_len(xhtml, c, len); if(plain) plain = g_string_append(plain, pln); if(cdata) cdata = g_string_append_len(cdata, c, len); c += len; } else { if(xhtml) xhtml = g_string_append_c(xhtml, *c); if(plain) plain = g_string_append_c(plain, *c); if(cdata) cdata = g_string_append_c(cdata, *c); c++; } } if(xhtml) { for (tag = tags; tag ; tag = tag->next) { struct purple_parse_tag *pt = tag->data; if(!pt->ignore) g_string_append_printf(xhtml, "</%s>", pt->dest_tag); } } g_list_free(tags); if(xhtml_out) *xhtml_out = g_string_free(xhtml, FALSE); if(plain_out) *plain_out = g_string_free(plain, FALSE); if(url) g_string_free(url, TRUE); if (cdata) g_string_free(cdata, TRUE); #undef CHECK_QUOTE #undef VALID_CHAR } /* The following are probably reasonable changes: * - \n should be converted to a normal space * - in addition to <br>, <p> and <div> etc. should also be converted into \n * - We want to turn </td>#whitespace<td> sequences into a single tab * - We want to turn </tr>#whitespace<tr> sequences into a single \n * - <script>...</script> and <style>...</style> should be completely removed */ char * purple_markup_strip_html(const char *str) { int i, j, k, entlen; gboolean visible = TRUE; gboolean closing_td_p = FALSE; gchar *str2; const gchar *cdata_close_tag = NULL, *ent; gchar *href = NULL; int href_st = 0; if(!str) return NULL; str2 = g_strdup(str); for (i = 0, j = 0; str2[i]; i++) { if (str2[i] == '<') { if (cdata_close_tag) { /* Note: Don't even assume any other tag is a tag in CDATA */ if (g_ascii_strncasecmp(str2 + i, cdata_close_tag, strlen(cdata_close_tag)) == 0) { i += strlen(cdata_close_tag) - 1; cdata_close_tag = NULL; } continue; } else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p) { str2[j++] = '\t'; visible = TRUE; } else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0) { closing_td_p = TRUE; visible = FALSE; } else { closing_td_p = FALSE; visible = TRUE; } k = i + 1; if(g_ascii_isspace(str2[k])) visible = TRUE; else if (str2[k]) { /* Scan until we end the tag either implicitly (closed start * tag) or explicitly, using a sloppy method (i.e., < or > * inside quoted attributes will screw us up) */ while (str2[k] && str2[k] != '<' && str2[k] != '>') { k++; } /* If we've got an <a> tag with an href, save the address * to print later. */ if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 && g_ascii_isspace(str2[i+2])) { int st; /* start of href, inclusive [ */ int end; /* end of href, exclusive ) */ char delim = ' '; /* Find start of href */ for (st = i + 3; st < k; st++) { if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0) { st += 5; if (str2[st] == '"' || str2[st] == '\'') { delim = str2[st]; st++; } break; } } /* find end of address */ for (end = st; end < k && str2[end] != delim; end++) { /* All the work is done in the loop construct above. */ } /* If there's an address, save it. If there was * already one saved, kill it. */ if (st < k) { char *tmp; g_free(href); tmp = g_strndup(str2 + st, end - st); href = purple_unescape_html(tmp); g_free(tmp); href_st = j; } } /* Replace </a> with an ascii representation of the * address the link was pointing to. */ else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0) { size_t hrlen = strlen(href); /* Only insert the href if it's different from the CDATA. */ if ((hrlen != (gsize)(j - href_st) || strncmp(str2 + href_st, href, hrlen)) && (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */ strncmp(str2 + href_st, href + 7, hrlen - 7))) { str2[j++] = ' '; str2[j++] = '('; memmove(str2 + j, href, hrlen); j += hrlen; str2[j++] = ')'; g_free(href); href = NULL; } } /* Check for tags which should be mapped to newline (but ignore some of * the tags at the beginning of the text) */ else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0 || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0 || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0 || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0 || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0)) || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0 || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0) { str2[j++] = '\n'; } /* Check for tags which begin CDATA and need to be closed */ else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0) { cdata_close_tag = "</script>"; } else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0) { cdata_close_tag = "</style>"; } /* Update the index and continue checking after the tag */ i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k; continue; } } else if (cdata_close_tag) { continue; } else if (!g_ascii_isspace(str2[i])) { visible = TRUE; } if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL) { while (*ent) str2[j++] = *ent++; i += entlen - 1; continue; } if (visible) str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i]; } g_free(href); str2[j] = '\0'; return str2; } static gboolean badchar(char c) { switch (c) { case ' ': case ',': case '\0': case '\n': case '\r': case '<': case '>': case '"': return TRUE; default: return FALSE; } } static gboolean badentity(const char *c) { if (!g_ascii_strncasecmp(c, "<", 4) || !g_ascii_strncasecmp(c, ">", 4) || !g_ascii_strncasecmp(c, """, 6)) { return TRUE; } return FALSE; } static const char * process_link(GString *ret, const char *start, const char *c, int matchlen, const char *urlprefix, int inside_paren) { char *url_buf, *tmpurlbuf; const char *t; for (t = c;; t++) { if (!badchar(*t) && !badentity(t)) continue; if (t - c == matchlen) break; if (*t == ',' && *(t + 1) != ' ') { continue; } if (t > start && *(t - 1) == '.') t--; if (t > start && *(t - 1) == ')' && inside_paren > 0) t--; url_buf = g_strndup(c, t - c); tmpurlbuf = purple_unescape_html(url_buf); g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>", urlprefix, tmpurlbuf, url_buf); g_free(tmpurlbuf); g_free(url_buf); return t; } return c; } char * purple_markup_linkify(const char *text) { const char *c, *t, *q = NULL; char *tmpurlbuf, *url_buf; gunichar g; gboolean inside_html = FALSE; int inside_paren = 0; GString *ret; if (text == NULL) return NULL; ret = g_string_new(""); c = text; while (*c) { if(*c == '(' && !inside_html) { inside_paren++; ret = g_string_append_c(ret, *c); c++; } if(inside_html) { if(*c == '>') { inside_html = FALSE; } else if(!q && (*c == '\"' || *c == '\'')) { q = c; } else if(q) { if(*c == *q) q = NULL; } } else if(*c == '<') { inside_html = TRUE; if (!g_ascii_strncasecmp(c, "<A", 2)) { while (1) { if (!g_ascii_strncasecmp(c, "/A>", 3)) { inside_html = FALSE; break; } ret = g_string_append_c(ret, *c); c++; if (!(*c)) break; } } } else if (!g_ascii_strncasecmp(c, "http://", 7)) { c = process_link(ret, text, c, 7, "", inside_paren); } else if (!g_ascii_strncasecmp(c, "https://", 8)) { c = process_link(ret, text, c, 8, "", inside_paren); } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) { c = process_link(ret, text, c, 6, "", inside_paren); } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) { c = process_link(ret, text, c, 7, "", inside_paren); } else if (!g_ascii_strncasecmp(c, "file://", 7)) { c = process_link(ret, text, c, 7, "", inside_paren); } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) { c = process_link(ret, text, c, 4, "http://", inside_paren); } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) { c = process_link(ret, text, c, 4, "ftp://", inside_paren); } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) { c = process_link(ret, text, c, 5, "", inside_paren); } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) { t = c; while (1) { if (badchar(*t) || badentity(t)) { char *d; if (t - c == 7) { break; } if (t > text && *(t - 1) == '.') t--; if ((d = strstr(c + 7, "?")) != NULL && d < t) url_buf = g_strndup(c + 7, d - c - 7); else url_buf = g_strndup(c + 7, t - c - 7); if (!purple_email_is_valid(url_buf)) { g_free(url_buf); break; } g_free(url_buf); url_buf = g_strndup(c, t - c); tmpurlbuf = purple_unescape_html(url_buf); g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", tmpurlbuf, url_buf); g_free(url_buf); g_free(tmpurlbuf); c = t; break; } t++; } } else if (c != text && (*c == '@')) { int flag; GString *gurl_buf = NULL; const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1))) flag = 0; else { flag = 1; gurl_buf = g_string_new(""); } t = c; while (flag) { /* iterate backwards grabbing the local part of an email address */ g = g_utf8_get_char(t); if (badchar(*t) || (g >= 127) || (*t == '(') || ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "<", 4) || !g_ascii_strncasecmp(t - 3, ">", 4))) || (t > (text+4) && (!g_ascii_strncasecmp(t - 5, """, 6)))))) { /* local part will already be part of ret, strip it out */ ret = g_string_truncate(ret, ret->len - (c - t)); ret = g_string_append_unichar(ret, g); break; } else { g_string_prepend_unichar(gurl_buf, g); t = g_utf8_find_prev_char(text, t); if (t < text) { ret = g_string_assign(ret, ""); break; } } } t = g_utf8_find_next_char(c, NULL); while (flag) { /* iterate forwards grabbing the domain part of an email address */ g = g_utf8_get_char(t); if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) { char *d; url_buf = g_string_free(gurl_buf, FALSE); gurl_buf = NULL; /* strip off trailing periods */ if (*url_buf) { for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) *d = '\0'; } tmpurlbuf = purple_unescape_html(url_buf); if (purple_email_is_valid(tmpurlbuf)) { g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>", tmpurlbuf, url_buf); } else { g_string_append(ret, url_buf); } g_free(url_buf); g_free(tmpurlbuf); c = t; break; } else { g_string_append_unichar(gurl_buf, g); t = g_utf8_find_next_char(t, NULL); } } if (gurl_buf) { g_string_free(gurl_buf, TRUE); } } if(*c == ')' && !inside_html) { inside_paren--; ret = g_string_append_c(ret, *c); c++; } if (*c == 0) break; ret = g_string_append_c(ret, *c); c++; } return g_string_free(ret, FALSE); } char *purple_unescape_text(const char *in) { GString *ret; const char *c = in; if (in == NULL) return NULL; ret = g_string_new(""); while (*c) { int len; const char *ent; if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) { g_string_append(ret, ent); c += len; } else { g_string_append_c(ret, *c); c++; } } return g_string_free(ret, FALSE); } char *purple_unescape_html(const char *html) { GString *ret; const char *c = html; if (html == NULL) return NULL; ret = g_string_new(""); while (*c) { int len; const char *ent; if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) { g_string_append(ret, ent); c += len; } else if (!strncmp(c, "<br>", 4)) { g_string_append_c(ret, '\n'); c += 4; } else { g_string_append_c(ret, *c); c++; } } return g_string_free(ret, FALSE); } char * purple_markup_slice(const char *str, guint x, guint y) { GString *ret; GQueue *q; guint z = 0; gboolean appended = FALSE; gunichar c; char *tag; g_return_val_if_fail(str != NULL, NULL); g_return_val_if_fail(x <= y, NULL); if (x == y) return g_strdup(""); ret = g_string_new(""); q = g_queue_new(); while (*str && (z < y)) { c = g_utf8_get_char(str); if (c == '<') { char *end = strchr(str, '>'); if (!end) { g_string_free(ret, TRUE); while ((tag = g_queue_pop_head(q))) g_free(tag); g_queue_free(q); return NULL; } if (!g_ascii_strncasecmp(str, "<img ", 5)) { z += strlen("[Image]"); } else if (!g_ascii_strncasecmp(str, "<br", 3)) { z += 1; } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) { z += strlen("\n---\n"); } else if (!g_ascii_strncasecmp(str, "</", 2)) { /* pop stack */ char *tmp; tmp = g_queue_pop_head(q); g_free(tmp); /* z += 0; */ } else { /* push it unto the stack */ char *tmp; tmp = g_strndup(str, end - str + 1); g_queue_push_head(q, tmp); /* z += 0; */ } if (z >= x) { g_string_append_len(ret, str, end - str + 1); } str = end; } else if (c == '&') { char *end = strchr(str, ';'); if (!end) { g_string_free(ret, TRUE); while ((tag = g_queue_pop_head(q))) g_free(tag); g_queue_free(q); return NULL; } if (z >= x) g_string_append_len(ret, str, end - str + 1); z++; str = end; } else { if (z == x && z > 0 && !appended) { GList *l = q->tail; while (l) { tag = l->data; g_string_append(ret, tag); l = l->prev; } appended = TRUE; } if (z >= x) g_string_append_unichar(ret, c); z++; } str = g_utf8_next_char(str); } while ((tag = g_queue_pop_head(q))) { char *name; name = purple_markup_get_tag_name(tag); g_string_append_printf(ret, "</%s>", name); g_free(name); g_free(tag); } g_queue_free(q); return g_string_free(ret, FALSE); } char * purple_markup_get_tag_name(const char *tag) { int i; g_return_val_if_fail(tag != NULL, NULL); g_return_val_if_fail(*tag == '<', NULL); for (i = 1; tag[i]; i++) if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/') break; return g_strndup(tag+1, i-1); }