diff -r 463b4fa9f067 -r 6a6d2ef151e6 src/util.c --- a/src/util.c Wed Oct 18 16:28:51 2006 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3988 +0,0 @@ -/* - * @file util.h Utility Functions - * @ingroup core - * - * Gaim is the legal property of its developers, whose names are too numerous - * to list here. Please refer to the COPYRIGHT file distributed with this - * source distribution. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include "internal.h" - -#include "conversation.h" -#include "debug.h" -#include "notify.h" -#include "prpl.h" -#include "prefs.h" -#include "util.h" - -typedef struct -{ - void (*callback)(void *, const char *, size_t); - void *user_data; - - struct - { - char *user; - char *passwd; - char *address; - int port; - char *page; - - } website; - - char *url; - gboolean full; - char *user_agent; - gboolean http11; - char *request; - gsize request_written; - gboolean include_headers; - - int inpa; - - gboolean got_headers; - gboolean has_explicit_data_len; - char *webdata; - unsigned long len; - unsigned long data_len; - -} GaimFetchUrlData; - -static char custom_home_dir[MAXPATHLEN]; -static char home_dir[MAXPATHLEN]; - -GaimMenuAction * -gaim_menu_action_new(const char *label, GaimCallback callback, gpointer data, - GList *children) -{ - GaimMenuAction *act = g_new0(GaimMenuAction, 1); - act->label = g_strdup(label); - act->callback = callback; - act->data = data; - act->children = children; - return act; -} - -void -gaim_menu_action_free(GaimMenuAction *act) -{ - g_return_if_fail(act != NULL); - - g_free(act->label); - g_free(act); -} - -/************************************************************************** - * Base16 Functions - **************************************************************************/ -gchar * -gaim_base16_encode(const guchar *data, gsize len) -{ - int i; - gchar *ascii = NULL; - - g_return_val_if_fail(data != NULL, NULL); - g_return_val_if_fail(len > 0, NULL); - - ascii = g_malloc(len * 2 + 1); - - for (i = 0; i < len; i++) - snprintf(&ascii[i * 2], 3, "%02hhx", data[i]); - - return ascii; -} - -guchar * -gaim_base16_decode(const char *str, gsize *ret_len) -{ - int len, i, accumulator = 0; - guchar *data; - - g_return_val_if_fail(str != NULL, NULL); - - len = strlen(str); - - g_return_val_if_fail(strlen(str) > 0, 0); - g_return_val_if_fail(len % 2 > 0, 0); - - data = g_malloc(len / 2); - - for (i = 0; i < len; i++) - { - if ((i % 2) == 0) - accumulator = 0; - else - accumulator <<= 4; - - if (isdigit(str[i])) - accumulator |= str[i] - 48; - else - { - switch(str[i]) - { - case 'a': case 'A': accumulator |= 10; break; - case 'b': case 'B': accumulator |= 11; break; - case 'c': case 'C': accumulator |= 12; break; - case 'd': case 'D': accumulator |= 13; break; - case 'e': case 'E': accumulator |= 14; break; - case 'f': case 'F': accumulator |= 15; break; - } - } - - if (i % 2) - data[(i - 1) / 2] = accumulator; - } - - if (ret_len != NULL) - *ret_len = len / 2; - - return data; -} - -/************************************************************************** - * Base64 Functions - **************************************************************************/ -static const char alphabet[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -static const char xdigits[] = - "0123456789abcdef"; - -gchar * -gaim_base64_encode(const guchar *data, gsize len) -{ - char *out, *rv; - - g_return_val_if_fail(data != NULL, NULL); - g_return_val_if_fail(len > 0, NULL); - - rv = out = g_malloc(((len/3)+1)*4 + 1); - - for (; len >= 3; len -= 3) - { - *out++ = alphabet[data[0] >> 2]; - *out++ = alphabet[((data[0] << 4) & 0x30) | (data[1] >> 4)]; - *out++ = alphabet[((data[1] << 2) & 0x3c) | (data[2] >> 6)]; - *out++ = alphabet[data[2] & 0x3f]; - data += 3; - } - - if (len > 0) - { - unsigned char fragment; - - *out++ = alphabet[data[0] >> 2]; - fragment = (data[0] << 4) & 0x30; - - if (len > 1) - fragment |= data[1] >> 4; - - *out++ = alphabet[fragment]; - *out++ = (len < 2) ? '=' : alphabet[(data[1] << 2) & 0x3c]; - *out++ = '='; - } - - *out = '\0'; - - return rv; -} - -guchar * -gaim_base64_decode(const char *str, gsize *ret_len) -{ - guchar *out = NULL; - char tmp = 0; - const char *c; - gint32 tmp2 = 0; - int len = 0, n = 0; - - g_return_val_if_fail(str != NULL, NULL); - - c = str; - - while (*c) { - if (*c >= 'A' && *c <= 'Z') { - tmp = *c - 'A'; - } else if (*c >= 'a' && *c <= 'z') { - tmp = 26 + (*c - 'a'); - } else if (*c >= '0' && *c <= 57) { - tmp = 52 + (*c - '0'); - } else if (*c == '+') { - tmp = 62; - } else if (*c == '/') { - tmp = 63; - } else if (*c == '\r' || *c == '\n') { - c++; - continue; - } else if (*c == '=') { - if (n == 3) { - out = g_realloc(out, len + 2); - out[len] = (guchar)(tmp2 >> 10) & 0xff; - len++; - out[len] = (guchar)(tmp2 >> 2) & 0xff; - len++; - } else if (n == 2) { - out = g_realloc(out, len + 1); - out[len] = (guchar)(tmp2 >> 4) & 0xff; - len++; - } - break; - } - tmp2 = ((tmp2 << 6) | (tmp & 0xff)); - n++; - if (n == 4) { - out = g_realloc(out, len + 3); - out[len] = (guchar)((tmp2 >> 16) & 0xff); - len++; - out[len] = (guchar)((tmp2 >> 8) & 0xff); - len++; - out[len] = (guchar)(tmp2 & 0xff); - len++; - tmp2 = 0; - n = 0; - } - c++; - } - - out = g_realloc(out, len + 1); - out[len] = 0; - - if (ret_len != NULL) - *ret_len = len; - - return out; -} - -/************************************************************************** - * Quoted Printable Functions (see RFC 2045). - **************************************************************************/ -guchar * -gaim_quotedp_decode(const char *str, gsize *ret_len) -{ - char *n, *new; - const char *end, *p; - - n = new = g_malloc(strlen (str) + 1); - end = str + strlen(str); - - for (p = str; p < end; p++, n++) { - if (*p == '=') { - if (p[1] == '\r' && p[2] == '\n') { /* 5.1 #5 */ - n -= 1; - p += 2; - } else if (p[1] == '\n') { /* fuzzy case for 5.1 #5 */ - n -= 1; - p += 1; - } else if (p[1] && p[2]) { - char *nibble1 = strchr(xdigits, tolower(p[1])); - char *nibble2 = strchr(xdigits, tolower(p[2])); - if (nibble1 && nibble2) { /* 5.1 #1 */ - *n = ((nibble1 - xdigits) << 4) | (nibble2 - xdigits); - p += 2; - } else { /* This should never happen */ - *n = *p; - } - } else { /* This should never happen */ - *n = *p; - } - } - else if (*p == '_') - *n = ' '; - else - *n = *p; - } - - *n = '\0'; - - if (ret_len != NULL) - *ret_len = n - new; - - /* Resize to take less space */ - /* new = realloc(new, n - new); */ - - return (guchar *)new; -} - -/************************************************************************** - * MIME Functions - **************************************************************************/ -char * -gaim_mime_decode_field(const char *str) -{ - /* - * This is wing's version, partially based on revo/shx's version - * See RFC2047 [which apparently obsoletes RFC1342] - */ - typedef enum { - state_start, state_equal1, state_question1, - state_charset, state_question2, - state_encoding, state_question3, - state_encoded_text, state_question4, state_equal2 = state_start - } encoded_word_state_t; - encoded_word_state_t state = state_start; - const char *cur, *mark; - const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL; - char *n, *new; - - /* token can be any CHAR (supposedly ISO8859-1/ISO2022), not just ASCII */ - #define token_char_p(c) \ - (c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c)) - - /* But encoded-text must be ASCII; alas, isascii() may not exist */ - #define encoded_text_char_p(c) \ - ((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c)) - - #define RECOVER_MARKED_TEXT strncpy(n, mark, cur - mark + 1); \ - n += cur - mark + 1 - - g_return_val_if_fail(str != NULL, NULL); - - /* NOTE: Assuming that we need just strlen(str)+1 *may* be wrong. - * It would be wrong if one byte (in some unknown encoding) could - * expand to >=4 bytes of UTF-8; I don't know if there are such things. - */ - n = new = g_malloc(strlen(str) + 1); - - /* Here we will be looking for encoded words and if they seem to be - * valid then decode them. - * They are of this form: =?charset?encoding?text?= - */ - - for (cur = str, mark = NULL; *cur; cur += 1) { - switch (state) { - case state_equal1: - if (*cur == '?') { - state = state_question1; - } else { - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - case state_question1: - if (token_char_p(*cur)) { - charset0 = cur; - state = state_charset; - } else { /* This should never happen */ - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - case state_charset: - if (*cur == '?') { - state = state_question2; - } else if (!token_char_p(*cur)) { /* This should never happen */ - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - case state_question2: - if (token_char_p(*cur)) { - encoding0 = cur; - state = state_encoding; - } else { /* This should never happen */ - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - case state_encoding: - if (*cur == '?') { - state = state_question3; - } else if (!token_char_p(*cur)) { /* This should never happen */ - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - case state_question3: - if (encoded_text_char_p(*cur)) { - encoded_text0 = cur; - state = state_encoded_text; - } else if (*cur == '?') { /* empty string */ - encoded_text0 = cur; - state = state_question4; - } else { /* This should never happen */ - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - case state_encoded_text: - if (*cur == '?') { - state = state_question4; - } else if (!encoded_text_char_p(*cur)) { - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - case state_question4: - if (*cur == '=') { /* Got the whole encoded-word */ - char *charset = g_strndup(charset0, encoding0 - charset0 - 1); - char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1); - char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1); - guchar *decoded = NULL; - gsize dec_len; - if (g_ascii_strcasecmp(encoding, "Q") == 0) - decoded = gaim_quotedp_decode(encoded_text, &dec_len); - else if (g_ascii_strcasecmp(encoding, "B") == 0) - decoded = gaim_base64_decode(encoded_text, &dec_len); - else - decoded = NULL; - if (decoded) { - gsize len; - char *converted = g_convert((const gchar *)decoded, dec_len, "utf-8", charset, NULL, &len, NULL); - - if (converted) { - n = strncpy(n, converted, len) + len; - g_free(converted); - } - g_free(decoded); - } - g_free(charset); - g_free(encoding); - g_free(encoded_text); - state = state_equal2; /* Restart the FSM */ - } else { /* This should never happen */ - RECOVER_MARKED_TEXT; - state = state_start; - } - break; - default: - if (*cur == '=') { - mark = cur; - state = state_equal1; - } else { - /* Some unencoded text. */ - *n = *cur; - n += 1; - } - break; - } /* switch */ - } /* for */ - - if (state != state_start) { - RECOVER_MARKED_TEXT; - } - *n = '\0'; - - return new; -} - - -/************************************************************************** - * Date/Time Functions - **************************************************************************/ - -#ifdef _WIN32 -static long win32_get_tz_offset() { - TIME_ZONE_INFORMATION tzi; - DWORD ret; - long off = -1; - - if ((ret = GetTimeZoneInformation(&tzi)) != TIME_ZONE_ID_INVALID) - { - off = -(tzi.Bias * 60); - if (ret == TIME_ZONE_ID_DAYLIGHT) - off -= tzi.DaylightBias * 60; - } - - return off; -} -#endif - -#ifndef HAVE_STRFTIME_Z_FORMAT -static const char *get_tmoff(const struct tm *tm) -{ - static char buf[6]; - long off; - gint8 min; - gint8 hrs; - struct tm new_tm = *tm; - - mktime(&new_tm); - - if (new_tm.tm_isdst < 0) - g_return_val_if_reached(""); - -#ifdef _WIN32 - if ((off = win32_get_tz_offset()) == -1) - return ""; -#else -# ifdef HAVE_TM_GMTOFF - off = new_tm.tm_gmtoff; -# else -# ifdef HAVE_TIMEZONE - tzset(); - off = -timezone; -# endif /* HAVE_TIMEZONE */ -# endif /* !HAVE_TM_GMTOFF */ -#endif /* _WIN32 */ - - min = (off / 60) % 60; - hrs = ((off / 60) - min) / 60; - - if (g_snprintf(buf, sizeof(buf), "%+03d%02d", hrs, ABS(min)) > 5) - g_return_val_if_reached(""); - - return buf; -} -#endif - -/* Windows doesn't HAVE_STRFTIME_Z_FORMAT, but this seems clearer. -- rlaager */ -#if !defined(HAVE_STRFTIME_Z_FORMAT) || defined(_WIN32) -static size_t gaim_internal_strftime(char *s, size_t max, const char *format, const struct tm *tm) -{ - const char *start; - const char *c; - char *fmt = NULL; - - /* Yes, this is checked in gaim_utf8_strftime(), - * but better safe than sorry. -- rlaager */ - g_return_val_if_fail(format != NULL, 0); - - /* This is fairly efficient, and it only gets - * executed on Windows or if the underlying - * system doesn't support the %z format string, - * for strftime() so I think it's good enough. - * -- rlaager */ - for (c = start = format; *c ; c++) - { - if (*c != '%') - continue; - - c++; - -#ifndef HAVE_STRFTIME_Z_FORMAT - if (*c == 'z') - { - char *tmp = g_strdup_printf("%s%.*s%s", - fmt ? fmt : "", - c - start - 1, - start, - get_tmoff(tm)); - g_free(fmt); - fmt = tmp; - start = c + 1; - } -#endif -#ifdef _WIN32 - if (*c == 'Z') - { - char *tmp = g_strdup_printf("%s%.*s%s", - fmt ? fmt : "", - c - start - 1, - start, - wgaim_get_timezone_abbreviation(tm)); - g_free(fmt); - fmt = tmp; - start = c + 1; - } -#endif - } - - if (fmt != NULL) - { - size_t ret; - - if (*start) - { - char *tmp = g_strconcat(fmt, start, NULL); - g_free(fmt); - fmt = tmp; - } - - ret = strftime(s, max, fmt, tm); - g_free(fmt); - - return ret; - } - - return strftime(s, max, format, tm); -} -#else /* HAVE_STRFTIME_Z_FORMAT && !_WIN32 */ -#define gaim_internal_strftime strftime -#endif - -const char * -gaim_utf8_strftime(const char *format, const struct tm *tm) -{ - static char buf[128]; - char *locale; - GError *err = NULL; - int len; - char *utf8; - - g_return_val_if_fail(format != NULL, NULL); - - if (tm == NULL) - { - time_t now = time(NULL); - tm = localtime(&now); - } - - locale = g_locale_from_utf8(format, -1, NULL, NULL, &err); - if (err != NULL) - { - gaim_debug_error("util", "Format conversion failed in gaim_utf8_strftime(): %s", err->message); - g_error_free(err); - locale = g_strdup(format); - } - - /* A return value of 0 is either an error (in - * which case, the contents of the buffer are - * undefined) or the empty string (in which - * case, no harm is done here). */ - if ((len = gaim_internal_strftime(buf, sizeof(buf), locale, tm)) == 0) - { - g_free(locale); - return ""; - } - - g_free(locale); - - utf8 = g_locale_to_utf8(buf, len, NULL, NULL, &err); - if (err != NULL) - { - gaim_debug_error("util", "Result conversion failed in gaim_utf8_strftime(): %s", err->message); - g_error_free(err); - } - else - { - gaim_strlcpy(buf, utf8); - g_free(utf8); - } - - return buf; -} - -const char * -gaim_date_format_short(const struct tm *tm) -{ - return gaim_utf8_strftime("%x", tm); -} - -const char * -gaim_date_format_long(const struct tm *tm) -{ - return gaim_utf8_strftime(_("%x %X"), tm); -} - -const char * -gaim_date_format_full(const struct tm *tm) -{ - return gaim_utf8_strftime("%c", tm); -} - -const char * -gaim_time_format(const struct tm *tm) -{ - return gaim_utf8_strftime("%X", tm); -} - -time_t -gaim_time_build(int year, int month, int day, int hour, int min, int sec) -{ - struct tm tm; - - tm.tm_year = year - 1900; - tm.tm_mon = month - 1; - tm.tm_mday = day; - tm.tm_hour = hour; - tm.tm_min = min; - tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60; - - return mktime(&tm); -} - -time_t -gaim_str_to_time(const char *timestamp, gboolean utc, - struct tm *tm, long *tz_off, const char **rest) -{ - time_t retval = 0; - struct tm *t; - const char *c = timestamp; - int year = 0; - long tzoff = GAIM_NO_TZ_OFF; - - time(&retval); - t = localtime(&retval); - - /* 4 digit year */ - if (sscanf(c, "%04d", &year) && year > 1900) - { - c += 4; - if (*c == '-') - c++; - t->tm_year = year - 1900; - } - - /* 2 digit month */ - if (!sscanf(c, "%02d", &t->tm_mon)) - { - if (rest != NULL && *c != '\0') - *rest = c; - return 0; - } - c += 2; - if (*c == '-' || *c == '/') - c++; - t->tm_mon -= 1; - - /* 2 digit day */ - if (!sscanf(c, "%02d", &t->tm_mday)) - { - if (rest != NULL && *c != '\0') - *rest = c; - return 0; - } - c += 2; - if (*c == '/') - { - c++; - - if (!sscanf(c, "%04d", &t->tm_year)) - { - if (rest != NULL && *c != '\0') - *rest = c; - return 0; - } - t->tm_year -= 1900; - } - else if (*c == 'T' || *c == '.') - { - c++; - /* we have more than a date, keep going */ - - /* 2 digit hour */ - if ((sscanf(c, "%02d:%02d:%02d", &t->tm_hour, &t->tm_min, &t->tm_sec) == 3 && (c = c + 8)) || - (sscanf(c, "%02d%02d%02d", &t->tm_hour, &t->tm_min, &t->tm_sec) == 3 && (c = c + 6))) - { - gboolean offset_positive = FALSE; - int tzhrs; - int tzmins; - - t->tm_isdst = -1; - - if (*c == '.' && *(c+1) >= '0' && *(c+1) <= '9') /* dealing with precision we don't care about */ - c += 4; - if (*c == '+') - offset_positive = TRUE; - if (((*c == '+' || *c == '-') && (c = c + 1)) && - ((sscanf(c, "%02d:%02d", &tzhrs, &tzmins) == 2 && (c = c + 5)) || - (sscanf(c, "%02d%02d", &tzhrs, &tzmins) == 2 && (c = c + 4)))) - { - tzoff = tzhrs*60*60 + tzmins*60; - if (offset_positive) - tzoff *= -1; - /* We don't want the C library doing DST calculations - * if we know the UTC offset already. */ - t->tm_isdst = 0; - } - - if (rest != NULL && *c != '\0') - { - if (*c == ' ') - c++; - if (*c != '\0') - *rest = c; - } - - if (tzoff != GAIM_NO_TZ_OFF || utc) - { -#if defined(_WIN32) - long sys_tzoff; -#endif - -#if defined(_WIN32) || defined(HAVE_TM_GMTOFF) || defined (HAVE_TIMEZONE) - if (tzoff == GAIM_NO_TZ_OFF) - tzoff = 0; -#endif - -#ifdef _WIN32 - if ((sys_tzoff = win32_get_tz_offset()) == -1) - tzoff = GAIM_NO_TZ_OFF; - else - tzoff += sys_tzoff; -#else -#ifdef HAVE_TM_GMTOFF - tzoff += t->tm_gmtoff; -#else -# ifdef HAVE_TIMEZONE - tzset(); /* making sure */ - tzoff -= timezone; -# endif -#endif -#endif /* _WIN32 */ - } - } - else - { - if (rest != NULL && *c != '\0') - *rest = c; - } - } - - if (tm != NULL) - { - *tm = *t; - tm->tm_isdst = -1; - mktime(tm); - } - - retval = mktime(t); - if (tzoff != GAIM_NO_TZ_OFF) - retval += tzoff; - - if (tz_off != NULL) - *tz_off = tzoff; - - return retval; -} - -/************************************************************************** - * Markup Functions - **************************************************************************/ -gboolean -gaim_markup_find_tag(const char *needle, const char *haystack, - const char **start, const char **end, GData **attributes) -{ - GData *attribs; - const char *cur = haystack; - char *name = NULL; - gboolean found = FALSE; - gboolean in_tag = FALSE; - gboolean in_attr = FALSE; - const char *in_quotes = NULL; - size_t needlelen; - - g_return_val_if_fail( needle != NULL, FALSE); - g_return_val_if_fail( *needle != '\0', FALSE); - g_return_val_if_fail( haystack != NULL, FALSE); - g_return_val_if_fail( *haystack != '\0', FALSE); - g_return_val_if_fail( start != NULL, FALSE); - g_return_val_if_fail( end != NULL, FALSE); - g_return_val_if_fail(attributes != NULL, FALSE); - - needlelen = strlen(needle); - g_datalist_init(&attribs); - - while (*cur && !found) { - if (in_tag) { - if (in_quotes) { - const char *close = cur; - - while (*close && *close != *in_quotes) - close++; - - /* if we got the close quote, store the value and carry on from * - * after it. if we ran to the end of the string, point to the NULL * - * and we're outta here */ - if (*close) { - /* only store a value if we have an attribute name */ - if (name) { - size_t len = close - cur; - char *val = g_strndup(cur, len); - - g_datalist_set_data_full(&attribs, name, val, g_free); - g_free(name); - name = NULL; - } - - in_quotes = NULL; - cur = close + 1; - } else { - cur = close; - } - } else if (in_attr) { - const char *close = cur; - - while (*close && *close != '>' && *close != '"' && - *close != '\'' && *close != ' ' && *close != '=') - close++; - - /* if we got the equals, store the name of the attribute. if we got - * the quote, save the attribute and go straight to quote mode. - * otherwise the tag closed or we reached the end of the string, - * so we can get outta here */ - switch (*close) { - case '"': - case '\'': - in_quotes = close; - case '=': - { - size_t len = close - cur; - - /* don't store a blank attribute name */ - if (len) { - if (name) - g_free(name); - name = g_ascii_strdown(cur, len); - } - - in_attr = FALSE; - cur = close + 1; - break; - } - case ' ': - case '>': - in_attr = FALSE; - default: - cur = close; - break; - } - } else { - switch (*cur) { - case ' ': - /* swallow extra spaces inside tag */ - while (*cur && *cur == ' ') cur++; - in_attr = TRUE; - break; - case '>': - found = TRUE; - *end = cur; - break; - case '"': - case '\'': - in_quotes = cur; - default: - cur++; - break; - } - } - } else { - /* if we hit a < followed by the name of our tag... */ - if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) { - *start = cur; - cur = cur + needlelen + 1; - - /* if we're pointing at a space or a >, we found the right tag. if * - * we're not, we've found a longer tag, so we need to skip to the * - * >, but not being distracted by >s inside quotes. */ - if (*cur == ' ' || *cur == '>') { - in_tag = TRUE; - } else { - while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') { - if (*cur == '"') { - cur++; - while (*cur && *cur != '"') - cur++; - } else if (*cur == '\'') { - cur++; - while (*cur && *cur != '\'') - cur++; - } else { - cur++; - } - } - } - } else { - cur++; - } - } - } - - /* clean up any attribute name from a premature termination */ - if (name) - g_free(name); - - if (found) { - *attributes = attribs; - } else { - *start = NULL; - *end = NULL; - *attributes = NULL; - } - - return found; -} - -gboolean -gaim_markup_extract_info_field(const char *str, int len, GString *dest, - const char *start_token, int skip, - const char *end_token, char check_value, - const char *no_value_token, - const char *display_name, gboolean is_link, - const char *link_prefix, - GaimInfoFieldFormatCallback format_cb) -{ - const char *p, *q; - - g_return_val_if_fail(str != NULL, FALSE); - g_return_val_if_fail(dest != NULL, FALSE); - g_return_val_if_fail(start_token != NULL, FALSE); - g_return_val_if_fail(end_token != NULL, FALSE); - g_return_val_if_fail(display_name != NULL, FALSE); - - p = strstr(str, start_token); - - if (p == NULL) - return FALSE; - - p += strlen(start_token) + skip; - - if (p >= str + len) - return FALSE; - - if (check_value != '\0' && *p == check_value) - return FALSE; - - q = strstr(p, end_token); - - /* Trim leading blanks */ - while (*p != '\n' && g_ascii_isspace(*p)) { - p += 1; - } - - /* Trim trailing blanks */ - while (q > p && g_ascii_isspace(*(q - 1))) { - q -= 1; - } - - /* Don't bother with null strings */ - if (p == q) - return FALSE; - - if (q != NULL && (!no_value_token || - (no_value_token && strncmp(p, no_value_token, - strlen(no_value_token))))) - { - g_string_append_printf(dest, _("%s: "), display_name); - - if (is_link) - { - g_string_append(dest, "
"); - - if (link_prefix) - g_string_append(dest, link_prefix); - - g_string_append_len(dest, p, q - p); - g_string_append(dest, ""); - } - else - { - if (format_cb != NULL) - { - char *reformatted = format_cb(p, q - p); - g_string_append(dest, reformatted); - g_free(reformatted); - } - else - g_string_append_len(dest, p, q - p); - } - - g_string_append(dest, "
\n"); - - return TRUE; - } - - return FALSE; -} - -struct gaim_parse_tag { - char *src_tag; - char *dest_tag; - gboolean ignore; -}; - -#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ - const char *o = c + strlen("<" x); \ - const char *p = NULL, *q = NULL, *r = NULL; \ - GString *innards = g_string_new(""); \ - while(o && *o) { \ - if(!q && (*o == '\"' || *o == '\'') ) { \ - q = o; \ - } else if(q) { \ - if(*o == *q) { \ - char *unescaped = g_strndup(q+1, o-q-1); \ - char *escaped = g_markup_escape_text(unescaped, -1); \ - g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ - g_free(unescaped); \ - g_free(escaped); \ - q = NULL; \ - } else if(*c == '\\') { \ - o++; \ - } \ - } else if(*o == '<') { \ - r = o; \ - } else if(*o == '>') { \ - p = o; \ - break; \ - } else { \ - innards = g_string_append_c(innards, *o); \ - } \ - o++; \ - } \ - if(p && !r) { \ - if(*(p-1) != '/') { \ - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ - pt->src_tag = x; \ - pt->dest_tag = y; \ - tags = g_list_prepend(tags, pt); \ - } \ - xhtml = g_string_append(xhtml, "<" y); \ - c += strlen("<" x ); \ - xhtml = g_string_append(xhtml, innards->str); \ - xhtml = g_string_append_c(xhtml, '>'); \ - c = p + 1; \ - } else { \ - xhtml = g_string_append(xhtml, "<"); \ - plain = g_string_append_c(plain, '<'); \ - c++; \ - } \ - g_string_free(innards, TRUE); \ - continue; \ - } \ - if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ - (*(c+strlen("<" x)) == '>' || \ - !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ - xhtml = g_string_append(xhtml, "<" y); \ - c += strlen("<" x); \ - if(*c != '/') { \ - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ - pt->src_tag = x; \ - pt->dest_tag = y; \ - tags = g_list_prepend(tags, pt); \ - xhtml = g_string_append_c(xhtml, '>'); \ - } else { \ - xhtml = g_string_append(xhtml, "/>");\ - } \ - c = strchr(c, '>') + 1; \ - continue; \ - } -#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) -void -gaim_markup_html_to_xhtml(const char *html, char **xhtml_out, - char **plain_out) -{ - GString *xhtml = g_string_new(""); - GString *plain = g_string_new(""); - GList *tags = NULL, *tag; - const char *c = html; - - while(c && *c) { - if(*c == '<') { - if(*(c+1) == '/') { /* closing tag */ - tag = tags; - while(tag) { - struct gaim_parse_tag *pt = tag->data; - if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { - c += strlen(pt->src_tag) + 3; - break; - } - tag = tag->next; - } - if(tag) { - while(tags) { - struct gaim_parse_tag *pt = tags->data; - g_string_append_printf(xhtml, "", pt->dest_tag); - if(tags == tag) - break; - tags = g_list_remove(tags, pt); - g_free(pt); - } - g_free(tag->data); - tags = g_list_remove(tags, tag->data); - } else { - /* a closing tag we weren't expecting... - * we'll let it slide, if it's really a tag...if it's - * just a ') { - c = end+1; - } else { - xhtml = g_string_append(xhtml, "<"); - plain = g_string_append_c(plain, '<'); - c++; - } - } - } else { /* opening tag */ - ALLOW_TAG("a"); - ALLOW_TAG_ALT("b", "strong"); - ALLOW_TAG("blockquote"); - ALLOW_TAG_ALT("bold", "strong"); - ALLOW_TAG("cite"); - ALLOW_TAG("div"); - ALLOW_TAG("em"); - ALLOW_TAG("h1"); - ALLOW_TAG("h2"); - ALLOW_TAG("h3"); - ALLOW_TAG("h4"); - ALLOW_TAG("h5"); - ALLOW_TAG("h6"); - /* we only allow html to start the message */ - if(c == html) - ALLOW_TAG("html"); - ALLOW_TAG_ALT("i", "em"); - ALLOW_TAG_ALT("italic", "em"); - ALLOW_TAG("li"); - ALLOW_TAG("ol"); - ALLOW_TAG("p"); - ALLOW_TAG("pre"); - ALLOW_TAG("q"); - ALLOW_TAG("span"); - ALLOW_TAG("strong"); - ALLOW_TAG("ul"); - - /* we skip
because it's not legal in XHTML-IM. However, - * we still want to send something sensible, so we put a - * linebreak in its place.
also needs special handling - * because putting a
to close it would just be dumb. */ - if((!g_ascii_strncasecmp(c, "' || - !g_ascii_strncasecmp(c+3, "/>", 2) || - !g_ascii_strncasecmp(c+3, " />", 3))) { - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, "
"); - if(*c != '\n') - plain = g_string_append_c(plain, '\n'); - continue; - } - if(!g_ascii_strncasecmp(c, "", 3) || !g_ascii_strncasecmp(c, "", strlen(""))) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = *(c+2) == '>' ? "u" : "underline"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "", 3) || !g_ascii_strncasecmp(c, "", strlen(""))) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = *(c+2) == '>' ? "s" : "strike"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "", 5)) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = "sub"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "", 5)) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = "sup"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "' || *(c+5) == ' ')) { - const char *p = c; - GString *style = g_string_new(""); - struct gaim_parse_tag *pt; - while(*p && *p != '>') { - if(!g_ascii_strncasecmp(p, "back=", strlen("back="))) { - const char *q = p + strlen("back="); - GString *color = g_string_new(""); - if(*q == '\'' || *q == '\"') - q++; - while(*q && *q != '\"' && *q != '\'' && *q != ' ') { - color = g_string_append_c(color, *q); - q++; - } - g_string_append_printf(style, "background: %s; ", color->str); - g_string_free(color, TRUE); - p = q; - } else if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { - const char *q = p + strlen("color="); - GString *color = g_string_new(""); - if(*q == '\'' || *q == '\"') - q++; - while(*q && *q != '\"' && *q != '\'' && *q != ' ') { - color = g_string_append_c(color, *q); - q++; - } - g_string_append_printf(style, "color: %s; ", color->str); - g_string_free(color, TRUE); - p = q; - } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { - const char *q = p + strlen("face="); - gboolean space_allowed = FALSE; - GString *face = g_string_new(""); - if(*q == '\'' || *q == '\"') { - space_allowed = TRUE; - q++; - } - while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { - face = g_string_append_c(face, *q); - q++; - } - g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str)); - g_string_free(face, TRUE); - p = q; - } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { - const char *q = p + strlen("size="); - int sz; - const char *size = "medium"; - if(*q == '\'' || *q == '\"') - q++; - sz = atoi(q); - switch (sz) - { - case 1: - size = "xx-small"; - break; - case 2: - size = "x-small"; - break; - case 3: - size = "small"; - break; - case 4: - size = "medium"; - break; - case 5: - size = "large"; - break; - case 6: - size = "x-large"; - break; - case 7: - size = "xx-large"; - break; - default: - break; - } - g_string_append_printf(style, "font-size: %s; ", size); - p = q; - } - p++; - } - if ((c = strchr(c, '>')) != NULL) - c++; - else - c = p; - pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = "font"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - if(style->len) - g_string_append_printf(xhtml, "", g_strstrip(style->str)); - else - pt->ignore = TRUE; - g_string_free(style, TRUE); - continue; - } - if(!g_ascii_strncasecmp(c, "", g_strstrip(color->str)); - g_string_free(color, TRUE); - if ((c = strchr(c, '>')) != NULL) - c++; - else - c = p; - pt->src_tag = "body"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - did_something = TRUE; - break; - } - p++; - } - if(did_something) continue; - } - /* this has to come after the special case for bgcolor */ - ALLOW_TAG("body"); - if(!g_ascii_strncasecmp(c, ""); - if(p) { - xhtml = g_string_append(xhtml, "