| 1 /* |
|
| 2 * nmrtf.c |
|
| 3 * |
|
| 4 * Copyright (c) 2004 Novell, Inc. All Rights Reserved. |
|
| 5 * |
|
| 6 * This program is free software; you can redistribute it and/or modify |
|
| 7 * it under the terms of the GNU General Public License as published by |
|
| 8 * the Free Software Foundation; version 2 of the License. |
|
| 9 * |
|
| 10 * This program is distributed in the hope that it will be useful, |
|
| 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 13 * GNU General Public License for more details. |
|
| 14 * |
|
| 15 * You should have received a copy of the GNU General Public License |
|
| 16 * along with this program; if not, write to the Free Software |
|
| 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA |
|
| 18 * |
|
| 19 */ |
|
| 20 |
|
| 21 /* This code was adapted from the sample RTF reader found here: |
|
| 22 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dnrtfspec/html/rtfspec.asp |
|
| 23 */ |
|
| 24 |
|
| 25 #include <glib.h> |
|
| 26 #include <stdlib.h> |
|
| 27 #include <stdio.h> |
|
| 28 #include <stddef.h> |
|
| 29 #include <ctype.h> |
|
| 30 #include <string.h> |
|
| 31 #include <purple.h> |
|
| 32 #include "nmrtf.h" |
|
| 33 |
|
| 34 /* Internal RTF parser error codes */ |
|
| 35 #define NMRTF_OK 0 /* Everything's fine! */ |
|
| 36 #define NMRTF_STACK_UNDERFLOW 1 /* Unmatched '}' */ |
|
| 37 #define NMRTF_STACK_OVERFLOW 2 /* Too many '{' -- memory exhausted */ |
|
| 38 #define NMRTF_UNMATCHED_BRACE 3 /* RTF ended during an open group. */ |
|
| 39 #define NMRTF_INVALID_HEX 4 /* invalid hex character found in data */ |
|
| 40 #define NMRTF_BAD_TABLE 5 /* RTF table (sym or prop) invalid */ |
|
| 41 #define NMRTF_ASSERTION 6 /* Assertion failure */ |
|
| 42 #define NMRTF_EOF 7 /* End of file reached while reading RTF */ |
|
| 43 #define NMRTF_CONVERT_ERROR 8 /* Error converting text */ |
|
| 44 |
|
| 45 #define NMRTF_MAX_DEPTH 256 |
|
| 46 |
|
| 47 typedef enum |
|
| 48 { |
|
| 49 NMRTF_STATE_NORMAL, |
|
| 50 NMRTF_STATE_SKIP, |
|
| 51 NMRTF_STATE_FONTTABLE, |
|
| 52 NMRTF_STATE_BIN, |
|
| 53 NMRTF_STATE_HEX |
|
| 54 } NMRtfState; /* Rtf State */ |
|
| 55 |
|
| 56 /* Property types that we care about */ |
|
| 57 typedef enum |
|
| 58 { |
|
| 59 NMRTF_PROP_FONT_IDX, |
|
| 60 NMRTF_PROP_FONT_CHARSET, |
|
| 61 NMRTF_PROP_MAX |
|
| 62 } NMRtfProperty; |
|
| 63 |
|
| 64 typedef enum |
|
| 65 { |
|
| 66 NMRTF_SPECIAL_BIN, |
|
| 67 NMRTF_SPECIAL_HEX, |
|
| 68 NMRTF_SPECIAL_UNICODE, |
|
| 69 NMRTF_SPECIAL_SKIP |
|
| 70 } NMRtfSpecialKwd; |
|
| 71 |
|
| 72 typedef enum |
|
| 73 { |
|
| 74 NMRTF_DEST_FONTTABLE, |
|
| 75 NMRTF_DEST_SKIP |
|
| 76 } NMRtfDestinationType; |
|
| 77 |
|
| 78 typedef enum |
|
| 79 { |
|
| 80 NMRTF_KWD_CHAR, |
|
| 81 NMRTF_KWD_DEST, |
|
| 82 NMRTF_KWD_PROP, |
|
| 83 NMRTF_KWD_SPEC |
|
| 84 } NMRtfKeywordType; |
|
| 85 |
|
| 86 typedef struct |
|
| 87 { |
|
| 88 /* All we care about for now is the font. |
|
| 89 * bold, italic, underline, etc. should be |
|
| 90 * added here |
|
| 91 */ |
|
| 92 int font_idx; |
|
| 93 int font_charset; |
|
| 94 } NMRtfCharProp; |
|
| 95 |
|
| 96 typedef struct |
|
| 97 { |
|
| 98 NMRtfCharProp chp; |
|
| 99 NMRtfState rds; |
|
| 100 NMRtfState ris; |
|
| 101 } NMRtfStateSave; |
|
| 102 |
|
| 103 typedef struct |
|
| 104 { |
|
| 105 char *keyword; /* RTF keyword */ |
|
| 106 int default_val; /* default value to use */ |
|
| 107 gboolean pass_default; /* true to use default value from this table */ |
|
| 108 NMRtfKeywordType kwd_type; /* the type of the keyword */ |
|
| 109 int action; /* property type if the keyword represents a property */ |
|
| 110 /* destination type if the keyword represents a destination */ |
|
| 111 /* character to print if the keyword represents a character */ |
|
| 112 } NMRtfSymbol; |
|
| 113 |
|
| 114 |
|
| 115 typedef struct |
|
| 116 { |
|
| 117 int number; |
|
| 118 char *name; |
|
| 119 int charset; |
|
| 120 } NMRtfFont; |
|
| 121 |
|
| 122 /* RTF Context */ |
|
| 123 struct _NMRtfContext |
|
| 124 { |
|
| 125 NMRtfState rds; /* destination state */ |
|
| 126 NMRtfState ris; /* internal state */ |
|
| 127 NMRtfCharProp chp; /* current character properties (ie. font, bold, italic, etc.) */ |
|
| 128 GSList *font_table; /* the font table */ |
|
| 129 GSList *saved; /* saved state stack */ |
|
| 130 int param; /* numeric parameter for the current keyword */ |
|
| 131 long bytes_to_skip; /* number of bytes to skip (after encountering \bin) */ |
|
| 132 int depth; /* how many groups deep are we */ |
|
| 133 gboolean skip_unknown; /* if true, skip any unknown destinations (this is set after encountering '\*') */ |
|
| 134 char *input; /* input string */ |
|
| 135 guchar nextch; /* next char in input */ |
|
| 136 gboolean nextch_available; /* nextch value is set */ |
|
| 137 GString *ansi; /* Temporary ansi text, will be convert/flushed to the output string */ |
|
| 138 GString *output; /* The plain text UTF8 string */ |
|
| 139 }; |
|
| 140 |
|
| 141 static int rtf_parse(NMRtfContext *ctx); |
|
| 142 static int rtf_push_state(NMRtfContext *ctx); |
|
| 143 static int rtf_pop_state(NMRtfContext *ctx); |
|
| 144 static NMRtfFont *rtf_get_font(NMRtfContext *ctx, int index); |
|
| 145 static int rtf_get_char(NMRtfContext *ctx, guchar *ch); |
|
| 146 static int rtf_unget_char(NMRtfContext *ctx, guchar ch); |
|
| 147 static int rtf_flush_data(NMRtfContext *ctx); |
|
| 148 static int rtf_parse_keyword(NMRtfContext *ctx); |
|
| 149 static int rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set); |
|
| 150 static int rtf_dispatch_char(NMRtfContext *ctx, guchar ch); |
|
| 151 static int rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch); |
|
| 152 static int rtf_print_char(NMRtfContext *ctx, guchar ch); |
|
| 153 static int rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch); |
|
| 154 static int rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType dest); |
|
| 155 static int rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd special); |
|
| 156 static int rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val); |
|
| 157 |
|
| 158 /* RTF parser tables */ |
|
| 159 |
|
| 160 /* Keyword descriptions */ |
|
| 161 NMRtfSymbol rtf_symbols[] = { |
|
| 162 /* keyword, default, pass_default, keyword_type, action */ |
|
| 163 {"fonttbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_FONTTABLE}, |
|
| 164 {"f", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_IDX}, |
|
| 165 {"fcharset", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_CHARSET}, |
|
| 166 {"par", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 167 {"line", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 168 {"\0x0a", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 169 {"\0x0d", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 170 {"tab", 0, FALSE, NMRTF_KWD_CHAR, 0x09}, |
|
| 171 {"\r", 0, FALSE, NMRTF_KWD_CHAR, '\r'}, |
|
| 172 {"\n", 0, FALSE, NMRTF_KWD_CHAR, '\n'}, |
|
| 173 {"ldblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, |
|
| 174 {"rdblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, |
|
| 175 {"{", 0, FALSE, NMRTF_KWD_CHAR, '{'}, |
|
| 176 {"}", 0, FALSE, NMRTF_KWD_CHAR, '}'}, |
|
| 177 {"\\", 0, FALSE, NMRTF_KWD_CHAR, '\\'}, |
|
| 178 {"bin", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_BIN}, |
|
| 179 {"*", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_SKIP}, |
|
| 180 {"'", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_HEX}, |
|
| 181 {"u", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_UNICODE}, |
|
| 182 {"colortbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 183 {"author", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 184 {"buptim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 185 {"comment", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 186 {"creatim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 187 {"doccomm", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 188 {"footer", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 189 {"footerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 190 {"footerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 191 {"footerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 192 {"footnote", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 193 {"ftncn", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 194 {"ftnsep", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 195 {"ftnsepc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 196 {"header", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 197 {"headerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 198 {"headerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 199 {"headerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 200 {"info", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 201 {"keywords", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 202 {"operator", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 203 {"pict", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 204 {"printim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 205 {"private1", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 206 {"revtim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 207 {"rxe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 208 {"stylesheet", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 209 {"subject", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 210 {"tc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 211 {"title", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 212 {"txe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 213 {"xe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP} |
|
| 214 }; |
|
| 215 int table_size = sizeof(rtf_symbols) / sizeof(NMRtfSymbol); |
|
| 216 |
|
| 217 NMRtfContext * |
|
| 218 nm_rtf_init(void) |
|
| 219 { |
|
| 220 NMRtfContext *ctx = g_new0(NMRtfContext, 1); |
|
| 221 ctx->nextch_available = FALSE; |
|
| 222 ctx->ansi = g_string_new(""); |
|
| 223 ctx->output = g_string_new(""); |
|
| 224 return ctx; |
|
| 225 } |
|
| 226 |
|
| 227 char * |
|
| 228 nm_rtf_strip_formatting(NMRtfContext *ctx, const char *input) |
|
| 229 { |
|
| 230 int status; |
|
| 231 |
|
| 232 ctx->input = (char *)input; |
|
| 233 status = rtf_parse(ctx); |
|
| 234 if (status == NMRTF_OK) |
|
| 235 return g_strdup(ctx->output->str); |
|
| 236 |
|
| 237 purple_debug_info("novell", "RTF parser failed with error code %d\n", status); |
|
| 238 return NULL; |
|
| 239 } |
|
| 240 |
|
| 241 static void |
|
| 242 nm_rtf_font_free(NMRtfFont *font) |
|
| 243 { |
|
| 244 g_return_if_fail(font != NULL); |
|
| 245 |
|
| 246 g_free(font->name); |
|
| 247 g_free(font); |
|
| 248 } |
|
| 249 |
|
| 250 void |
|
| 251 nm_rtf_deinit(NMRtfContext *ctx) |
|
| 252 { |
|
| 253 if (ctx) { |
|
| 254 g_slist_free_full(ctx->font_table, (GDestroyNotify)nm_rtf_font_free); |
|
| 255 g_slist_free_full(ctx->saved, g_free); |
|
| 256 g_string_free(ctx->ansi, TRUE); |
|
| 257 g_string_free(ctx->output, TRUE); |
|
| 258 g_free(ctx); |
|
| 259 } |
|
| 260 } |
|
| 261 |
|
| 262 static const char * |
|
| 263 get_current_encoding(NMRtfContext *ctx) |
|
| 264 { |
|
| 265 NMRtfFont *font; |
|
| 266 |
|
| 267 font = rtf_get_font(ctx, ctx->chp.font_idx); |
|
| 268 |
|
| 269 switch (font->charset) { |
|
| 270 case 0: |
|
| 271 return "CP1252"; |
|
| 272 case 77: |
|
| 273 return "MACINTOSH"; |
|
| 274 case 78: |
|
| 275 return "SJIS"; |
|
| 276 case 128: |
|
| 277 return "CP932"; |
|
| 278 case 129: |
|
| 279 return "CP949"; |
|
| 280 case 130: |
|
| 281 return "CP1361"; |
|
| 282 case 134: |
|
| 283 return "CP936"; |
|
| 284 case 136: |
|
| 285 return "CP950"; |
|
| 286 case 161: |
|
| 287 return "CP1253"; |
|
| 288 case 162: |
|
| 289 return "CP1254"; |
|
| 290 case 163: |
|
| 291 return "CP1258"; |
|
| 292 case 181: |
|
| 293 case 177: |
|
| 294 return "CP1255"; |
|
| 295 case 178: |
|
| 296 case 179: |
|
| 297 case 180: |
|
| 298 return "CP1256"; |
|
| 299 case 186: |
|
| 300 return "CP1257"; |
|
| 301 case 204: |
|
| 302 return "CP1251"; |
|
| 303 case 222: |
|
| 304 return "CP874"; |
|
| 305 case 238: |
|
| 306 return "CP1250"; |
|
| 307 case 254: |
|
| 308 return "CP437"; |
|
| 309 default: |
|
| 310 purple_debug_info("novell", "Unhandled font charset %d\n", font->charset); |
|
| 311 return "CP1252"; |
|
| 312 } |
|
| 313 } |
|
| 314 |
|
| 315 |
|
| 316 /* |
|
| 317 * Add an entry to the font table |
|
| 318 */ |
|
| 319 static int |
|
| 320 rtf_add_font_entry(NMRtfContext *ctx, int number, const char *name, int charset) |
|
| 321 { |
|
| 322 NMRtfFont *font = g_new0(NMRtfFont, 1); |
|
| 323 |
|
| 324 font->number = number; |
|
| 325 font->name = g_strdup(name); |
|
| 326 font->charset = charset; |
|
| 327 |
|
| 328 purple_debug_info("novell", "Adding font to table: #%d\t%s\t%d\n", |
|
| 329 font->number, font->name, font->charset); |
|
| 330 |
|
| 331 ctx->font_table = g_slist_append(ctx->font_table, font); |
|
| 332 |
|
| 333 return NMRTF_OK; |
|
| 334 } |
|
| 335 |
|
| 336 /* |
|
| 337 * Return the nth entry in the font table |
|
| 338 */ |
|
| 339 static NMRtfFont * |
|
| 340 rtf_get_font(NMRtfContext *ctx, int nth) |
|
| 341 { |
|
| 342 NMRtfFont *font; |
|
| 343 |
|
| 344 font = g_slist_nth_data(ctx->font_table, nth); |
|
| 345 |
|
| 346 return font; |
|
| 347 } |
|
| 348 |
|
| 349 /* |
|
| 350 * Step 1: |
|
| 351 * Isolate RTF keywords and send them to rtf_parse_keyword; |
|
| 352 * Push and pop state at the start and end of RTF groups; |
|
| 353 * Send text to rtf_dispatch_char for further processing. |
|
| 354 */ |
|
| 355 static int |
|
| 356 rtf_parse(NMRtfContext *ctx) |
|
| 357 { |
|
| 358 int status; |
|
| 359 guchar ch; |
|
| 360 guchar hex_byte = 0; |
|
| 361 int hex_count = 2; |
|
| 362 int len; |
|
| 363 |
|
| 364 if (ctx->input == NULL) |
|
| 365 return NMRTF_OK; |
|
| 366 |
|
| 367 while (rtf_get_char(ctx, &ch) == NMRTF_OK) { |
|
| 368 if (ctx->depth < 0) |
|
| 369 return NMRTF_STACK_UNDERFLOW; |
|
| 370 |
|
| 371 /* if we're parsing binary data, handle it directly */ |
|
| 372 if (ctx->ris == NMRTF_STATE_BIN) { |
|
| 373 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK) |
|
| 374 return status; |
|
| 375 } else { |
|
| 376 switch (ch) { |
|
| 377 case '{': |
|
| 378 if (ctx->depth > NMRTF_MAX_DEPTH) |
|
| 379 return NMRTF_STACK_OVERFLOW; |
|
| 380 rtf_flush_data(ctx); |
|
| 381 if ((status = rtf_push_state(ctx)) != NMRTF_OK) |
|
| 382 return status; |
|
| 383 break; |
|
| 384 case '}': |
|
| 385 rtf_flush_data(ctx); |
|
| 386 |
|
| 387 /* for some reason there is always an unwanted '\par' at the end */ |
|
| 388 if (ctx->rds == NMRTF_STATE_NORMAL) { |
|
| 389 len = ctx->output->len; |
|
| 390 if (ctx->output->str[len-1] == '\n') |
|
| 391 ctx->output = g_string_truncate(ctx->output, len-1); |
|
| 392 } |
|
| 393 |
|
| 394 if ((status = rtf_pop_state(ctx)) != NMRTF_OK) |
|
| 395 return status; |
|
| 396 |
|
| 397 if (ctx->depth < 0) |
|
| 398 return NMRTF_STACK_OVERFLOW; |
|
| 399 break; |
|
| 400 case '\\': |
|
| 401 if ((status = rtf_parse_keyword(ctx)) != NMRTF_OK) |
|
| 402 return status; |
|
| 403 break; |
|
| 404 case 0x0d: |
|
| 405 case 0x0a: /* cr and lf are noise characters... */ |
|
| 406 break; |
|
| 407 default: |
|
| 408 if (ctx->ris == NMRTF_STATE_NORMAL) { |
|
| 409 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK) |
|
| 410 return status; |
|
| 411 } else { /* parsing a hex encoded character */ |
|
| 412 if (ctx->ris != NMRTF_STATE_HEX) |
|
| 413 return NMRTF_ASSERTION; |
|
| 414 |
|
| 415 hex_byte = hex_byte << 4; |
|
| 416 if (isdigit(ch)) |
|
| 417 hex_byte += (char) ch - '0'; |
|
| 418 else { |
|
| 419 if (islower(ch)) { |
|
| 420 if (ch < 'a' || ch > 'f') |
|
| 421 return NMRTF_INVALID_HEX; |
|
| 422 hex_byte += (char) ch - 'a' + 10; |
|
| 423 } else { |
|
| 424 if (ch < 'A' || ch > 'F') |
|
| 425 return NMRTF_INVALID_HEX; |
|
| 426 hex_byte += (char) ch - 'A' + 10; |
|
| 427 } |
|
| 428 } |
|
| 429 hex_count--; |
|
| 430 if (hex_count == 0) { |
|
| 431 if ((status = rtf_dispatch_char(ctx, hex_byte)) != NMRTF_OK) |
|
| 432 return status; |
|
| 433 hex_count = 2; |
|
| 434 hex_byte = 0; |
|
| 435 ctx->ris = NMRTF_STATE_NORMAL; |
|
| 436 } |
|
| 437 } |
|
| 438 break; |
|
| 439 } |
|
| 440 } |
|
| 441 } |
|
| 442 if (ctx->depth < 0) |
|
| 443 return NMRTF_STACK_OVERFLOW; |
|
| 444 if (ctx->depth > 0) |
|
| 445 return NMRTF_UNMATCHED_BRACE; |
|
| 446 return NMRTF_OK; |
|
| 447 } |
|
| 448 |
|
| 449 /* |
|
| 450 * Push the current state onto stack |
|
| 451 */ |
|
| 452 static int |
|
| 453 rtf_push_state(NMRtfContext *ctx) |
|
| 454 { |
|
| 455 NMRtfStateSave *save = g_new0(NMRtfStateSave, 1); |
|
| 456 save->chp = ctx->chp; |
|
| 457 save->rds = ctx->rds; |
|
| 458 save->ris = ctx->ris; |
|
| 459 ctx->saved = g_slist_prepend(ctx->saved, save); |
|
| 460 ctx->ris = NMRTF_STATE_NORMAL; |
|
| 461 (ctx->depth)++; |
|
| 462 return NMRTF_OK; |
|
| 463 } |
|
| 464 |
|
| 465 /* |
|
| 466 * Restore the state at the top of the stack |
|
| 467 */ |
|
| 468 static int |
|
| 469 rtf_pop_state(NMRtfContext *ctx) |
|
| 470 { |
|
| 471 NMRtfStateSave *save_old; |
|
| 472 GSList *link_old; |
|
| 473 |
|
| 474 if (ctx->saved == NULL) |
|
| 475 return NMRTF_STACK_UNDERFLOW; |
|
| 476 |
|
| 477 save_old = ctx->saved->data; |
|
| 478 ctx->chp = save_old->chp; |
|
| 479 ctx->rds = save_old->rds; |
|
| 480 ctx->ris = save_old->ris; |
|
| 481 (ctx->depth)--; |
|
| 482 |
|
| 483 g_free(save_old); |
|
| 484 link_old = ctx->saved; |
|
| 485 ctx->saved = g_slist_delete_link(ctx->saved, link_old); |
|
| 486 return NMRTF_OK; |
|
| 487 } |
|
| 488 |
|
| 489 /* |
|
| 490 * Step 2: |
|
| 491 * Get a control word (and its associated value) and |
|
| 492 * dispatch the control. |
|
| 493 */ |
|
| 494 static int |
|
| 495 rtf_parse_keyword(NMRtfContext *ctx) |
|
| 496 { |
|
| 497 int status = NMRTF_OK; |
|
| 498 guchar ch; |
|
| 499 gboolean param_set = FALSE; |
|
| 500 gboolean is_neg = FALSE; |
|
| 501 int param = 0; |
|
| 502 char keyword[30]; |
|
| 503 char parameter[20]; |
|
| 504 gsize i; |
|
| 505 |
|
| 506 keyword[0] = '\0'; |
|
| 507 parameter[0] = '\0'; |
|
| 508 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK) |
|
| 509 return status; |
|
| 510 |
|
| 511 if (!isalpha(ch)) { |
|
| 512 /* a control symbol; no delimiter. */ |
|
| 513 keyword[0] = (char) ch; |
|
| 514 keyword[1] = '\0'; |
|
| 515 return rtf_dispatch_control(ctx, keyword, 0, param_set); |
|
| 516 } |
|
| 517 |
|
| 518 /* parse keyword */ |
|
| 519 for (i = 0; isalpha(ch) && (i < sizeof(keyword) - 1); rtf_get_char(ctx, &ch)) { |
|
| 520 keyword[i] = (char) ch; |
|
| 521 i++; |
|
| 522 } |
|
| 523 keyword[i] = '\0'; |
|
| 524 |
|
| 525 /* check for '-' indicated a negative parameter value */ |
|
| 526 if (ch == '-') { |
|
| 527 is_neg = TRUE; |
|
| 528 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK) |
|
| 529 return status; |
|
| 530 } |
|
| 531 |
|
| 532 /* check for numerical param */ |
|
| 533 if (isdigit(ch)) { |
|
| 534 |
|
| 535 param_set = TRUE; |
|
| 536 for (i = 0; isdigit(ch) && (i < sizeof(parameter) - 1); rtf_get_char(ctx, &ch)) { |
|
| 537 parameter[i] = (char) ch; |
|
| 538 i++; |
|
| 539 } |
|
| 540 parameter[i] = '\0'; |
|
| 541 |
|
| 542 ctx->param = param = atoi(parameter); |
|
| 543 if (is_neg) |
|
| 544 ctx->param = param = -param; |
|
| 545 } |
|
| 546 |
|
| 547 /* space after control is optional, put character back if it is not a space */ |
|
| 548 if (ch != ' ') |
|
| 549 rtf_unget_char(ctx, ch); |
|
| 550 |
|
| 551 return rtf_dispatch_control(ctx, keyword, param, param_set); |
|
| 552 } |
|
| 553 |
|
| 554 /* |
|
| 555 * Route the character to the appropriate destination |
|
| 556 */ |
|
| 557 static int |
|
| 558 rtf_dispatch_char(NMRtfContext *ctx, guchar ch) |
|
| 559 { |
|
| 560 if (ctx->ris == NMRTF_STATE_BIN && --(ctx->bytes_to_skip) <= 0) |
|
| 561 ctx->ris = NMRTF_STATE_NORMAL; |
|
| 562 |
|
| 563 switch (ctx->rds) { |
|
| 564 case NMRTF_STATE_SKIP: |
|
| 565 return NMRTF_OK; |
|
| 566 case NMRTF_STATE_NORMAL: |
|
| 567 return rtf_print_char(ctx, ch); |
|
| 568 case NMRTF_STATE_FONTTABLE: |
|
| 569 if (ch == ';') { |
|
| 570 rtf_add_font_entry(ctx, ctx->chp.font_idx, |
|
| 571 ctx->ansi->str, ctx->chp.font_charset); |
|
| 572 g_string_truncate(ctx->ansi, 0); |
|
| 573 } |
|
| 574 else { |
|
| 575 return rtf_print_char(ctx, ch); |
|
| 576 } |
|
| 577 return NMRTF_OK; |
|
| 578 default: |
|
| 579 return NMRTF_OK; |
|
| 580 } |
|
| 581 } |
|
| 582 |
|
| 583 /* Handle a unicode character */ |
|
| 584 static int |
|
| 585 rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch) |
|
| 586 { |
|
| 587 switch (ctx->rds) { |
|
| 588 case NMRTF_STATE_SKIP: |
|
| 589 return NMRTF_OK; |
|
| 590 case NMRTF_STATE_NORMAL: |
|
| 591 case NMRTF_STATE_FONTTABLE: |
|
| 592 return rtf_print_unicode_char(ctx, ch); |
|
| 593 default: |
|
| 594 return NMRTF_OK; |
|
| 595 } |
|
| 596 } |
|
| 597 |
|
| 598 /* |
|
| 599 * Output a character |
|
| 600 */ |
|
| 601 static int |
|
| 602 rtf_print_char(NMRtfContext *ctx, guchar ch) |
|
| 603 { |
|
| 604 |
|
| 605 ctx->ansi = g_string_append_c(ctx->ansi, ch); |
|
| 606 |
|
| 607 return NMRTF_OK; |
|
| 608 } |
|
| 609 |
|
| 610 /* |
|
| 611 * Output a unicode character |
|
| 612 */ |
|
| 613 static int |
|
| 614 rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch) |
|
| 615 { |
|
| 616 char buf[7]; |
|
| 617 int num; |
|
| 618 |
|
| 619 /* convert and flush the ansi buffer to the utf8 buffer */ |
|
| 620 rtf_flush_data(ctx); |
|
| 621 |
|
| 622 /* convert the unicode character to utf8 and add directly to the output buffer */ |
|
| 623 num = g_unichar_to_utf8((gunichar) ch, buf); |
|
| 624 buf[num] = 0; |
|
| 625 purple_debug_info("novell", "converted unichar 0x%X to utf8 char %s\n", ch, buf); |
|
| 626 |
|
| 627 ctx->output = g_string_append(ctx->output, buf); |
|
| 628 return NMRTF_OK; |
|
| 629 } |
|
| 630 |
|
| 631 /* |
|
| 632 * Flush the output text |
|
| 633 */ |
|
| 634 static int |
|
| 635 rtf_flush_data(NMRtfContext *ctx) |
|
| 636 { |
|
| 637 int status = NMRTF_OK; |
|
| 638 char *conv_data = NULL; |
|
| 639 const char *enc = NULL; |
|
| 640 GError *gerror = NULL; |
|
| 641 |
|
| 642 if (ctx->rds == NMRTF_STATE_NORMAL && ctx->ansi->len > 0) { |
|
| 643 enc = get_current_encoding(ctx); |
|
| 644 conv_data = g_convert(ctx->ansi->str, ctx->ansi->len, "UTF-8", enc, |
|
| 645 NULL, NULL, &gerror); |
|
| 646 if (conv_data) { |
|
| 647 ctx->output = g_string_append(ctx->output, conv_data); |
|
| 648 g_free(conv_data); |
|
| 649 ctx->ansi = g_string_truncate(ctx->ansi, 0); |
|
| 650 } else { |
|
| 651 status = NMRTF_CONVERT_ERROR; |
|
| 652 purple_debug_info("novell", "failed to convert data! error code = %d msg = %s\n", |
|
| 653 gerror->code, gerror->message); |
|
| 654 } |
|
| 655 } |
|
| 656 g_error_free(gerror); |
|
| 657 |
|
| 658 return status; |
|
| 659 } |
|
| 660 |
|
| 661 /* |
|
| 662 * Handle a property change |
|
| 663 */ |
|
| 664 static int |
|
| 665 rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val) |
|
| 666 { |
|
| 667 if (ctx->rds == NMRTF_STATE_SKIP) /* If we're skipping text, */ |
|
| 668 return NMRTF_OK; /* don't do anything. */ |
|
| 669 |
|
| 670 /* Need to flush any temporary data before a property change*/ |
|
| 671 rtf_flush_data(ctx); |
|
| 672 |
|
| 673 switch (prop) { |
|
| 674 case NMRTF_PROP_FONT_IDX: |
|
| 675 ctx->chp.font_idx = val; |
|
| 676 break; |
|
| 677 case NMRTF_PROP_FONT_CHARSET: |
|
| 678 ctx->chp.font_charset = val; |
|
| 679 break; |
|
| 680 default: |
|
| 681 return NMRTF_BAD_TABLE; |
|
| 682 } |
|
| 683 |
|
| 684 return NMRTF_OK; |
|
| 685 } |
|
| 686 |
|
| 687 /* |
|
| 688 * Step 3. |
|
| 689 * Search the table for keyword and evaluate it appropriately. |
|
| 690 * |
|
| 691 * Inputs: |
|
| 692 * keyword: The RTF control to evaluate. |
|
| 693 * param: The parameter of the RTF control. |
|
| 694 * param_set: TRUE if the control had a parameter; (that is, if param is valid) |
|
| 695 * FALSE if it did not. |
|
| 696 */ |
|
| 697 static int |
|
| 698 rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set) |
|
| 699 { |
|
| 700 int idx; |
|
| 701 |
|
| 702 for (idx = 0; idx < table_size; idx++) { |
|
| 703 if (purple_strequal(keyword, rtf_symbols[idx].keyword)) |
|
| 704 break; |
|
| 705 } |
|
| 706 |
|
| 707 if (idx == table_size) { |
|
| 708 if (ctx->skip_unknown) |
|
| 709 ctx->rds = NMRTF_STATE_SKIP; |
|
| 710 ctx->skip_unknown = FALSE; |
|
| 711 return NMRTF_OK; |
|
| 712 } |
|
| 713 |
|
| 714 /* found it! use kwd_type and action to determine what to do with it. */ |
|
| 715 ctx->skip_unknown = FALSE; |
|
| 716 switch (rtf_symbols[idx].kwd_type) { |
|
| 717 case NMRTF_KWD_PROP: |
|
| 718 if (rtf_symbols[idx].pass_default || !param_set) |
|
| 719 param = rtf_symbols[idx].default_val; |
|
| 720 return rtf_apply_property(ctx, rtf_symbols[idx].action, param); |
|
| 721 case NMRTF_KWD_CHAR: |
|
| 722 return rtf_dispatch_char(ctx, rtf_symbols[idx].action); |
|
| 723 case NMRTF_KWD_DEST: |
|
| 724 return rtf_change_destination(ctx, rtf_symbols[idx].action); |
|
| 725 case NMRTF_KWD_SPEC: |
|
| 726 return rtf_dispatch_special(ctx, rtf_symbols[idx].action); |
|
| 727 default: |
|
| 728 return NMRTF_BAD_TABLE; |
|
| 729 } |
|
| 730 return NMRTF_BAD_TABLE; |
|
| 731 } |
|
| 732 |
|
| 733 /* |
|
| 734 * Change to the destination specified. |
|
| 735 */ |
|
| 736 static int |
|
| 737 rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType type) |
|
| 738 { |
|
| 739 /* if we're skipping text, don't do anything */ |
|
| 740 if (ctx->rds == NMRTF_STATE_SKIP) |
|
| 741 return NMRTF_OK; |
|
| 742 |
|
| 743 switch (type) { |
|
| 744 case NMRTF_DEST_FONTTABLE: |
|
| 745 ctx->rds = NMRTF_STATE_FONTTABLE; |
|
| 746 g_string_truncate(ctx->ansi, 0); |
|
| 747 break; |
|
| 748 default: |
|
| 749 ctx->rds = NMRTF_STATE_SKIP; /* when in doubt, skip it... */ |
|
| 750 break; |
|
| 751 } |
|
| 752 return NMRTF_OK; |
|
| 753 } |
|
| 754 |
|
| 755 /* |
|
| 756 * Dispatch an RTF control that needs special processing |
|
| 757 */ |
|
| 758 static int |
|
| 759 rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd type) |
|
| 760 { |
|
| 761 int status = NMRTF_OK; |
|
| 762 guchar ch; |
|
| 763 |
|
| 764 if (ctx->rds == NMRTF_STATE_SKIP && type != NMRTF_SPECIAL_BIN) /* if we're skipping, and it's not */ |
|
| 765 return NMRTF_OK; /* the \bin keyword, ignore it. */ |
|
| 766 |
|
| 767 switch (type) { |
|
| 768 case NMRTF_SPECIAL_BIN: |
|
| 769 ctx->ris = NMRTF_STATE_BIN; |
|
| 770 ctx->bytes_to_skip = ctx->param; |
|
| 771 break; |
|
| 772 case NMRTF_SPECIAL_SKIP: |
|
| 773 ctx->skip_unknown = TRUE; |
|
| 774 break; |
|
| 775 case NMRTF_SPECIAL_HEX: |
|
| 776 ctx->ris = NMRTF_STATE_HEX; |
|
| 777 break; |
|
| 778 case NMRTF_SPECIAL_UNICODE: |
|
| 779 purple_debug_info("novell", "parsing unichar\n"); |
|
| 780 status = rtf_dispatch_unicode_char(ctx, ctx->param); |
|
| 781 /* Skip next char */ |
|
| 782 if (status == NMRTF_OK) |
|
| 783 status = rtf_get_char(ctx, &ch); |
|
| 784 break; |
|
| 785 default: |
|
| 786 status = NMRTF_BAD_TABLE; |
|
| 787 break; |
|
| 788 } |
|
| 789 |
|
| 790 return status; |
|
| 791 } |
|
| 792 |
|
| 793 /* |
|
| 794 * Get the next character from the input stream |
|
| 795 */ |
|
| 796 static int |
|
| 797 rtf_get_char(NMRtfContext *ctx, guchar *ch) |
|
| 798 { |
|
| 799 if (ctx->nextch_available) { |
|
| 800 *ch = ctx->nextch; |
|
| 801 ctx->nextch_available = FALSE; |
|
| 802 } else { |
|
| 803 *ch = *(ctx->input); |
|
| 804 ctx->input++; |
|
| 805 } |
|
| 806 |
|
| 807 if (*ch) |
|
| 808 return NMRTF_OK; |
|
| 809 else |
|
| 810 return NMRTF_EOF; |
|
| 811 } |
|
| 812 |
|
| 813 /* |
|
| 814 * Move a character back into the input stream |
|
| 815 */ |
|
| 816 static int |
|
| 817 rtf_unget_char(NMRtfContext *ctx, guchar ch) |
|
| 818 { |
|
| 819 ctx->nextch = ch; |
|
| 820 ctx->nextch_available = TRUE; |
|
| 821 return NMRTF_OK; |
|
| 822 } |
|