| 1 /* |
|
| 2 * nmrtf.c |
|
| 3 * |
|
| 4 * Copyright (c) 2004 Novell, Inc. All Rights Reserved. |
|
| 5 * |
|
| 6 * This program is free software; you can redistribute it and/or modify |
|
| 7 * it under the terms of the GNU General Public License as published by |
|
| 8 * the Free Software Foundation; version 2 of the License. |
|
| 9 * |
|
| 10 * This program is distributed in the hope that it will be useful, |
|
| 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
| 13 * GNU General Public License for more details. |
|
| 14 * |
|
| 15 * You should have received a copy of the GNU General Public License |
|
| 16 * along with this program; if not, write to the Free Software |
|
| 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
| 18 * |
|
| 19 */ |
|
| 20 |
|
| 21 /* This code was adapted from the sample RTF reader found here: |
|
| 22 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dnrtfspec/html/rtfspec.asp |
|
| 23 */ |
|
| 24 |
|
| 25 #include <glib.h> |
|
| 26 #include <stdlib.h> |
|
| 27 #include <stdio.h> |
|
| 28 #include <stddef.h> |
|
| 29 #include <ctype.h> |
|
| 30 #include <string.h> |
|
| 31 #include "nmrtf.h" |
|
| 32 #include "debug.h" |
|
| 33 |
|
| 34 /* Internal RTF parser error codes */ |
|
| 35 #define NMRTF_OK 0 /* Everything's fine! */ |
|
| 36 #define NMRTF_STACK_UNDERFLOW 1 /* Unmatched '}' */ |
|
| 37 #define NMRTF_STACK_OVERFLOW 2 /* Too many '{' -- memory exhausted */ |
|
| 38 #define NMRTF_UNMATCHED_BRACE 3 /* RTF ended during an open group. */ |
|
| 39 #define NMRTF_INVALID_HEX 4 /* invalid hex character found in data */ |
|
| 40 #define NMRTF_BAD_TABLE 5 /* RTF table (sym or prop) invalid */ |
|
| 41 #define NMRTF_ASSERTION 6 /* Assertion failure */ |
|
| 42 #define NMRTF_EOF 7 /* End of file reached while reading RTF */ |
|
| 43 #define NMRTF_CONVERT_ERROR 8 /* Error converting text */ |
|
| 44 |
|
| 45 #define NMRTF_MAX_DEPTH 256 |
|
| 46 |
|
| 47 typedef enum |
|
| 48 { |
|
| 49 NMRTF_STATE_NORMAL, |
|
| 50 NMRTF_STATE_SKIP, |
|
| 51 NMRTF_STATE_FONTTABLE, |
|
| 52 NMRTF_STATE_BIN, |
|
| 53 NMRTF_STATE_HEX |
|
| 54 } NMRtfState; /* Rtf State */ |
|
| 55 |
|
| 56 /* Property types that we care about */ |
|
| 57 typedef enum |
|
| 58 { |
|
| 59 NMRTF_PROP_FONT_IDX, |
|
| 60 NMRTF_PROP_FONT_CHARSET, |
|
| 61 NMRTF_PROP_MAX |
|
| 62 } NMRtfProperty; |
|
| 63 |
|
| 64 typedef enum |
|
| 65 { |
|
| 66 NMRTF_SPECIAL_BIN, |
|
| 67 NMRTF_SPECIAL_HEX, |
|
| 68 NMRTF_SPECIAL_UNICODE, |
|
| 69 NMRTF_SPECIAL_SKIP |
|
| 70 } NMRtfSpecialKwd; |
|
| 71 |
|
| 72 typedef enum |
|
| 73 { |
|
| 74 NMRTF_DEST_FONTTABLE, |
|
| 75 NMRTF_DEST_SKIP |
|
| 76 } NMRtfDestinationType; |
|
| 77 |
|
| 78 typedef enum |
|
| 79 { |
|
| 80 NMRTF_KWD_CHAR, |
|
| 81 NMRTF_KWD_DEST, |
|
| 82 NMRTF_KWD_PROP, |
|
| 83 NMRTF_KWD_SPEC |
|
| 84 } NMRtfKeywordType; |
|
| 85 |
|
| 86 typedef struct _NMRTFCharProp |
|
| 87 { |
|
| 88 /* All we care about for now is the font. |
|
| 89 * bold, italic, underline, etc. should be |
|
| 90 * added here |
|
| 91 */ |
|
| 92 int font_idx; |
|
| 93 int font_charset; |
|
| 94 } NMRtfCharProp; |
|
| 95 |
|
| 96 typedef struct _NMRtfStateSave |
|
| 97 { |
|
| 98 NMRtfCharProp chp; |
|
| 99 NMRtfState rds; |
|
| 100 NMRtfState ris; |
|
| 101 } NMRtfStateSave; |
|
| 102 |
|
| 103 typedef struct _NMRtfSymbol |
|
| 104 { |
|
| 105 char *keyword; /* RTF keyword */ |
|
| 106 int default_val; /* default value to use */ |
|
| 107 gboolean pass_default; /* true to use default value from this table */ |
|
| 108 NMRtfKeywordType kwd_type; /* the type of the keyword */ |
|
| 109 int action; /* property type if the keyword represents a property */ |
|
| 110 /* destination type if the keyword represents a destination */ |
|
| 111 /* character to print if the keyword represents a character */ |
|
| 112 } NMRtfSymbol; |
|
| 113 |
|
| 114 |
|
| 115 typedef struct _NMRtfFont |
|
| 116 { |
|
| 117 int number; |
|
| 118 char *name; |
|
| 119 int charset; |
|
| 120 } NMRtfFont; |
|
| 121 |
|
| 122 /* RTF Context */ |
|
| 123 struct _NMRtfContext |
|
| 124 { |
|
| 125 NMRtfState rds; /* destination state */ |
|
| 126 NMRtfState ris; /* internal state */ |
|
| 127 NMRtfCharProp chp; /* current character properties (ie. font, bold, italic, etc.) */ |
|
| 128 GSList *font_table; /* the font table */ |
|
| 129 GSList *saved; /* saved state stack */ |
|
| 130 int param; /* numeric parameter for the current keyword */ |
|
| 131 long bytes_to_skip; /* number of bytes to skip (after encountering \bin) */ |
|
| 132 int depth; /* how many groups deep are we */ |
|
| 133 gboolean skip_unknown; /* if true, skip any unknown destinations (this is set after encountering '\*') */ |
|
| 134 char *input; /* input string */ |
|
| 135 char nextch; /* next char in input */ |
|
| 136 GString *ansi; /* Temporary ansi text, will be convert/flushed to the output string */ |
|
| 137 GString *output; /* The plain text UTF8 string */ |
|
| 138 }; |
|
| 139 |
|
| 140 static int rtf_parse(NMRtfContext *ctx); |
|
| 141 static int rtf_push_state(NMRtfContext *ctx); |
|
| 142 static int rtf_pop_state(NMRtfContext *ctx); |
|
| 143 static NMRtfFont *rtf_get_font(NMRtfContext *ctx, int index); |
|
| 144 static int rtf_get_char(NMRtfContext *ctx, guchar *ch); |
|
| 145 static int rtf_unget_char(NMRtfContext *ctx, guchar ch); |
|
| 146 static int rtf_flush_data(NMRtfContext *ctx); |
|
| 147 static int rtf_parse_keyword(NMRtfContext *ctx); |
|
| 148 static int rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set); |
|
| 149 static int rtf_dispatch_char(NMRtfContext *ctx, guchar ch); |
|
| 150 static int rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch); |
|
| 151 static int rtf_print_char(NMRtfContext *ctx, guchar ch); |
|
| 152 static int rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch); |
|
| 153 static int rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType dest); |
|
| 154 static int rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd special); |
|
| 155 static int rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val); |
|
| 156 |
|
| 157 /* RTF parser tables */ |
|
| 158 |
|
| 159 /* Keyword descriptions */ |
|
| 160 NMRtfSymbol rtf_symbols[] = { |
|
| 161 /* keyword, default, pass_default, keyword_type, action */ |
|
| 162 {"fonttbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_FONTTABLE}, |
|
| 163 {"f", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_IDX}, |
|
| 164 {"fcharset", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_CHARSET}, |
|
| 165 {"par", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 166 {"line", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 167 {"\0x0a", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 168 {"\0x0d", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
|
| 169 {"tab", 0, FALSE, NMRTF_KWD_CHAR, 0x09}, |
|
| 170 {"\r", 0, FALSE, NMRTF_KWD_CHAR, '\r'}, |
|
| 171 {"\n", 0, FALSE, NMRTF_KWD_CHAR, '\n'}, |
|
| 172 {"ldblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, |
|
| 173 {"rdblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, |
|
| 174 {"{", 0, FALSE, NMRTF_KWD_CHAR, '{'}, |
|
| 175 {"}", 0, FALSE, NMRTF_KWD_CHAR, '}'}, |
|
| 176 {"\\", 0, FALSE, NMRTF_KWD_CHAR, '\\'}, |
|
| 177 {"bin", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_BIN}, |
|
| 178 {"*", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_SKIP}, |
|
| 179 {"'", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_HEX}, |
|
| 180 {"u", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_UNICODE}, |
|
| 181 {"colortbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 182 {"author", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 183 {"buptim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 184 {"comment", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 185 {"creatim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 186 {"doccomm", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 187 {"footer", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 188 {"footerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 189 {"footerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 190 {"footerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 191 {"footnote", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 192 {"ftncn", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 193 {"ftnsep", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 194 {"ftnsepc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 195 {"header", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 196 {"headerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 197 {"headerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 198 {"headerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 199 {"info", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 200 {"keywords", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 201 {"operator", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 202 {"pict", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 203 {"printim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 204 {"private1", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 205 {"revtim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 206 {"rxe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 207 {"stylesheet", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 208 {"subject", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 209 {"tc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 210 {"title", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 211 {"txe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
|
| 212 {"xe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP} |
|
| 213 }; |
|
| 214 int table_size = sizeof(rtf_symbols) / sizeof(NMRtfSymbol); |
|
| 215 |
|
| 216 NMRtfContext * |
|
| 217 nm_rtf_init() |
|
| 218 { |
|
| 219 NMRtfContext *ctx = g_new0(NMRtfContext, 1); |
|
| 220 ctx->nextch = -1; |
|
| 221 ctx->ansi = g_string_new(""); |
|
| 222 ctx->output = g_string_new(""); |
|
| 223 return ctx; |
|
| 224 } |
|
| 225 |
|
| 226 char * |
|
| 227 nm_rtf_strip_formatting(NMRtfContext *ctx, const char *input) |
|
| 228 { |
|
| 229 int status; |
|
| 230 |
|
| 231 ctx->input = (char *)input; |
|
| 232 status = rtf_parse(ctx); |
|
| 233 if (status == NMRTF_OK) |
|
| 234 return g_strdup(ctx->output->str); |
|
| 235 |
|
| 236 gaim_debug_info("novell", "RTF parser failed with error code %d", status); |
|
| 237 return NULL; |
|
| 238 } |
|
| 239 |
|
| 240 void |
|
| 241 nm_rtf_deinit(NMRtfContext *ctx) |
|
| 242 { |
|
| 243 GSList *node; |
|
| 244 NMRtfFont *font; |
|
| 245 NMRtfStateSave *save; |
|
| 246 |
|
| 247 if (ctx) { |
|
| 248 for (node = ctx->font_table; node; node = node->next) { |
|
| 249 font = node->data; |
|
| 250 g_free(font->name); |
|
| 251 g_free(font); |
|
| 252 node->data = NULL; |
|
| 253 } |
|
| 254 g_slist_free(ctx->font_table); |
|
| 255 for (node = ctx->saved; node; node = node->next) { |
|
| 256 save = node->data; |
|
| 257 g_free(save); |
|
| 258 node->data = NULL; |
|
| 259 } |
|
| 260 g_slist_free(ctx->saved); |
|
| 261 g_string_free(ctx->ansi, TRUE); |
|
| 262 g_string_free(ctx->output, TRUE); |
|
| 263 g_free(ctx); |
|
| 264 } |
|
| 265 } |
|
| 266 |
|
| 267 static const char * |
|
| 268 get_current_encoding(NMRtfContext *ctx) |
|
| 269 { |
|
| 270 NMRtfFont *font; |
|
| 271 |
|
| 272 font = rtf_get_font(ctx, ctx->chp.font_idx); |
|
| 273 |
|
| 274 switch (font->charset) { |
|
| 275 case 0: |
|
| 276 return "CP1252"; |
|
| 277 case 77: |
|
| 278 return "MACINTOSH"; |
|
| 279 case 78: |
|
| 280 return "SJIS"; |
|
| 281 case 128: |
|
| 282 return "CP932"; |
|
| 283 case 129: |
|
| 284 return "CP949"; |
|
| 285 case 130: |
|
| 286 return "CP1361"; |
|
| 287 case 134: |
|
| 288 return "CP936"; |
|
| 289 case 136: |
|
| 290 return "CP950"; |
|
| 291 case 161: |
|
| 292 return "CP1253"; |
|
| 293 case 162: |
|
| 294 return "CP1254"; |
|
| 295 case 163: |
|
| 296 return "CP1258"; |
|
| 297 case 181: |
|
| 298 case 177: |
|
| 299 return "CP1255"; |
|
| 300 case 178: |
|
| 301 case 179: |
|
| 302 case 180: |
|
| 303 return "CP1256"; |
|
| 304 case 186: |
|
| 305 return "CP1257"; |
|
| 306 case 204: |
|
| 307 return "CP1251"; |
|
| 308 case 222: |
|
| 309 return "CP874"; |
|
| 310 case 238: |
|
| 311 return "CP1250"; |
|
| 312 case 254: |
|
| 313 return "CP437"; |
|
| 314 default: |
|
| 315 gaim_debug_info("novell", "Unhandled font charset %d\n", font->charset); |
|
| 316 return "CP1252"; |
|
| 317 } |
|
| 318 return "CP1252"; |
|
| 319 } |
|
| 320 |
|
| 321 |
|
| 322 /* |
|
| 323 * Add an entry to the font table |
|
| 324 */ |
|
| 325 static int |
|
| 326 rtf_add_font_entry(NMRtfContext *ctx, int number, const char *name, int charset) |
|
| 327 { |
|
| 328 NMRtfFont *font = g_new0(NMRtfFont, 1); |
|
| 329 |
|
| 330 font->number = number; |
|
| 331 font->name = g_strdup(name); |
|
| 332 font->charset = charset; |
|
| 333 |
|
| 334 gaim_debug_info("novell", "Adding font to table: #%d\t%s\t%d\n", |
|
| 335 font->number, font->name, font->charset); |
|
| 336 |
|
| 337 ctx->font_table = g_slist_append(ctx->font_table, font); |
|
| 338 |
|
| 339 return NMRTF_OK; |
|
| 340 } |
|
| 341 |
|
| 342 /* |
|
| 343 * Return the nth entry in the font table |
|
| 344 */ |
|
| 345 static NMRtfFont * |
|
| 346 rtf_get_font(NMRtfContext *ctx, int nth) |
|
| 347 { |
|
| 348 NMRtfFont *font; |
|
| 349 |
|
| 350 font = g_slist_nth_data(ctx->font_table, nth); |
|
| 351 |
|
| 352 return font; |
|
| 353 } |
|
| 354 |
|
| 355 /* |
|
| 356 * Step 1: |
|
| 357 * Isolate RTF keywords and send them to rtf_parse_keyword; |
|
| 358 * Push and pop state at the start and end of RTF groups; |
|
| 359 * Send text to rtf_dispatch_char for further processing. |
|
| 360 */ |
|
| 361 static int |
|
| 362 rtf_parse(NMRtfContext *ctx) |
|
| 363 { |
|
| 364 int status; |
|
| 365 guchar ch; |
|
| 366 guchar hex_byte = 0; |
|
| 367 int hex_count = 2; |
|
| 368 int len; |
|
| 369 |
|
| 370 if (ctx->input == NULL) |
|
| 371 return NMRTF_OK; |
|
| 372 |
|
| 373 while (rtf_get_char(ctx, &ch) == NMRTF_OK) { |
|
| 374 if (ctx->depth < 0) |
|
| 375 return NMRTF_STACK_UNDERFLOW; |
|
| 376 |
|
| 377 /* if we're parsing binary data, handle it directly */ |
|
| 378 if (ctx->ris == NMRTF_STATE_BIN) { |
|
| 379 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK) |
|
| 380 return status; |
|
| 381 } else { |
|
| 382 switch (ch) { |
|
| 383 case '{': |
|
| 384 if (ctx->depth > NMRTF_MAX_DEPTH) |
|
| 385 return NMRTF_STACK_OVERFLOW; |
|
| 386 rtf_flush_data(ctx); |
|
| 387 if ((status = rtf_push_state(ctx)) != NMRTF_OK) |
|
| 388 return status; |
|
| 389 break; |
|
| 390 case '}': |
|
| 391 rtf_flush_data(ctx); |
|
| 392 |
|
| 393 /* for some reason there is always an unwanted '\par' at the end */ |
|
| 394 if (ctx->rds == NMRTF_STATE_NORMAL) { |
|
| 395 len = ctx->output->len; |
|
| 396 if (ctx->output->str[len-1] == '\n') |
|
| 397 ctx->output = g_string_truncate(ctx->output, len-1); |
|
| 398 } |
|
| 399 |
|
| 400 if ((status = rtf_pop_state(ctx)) != NMRTF_OK) |
|
| 401 return status; |
|
| 402 |
|
| 403 if (ctx->depth < 0) |
|
| 404 return NMRTF_STACK_OVERFLOW; |
|
| 405 break; |
|
| 406 case '\\': |
|
| 407 if ((status = rtf_parse_keyword(ctx)) != NMRTF_OK) |
|
| 408 return status; |
|
| 409 break; |
|
| 410 case 0x0d: |
|
| 411 case 0x0a: /* cr and lf are noise characters... */ |
|
| 412 break; |
|
| 413 default: |
|
| 414 if (ctx->ris == NMRTF_STATE_NORMAL) { |
|
| 415 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK) |
|
| 416 return status; |
|
| 417 } else { /* parsing a hex encoded character */ |
|
| 418 if (ctx->ris != NMRTF_STATE_HEX) |
|
| 419 return NMRTF_ASSERTION; |
|
| 420 |
|
| 421 hex_byte = hex_byte << 4; |
|
| 422 if (isdigit(ch)) |
|
| 423 hex_byte += (char) ch - '0'; |
|
| 424 else { |
|
| 425 if (islower(ch)) { |
|
| 426 if (ch < 'a' || ch > 'f') |
|
| 427 return NMRTF_INVALID_HEX; |
|
| 428 hex_byte += (char) ch - 'a' + 10; |
|
| 429 } else { |
|
| 430 if (ch < 'A' || ch > 'F') |
|
| 431 return NMRTF_INVALID_HEX; |
|
| 432 hex_byte += (char) ch - 'A' + 10; |
|
| 433 } |
|
| 434 } |
|
| 435 hex_count--; |
|
| 436 if (hex_count == 0) { |
|
| 437 if ((status = rtf_dispatch_char(ctx, hex_byte)) != NMRTF_OK) |
|
| 438 return status; |
|
| 439 hex_count = 2; |
|
| 440 hex_byte = 0; |
|
| 441 ctx->ris = NMRTF_STATE_NORMAL; |
|
| 442 } |
|
| 443 } |
|
| 444 break; |
|
| 445 } |
|
| 446 } |
|
| 447 } |
|
| 448 if (ctx->depth < 0) |
|
| 449 return NMRTF_STACK_OVERFLOW; |
|
| 450 if (ctx->depth > 0) |
|
| 451 return NMRTF_UNMATCHED_BRACE; |
|
| 452 return NMRTF_OK; |
|
| 453 } |
|
| 454 |
|
| 455 /* |
|
| 456 * Push the current state onto stack |
|
| 457 */ |
|
| 458 static int |
|
| 459 rtf_push_state(NMRtfContext *ctx) |
|
| 460 { |
|
| 461 NMRtfStateSave *save = g_new0(NMRtfStateSave, 1); |
|
| 462 save->chp = ctx->chp; |
|
| 463 save->rds = ctx->rds; |
|
| 464 save->ris = ctx->ris; |
|
| 465 ctx->saved = g_slist_prepend(ctx->saved, save); |
|
| 466 ctx->ris = NMRTF_STATE_NORMAL; |
|
| 467 (ctx->depth)++; |
|
| 468 return NMRTF_OK; |
|
| 469 } |
|
| 470 |
|
| 471 /* |
|
| 472 * Restore the state at the top of the stack |
|
| 473 */ |
|
| 474 static int |
|
| 475 rtf_pop_state(NMRtfContext *ctx) |
|
| 476 { |
|
| 477 NMRtfStateSave *save_old; |
|
| 478 GSList *link_old; |
|
| 479 |
|
| 480 if (ctx->saved == NULL) |
|
| 481 return NMRTF_STACK_UNDERFLOW; |
|
| 482 |
|
| 483 save_old = ctx->saved->data; |
|
| 484 ctx->chp = save_old->chp; |
|
| 485 ctx->rds = save_old->rds; |
|
| 486 ctx->ris = save_old->ris; |
|
| 487 (ctx->depth)--; |
|
| 488 |
|
| 489 g_free(save_old); |
|
| 490 link_old = ctx->saved; |
|
| 491 ctx->saved = g_slist_remove_link(ctx->saved, link_old); |
|
| 492 g_slist_free_1(link_old); |
|
| 493 return NMRTF_OK; |
|
| 494 } |
|
| 495 |
|
| 496 /* |
|
| 497 * Step 2: |
|
| 498 * Get a control word (and its associated value) and |
|
| 499 * dispatch the control. |
|
| 500 */ |
|
| 501 static int |
|
| 502 rtf_parse_keyword(NMRtfContext *ctx) |
|
| 503 { |
|
| 504 int status = NMRTF_OK; |
|
| 505 guchar ch; |
|
| 506 gboolean param_set = FALSE; |
|
| 507 gboolean is_neg = FALSE; |
|
| 508 int param = 0; |
|
| 509 char keyword[30]; |
|
| 510 char parameter[20]; |
|
| 511 int i; |
|
| 512 |
|
| 513 keyword[0] = '\0'; |
|
| 514 parameter[0] = '\0'; |
|
| 515 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK) |
|
| 516 return status; |
|
| 517 |
|
| 518 if (!isalpha(ch)) { |
|
| 519 /* a control symbol; no delimiter. */ |
|
| 520 keyword[0] = (char) ch; |
|
| 521 keyword[1] = '\0'; |
|
| 522 return rtf_dispatch_control(ctx, keyword, 0, param_set); |
|
| 523 } |
|
| 524 |
|
| 525 /* parse keyword */ |
|
| 526 for (i = 0; isalpha(ch) && (i < sizeof(keyword) - 1); rtf_get_char(ctx, &ch)) { |
|
| 527 keyword[i] = (char) ch; |
|
| 528 i++; |
|
| 529 } |
|
| 530 keyword[i] = '\0'; |
|
| 531 |
|
| 532 /* check for '-' indicated a negative parameter value */ |
|
| 533 if (ch == '-') { |
|
| 534 is_neg = TRUE; |
|
| 535 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK) |
|
| 536 return status; |
|
| 537 } |
|
| 538 |
|
| 539 /* check for numerical param */ |
|
| 540 if (isdigit(ch)) { |
|
| 541 |
|
| 542 param_set = TRUE; |
|
| 543 for (i = 0; isdigit(ch) && (i < sizeof(parameter) - 1); rtf_get_char(ctx, &ch)) { |
|
| 544 parameter[i] = (char) ch; |
|
| 545 i++; |
|
| 546 } |
|
| 547 parameter[i] = '\0'; |
|
| 548 |
|
| 549 ctx->param = param = atoi(parameter); |
|
| 550 if (is_neg) |
|
| 551 ctx->param = param = -param; |
|
| 552 } |
|
| 553 |
|
| 554 /* space after control is optional, put character back if it is not a space */ |
|
| 555 if (ch != ' ') |
|
| 556 rtf_unget_char(ctx, ch); |
|
| 557 |
|
| 558 return rtf_dispatch_control(ctx, keyword, param, param_set); |
|
| 559 } |
|
| 560 |
|
| 561 /* |
|
| 562 * Route the character to the appropriate destination |
|
| 563 */ |
|
| 564 static int |
|
| 565 rtf_dispatch_char(NMRtfContext *ctx, guchar ch) |
|
| 566 { |
|
| 567 if (ctx->ris == NMRTF_STATE_BIN && --(ctx->bytes_to_skip) <= 0) |
|
| 568 ctx->ris = NMRTF_STATE_NORMAL; |
|
| 569 |
|
| 570 switch (ctx->rds) { |
|
| 571 case NMRTF_STATE_SKIP: |
|
| 572 return NMRTF_OK; |
|
| 573 case NMRTF_STATE_NORMAL: |
|
| 574 return rtf_print_char(ctx, ch); |
|
| 575 case NMRTF_STATE_FONTTABLE: |
|
| 576 if (ch == ';') { |
|
| 577 rtf_add_font_entry(ctx, ctx->chp.font_idx, |
|
| 578 ctx->ansi->str, ctx->chp.font_charset); |
|
| 579 g_string_truncate(ctx->ansi, 0); |
|
| 580 } |
|
| 581 else { |
|
| 582 return rtf_print_char(ctx, ch); |
|
| 583 } |
|
| 584 return NMRTF_OK; |
|
| 585 default: |
|
| 586 return NMRTF_OK; |
|
| 587 } |
|
| 588 } |
|
| 589 |
|
| 590 /* Handle a unicode character */ |
|
| 591 static int |
|
| 592 rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch) |
|
| 593 { |
|
| 594 switch (ctx->rds) { |
|
| 595 case NMRTF_STATE_SKIP: |
|
| 596 return NMRTF_OK; |
|
| 597 case NMRTF_STATE_NORMAL: |
|
| 598 case NMRTF_STATE_FONTTABLE: |
|
| 599 return rtf_print_unicode_char(ctx, ch); |
|
| 600 default: |
|
| 601 return NMRTF_OK; |
|
| 602 } |
|
| 603 } |
|
| 604 |
|
| 605 /* |
|
| 606 * Output a character |
|
| 607 */ |
|
| 608 static int |
|
| 609 rtf_print_char(NMRtfContext *ctx, guchar ch) |
|
| 610 { |
|
| 611 |
|
| 612 ctx->ansi = g_string_append_c(ctx->ansi, ch); |
|
| 613 |
|
| 614 return NMRTF_OK; |
|
| 615 } |
|
| 616 |
|
| 617 /* |
|
| 618 * Output a unicode character |
|
| 619 */ |
|
| 620 static int |
|
| 621 rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch) |
|
| 622 { |
|
| 623 char buf[7]; |
|
| 624 int num; |
|
| 625 |
|
| 626 /* convert and flush the ansi buffer to the utf8 buffer */ |
|
| 627 rtf_flush_data(ctx); |
|
| 628 |
|
| 629 /* convert the unicode character to utf8 and add directly to the output buffer */ |
|
| 630 num = g_unichar_to_utf8((gunichar) ch, buf); |
|
| 631 buf[num] = 0; |
|
| 632 gaim_debug_info("novell", "converted unichar 0x%X to utf8 char %s\n", ch, buf); |
|
| 633 |
|
| 634 ctx->output = g_string_append(ctx->output, buf); |
|
| 635 return NMRTF_OK; |
|
| 636 } |
|
| 637 |
|
| 638 /* |
|
| 639 * Flush the output text |
|
| 640 */ |
|
| 641 static int |
|
| 642 rtf_flush_data(NMRtfContext *ctx) |
|
| 643 { |
|
| 644 int status = NMRTF_OK; |
|
| 645 char *conv_data = NULL; |
|
| 646 const char *enc = NULL; |
|
| 647 GError *gerror = NULL; |
|
| 648 |
|
| 649 if (ctx->rds == NMRTF_STATE_NORMAL && ctx->ansi->len > 0) { |
|
| 650 enc = get_current_encoding(ctx); |
|
| 651 conv_data = g_convert(ctx->ansi->str, ctx->ansi->len, "UTF-8", enc, |
|
| 652 NULL, NULL, &gerror); |
|
| 653 if (conv_data) { |
|
| 654 ctx->output = g_string_append(ctx->output, conv_data); |
|
| 655 g_free(conv_data); |
|
| 656 ctx->ansi = g_string_truncate(ctx->ansi, 0); |
|
| 657 } else { |
|
| 658 status = NMRTF_CONVERT_ERROR; |
|
| 659 gaim_debug_info("novell", "failed to convert data! error code = %d msg = %s\n", |
|
| 660 gerror->code, gerror->message); |
|
| 661 g_free(gerror); |
|
| 662 } |
|
| 663 } |
|
| 664 |
|
| 665 return status; |
|
| 666 } |
|
| 667 |
|
| 668 /* |
|
| 669 * Handle a property change |
|
| 670 */ |
|
| 671 static int |
|
| 672 rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val) |
|
| 673 { |
|
| 674 if (ctx->rds == NMRTF_STATE_SKIP) /* If we're skipping text, */ |
|
| 675 return NMRTF_OK; /* don't do anything. */ |
|
| 676 |
|
| 677 /* Need to flush any temporary data before a property change*/ |
|
| 678 rtf_flush_data(ctx); |
|
| 679 |
|
| 680 switch (prop) { |
|
| 681 case NMRTF_PROP_FONT_IDX: |
|
| 682 ctx->chp.font_idx = val; |
|
| 683 break; |
|
| 684 case NMRTF_PROP_FONT_CHARSET: |
|
| 685 ctx->chp.font_charset = val; |
|
| 686 break; |
|
| 687 default: |
|
| 688 return NMRTF_BAD_TABLE; |
|
| 689 } |
|
| 690 |
|
| 691 return NMRTF_OK; |
|
| 692 } |
|
| 693 |
|
| 694 /* |
|
| 695 * Step 3. |
|
| 696 * Search the table for keyword and evaluate it appropriately. |
|
| 697 * |
|
| 698 * Inputs: |
|
| 699 * keyword: The RTF control to evaluate. |
|
| 700 * param: The parameter of the RTF control. |
|
| 701 * param_set: TRUE if the control had a parameter; (that is, if param is valid) |
|
| 702 * FALSE if it did not. |
|
| 703 */ |
|
| 704 static int |
|
| 705 rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set) |
|
| 706 { |
|
| 707 int idx; |
|
| 708 |
|
| 709 for (idx = 0; idx < table_size; idx++) { |
|
| 710 if (strcmp(keyword, rtf_symbols[idx].keyword) == 0) |
|
| 711 break; |
|
| 712 } |
|
| 713 |
|
| 714 if (idx == table_size) { |
|
| 715 if (ctx->skip_unknown) |
|
| 716 ctx->rds = NMRTF_STATE_SKIP; |
|
| 717 ctx->skip_unknown = FALSE; |
|
| 718 return NMRTF_OK; |
|
| 719 } |
|
| 720 |
|
| 721 /* found it! use kwd_type and action to determine what to do with it. */ |
|
| 722 ctx->skip_unknown = FALSE; |
|
| 723 switch (rtf_symbols[idx].kwd_type) { |
|
| 724 case NMRTF_KWD_PROP: |
|
| 725 if (rtf_symbols[idx].pass_default || !param_set) |
|
| 726 param = rtf_symbols[idx].default_val; |
|
| 727 return rtf_apply_property(ctx, rtf_symbols[idx].action, param); |
|
| 728 case NMRTF_KWD_CHAR: |
|
| 729 return rtf_dispatch_char(ctx, rtf_symbols[idx].action); |
|
| 730 case NMRTF_KWD_DEST: |
|
| 731 return rtf_change_destination(ctx, rtf_symbols[idx].action); |
|
| 732 case NMRTF_KWD_SPEC: |
|
| 733 return rtf_dispatch_special(ctx, rtf_symbols[idx].action); |
|
| 734 default: |
|
| 735 return NMRTF_BAD_TABLE; |
|
| 736 } |
|
| 737 return NMRTF_BAD_TABLE; |
|
| 738 } |
|
| 739 |
|
| 740 /* |
|
| 741 * Change to the destination specified. |
|
| 742 */ |
|
| 743 static int |
|
| 744 rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType type) |
|
| 745 { |
|
| 746 /* if we're skipping text, don't do anything */ |
|
| 747 if (ctx->rds == NMRTF_STATE_SKIP) |
|
| 748 return NMRTF_OK; |
|
| 749 |
|
| 750 switch (type) { |
|
| 751 case NMRTF_DEST_FONTTABLE: |
|
| 752 ctx->rds = NMRTF_STATE_FONTTABLE; |
|
| 753 g_string_truncate(ctx->ansi, 0); |
|
| 754 break; |
|
| 755 default: |
|
| 756 ctx->rds = NMRTF_STATE_SKIP; /* when in doubt, skip it... */ |
|
| 757 break; |
|
| 758 } |
|
| 759 return NMRTF_OK; |
|
| 760 } |
|
| 761 |
|
| 762 /* |
|
| 763 * Dispatch an RTF control that needs special processing |
|
| 764 */ |
|
| 765 static int |
|
| 766 rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd type) |
|
| 767 { |
|
| 768 int status = NMRTF_OK; |
|
| 769 guchar ch; |
|
| 770 |
|
| 771 if (ctx->rds == NMRTF_STATE_SKIP && type != NMRTF_SPECIAL_BIN) /* if we're skipping, and it's not */ |
|
| 772 return NMRTF_OK; /* the \bin keyword, ignore it. */ |
|
| 773 |
|
| 774 switch (type) { |
|
| 775 case NMRTF_SPECIAL_BIN: |
|
| 776 ctx->ris = NMRTF_STATE_BIN; |
|
| 777 ctx->bytes_to_skip = ctx->param; |
|
| 778 break; |
|
| 779 case NMRTF_SPECIAL_SKIP: |
|
| 780 ctx->skip_unknown = TRUE; |
|
| 781 break; |
|
| 782 case NMRTF_SPECIAL_HEX: |
|
| 783 ctx->ris = NMRTF_STATE_HEX; |
|
| 784 break; |
|
| 785 case NMRTF_SPECIAL_UNICODE: |
|
| 786 gaim_debug_info("novell", "parsing unichar\n"); |
|
| 787 status = rtf_dispatch_unicode_char(ctx, ctx->param); |
|
| 788 /* Skip next char */ |
|
| 789 if (status == NMRTF_OK) |
|
| 790 status = rtf_get_char(ctx, &ch); |
|
| 791 break; |
|
| 792 default: |
|
| 793 status = NMRTF_BAD_TABLE; |
|
| 794 break; |
|
| 795 } |
|
| 796 |
|
| 797 return status; |
|
| 798 } |
|
| 799 |
|
| 800 /* |
|
| 801 * Get the next character from the input stream |
|
| 802 */ |
|
| 803 static int |
|
| 804 rtf_get_char(NMRtfContext *ctx, guchar *ch) |
|
| 805 { |
|
| 806 if (ctx->nextch >= 0) { |
|
| 807 *ch = ctx->nextch; |
|
| 808 ctx->nextch = -1; |
|
| 809 } |
|
| 810 else { |
|
| 811 *ch = *(ctx->input); |
|
| 812 ctx->input++; |
|
| 813 } |
|
| 814 |
|
| 815 if (*ch) |
|
| 816 return NMRTF_OK; |
|
| 817 else |
|
| 818 return NMRTF_EOF; |
|
| 819 } |
|
| 820 |
|
| 821 /* |
|
| 822 * Move a character back into the input stream |
|
| 823 */ |
|
| 824 static int |
|
| 825 rtf_unget_char(NMRtfContext *ctx, guchar ch) |
|
| 826 { |
|
| 827 ctx->nextch = ch; |
|
| 828 return NMRTF_OK; |
|
| 829 } |
|