| |
1 /* |
| |
2 * nmrtf.c |
| |
3 * |
| |
4 * Copyright (c) 2004 Novell, Inc. All Rights Reserved. |
| |
5 * |
| |
6 * This program is free software; you can redistribute it and/or modify |
| |
7 * it under the terms of the GNU General Public License as published by |
| |
8 * the Free Software Foundation; version 2 of the License. |
| |
9 * |
| |
10 * This program is distributed in the hope that it will be useful, |
| |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| |
13 * GNU General Public License for more details. |
| |
14 * |
| |
15 * You should have received a copy of the GNU General Public License |
| |
16 * along with this program; if not, write to the Free Software |
| |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| |
18 * |
| |
19 */ |
| |
20 |
| |
21 /* This code was adapted from the sample RTF reader found here: |
| |
22 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dnrtfspec/html/rtfspec.asp |
| |
23 */ |
| |
24 |
| |
25 #include <glib.h> |
| |
26 #include <stdlib.h> |
| |
27 #include <stdio.h> |
| |
28 #include <stddef.h> |
| |
29 #include <ctype.h> |
| |
30 #include <string.h> |
| |
31 #include "nmrtf.h" |
| |
32 #include "debug.h" |
| |
33 |
| |
34 /* Internal RTF parser error codes */ |
| |
35 #define NMRTF_OK 0 /* Everything's fine! */ |
| |
36 #define NMRTF_STACK_UNDERFLOW 1 /* Unmatched '}' */ |
| |
37 #define NMRTF_STACK_OVERFLOW 2 /* Too many '{' -- memory exhausted */ |
| |
38 #define NMRTF_UNMATCHED_BRACE 3 /* RTF ended during an open group. */ |
| |
39 #define NMRTF_INVALID_HEX 4 /* invalid hex character found in data */ |
| |
40 #define NMRTF_BAD_TABLE 5 /* RTF table (sym or prop) invalid */ |
| |
41 #define NMRTF_ASSERTION 6 /* Assertion failure */ |
| |
42 #define NMRTF_EOF 7 /* End of file reached while reading RTF */ |
| |
43 #define NMRTF_CONVERT_ERROR 8 /* Error converting text */ |
| |
44 |
| |
45 #define NMRTF_MAX_DEPTH 256 |
| |
46 |
| |
47 typedef enum |
| |
48 { |
| |
49 NMRTF_STATE_NORMAL, |
| |
50 NMRTF_STATE_SKIP, |
| |
51 NMRTF_STATE_FONTTABLE, |
| |
52 NMRTF_STATE_BIN, |
| |
53 NMRTF_STATE_HEX |
| |
54 } NMRtfState; /* Rtf State */ |
| |
55 |
| |
56 /* Property types that we care about */ |
| |
57 typedef enum |
| |
58 { |
| |
59 NMRTF_PROP_FONT_IDX, |
| |
60 NMRTF_PROP_FONT_CHARSET, |
| |
61 NMRTF_PROP_MAX |
| |
62 } NMRtfProperty; |
| |
63 |
| |
64 typedef enum |
| |
65 { |
| |
66 NMRTF_SPECIAL_BIN, |
| |
67 NMRTF_SPECIAL_HEX, |
| |
68 NMRTF_SPECIAL_UNICODE, |
| |
69 NMRTF_SPECIAL_SKIP |
| |
70 } NMRtfSpecialKwd; |
| |
71 |
| |
72 typedef enum |
| |
73 { |
| |
74 NMRTF_DEST_FONTTABLE, |
| |
75 NMRTF_DEST_SKIP |
| |
76 } NMRtfDestinationType; |
| |
77 |
| |
78 typedef enum |
| |
79 { |
| |
80 NMRTF_KWD_CHAR, |
| |
81 NMRTF_KWD_DEST, |
| |
82 NMRTF_KWD_PROP, |
| |
83 NMRTF_KWD_SPEC |
| |
84 } NMRtfKeywordType; |
| |
85 |
| |
86 typedef struct _NMRTFCharProp |
| |
87 { |
| |
88 /* All we care about for now is the font. |
| |
89 * bold, italic, underline, etc. should be |
| |
90 * added here |
| |
91 */ |
| |
92 int font_idx; |
| |
93 int font_charset; |
| |
94 } NMRtfCharProp; |
| |
95 |
| |
96 typedef struct _NMRtfStateSave |
| |
97 { |
| |
98 NMRtfCharProp chp; |
| |
99 NMRtfState rds; |
| |
100 NMRtfState ris; |
| |
101 } NMRtfStateSave; |
| |
102 |
| |
103 typedef struct _NMRtfSymbol |
| |
104 { |
| |
105 char *keyword; /* RTF keyword */ |
| |
106 int default_val; /* default value to use */ |
| |
107 gboolean pass_default; /* true to use default value from this table */ |
| |
108 NMRtfKeywordType kwd_type; /* the type of the keyword */ |
| |
109 int action; /* property type if the keyword represents a property */ |
| |
110 /* destination type if the keyword represents a destination */ |
| |
111 /* character to print if the keyword represents a character */ |
| |
112 } NMRtfSymbol; |
| |
113 |
| |
114 |
| |
115 typedef struct _NMRtfFont |
| |
116 { |
| |
117 int number; |
| |
118 char *name; |
| |
119 int charset; |
| |
120 } NMRtfFont; |
| |
121 |
| |
122 /* RTF Context */ |
| |
123 struct _NMRtfContext |
| |
124 { |
| |
125 NMRtfState rds; /* destination state */ |
| |
126 NMRtfState ris; /* internal state */ |
| |
127 NMRtfCharProp chp; /* current character properties (ie. font, bold, italic, etc.) */ |
| |
128 GSList *font_table; /* the font table */ |
| |
129 GSList *saved; /* saved state stack */ |
| |
130 int param; /* numeric parameter for the current keyword */ |
| |
131 long bytes_to_skip; /* number of bytes to skip (after encountering \bin) */ |
| |
132 int depth; /* how many groups deep are we */ |
| |
133 gboolean skip_unknown; /* if true, skip any unknown destinations (this is set after encountering '\*') */ |
| |
134 char *input; /* input string */ |
| |
135 char nextch; /* next char in input */ |
| |
136 GString *ansi; /* Temporary ansi text, will be convert/flushed to the output string */ |
| |
137 GString *output; /* The plain text UTF8 string */ |
| |
138 }; |
| |
139 |
| |
140 static int rtf_parse(NMRtfContext *ctx); |
| |
141 static int rtf_push_state(NMRtfContext *ctx); |
| |
142 static int rtf_pop_state(NMRtfContext *ctx); |
| |
143 static NMRtfFont *rtf_get_font(NMRtfContext *ctx, int index); |
| |
144 static int rtf_get_char(NMRtfContext *ctx, guchar *ch); |
| |
145 static int rtf_unget_char(NMRtfContext *ctx, guchar ch); |
| |
146 static int rtf_flush_data(NMRtfContext *ctx); |
| |
147 static int rtf_parse_keyword(NMRtfContext *ctx); |
| |
148 static int rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set); |
| |
149 static int rtf_dispatch_char(NMRtfContext *ctx, guchar ch); |
| |
150 static int rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch); |
| |
151 static int rtf_print_char(NMRtfContext *ctx, guchar ch); |
| |
152 static int rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch); |
| |
153 static int rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType dest); |
| |
154 static int rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd special); |
| |
155 static int rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val); |
| |
156 |
| |
157 /* RTF parser tables */ |
| |
158 |
| |
159 /* Keyword descriptions */ |
| |
160 NMRtfSymbol rtf_symbols[] = { |
| |
161 /* keyword, default, pass_default, keyword_type, action */ |
| |
162 {"fonttbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_FONTTABLE}, |
| |
163 {"f", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_IDX}, |
| |
164 {"fcharset", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_CHARSET}, |
| |
165 {"par", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
| |
166 {"line", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
| |
167 {"\0x0a", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
| |
168 {"\0x0d", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, |
| |
169 {"tab", 0, FALSE, NMRTF_KWD_CHAR, 0x09}, |
| |
170 {"\r", 0, FALSE, NMRTF_KWD_CHAR, '\r'}, |
| |
171 {"\n", 0, FALSE, NMRTF_KWD_CHAR, '\n'}, |
| |
172 {"ldblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, |
| |
173 {"rdblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, |
| |
174 {"{", 0, FALSE, NMRTF_KWD_CHAR, '{'}, |
| |
175 {"}", 0, FALSE, NMRTF_KWD_CHAR, '}'}, |
| |
176 {"\\", 0, FALSE, NMRTF_KWD_CHAR, '\\'}, |
| |
177 {"bin", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_BIN}, |
| |
178 {"*", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_SKIP}, |
| |
179 {"'", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_HEX}, |
| |
180 {"u", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_UNICODE}, |
| |
181 {"colortbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
182 {"author", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
183 {"buptim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
184 {"comment", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
185 {"creatim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
186 {"doccomm", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
187 {"footer", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
188 {"footerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
189 {"footerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
190 {"footerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
191 {"footnote", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
192 {"ftncn", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
193 {"ftnsep", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
194 {"ftnsepc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
195 {"header", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
196 {"headerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
197 {"headerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
198 {"headerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
199 {"info", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
200 {"keywords", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
201 {"operator", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
202 {"pict", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
203 {"printim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
204 {"private1", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
205 {"revtim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
206 {"rxe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
207 {"stylesheet", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
208 {"subject", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
209 {"tc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
210 {"title", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
211 {"txe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, |
| |
212 {"xe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP} |
| |
213 }; |
| |
214 int table_size = sizeof(rtf_symbols) / sizeof(NMRtfSymbol); |
| |
215 |
| |
216 NMRtfContext * |
| |
217 nm_rtf_init() |
| |
218 { |
| |
219 NMRtfContext *ctx = g_new0(NMRtfContext, 1); |
| |
220 ctx->nextch = -1; |
| |
221 ctx->ansi = g_string_new(""); |
| |
222 ctx->output = g_string_new(""); |
| |
223 return ctx; |
| |
224 } |
| |
225 |
| |
226 char * |
| |
227 nm_rtf_strip_formatting(NMRtfContext *ctx, const char *input) |
| |
228 { |
| |
229 int status; |
| |
230 |
| |
231 ctx->input = (char *)input; |
| |
232 status = rtf_parse(ctx); |
| |
233 if (status == NMRTF_OK) |
| |
234 return g_strdup(ctx->output->str); |
| |
235 |
| |
236 gaim_debug_info("novell", "RTF parser failed with error code %d", status); |
| |
237 return NULL; |
| |
238 } |
| |
239 |
| |
240 void |
| |
241 nm_rtf_deinit(NMRtfContext *ctx) |
| |
242 { |
| |
243 GSList *node; |
| |
244 NMRtfFont *font; |
| |
245 NMRtfStateSave *save; |
| |
246 |
| |
247 if (ctx) { |
| |
248 for (node = ctx->font_table; node; node = node->next) { |
| |
249 font = node->data; |
| |
250 g_free(font->name); |
| |
251 g_free(font); |
| |
252 node->data = NULL; |
| |
253 } |
| |
254 g_slist_free(ctx->font_table); |
| |
255 for (node = ctx->saved; node; node = node->next) { |
| |
256 save = node->data; |
| |
257 g_free(save); |
| |
258 node->data = NULL; |
| |
259 } |
| |
260 g_slist_free(ctx->saved); |
| |
261 g_string_free(ctx->ansi, TRUE); |
| |
262 g_string_free(ctx->output, TRUE); |
| |
263 g_free(ctx); |
| |
264 } |
| |
265 } |
| |
266 |
| |
267 static const char * |
| |
268 get_current_encoding(NMRtfContext *ctx) |
| |
269 { |
| |
270 NMRtfFont *font; |
| |
271 |
| |
272 font = rtf_get_font(ctx, ctx->chp.font_idx); |
| |
273 |
| |
274 switch (font->charset) { |
| |
275 case 0: |
| |
276 return "CP1252"; |
| |
277 case 77: |
| |
278 return "MACINTOSH"; |
| |
279 case 78: |
| |
280 return "SJIS"; |
| |
281 case 128: |
| |
282 return "CP932"; |
| |
283 case 129: |
| |
284 return "CP949"; |
| |
285 case 130: |
| |
286 return "CP1361"; |
| |
287 case 134: |
| |
288 return "CP936"; |
| |
289 case 136: |
| |
290 return "CP950"; |
| |
291 case 161: |
| |
292 return "CP1253"; |
| |
293 case 162: |
| |
294 return "CP1254"; |
| |
295 case 163: |
| |
296 return "CP1258"; |
| |
297 case 181: |
| |
298 case 177: |
| |
299 return "CP1255"; |
| |
300 case 178: |
| |
301 case 179: |
| |
302 case 180: |
| |
303 return "CP1256"; |
| |
304 case 186: |
| |
305 return "CP1257"; |
| |
306 case 204: |
| |
307 return "CP1251"; |
| |
308 case 222: |
| |
309 return "CP874"; |
| |
310 case 238: |
| |
311 return "CP1250"; |
| |
312 case 254: |
| |
313 return "CP437"; |
| |
314 default: |
| |
315 gaim_debug_info("novell", "Unhandled font charset %d\n", font->charset); |
| |
316 return "CP1252"; |
| |
317 } |
| |
318 return "CP1252"; |
| |
319 } |
| |
320 |
| |
321 |
| |
322 /* |
| |
323 * Add an entry to the font table |
| |
324 */ |
| |
325 static int |
| |
326 rtf_add_font_entry(NMRtfContext *ctx, int number, const char *name, int charset) |
| |
327 { |
| |
328 NMRtfFont *font = g_new0(NMRtfFont, 1); |
| |
329 |
| |
330 font->number = number; |
| |
331 font->name = g_strdup(name); |
| |
332 font->charset = charset; |
| |
333 |
| |
334 gaim_debug_info("novell", "Adding font to table: #%d\t%s\t%d\n", |
| |
335 font->number, font->name, font->charset); |
| |
336 |
| |
337 ctx->font_table = g_slist_append(ctx->font_table, font); |
| |
338 |
| |
339 return NMRTF_OK; |
| |
340 } |
| |
341 |
| |
342 /* |
| |
343 * Return the nth entry in the font table |
| |
344 */ |
| |
345 static NMRtfFont * |
| |
346 rtf_get_font(NMRtfContext *ctx, int nth) |
| |
347 { |
| |
348 NMRtfFont *font; |
| |
349 |
| |
350 font = g_slist_nth_data(ctx->font_table, nth); |
| |
351 |
| |
352 return font; |
| |
353 } |
| |
354 |
| |
355 /* |
| |
356 * Step 1: |
| |
357 * Isolate RTF keywords and send them to rtf_parse_keyword; |
| |
358 * Push and pop state at the start and end of RTF groups; |
| |
359 * Send text to rtf_dispatch_char for further processing. |
| |
360 */ |
| |
361 static int |
| |
362 rtf_parse(NMRtfContext *ctx) |
| |
363 { |
| |
364 int status; |
| |
365 guchar ch; |
| |
366 guchar hex_byte = 0; |
| |
367 int hex_count = 2; |
| |
368 int len; |
| |
369 |
| |
370 if (ctx->input == NULL) |
| |
371 return NMRTF_OK; |
| |
372 |
| |
373 while (rtf_get_char(ctx, &ch) == NMRTF_OK) { |
| |
374 if (ctx->depth < 0) |
| |
375 return NMRTF_STACK_UNDERFLOW; |
| |
376 |
| |
377 /* if we're parsing binary data, handle it directly */ |
| |
378 if (ctx->ris == NMRTF_STATE_BIN) { |
| |
379 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK) |
| |
380 return status; |
| |
381 } else { |
| |
382 switch (ch) { |
| |
383 case '{': |
| |
384 if (ctx->depth > NMRTF_MAX_DEPTH) |
| |
385 return NMRTF_STACK_OVERFLOW; |
| |
386 rtf_flush_data(ctx); |
| |
387 if ((status = rtf_push_state(ctx)) != NMRTF_OK) |
| |
388 return status; |
| |
389 break; |
| |
390 case '}': |
| |
391 rtf_flush_data(ctx); |
| |
392 |
| |
393 /* for some reason there is always an unwanted '\par' at the end */ |
| |
394 if (ctx->rds == NMRTF_STATE_NORMAL) { |
| |
395 len = ctx->output->len; |
| |
396 if (ctx->output->str[len-1] == '\n') |
| |
397 ctx->output = g_string_truncate(ctx->output, len-1); |
| |
398 } |
| |
399 |
| |
400 if ((status = rtf_pop_state(ctx)) != NMRTF_OK) |
| |
401 return status; |
| |
402 |
| |
403 if (ctx->depth < 0) |
| |
404 return NMRTF_STACK_OVERFLOW; |
| |
405 break; |
| |
406 case '\\': |
| |
407 if ((status = rtf_parse_keyword(ctx)) != NMRTF_OK) |
| |
408 return status; |
| |
409 break; |
| |
410 case 0x0d: |
| |
411 case 0x0a: /* cr and lf are noise characters... */ |
| |
412 break; |
| |
413 default: |
| |
414 if (ctx->ris == NMRTF_STATE_NORMAL) { |
| |
415 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK) |
| |
416 return status; |
| |
417 } else { /* parsing a hex encoded character */ |
| |
418 if (ctx->ris != NMRTF_STATE_HEX) |
| |
419 return NMRTF_ASSERTION; |
| |
420 |
| |
421 hex_byte = hex_byte << 4; |
| |
422 if (isdigit(ch)) |
| |
423 hex_byte += (char) ch - '0'; |
| |
424 else { |
| |
425 if (islower(ch)) { |
| |
426 if (ch < 'a' || ch > 'f') |
| |
427 return NMRTF_INVALID_HEX; |
| |
428 hex_byte += (char) ch - 'a' + 10; |
| |
429 } else { |
| |
430 if (ch < 'A' || ch > 'F') |
| |
431 return NMRTF_INVALID_HEX; |
| |
432 hex_byte += (char) ch - 'A' + 10; |
| |
433 } |
| |
434 } |
| |
435 hex_count--; |
| |
436 if (hex_count == 0) { |
| |
437 if ((status = rtf_dispatch_char(ctx, hex_byte)) != NMRTF_OK) |
| |
438 return status; |
| |
439 hex_count = 2; |
| |
440 hex_byte = 0; |
| |
441 ctx->ris = NMRTF_STATE_NORMAL; |
| |
442 } |
| |
443 } |
| |
444 break; |
| |
445 } |
| |
446 } |
| |
447 } |
| |
448 if (ctx->depth < 0) |
| |
449 return NMRTF_STACK_OVERFLOW; |
| |
450 if (ctx->depth > 0) |
| |
451 return NMRTF_UNMATCHED_BRACE; |
| |
452 return NMRTF_OK; |
| |
453 } |
| |
454 |
| |
455 /* |
| |
456 * Push the current state onto stack |
| |
457 */ |
| |
458 static int |
| |
459 rtf_push_state(NMRtfContext *ctx) |
| |
460 { |
| |
461 NMRtfStateSave *save = g_new0(NMRtfStateSave, 1); |
| |
462 save->chp = ctx->chp; |
| |
463 save->rds = ctx->rds; |
| |
464 save->ris = ctx->ris; |
| |
465 ctx->saved = g_slist_prepend(ctx->saved, save); |
| |
466 ctx->ris = NMRTF_STATE_NORMAL; |
| |
467 (ctx->depth)++; |
| |
468 return NMRTF_OK; |
| |
469 } |
| |
470 |
| |
471 /* |
| |
472 * Restore the state at the top of the stack |
| |
473 */ |
| |
474 static int |
| |
475 rtf_pop_state(NMRtfContext *ctx) |
| |
476 { |
| |
477 NMRtfStateSave *save_old; |
| |
478 GSList *link_old; |
| |
479 |
| |
480 if (ctx->saved == NULL) |
| |
481 return NMRTF_STACK_UNDERFLOW; |
| |
482 |
| |
483 save_old = ctx->saved->data; |
| |
484 ctx->chp = save_old->chp; |
| |
485 ctx->rds = save_old->rds; |
| |
486 ctx->ris = save_old->ris; |
| |
487 (ctx->depth)--; |
| |
488 |
| |
489 g_free(save_old); |
| |
490 link_old = ctx->saved; |
| |
491 ctx->saved = g_slist_remove_link(ctx->saved, link_old); |
| |
492 g_slist_free_1(link_old); |
| |
493 return NMRTF_OK; |
| |
494 } |
| |
495 |
| |
496 /* |
| |
497 * Step 2: |
| |
498 * Get a control word (and its associated value) and |
| |
499 * dispatch the control. |
| |
500 */ |
| |
501 static int |
| |
502 rtf_parse_keyword(NMRtfContext *ctx) |
| |
503 { |
| |
504 int status = NMRTF_OK; |
| |
505 guchar ch; |
| |
506 gboolean param_set = FALSE; |
| |
507 gboolean is_neg = FALSE; |
| |
508 int param = 0; |
| |
509 char keyword[30]; |
| |
510 char parameter[20]; |
| |
511 int i; |
| |
512 |
| |
513 keyword[0] = '\0'; |
| |
514 parameter[0] = '\0'; |
| |
515 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK) |
| |
516 return status; |
| |
517 |
| |
518 if (!isalpha(ch)) { |
| |
519 /* a control symbol; no delimiter. */ |
| |
520 keyword[0] = (char) ch; |
| |
521 keyword[1] = '\0'; |
| |
522 return rtf_dispatch_control(ctx, keyword, 0, param_set); |
| |
523 } |
| |
524 |
| |
525 /* parse keyword */ |
| |
526 for (i = 0; isalpha(ch) && (i < sizeof(keyword) - 1); rtf_get_char(ctx, &ch)) { |
| |
527 keyword[i] = (char) ch; |
| |
528 i++; |
| |
529 } |
| |
530 keyword[i] = '\0'; |
| |
531 |
| |
532 /* check for '-' indicated a negative parameter value */ |
| |
533 if (ch == '-') { |
| |
534 is_neg = TRUE; |
| |
535 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK) |
| |
536 return status; |
| |
537 } |
| |
538 |
| |
539 /* check for numerical param */ |
| |
540 if (isdigit(ch)) { |
| |
541 |
| |
542 param_set = TRUE; |
| |
543 for (i = 0; isdigit(ch) && (i < sizeof(parameter) - 1); rtf_get_char(ctx, &ch)) { |
| |
544 parameter[i] = (char) ch; |
| |
545 i++; |
| |
546 } |
| |
547 parameter[i] = '\0'; |
| |
548 |
| |
549 ctx->param = param = atoi(parameter); |
| |
550 if (is_neg) |
| |
551 ctx->param = param = -param; |
| |
552 } |
| |
553 |
| |
554 /* space after control is optional, put character back if it is not a space */ |
| |
555 if (ch != ' ') |
| |
556 rtf_unget_char(ctx, ch); |
| |
557 |
| |
558 return rtf_dispatch_control(ctx, keyword, param, param_set); |
| |
559 } |
| |
560 |
| |
561 /* |
| |
562 * Route the character to the appropriate destination |
| |
563 */ |
| |
564 static int |
| |
565 rtf_dispatch_char(NMRtfContext *ctx, guchar ch) |
| |
566 { |
| |
567 if (ctx->ris == NMRTF_STATE_BIN && --(ctx->bytes_to_skip) <= 0) |
| |
568 ctx->ris = NMRTF_STATE_NORMAL; |
| |
569 |
| |
570 switch (ctx->rds) { |
| |
571 case NMRTF_STATE_SKIP: |
| |
572 return NMRTF_OK; |
| |
573 case NMRTF_STATE_NORMAL: |
| |
574 return rtf_print_char(ctx, ch); |
| |
575 case NMRTF_STATE_FONTTABLE: |
| |
576 if (ch == ';') { |
| |
577 rtf_add_font_entry(ctx, ctx->chp.font_idx, |
| |
578 ctx->ansi->str, ctx->chp.font_charset); |
| |
579 g_string_truncate(ctx->ansi, 0); |
| |
580 } |
| |
581 else { |
| |
582 return rtf_print_char(ctx, ch); |
| |
583 } |
| |
584 return NMRTF_OK; |
| |
585 default: |
| |
586 return NMRTF_OK; |
| |
587 } |
| |
588 } |
| |
589 |
| |
590 /* Handle a unicode character */ |
| |
591 static int |
| |
592 rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch) |
| |
593 { |
| |
594 switch (ctx->rds) { |
| |
595 case NMRTF_STATE_SKIP: |
| |
596 return NMRTF_OK; |
| |
597 case NMRTF_STATE_NORMAL: |
| |
598 case NMRTF_STATE_FONTTABLE: |
| |
599 return rtf_print_unicode_char(ctx, ch); |
| |
600 default: |
| |
601 return NMRTF_OK; |
| |
602 } |
| |
603 } |
| |
604 |
| |
605 /* |
| |
606 * Output a character |
| |
607 */ |
| |
608 static int |
| |
609 rtf_print_char(NMRtfContext *ctx, guchar ch) |
| |
610 { |
| |
611 |
| |
612 ctx->ansi = g_string_append_c(ctx->ansi, ch); |
| |
613 |
| |
614 return NMRTF_OK; |
| |
615 } |
| |
616 |
| |
617 /* |
| |
618 * Output a unicode character |
| |
619 */ |
| |
620 static int |
| |
621 rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch) |
| |
622 { |
| |
623 char buf[7]; |
| |
624 int num; |
| |
625 |
| |
626 /* convert and flush the ansi buffer to the utf8 buffer */ |
| |
627 rtf_flush_data(ctx); |
| |
628 |
| |
629 /* convert the unicode character to utf8 and add directly to the output buffer */ |
| |
630 num = g_unichar_to_utf8((gunichar) ch, buf); |
| |
631 buf[num] = 0; |
| |
632 gaim_debug_info("novell", "converted unichar 0x%X to utf8 char %s\n", ch, buf); |
| |
633 |
| |
634 ctx->output = g_string_append(ctx->output, buf); |
| |
635 return NMRTF_OK; |
| |
636 } |
| |
637 |
| |
638 /* |
| |
639 * Flush the output text |
| |
640 */ |
| |
641 static int |
| |
642 rtf_flush_data(NMRtfContext *ctx) |
| |
643 { |
| |
644 int status = NMRTF_OK; |
| |
645 char *conv_data = NULL; |
| |
646 const char *enc = NULL; |
| |
647 GError *gerror = NULL; |
| |
648 |
| |
649 if (ctx->rds == NMRTF_STATE_NORMAL && ctx->ansi->len > 0) { |
| |
650 enc = get_current_encoding(ctx); |
| |
651 conv_data = g_convert(ctx->ansi->str, ctx->ansi->len, "UTF-8", enc, |
| |
652 NULL, NULL, &gerror); |
| |
653 if (conv_data) { |
| |
654 ctx->output = g_string_append(ctx->output, conv_data); |
| |
655 g_free(conv_data); |
| |
656 ctx->ansi = g_string_truncate(ctx->ansi, 0); |
| |
657 } else { |
| |
658 status = NMRTF_CONVERT_ERROR; |
| |
659 gaim_debug_info("novell", "failed to convert data! error code = %d msg = %s\n", |
| |
660 gerror->code, gerror->message); |
| |
661 g_free(gerror); |
| |
662 } |
| |
663 } |
| |
664 |
| |
665 return status; |
| |
666 } |
| |
667 |
| |
668 /* |
| |
669 * Handle a property change |
| |
670 */ |
| |
671 static int |
| |
672 rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val) |
| |
673 { |
| |
674 if (ctx->rds == NMRTF_STATE_SKIP) /* If we're skipping text, */ |
| |
675 return NMRTF_OK; /* don't do anything. */ |
| |
676 |
| |
677 /* Need to flush any temporary data before a property change*/ |
| |
678 rtf_flush_data(ctx); |
| |
679 |
| |
680 switch (prop) { |
| |
681 case NMRTF_PROP_FONT_IDX: |
| |
682 ctx->chp.font_idx = val; |
| |
683 break; |
| |
684 case NMRTF_PROP_FONT_CHARSET: |
| |
685 ctx->chp.font_charset = val; |
| |
686 break; |
| |
687 default: |
| |
688 return NMRTF_BAD_TABLE; |
| |
689 } |
| |
690 |
| |
691 return NMRTF_OK; |
| |
692 } |
| |
693 |
| |
694 /* |
| |
695 * Step 3. |
| |
696 * Search the table for keyword and evaluate it appropriately. |
| |
697 * |
| |
698 * Inputs: |
| |
699 * keyword: The RTF control to evaluate. |
| |
700 * param: The parameter of the RTF control. |
| |
701 * param_set: TRUE if the control had a parameter; (that is, if param is valid) |
| |
702 * FALSE if it did not. |
| |
703 */ |
| |
704 static int |
| |
705 rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set) |
| |
706 { |
| |
707 int idx; |
| |
708 |
| |
709 for (idx = 0; idx < table_size; idx++) { |
| |
710 if (strcmp(keyword, rtf_symbols[idx].keyword) == 0) |
| |
711 break; |
| |
712 } |
| |
713 |
| |
714 if (idx == table_size) { |
| |
715 if (ctx->skip_unknown) |
| |
716 ctx->rds = NMRTF_STATE_SKIP; |
| |
717 ctx->skip_unknown = FALSE; |
| |
718 return NMRTF_OK; |
| |
719 } |
| |
720 |
| |
721 /* found it! use kwd_type and action to determine what to do with it. */ |
| |
722 ctx->skip_unknown = FALSE; |
| |
723 switch (rtf_symbols[idx].kwd_type) { |
| |
724 case NMRTF_KWD_PROP: |
| |
725 if (rtf_symbols[idx].pass_default || !param_set) |
| |
726 param = rtf_symbols[idx].default_val; |
| |
727 return rtf_apply_property(ctx, rtf_symbols[idx].action, param); |
| |
728 case NMRTF_KWD_CHAR: |
| |
729 return rtf_dispatch_char(ctx, rtf_symbols[idx].action); |
| |
730 case NMRTF_KWD_DEST: |
| |
731 return rtf_change_destination(ctx, rtf_symbols[idx].action); |
| |
732 case NMRTF_KWD_SPEC: |
| |
733 return rtf_dispatch_special(ctx, rtf_symbols[idx].action); |
| |
734 default: |
| |
735 return NMRTF_BAD_TABLE; |
| |
736 } |
| |
737 return NMRTF_BAD_TABLE; |
| |
738 } |
| |
739 |
| |
740 /* |
| |
741 * Change to the destination specified. |
| |
742 */ |
| |
743 static int |
| |
744 rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType type) |
| |
745 { |
| |
746 /* if we're skipping text, don't do anything */ |
| |
747 if (ctx->rds == NMRTF_STATE_SKIP) |
| |
748 return NMRTF_OK; |
| |
749 |
| |
750 switch (type) { |
| |
751 case NMRTF_DEST_FONTTABLE: |
| |
752 ctx->rds = NMRTF_STATE_FONTTABLE; |
| |
753 g_string_truncate(ctx->ansi, 0); |
| |
754 break; |
| |
755 default: |
| |
756 ctx->rds = NMRTF_STATE_SKIP; /* when in doubt, skip it... */ |
| |
757 break; |
| |
758 } |
| |
759 return NMRTF_OK; |
| |
760 } |
| |
761 |
| |
762 /* |
| |
763 * Dispatch an RTF control that needs special processing |
| |
764 */ |
| |
765 static int |
| |
766 rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd type) |
| |
767 { |
| |
768 int status = NMRTF_OK; |
| |
769 guchar ch; |
| |
770 |
| |
771 if (ctx->rds == NMRTF_STATE_SKIP && type != NMRTF_SPECIAL_BIN) /* if we're skipping, and it's not */ |
| |
772 return NMRTF_OK; /* the \bin keyword, ignore it. */ |
| |
773 |
| |
774 switch (type) { |
| |
775 case NMRTF_SPECIAL_BIN: |
| |
776 ctx->ris = NMRTF_STATE_BIN; |
| |
777 ctx->bytes_to_skip = ctx->param; |
| |
778 break; |
| |
779 case NMRTF_SPECIAL_SKIP: |
| |
780 ctx->skip_unknown = TRUE; |
| |
781 break; |
| |
782 case NMRTF_SPECIAL_HEX: |
| |
783 ctx->ris = NMRTF_STATE_HEX; |
| |
784 break; |
| |
785 case NMRTF_SPECIAL_UNICODE: |
| |
786 gaim_debug_info("novell", "parsing unichar\n"); |
| |
787 status = rtf_dispatch_unicode_char(ctx, ctx->param); |
| |
788 /* Skip next char */ |
| |
789 if (status == NMRTF_OK) |
| |
790 status = rtf_get_char(ctx, &ch); |
| |
791 break; |
| |
792 default: |
| |
793 status = NMRTF_BAD_TABLE; |
| |
794 break; |
| |
795 } |
| |
796 |
| |
797 return status; |
| |
798 } |
| |
799 |
| |
800 /* |
| |
801 * Get the next character from the input stream |
| |
802 */ |
| |
803 static int |
| |
804 rtf_get_char(NMRtfContext *ctx, guchar *ch) |
| |
805 { |
| |
806 if (ctx->nextch >= 0) { |
| |
807 *ch = ctx->nextch; |
| |
808 ctx->nextch = -1; |
| |
809 } |
| |
810 else { |
| |
811 *ch = *(ctx->input); |
| |
812 ctx->input++; |
| |
813 } |
| |
814 |
| |
815 if (*ch) |
| |
816 return NMRTF_OK; |
| |
817 else |
| |
818 return NMRTF_EOF; |
| |
819 } |
| |
820 |
| |
821 /* |
| |
822 * Move a character back into the input stream |
| |
823 */ |
| |
824 static int |
| |
825 rtf_unget_char(NMRtfContext *ctx, guchar ch) |
| |
826 { |
| |
827 ctx->nextch = ch; |
| |
828 return NMRTF_OK; |
| |
829 } |