Fri, 19 Mar 2004 17:34:33 +0000
[gaim-migrate @ 9206]
" You can once again show how evil you are by typing >:)
and getting it to render in spite of escaped HTML.
This patch changes around the parsing code to catch
smileys before eating just any HTML entity we bump into
on the street. We try to catch entities at the
beginning of smileys first, and if we're sure they're
not smileys, then we eat them for breakfast. The patch
also deals with eating any subsequent entities that may
appear in any smileys (like :-&) so we don't end up
with trailing leftovers. This patch description is
making me hungry.
FYI, I know this gtkimhtml is supposed to be not gaim
dependent, but both the gaim_* functions that were
preexisting and newly used in gtkimhtml code are all
non-gaim dependent utility functions from util.c, so I felt
their use was justified and acceptable." --Kevin Stange
committer: Luke Schierer <lschiere@pidgin.im>
| 7131 | 1 | /** |
| 2 | * @file xmlnode.c XML DOM functions | |
| 3 | * | |
| 4 | * gaim | |
| 5 | * | |
| 8046 | 6 | * Gaim is the legal property of its developers, whose names are too numerous |
| 7 | * to list here. Please refer to the COPYRIGHT file distributed with this | |
| 8 | * source distribution. | |
| 7131 | 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify | |
| 11 | * it under the terms of the GNU General Public License as published by | |
| 12 | * the Free Software Foundation; either version 2 of the License, or | |
| 13 | * (at your option) any later version. | |
| 14 | * | |
| 15 | * This program is distributed in the hope that it will be useful, | |
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 18 | * GNU General Public License for more details. | |
| 19 | * | |
| 20 | * You should have received a copy of the GNU General Public License | |
| 21 | * along with this program; if not, write to the Free Software | |
| 22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 23 | */ | |
| 24 | ||
| 25 | /* A lot of this code at least resembles the code in libxode, but since | |
| 26 | * libxode uses memory pools that we simply have no need for, I decided to | |
| 27 | * write my own stuff. Also, re-writing this lets me be as lightweight | |
| 28 | * as I want to be. Thank you libxode for giving me a good starting point */ | |
| 29 | ||
| 30 | #include "internal.h" | |
| 31 | ||
| 32 | #include <string.h> | |
| 33 | #include <glib.h> | |
| 34 | ||
| 35 | #include "xmlnode.h" | |
| 36 | ||
| 37 | static xmlnode* | |
| 8135 | 38 | new_node(const char *name, XMLNodeType type) |
| 7131 | 39 | { |
| 40 | xmlnode *node = g_new0(xmlnode, 1); | |
| 41 | if(name) | |
| 42 | node->name = g_strdup(name); | |
| 43 | node->type = type; | |
| 44 | ||
| 45 | return node; | |
| 46 | } | |
| 47 | ||
| 48 | xmlnode* | |
| 49 | xmlnode_new(const char *name) | |
| 50 | { | |
| 51 | g_return_val_if_fail(name != NULL, NULL); | |
| 52 | ||
| 8135 | 53 | return new_node(name, XMLNODE_TYPE_TAG); |
| 7131 | 54 | } |
| 55 | ||
| 56 | xmlnode *xmlnode_new_child(xmlnode *parent, const char *name) | |
| 57 | { | |
| 58 | xmlnode *node; | |
| 59 | ||
| 60 | g_return_val_if_fail(parent != NULL, NULL); | |
| 61 | g_return_val_if_fail(name != NULL, NULL); | |
| 62 | ||
| 8135 | 63 | node = new_node(name, XMLNODE_TYPE_TAG); |
| 7131 | 64 | |
| 65 | xmlnode_insert_child(parent, node); | |
| 66 | ||
| 67 | return node; | |
| 68 | } | |
| 69 | ||
| 70 | void | |
| 71 | xmlnode_insert_child(xmlnode *parent, xmlnode *child) | |
| 72 | { | |
| 73 | g_return_if_fail(parent != NULL); | |
| 74 | g_return_if_fail(child != NULL); | |
| 75 | ||
| 76 | child->parent = parent; | |
| 77 | ||
| 78 | if(parent->child) { | |
| 79 | xmlnode *x; | |
| 80 | for(x = parent->child; x->next; x = x->next); | |
| 81 | x->next = child; | |
| 82 | } else { | |
| 83 | parent->child = child; | |
| 84 | } | |
| 85 | } | |
| 86 | ||
| 87 | void | |
| 88 | xmlnode_insert_data(xmlnode *parent, const char *data, size_t size) | |
| 89 | { | |
| 90 | xmlnode *node; | |
| 91 | size_t real_size; | |
| 92 | ||
| 93 | g_return_if_fail(parent != NULL); | |
| 94 | g_return_if_fail(data != NULL); | |
| 95 | g_return_if_fail(size != 0); | |
| 96 | ||
| 97 | real_size = size == -1 ? strlen(data) : size; | |
| 98 | ||
| 8135 | 99 | node = new_node(NULL, XMLNODE_TYPE_DATA); |
| 7131 | 100 | |
| 101 | node->data = g_memdup(data, real_size); | |
| 102 | node->data_sz = real_size; | |
| 103 | ||
| 104 | xmlnode_insert_child(parent, node); | |
| 105 | } | |
| 106 | ||
| 107 | void | |
| 108 | xmlnode_remove_attrib(xmlnode *node, const char *attr) | |
| 109 | { | |
| 110 | xmlnode *attr_node, *sibling = NULL; | |
| 111 | ||
| 112 | g_return_if_fail(node != NULL); | |
| 113 | g_return_if_fail(attr != NULL); | |
| 114 | ||
| 115 | for(attr_node = node->child; attr_node; attr_node = attr_node->next) | |
| 116 | { | |
| 8135 | 117 | if(attr_node->type == XMLNODE_TYPE_ATTRIB && |
| 7131 | 118 | !strcmp(attr_node->name, attr)) { |
| 119 | if(node->child == attr_node) { | |
| 120 | node->child = attr_node->next; | |
| 121 | } else { | |
| 122 | sibling->next = attr_node->next; | |
| 123 | } | |
| 124 | xmlnode_free(attr_node); | |
| 125 | return; | |
| 126 | } | |
| 127 | sibling = attr_node; | |
| 128 | } | |
| 129 | } | |
| 130 | ||
| 131 | void | |
| 132 | xmlnode_set_attrib(xmlnode *node, const char *attr, const char *value) | |
| 133 | { | |
| 134 | xmlnode *attrib_node; | |
| 135 | ||
| 136 | g_return_if_fail(node != NULL); | |
| 137 | g_return_if_fail(attr != NULL); | |
| 138 | g_return_if_fail(value != NULL); | |
| 139 | ||
| 140 | xmlnode_remove_attrib(node, attr); | |
| 141 | ||
| 8135 | 142 | attrib_node = new_node(attr, XMLNODE_TYPE_ATTRIB); |
| 7131 | 143 | |
| 144 | attrib_node->data = g_strdup(value); | |
| 145 | ||
| 146 | xmlnode_insert_child(node, attrib_node); | |
| 147 | } | |
| 148 | ||
| 149 | const char* | |
| 150 | xmlnode_get_attrib(xmlnode *node, const char *attr) | |
| 151 | { | |
| 152 | xmlnode *x; | |
| 153 | ||
| 154 | g_return_val_if_fail(node != NULL, NULL); | |
| 155 | ||
| 156 | for(x = node->child; x; x = x->next) { | |
| 8135 | 157 | if(x->type == XMLNODE_TYPE_ATTRIB && !strcmp(attr, x->name)) { |
| 7131 | 158 | return x->data; |
| 159 | } | |
| 160 | } | |
| 161 | ||
| 162 | return NULL; | |
| 163 | } | |
| 164 | ||
| 165 | void xmlnode_free(xmlnode *node) | |
| 166 | { | |
| 167 | xmlnode *x, *y; | |
| 168 | ||
| 169 | g_return_if_fail(node != NULL); | |
| 170 | ||
| 171 | x = node->child; | |
| 172 | while(x) { | |
| 173 | y = x->next; | |
| 174 | xmlnode_free(x); | |
| 175 | x = y; | |
| 176 | } | |
| 177 | ||
| 178 | if(node->name) | |
| 179 | g_free(node->name); | |
| 180 | if(node->data) | |
| 181 | g_free(node->data); | |
| 182 | g_free(node); | |
| 183 | } | |
| 184 | ||
| 185 | xmlnode* | |
| 8262 | 186 | xmlnode_get_child_with_namespace(xmlnode *parent, const char *name, const char *ns) |
| 7131 | 187 | { |
| 188 | xmlnode *x, *ret = NULL; | |
| 189 | char **names; | |
| 190 | char *parent_name, *child_name; | |
| 191 | ||
| 192 | g_return_val_if_fail(parent != NULL, NULL); | |
| 193 | ||
| 194 | names = g_strsplit(name, "/", 2); | |
| 195 | parent_name = names[0]; | |
| 196 | child_name = names[1]; | |
| 197 | ||
| 198 | for(x = parent->child; x; x = x->next) { | |
| 8262 | 199 | const char *xmlns = NULL; |
| 200 | if(ns) | |
| 201 | xmlns = xmlnode_get_attrib(x, "xmlns"); | |
| 202 | ||
| 203 | if(x->type == XMLNODE_TYPE_TAG && name && !strcmp(parent_name, x->name) | |
| 204 | && (!ns || (xmlns && !strcmp(ns, xmlns)))) { | |
| 7131 | 205 | ret = x; |
| 206 | break; | |
| 207 | } | |
| 208 | } | |
| 209 | ||
| 210 | if(child_name && ret) | |
| 8262 | 211 | ret = xmlnode_get_child(ret, child_name); |
| 7131 | 212 | |
| 213 | g_strfreev(names); | |
| 214 | return ret; | |
| 215 | } | |
| 216 | ||
| 8262 | 217 | xmlnode* |
| 218 | xmlnode_get_child(xmlnode *parent, const char *name) | |
| 219 | { | |
| 220 | return xmlnode_get_child_with_namespace(parent, name, NULL); | |
| 221 | } | |
| 222 | ||
| 7131 | 223 | char * |
| 224 | xmlnode_get_data(xmlnode *node) | |
| 225 | { | |
| 226 | GString *str = NULL; | |
| 227 | char *ret = NULL; | |
| 228 | xmlnode *c; | |
| 229 | ||
| 230 | g_return_val_if_fail(node != NULL, NULL); | |
| 231 | ||
| 232 | ||
| 233 | for(c = node->child; c; c = c->next) { | |
| 8135 | 234 | if(c->type == XMLNODE_TYPE_DATA) { |
| 7131 | 235 | if(!str) |
| 236 | str = g_string_new(""); | |
| 237 | str = g_string_append_len(str, c->data, c->data_sz); | |
| 238 | } | |
| 239 | } | |
| 240 | ||
| 241 | if(str) { | |
| 242 | ret = str->str; | |
| 243 | g_string_free(str, FALSE); | |
| 244 | } | |
| 245 | ||
| 246 | return ret; | |
| 247 | } | |
| 248 | ||
| 7642 | 249 | char *xmlnode_to_str(xmlnode *node, int *len) |
| 7131 | 250 | { |
| 251 | char *ret; | |
| 252 | GString *text = g_string_new(""); | |
| 253 | xmlnode *c; | |
| 254 | char *node_name, *esc, *esc2; | |
| 255 | gboolean need_end = FALSE; | |
| 256 | ||
| 257 | node_name = g_markup_escape_text(node->name, -1); | |
| 258 | g_string_append_printf(text, "<%s", node_name); | |
| 259 | ||
| 260 | ||
| 261 | for(c = node->child; c; c = c->next) | |
| 262 | { | |
| 8135 | 263 | if(c->type == XMLNODE_TYPE_ATTRIB) { |
| 7131 | 264 | esc = g_markup_escape_text(c->name, -1); |
| 265 | esc2 = g_markup_escape_text(c->data, -1); | |
| 266 | g_string_append_printf(text, " %s='%s'", esc, esc2); | |
| 267 | g_free(esc); | |
| 268 | g_free(esc2); | |
| 8135 | 269 | } else if(c->type == XMLNODE_TYPE_TAG || c->type == XMLNODE_TYPE_DATA) { |
| 7131 | 270 | need_end = TRUE; |
| 271 | } | |
| 272 | } | |
| 273 | ||
| 274 | if(need_end) { | |
| 275 | text = g_string_append_c(text, '>'); | |
| 276 | ||
| 277 | for(c = node->child; c; c = c->next) | |
| 278 | { | |
| 8135 | 279 | if(c->type == XMLNODE_TYPE_TAG) { |
| 7642 | 280 | int esc_len; |
| 281 | esc = xmlnode_to_str(c, &esc_len); | |
| 282 | text = g_string_append_len(text, esc, esc_len); | |
| 7131 | 283 | g_free(esc); |
| 8135 | 284 | } else if(c->type == XMLNODE_TYPE_DATA) { |
| 7131 | 285 | esc = g_markup_escape_text(c->data, c->data_sz); |
| 7642 | 286 | text = g_string_append(text, esc); |
| 7131 | 287 | g_free(esc); |
| 288 | } | |
| 289 | } | |
| 290 | ||
| 291 | g_string_append_printf(text, "</%s>", node_name); | |
| 292 | } else { | |
| 293 | g_string_append_printf(text, "/>"); | |
| 294 | } | |
| 295 | ||
| 296 | g_free(node_name); | |
| 297 | ||
| 298 | ret = text->str; | |
| 7642 | 299 | if(len) |
| 300 | *len = text->len; | |
| 7131 | 301 | g_string_free(text, FALSE); |
| 302 | return ret; | |
| 303 | } | |
| 304 | ||
| 305 | struct _xmlnode_parser_data { | |
| 306 | xmlnode *current; | |
| 307 | }; | |
| 308 | ||
| 309 | static void | |
| 310 | xmlnode_parser_element_start(GMarkupParseContext *context, | |
| 311 | const char *element_name, const char **attrib_names, | |
| 312 | const char **attrib_values, gpointer user_data, GError **error) | |
| 313 | { | |
| 314 | struct _xmlnode_parser_data *xpd = user_data; | |
| 315 | xmlnode *node; | |
| 316 | int i; | |
| 317 | ||
| 318 | if(!element_name) { | |
| 319 | return; | |
| 320 | } else { | |
| 321 | if(xpd->current) | |
| 322 | node = xmlnode_new_child(xpd->current, element_name); | |
| 323 | else | |
| 324 | node = xmlnode_new(element_name); | |
| 325 | ||
| 326 | for(i=0; attrib_names[i]; i++) | |
| 327 | xmlnode_set_attrib(node, attrib_names[i], attrib_values[i]); | |
| 328 | ||
| 329 | xpd->current = node; | |
| 330 | } | |
| 331 | } | |
| 332 | ||
| 333 | static void | |
| 334 | xmlnode_parser_element_end(GMarkupParseContext *context, | |
| 335 | const char *element_name, gpointer user_data, GError **error) | |
| 336 | { | |
| 337 | struct _xmlnode_parser_data *xpd = user_data; | |
| 338 | ||
| 339 | if(!element_name || !xpd->current) | |
| 340 | return; | |
| 341 | ||
| 342 | if(xpd->current->parent) { | |
| 343 | if(!strcmp(xpd->current->name, element_name)) | |
| 344 | xpd->current = xpd->current->parent; | |
| 345 | } | |
| 346 | } | |
| 347 | ||
| 348 | static void | |
| 349 | xmlnode_parser_element_text(GMarkupParseContext *context, const char *text, | |
| 350 | gsize text_len, gpointer user_data, GError **error) | |
| 351 | { | |
| 352 | struct _xmlnode_parser_data *xpd = user_data; | |
| 353 | ||
| 354 | if(!xpd->current) | |
| 355 | return; | |
| 356 | ||
| 357 | if(!text || !text_len) | |
| 358 | return; | |
| 359 | ||
| 360 | xmlnode_insert_data(xpd->current, text, text_len); | |
| 361 | } | |
| 362 | ||
| 363 | static GMarkupParser xmlnode_parser = { | |
| 364 | xmlnode_parser_element_start, | |
| 365 | xmlnode_parser_element_end, | |
| 366 | xmlnode_parser_element_text, | |
| 367 | NULL, | |
| 368 | NULL | |
| 369 | }; | |
| 370 | ||
| 371 | ||
| 372 | xmlnode *xmlnode_from_str(const char *str, size_t size) | |
| 373 | { | |
| 374 | struct _xmlnode_parser_data *xpd = g_new0(struct _xmlnode_parser_data, 1); | |
| 375 | xmlnode *ret; | |
| 376 | GMarkupParseContext *context; | |
| 377 | size_t real_size = size == -1 ? strlen(str) : size; | |
| 378 | ||
| 379 | context = g_markup_parse_context_new(&xmlnode_parser, 0, xpd, NULL); | |
| 380 | ||
| 381 | if(!g_markup_parse_context_parse(context, str, real_size, NULL)) { | |
| 382 | while(xpd->current && xpd->current->parent) | |
| 383 | xpd->current = xpd->current->parent; | |
| 384 | if(xpd->current) | |
| 385 | xmlnode_free(xpd->current); | |
| 386 | xpd->current = NULL; | |
| 387 | } | |
| 388 | g_markup_parse_context_free(context); | |
| 389 | ||
| 390 | ret = xpd->current; | |
| 391 | g_free(xpd); | |
| 392 | return ret; | |
| 393 | } | |
| 8135 | 394 | |
| 395 | xmlnode *xmlnode_copy(xmlnode *src) | |
| 396 | { | |
| 397 | xmlnode *ret; | |
| 398 | xmlnode *child; | |
| 399 | xmlnode *sibling = NULL; | |
| 400 | ||
| 401 | if(!src) | |
| 402 | return NULL; | |
| 403 | ||
| 404 | ret = new_node(src->name, src->type); | |
| 405 | if(src->data) { | |
| 8167 | 406 | if(src->data_sz) { |
| 407 | ret->data = g_memdup(src->data, src->data_sz); | |
| 408 | ret->data_sz = src->data_sz; | |
| 409 | } else { | |
| 410 | ret->data = g_strdup(src->data); | |
| 411 | } | |
| 8135 | 412 | } |
| 413 | ||
| 414 | for(child = src->child; child; child = child->next) { | |
| 415 | if(sibling) { | |
| 416 | sibling->next = xmlnode_copy(child); | |
| 417 | sibling = sibling->next; | |
| 418 | } else { | |
| 419 | ret->child = xmlnode_copy(child); | |
| 420 | sibling = ret->child; | |
| 421 | } | |
| 422 | sibling->parent = ret; | |
| 423 | } | |
| 424 | ||
| 425 | return ret; | |
| 426 | } | |
| 427 | ||
| 428 | xmlnode *xmlnode_get_next_twin(xmlnode *node) { | |
| 429 | xmlnode *sibling; | |
| 8262 | 430 | const char *ns = xmlnode_get_attrib(node, "xmlns"); |
| 8135 | 431 | |
| 432 | g_return_val_if_fail(node != NULL, NULL); | |
| 433 | g_return_val_if_fail(node->type == XMLNODE_TYPE_TAG, NULL); | |
| 434 | ||
| 435 | for(sibling = node->next; sibling; sibling = sibling->next) { | |
| 8283 | 436 | const char *xmlns = NULL; |
| 8262 | 437 | if(ns) |
| 438 | xmlns = xmlnode_get_attrib(sibling, "xmlns"); | |
| 439 | ||
| 440 | if(sibling->type == XMLNODE_TYPE_TAG && !strcmp(node->name, sibling->name) && | |
| 441 | (!ns || (xmlns && !strcmp(ns, xmlns)))) | |
| 8135 | 442 | return sibling; |
| 443 | } | |
| 444 | ||
| 445 | return NULL; | |
| 446 | } | |
| 447 |