| 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA |
| 21 */ |
21 */ |
| 22 |
22 |
| 23 #include "trie.h" |
23 #include "trie.h" |
| 24 |
24 |
| |
25 #include <string.h> |
| |
26 |
| 25 #include "debug.h" |
27 #include "debug.h" |
| 26 #include "memorypool.h" |
28 #include "memorypool.h" |
| 27 |
29 |
| 28 #define PURPLE_TRIE_GET_PRIVATE(obj) \ |
30 #define PURPLE_TRIE_GET_PRIVATE(obj) \ |
| 29 (G_TYPE_INSTANCE_GET_PRIVATE((obj), PURPLE_TYPE_TRIE, PurpleTriePrivate)) |
31 (G_TYPE_INSTANCE_GET_PRIVATE((obj), PURPLE_TYPE_TRIE, PurpleTriePrivate)) |
| 169 } |
172 } |
| 170 } |
173 } |
| 171 |
174 |
| 172 /* Allocates a state and binds it to the parent. */ |
175 /* Allocates a state and binds it to the parent. */ |
| 173 static PurpleTrieState * |
176 static PurpleTrieState * |
| 174 purple_trie_state_new(PurpleTrie *trie, PurpleTrieState *parent, guchar letter) |
177 purple_trie_state_new(PurpleTrie *trie, PurpleTrieState *parent, guchar character) |
| 175 { |
178 { |
| 176 PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie); |
179 PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie); |
| 177 PurpleTrieState *state; |
180 PurpleTrieState *state; |
| 178 |
181 |
| 179 g_return_val_if_fail(priv != NULL, NULL); |
182 g_return_val_if_fail(priv != NULL, NULL); |
| 239 * index of all remaining words. Loop finishes when there are no words |
242 * index of all remaining words. Loop finishes when there are no words |
| 240 * longer than cur_len. */ |
243 * longer than cur_len. */ |
| 241 for (cur_len = 0; reclist != NULL; cur_len++) { |
244 for (cur_len = 0; reclist != NULL; cur_len++) { |
| 242 for (it = reclist; it; it = it->next) { |
245 for (it = reclist; it; it = it->next) { |
| 243 PurpleTrieRecord *rec = it->rec; |
246 PurpleTrieRecord *rec = it->rec; |
| 244 guchar letter = rec->word[cur_len]; |
247 guchar character = rec->word[cur_len]; |
| 245 PurpleTrieState *prefix = it->extra_data; |
248 PurpleTrieState *prefix = it->extra_data; |
| 246 PurpleTrieState *lon_suf_parent; |
249 PurpleTrieState *lon_suf_parent; |
| 247 |
250 |
| 248 /* the whole word is already added to the trie */ |
251 /* the whole word is already added to the trie */ |
| 249 if (letter == '\0') { |
252 if (character == '\0') { |
| 250 if (prefix->found_word == NULL) |
253 if (prefix->found_word == NULL) |
| 251 prefix->found_word = rec; |
254 prefix->found_word = rec; |
| 252 else { |
255 else { |
| 253 purple_debug_warning("trie", "found " |
256 purple_debug_warning("trie", "found " |
| 254 "a collision of \"%s\" words", |
257 "a collision of \"%s\" words", |
| 261 continue; |
264 continue; |
| 262 } |
265 } |
| 263 |
266 |
| 264 /* word's prefix is already in the trie, added by the |
267 /* word's prefix is already in the trie, added by the |
| 265 * other word */ |
268 * other word */ |
| 266 if (prefix->children && prefix->children[letter]) { |
269 if (prefix->children && prefix->children[character]) { |
| 267 it->extra_data = prefix->children[letter]; |
270 it->extra_data = prefix->children[character]; |
| 268 continue; |
271 continue; |
| 269 } |
272 } |
| 270 |
273 |
| 271 /* We need to create a new branch of trie. |
274 /* We need to create a new branch of trie. |
| 272 * prefix is now of length increased by one letter. |
275 * prefix is now of length increased by one character. |
| 273 */ |
276 */ |
| 274 prefix = purple_trie_state_new(trie, prefix, letter); |
277 prefix = purple_trie_state_new(trie, prefix, character); |
| 275 if (!prefix) { |
278 if (!prefix) { |
| 276 g_warn_if_reached(); |
279 g_warn_if_reached(); |
| 277 g_object_unref(reclist_mpool); |
280 g_object_unref(reclist_mpool); |
| 278 return FALSE; |
281 return FALSE; |
| 279 } |
282 } |
| 285 * in the (cur_len - 1) level of trie. */ |
288 * in the (cur_len - 1) level of trie. */ |
| 286 g_assert(prefix->longest_suffix == NULL); |
289 g_assert(prefix->longest_suffix == NULL); |
| 287 lon_suf_parent = prefix->parent->longest_suffix; |
290 lon_suf_parent = prefix->parent->longest_suffix; |
| 288 while (lon_suf_parent) { |
291 while (lon_suf_parent) { |
| 289 if (lon_suf_parent->children && |
292 if (lon_suf_parent->children && |
| 290 lon_suf_parent->children[letter]) |
293 lon_suf_parent->children[character]) |
| 291 { |
294 { |
| 292 prefix->longest_suffix = |
295 prefix->longest_suffix = lon_suf_parent-> |
| 293 lon_suf_parent->children[letter]; |
296 children[character]; |
| 294 break; |
297 break; |
| 295 } |
298 } |
| |
299 lon_suf_parent = lon_suf_parent->longest_suffix; |
| 296 } |
300 } |
| 297 if (prefix->longest_suffix == NULL) |
301 if (prefix->longest_suffix == NULL) |
| 298 prefix->longest_suffix = root; |
302 prefix->longest_suffix = root; |
| 299 } |
303 } |
| 300 } |
304 } |
| 303 |
307 |
| 304 return TRUE; |
308 return TRUE; |
| 305 } |
309 } |
| 306 |
310 |
| 307 /******************************************************************************* |
311 /******************************************************************************* |
| |
312 * Searching |
| |
313 ******************************************************************************/ |
| |
314 |
| |
315 gchar * |
| |
316 purple_trie_replace(PurpleTrie *trie, const gchar *src, |
| |
317 PurpleTrieReplaceCb replace_cb, gpointer user_data) |
| |
318 { |
| |
319 PurpleTriePrivate *priv = PURPLE_TRIE_GET_PRIVATE(trie); |
| |
320 PurpleTrieState *state; |
| |
321 gsize i = 0; |
| |
322 GString *out; |
| |
323 |
| |
324 if (src == NULL) |
| |
325 return NULL; |
| |
326 |
| |
327 g_return_val_if_fail(replace_cb != NULL, g_strdup(src)); |
| |
328 g_return_val_if_fail(priv != NULL, NULL); |
| |
329 |
| |
330 out = g_string_new(NULL); |
| |
331 state = priv->root_state; |
| |
332 while (src[i] != '\0') { |
| |
333 guchar character = src[i]; |
| |
334 |
| |
335 /* change state after processing a character */ |
| |
336 while (TRUE) { |
| |
337 /* Perfect fit - next character is the same, as the |
| |
338 * child of the prefix we reached so far. */ |
| |
339 if (state->children && state->children[character]) { |
| |
340 state = state->children[character]; |
| |
341 break; |
| |
342 } |
| |
343 |
| |
344 /* We reached root, that's a pity. */ |
| |
345 if (state == priv->root_state) |
| |
346 break; |
| |
347 |
| |
348 state = state->longest_suffix; |
| |
349 } |
| |
350 |
| |
351 /* if we reached a "found" state, let's process it */ |
| |
352 if (state->found_word) { |
| |
353 gboolean was_replaced; |
| |
354 |
| |
355 was_replaced = replace_cb(out, state->found_word->word, |
| |
356 state->found_word->data, user_data); |
| |
357 |
| |
358 if (was_replaced || priv->reset_on_match) { |
| |
359 i += state->found_word->word_len; |
| |
360 state = priv->root_state; |
| |
361 } else |
| |
362 i++; |
| |
363 } else |
| |
364 i++; |
| |
365 } |
| |
366 |
| |
367 return g_string_free(out, FALSE); |
| |
368 } |
| |
369 |
| |
370 |
| |
371 /******************************************************************************* |
| 308 * Records |
372 * Records |
| 309 ******************************************************************************/ |
373 ******************************************************************************/ |
| 310 |
374 |
| 311 void |
375 void |
| 312 purple_trie_add(PurpleTrie *trie, const gchar *word, gpointer data) |
376 purple_trie_add(PurpleTrie *trie, const gchar *word, gpointer data) |
| 323 purple_trie_states_cleanup(trie); |
387 purple_trie_states_cleanup(trie); |
| 324 |
388 |
| 325 rec = purple_memory_pool_alloc(priv->records_obj_mempool, |
389 rec = purple_memory_pool_alloc(priv->records_obj_mempool, |
| 326 sizeof(PurpleTrieRecord), sizeof(gpointer)); |
390 sizeof(PurpleTrieRecord), sizeof(gpointer)); |
| 327 rec->word = purple_memory_pool_strdup(priv->records_str_mempool, word); |
391 rec->word = purple_memory_pool_strdup(priv->records_str_mempool, word); |
| |
392 rec->word_len = strlen(word); |
| 328 rec->data = data; |
393 rec->data = data; |
| 329 |
394 |
| 330 priv->records = purple_record_list_prepend(priv->records_obj_mempool, |
395 priv->records = purple_record_list_prepend(priv->records_obj_mempool, |
| 331 priv->records, rec); |
396 priv->records, rec); |
| 332 |
397 |