Sat, 05 Apr 2014 16:31:19 +0200
Comments: PurpleTrie
--- a/doc/reference/libpurple/libpurple-docs.xml Sat Apr 05 14:19:33 2014 +0200 +++ b/doc/reference/libpurple/libpurple-docs.xml Sat Apr 05 16:31:19 2014 +0200 @@ -86,6 +86,7 @@ <xi:include href="xml/theme.xml" /> <xi:include href="xml/theme-loader.xml" /> <xi:include href="xml/theme-manager.xml" /> + <xi:include href="xml/trie.xml" /> <xi:include href="xml/sound-theme.xml" /> <xi:include href="xml/sound-theme-loader.xml" /> <xi:include href="xml/upnp.xml" />
--- a/libpurple/memorypool.h Sat Apr 05 14:19:33 2014 +0200 +++ b/libpurple/memorypool.h Sat Apr 05 16:31:19 2014 +0200 @@ -24,8 +24,9 @@ #define PURPLE_MEMORY_POOL_H /** * SECTION:memorypool + * @include:memorypool.h * @section_id: libpurple-memorypool - * @short_description: <filename>memorypool.h</filename> + * @short_description: a container for a large number of small chunks of memory * @title: Memory pools * * A #PurpleMemoryPool allows allocating many small objects within a single
--- a/libpurple/trie.c Sat Apr 05 14:19:33 2014 +0200 +++ b/libpurple/trie.c Sat Apr 05 16:31:19 2014 +0200 @@ -809,7 +809,10 @@ properties[PROP_RESET_ON_MATCH] = g_param_spec_boolean("reset-on-match", "Reset on match", "Determines, if the search state machine " "should be reset to the initial state on every match. This " - "ensures, that every match is distinct from each other.", TRUE, + "ensures, that every match is distinct from each other. " + "Please note, that it's not well-defined for a replace " + "operation, so it's better to leave this value default, unless " + "you perform only find operations.", TRUE, G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS); g_object_class_install_properties(obj_class, PROP_LAST, properties);
--- a/libpurple/trie.h Sat Apr 05 14:19:33 2014 +0200 +++ b/libpurple/trie.h Sat Apr 05 16:31:19 2014 +0200 @@ -22,6 +22,32 @@ #ifndef PURPLE_TRIE_H #define PURPLE_TRIE_H +/** + * SECTION:trie + * @include:trie.h + * @section_id: libpurple-trie + * @short_description: a structure for linear-time text searching + * @title: Tries + * + * A #PurpleTrie is a structure for quick searching of multiple phrases within + * a text. It's intended for repeated searches of the same set of patterns + * within multiple source texts (or a single, big one). + * + * It's preparation time is <literal>O(p)</literal>, where <literal>p</literal> + * is the total length of searched phrases. In current implementation, the + * internal structure is invalidated after every modification of the + * #PurpleTrie's contents, so it's not efficient to do alternating modifications + * and searches. Search time does not depend on patterns being stored within + * a trie and is always <literal>O(n)</literal>, where <literal>n</literal> is + * the size of a text. + * + * Its main drawback is a significant memory usage - every internal trie node + * needs about 1kB of memory on 32-bit machine and 2kB on 64-bit. Fortunately, + * the trie grows slower when more words (with common prefixes) are added. + * We could avoid invalidating the whole tree when altering it, but it would + * require figuring out, how to update <literal>longest_suffix</literal> fields + * in satisfying time. + */ #include <glib-object.h> @@ -40,12 +66,22 @@ typedef struct _PurpleTrie PurpleTrie; typedef struct _PurpleTrieClass PurpleTrieClass; +/** + * PurpleTrie: + * + * The trie object instance. + */ struct _PurpleTrie { /*< private >*/ GObject parent_instance; }; +/** + * PurpleTrieClass: + * + * Base class for #PurpleTrie objects. + */ struct _PurpleTrieClass { /*< private >*/ @@ -59,25 +95,52 @@ /** * PurpleTrieReplaceCb: - * @out: Currently built output string, append replacement to it. - * @word: Found word. - * @word_data: A user data bound with this word, when added with - * purple_trie_add(). - * @user_data: A user supplied data passed when calling purple_trie_replace(). + * @out: currently built output string, append replacement to it. + * @word: found word. + * @word_data: the user data bound with this word, when added with + * #purple_trie_add. + * @user_data: the user supplied data passed when calling #purple_trie_replace. * - * A funtion called after every found matching substring to be replaced. + * A funtion called on every matching substring to be replaced. * - * Returns: %TRUE, if the word was replaced, %FALSE otherwise. In the second - * case you must not modify @out. + * If you decide to replace the word, append your text to @out and return %TRUE. + * Otherwise, you must not touch @out. In both cases, you must not do any + * operations on @out other than appending text to it. + * + * Returns: %TRUE if the word was replaced, %FALSE otherwise. */ typedef gboolean (*PurpleTrieReplaceCb)(GString *out, const gchar *word, gpointer word_data, gpointer user_data); +/** + * PurpleTrieFindCb: + * @word: found word. + * @word_data: the user data bound with this word, when added with + * #purple_trie_add. + * @user_data: the user data passed when calling #purple_trie_find. + * + * A function called on every matching substring. + * + * You can decide to count the match or not (for the total number of found + * words, that is returned by #purple_trie_find). In both cases you can + * obviously do some processing outside the #PurpleTrie object. + * + * If you decide to count the word and #PurpleTrie:reset-on-match property + * is set, no overlapping words will be found - the processing will skip after + * the end of this word. + * + * Returns: %TRUE if the word should be counter, %FALSE otherwise. + */ typedef gboolean (*PurpleTrieFindCb)(const gchar *word, gpointer word_data, gpointer user_data); G_BEGIN_DECLS +/** + * purple_trie_get_type: + * + * Returns: the #GType for a #PurpleTrie. + */ GType purple_trie_get_type(void); @@ -86,14 +149,14 @@ * * Creates a new trie. * - * Returns: The new #PurpleTrie. + * Returns: the new #PurpleTrie. */ PurpleTrie * purple_trie_new(void); /** * purple_trie_get_reset_on_match: - * @trie: The trie. + * @trie: the trie. * * Checks, if the trie will reset its internal state after every match. * @@ -104,60 +167,76 @@ /** * purple_trie_set_reset_on_match: - * @trie: The trie. + * @trie: the trie. * @reset: %TRUE, if trie should reset, %FALSE otherwise. * * Enables or disables a feature of resetting trie's state after every match. * When enabled, it will not search for overlapping matches. + * + * It's well defined for #purple_trie_find, but not for replace operations. + * Thus, for the latter, it's better to stay with this option enabled, because + * its behavior may be changed in future. */ void purple_trie_set_reset_on_match(PurpleTrie *trie, gboolean reset); /** * purple_trie_add: - * @trie: The trie. - * @word: The word. - * @data: The word-related data (may be %NULL). + * @trie: the trie. + * @word: the word. + * @data: the word-related data (may be %NULL). + * + * Adds a word to the trie. Current implementation doesn't allow for duplicates, + * so please avoid adding those. * - * Adds a word to the trie. + * Please note, that altering a trie invalidates its internal structure, so by + * the occasion of next search, it will be rebuilt. It's done in + * <literal>O(n)</literal>, where n is the total length of strings + * in #PurpleTrie. * - * Returns: %TRUE if succeeded, %FALSE otherwise (possibly on duplicate) + * Returns: %TRUE if succeeded, %FALSE otherwise. */ gboolean purple_trie_add(PurpleTrie *trie, const gchar *word, gpointer data); /** * purple_trie_remove: - * @trie: The trie. - * @word: The word. + * @trie: the trie. + * @word: the word. * * Removes a word from the trie. Depending on used memory pool, this may not * free allocated memory (that will be freed when destroying the whole - * collection), so use it wisely. + * collection), so use it wisely. See #purple_memory_pool_free. + * + * Please note, that altering a trie invalidates its internal structure. + * See #purple_trie_add. */ void purple_trie_remove(PurpleTrie *trie, const gchar *word); /** * purple_trie_get_size: - * @trie: The trie. + * @trie: the trie. * - * Returns: The number of elements stored in this trie. + * Returns the number of elements contained in the #PurpleTrie. + * + * Returns: the number of stored words in @trie. */ guint purple_trie_get_size(PurpleTrie *trie); /** * purple_trie_replace: - * @trie: The trie. - * @src: The source string. - * @replace_cb: The replacement function. - * @user_data: Custom data to be passed to @replace_cb. + * @trie: the trie. + * @src: the source string. + * @replace_cb: the replacement function. + * @user_data: custom data to be passed to @replace_cb. * * Processes @src string and replaces all occuriences of words added to @trie. - * It's O(strlen(src)), if replace_cb runs in O(strlen(word)). + * It's <literal>O(strlen(src))</literal>, if @replace_cb runs in + * <literal>O(strlen(word))</literal> and #PurpleTrie:reset-on-match is set. * - * Returns: resulting string. Must be g_free'd when you are done using it. + * Returns: resulting string. Must be #g_free'd when you are done using it. */ gchar * purple_trie_replace(PurpleTrie *trie, const gchar *src, @@ -165,19 +244,19 @@ /** * purple_trie_multi_replace: - * @tries: The list of tries. - * @src: The source string. - * @replace_cb: The replacement function. - * @user_data: Custom data to be passed to @replace_cb. + * @tries: the list of tries. + * @src: the source string. + * @replace_cb: the replacement function. + * @user_data: custom data to be passed to @replace_cb. * * Processes @src and replaces all occuriences of words added to tries in list * @tries. Entries added to tries on the beginning of the list have higher * priority, than ones added further. * - * Differend GSList's can be combined to possess common parts, so you can create + * Different #GSList's can be combined to possess common parts, so you can create * a "tree of tries". * - * Returns: resulting string. Must be g_free'd when you are done using it. + * Returns: resulting string. Must be #g_free'd when you are done using it. */ gchar * purple_trie_multi_replace(const GSList *tries, const gchar *src, @@ -185,17 +264,18 @@ /** * purple_trie_find: - * @trie: The trie. - * @src: The source string. - * @find_cb: The callback for found entries (may be %NULL). - * @user_data: Custom data to be passed to @find_cb. + * @trie: the trie. + * @src: the source string. + * @find_cb: the callback for found entries (may be %NULL). + * @user_data: custom data to be passed to @find_cb. * * Processes @src string and finds all occuriences of words added to @trie. - * It's O(strlen(src)), if find_cb runs in O(1). + * It's <literal>O(strlen(src))</literal>, if find_cb runs + * in <literal>O(1)</literal>. * * The word is counted as found if it's found and the callback returns %TRUE. * - * Returns: The number of found words. + * Returns: the number of found words. */ gulong purple_trie_find(PurpleTrie *trie, const gchar *src,