src/gtksourceiter.c

changeset 12465
59859abbe998
parent 12205
1b0471533cf5
--- a/src/gtksourceiter.c	Mon Dec 12 08:05:03 2005 +0000
+++ b/src/gtksourceiter.c	Mon Dec 12 08:08:07 2005 +0000
@@ -1,13 +1,15 @@
 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- 
- * @file gtksourceiter.h GTK+ Source iterator
- * @ingroup gtkui
- *
- *  gaim
+ *  gtksourceiter.c
  *
  *  Gaim is the legal property of its developers, whose names are too numerous
  *  to list here.  Please refer to the COPYRIGHT file distributed with this
  *  source distribution.
  *
+ *  The following copyright notice applies to this file:
+ *
+ *  Copyright (C) 2000 - 2005 Paolo Maggi 
+ *  Copyright (C) 2002, 2003 Jeroen Zwartepoorte
+ *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU Library General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
@@ -36,12 +38,36 @@
 
 #define GTK_TEXT_UNKNOWN_CHAR 0xFFFC
 
-static gchar *
+/* this function acts like g_utf8_offset_to_pointer() except that if it finds a
+ * decomposable character it consumes the decomposition length from the given
+ * offset.  So it's useful when the offset was calculated for the normalized
+ * version of str, but we need a pointer to str itself. */
+static const gchar *
+pointer_from_offset_skipping_decomp (const gchar *str, gint offset)
+{
+	gchar *casefold, *normal;
+	const gchar *p, *q;
+
+	p = str;
+	while (offset > 0)
+	{
+		q = g_utf8_next_char (p);
+		casefold = g_utf8_casefold (p, q - p);
+		normal = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
+		offset -= g_utf8_strlen (normal, -1);
+		g_free (casefold);
+		g_free (normal);
+		p = q;
+	}
+	return p;
+}
+
+static const gchar *
 g_utf8_strcasestr (const gchar *haystack, const gchar *needle)
 {
 	gsize needle_len;
 	gsize haystack_len;
-	gchar *ret = NULL;
+	const gchar *ret = NULL;
 	gchar *p;
 	gchar *casefold;
 	gchar *caseless_haystack;
@@ -51,7 +77,7 @@
 	g_return_val_if_fail (needle != NULL, NULL);
 
 	casefold = g_utf8_casefold (haystack, -1);
-	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 
 	needle_len = g_utf8_strlen (needle, -1);
@@ -77,7 +103,7 @@
 	{
 		if ((strncmp (p, needle, needle_len) == 0))
 		{
-			ret = g_utf8_offset_to_pointer (haystack, i);
+			ret = pointer_from_offset_skipping_decomp (haystack, i);
 			goto finally_1;
 		}
 
@@ -91,12 +117,12 @@
 	return ret;
 }
 
-static gchar *
+static const gchar *
 g_utf8_strrcasestr (const gchar *haystack, const gchar *needle)
 {
 	gsize needle_len;
 	gsize haystack_len;
-	gchar *ret = NULL;
+	const gchar *ret = NULL;
 	gchar *p;
 	gchar *casefold;
 	gchar *caseless_haystack;
@@ -106,7 +132,7 @@
 	g_return_val_if_fail (needle != NULL, NULL);
 
 	casefold = g_utf8_casefold (haystack, -1);
-	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 
 	needle_len = g_utf8_strlen (needle, -1);
@@ -124,16 +150,15 @@
 		goto finally_1;
 	}
 
-	haystack_len = strlen (caseless_haystack);
+	i = haystack_len - needle_len;
+	p = g_utf8_offset_to_pointer (caseless_haystack, i);
 	needle_len = strlen (needle);
-	p = (gchar *)caseless_haystack + haystack_len - needle_len;
-	i = haystack_len - needle_len;
 
 	while (p >= caseless_haystack)
 	{
-		if (strncasecmp (p, needle, needle_len) == 0)
+		if (strncmp (p, needle, needle_len) == 0)
 		{
-			ret = g_utf8_offset_to_pointer (haystack, i);
+			ret = pointer_from_offset_skipping_decomp (haystack, i);
 			goto finally_1;
 		}
 
@@ -164,11 +189,11 @@
 	g_return_val_if_fail (n2 > 0, FALSE);
 
 	casefold = g_utf8_casefold (s1, n1);
-	normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 
 	casefold = g_utf8_casefold (s2, n2);
-	normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 	g_free (casefold);
 
 	len_s1 = strlen (normalized_s1);
@@ -190,7 +215,8 @@
 forward_chars_with_skipping (GtkTextIter *iter,
 			     gint         count,
 			     gboolean     skip_invisible,
-			     gboolean     skip_nontext)
+			     gboolean     skip_nontext,
+			     gboolean     skip_decomp)
 {
 	gint i;
 
@@ -202,15 +228,37 @@
 	{
 		gboolean ignored = FALSE;
 
+		/* minimal workaround to avoid the infinite loop of bug #168247.
+		 * It doesn't fix the problemjust the symptom...
+		 */
+		if (gtk_text_iter_is_end (iter))
+			return;
+
 		if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR)
 			ignored = TRUE;
 
 #if 0
 		if (!ignored && skip_invisible &&
-		    _gtk_text_btree_char_is_invisible (iter))
+		    /* _gtk_text_btree_char_is_invisible (iter)*/ FALSE)
 			ignored = TRUE;
 #endif
 
+		if (!ignored && skip_decomp)
+		{
+			/* being UTF8 correct sucks; this accounts for extra
+			   offsets coming from canonical decompositions of
+			   UTF8 characters (e.g. accented characters) which 
+			   g_utf8_normalize() performs */
+			gchar *normal;
+			gchar buffer[6];
+			gint buffer_len;
+
+			buffer_len = g_unichar_to_utf8 (gtk_text_iter_get_char (iter), buffer);
+			normal = g_utf8_normalize (buffer, buffer_len, G_NORMALIZE_NFD);
+			i -= (g_utf8_strlen (normal, -1) - 1);
+			g_free (normal);
+		}
+
 		gtk_text_iter_forward_char (iter);
 
 		if (!ignored)
@@ -292,18 +340,14 @@
 	/* If match start needs to be returned, set it to the
 	 * start of the search string.
 	 */
+	forward_chars_with_skipping (&next, offset, visible_only, !slice, FALSE);
 	if (match_start)
 	{
 		*match_start = next;
-
-		forward_chars_with_skipping (match_start, offset,
-					     visible_only, !slice);
 	}
 
 	/* Go to end of search string */
-	offset += g_utf8_strlen (*lines, -1);
-
-	forward_chars_with_skipping (&next, offset, visible_only, !slice);
+	forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), visible_only, !slice, TRUE);
 
 	g_free (line_text);
 
@@ -389,19 +433,18 @@
 	/* Get offset to start of search string */
 	offset = g_utf8_strlen (line_text, found - line_text);
 
+	forward_chars_with_skipping (&next, offset, visible_only, !slice, FALSE);
+
 	/* If match start needs to be returned, set it to the
 	 * start of the search string.
 	 */
 	if (match_start)
 	{
 		*match_start = next;
-		gtk_text_iter_set_visible_line_offset (match_start, offset);
 	}
 
 	/* Go to end of search string */
-	offset += g_utf8_strlen (*lines, -1);
-
-	forward_chars_with_skipping (&next, offset, visible_only, !slice);
+	forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), visible_only, !slice, TRUE);
 
 	g_free (line_text);
 
@@ -448,7 +491,7 @@
 			new_string[len] = 0;
 			casefold = g_utf8_casefold (new_string, -1);
 			g_free (new_string);
-			new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+			new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 			g_free (casefold);
 			string_list = g_slist_prepend (string_list, new_string);
 			n++;
@@ -461,7 +504,7 @@
 	{
 		n++;
 		casefold = g_utf8_casefold (string, -1);
-		new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+		new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
 		g_free (casefold);
 		string_list = g_slist_prepend (string_list, new_string);
 	}
@@ -481,6 +524,12 @@
 
 /**
  * gtk_source_iter_forward_search:
+ * @iter: start of search.
+ * @str: a search string.
+ * @flags: flags affecting how the search is done.
+ * @match_start: return location for start of match, or %%NULL.
+ * @match_end: return location for end of match, or %%NULL.
+ * @limit: bound for the search, or %%NULL for the end of the buffer.
  * 
  * Searches forward for @str. Any match is returned by setting 
  * @match_start to the first character of the match and @match_end to the 
@@ -502,14 +551,8 @@
  * Same as gtk_text_iter_forward_search(), but supports case insensitive
  * searching.
  * 
- * @param iter start of search
- * @param str a search string
- * @param flags flags affecting how the search is done
- * @param match_start return location for start of match, or %NULL
- * @param match_end return location for end of match, or %NULL
- * @param limit bound for the search, or %NULL for the end of the buffer
- * @return returns whether a match was found
- */
+ * Return value: whether a match was found.
+ **/
 gboolean
 gtk_source_iter_forward_search (const GtkTextIter   *iter,
 				const gchar         *str,
@@ -580,8 +623,8 @@
 		if (lines_match (&search, (const gchar**)lines,
 				 visible_only, slice, &match, &end))
 		{
-			if (limit == NULL || (limit &&
-					      gtk_text_iter_compare (&end, limit) < 0))
+			if (limit == NULL ||
+			    (limit && gtk_text_iter_compare (&end, limit) <= 0))
 			{
 				retval = TRUE;
 
@@ -601,18 +644,18 @@
 
 /**
  * gtk_source_iter_backward_search:
+ * @iter: a #GtkTextIter where the search begins.
+ * @str: search string.
+ * @flags: bitmask of flags affecting the search.
+ * @match_start: return location for start of match, or %%NULL.
+ * @match_end: return location for end of match, or %%NULL.
+ * @limit: location of last possible @match_start, or %%NULL for start of buffer.
  * 
  * Same as gtk_text_iter_backward_search(), but supports case insensitive
  * searching.
  * 
- * @param iter a #GtkTextIter where the search begins
- * @param str search string
- * @param flags bitmask of flags affecting the search
- * @param match_start return location for start of match, or %NULL
- * @param match_end return location for end of match, or %NULL
- * @param limit location of last possible @match_start, or %NULL for start of buffer
- * @return returns whether a match was found
- */
+ * Return value: whether a match was found.
+ **/
 gboolean
 gtk_source_iter_backward_search (const GtkTextIter   *iter,
 				 const gchar         *str,

mercurial