search: skip combining diacritical marks in search operations
https://bugzilla.gnome.org/show_bug.cgi?id=648587
This commit is contained in:
parent
15cac0157c
commit
5308d12239
@ -738,7 +738,8 @@ normalize_terms (GSList *terms)
|
|||||||
for (iter = terms; iter; iter = iter->next)
|
for (iter = terms; iter; iter = iter->next)
|
||||||
{
|
{
|
||||||
const char *term = iter->data;
|
const char *term = iter->data;
|
||||||
normalized_terms = g_slist_prepend (normalized_terms, shell_util_normalize_and_casefold (term));
|
normalized_terms = g_slist_prepend (normalized_terms,
|
||||||
|
shell_util_normalize_casefold_and_unaccent (term));
|
||||||
}
|
}
|
||||||
return normalized_terms;
|
return normalized_terms;
|
||||||
}
|
}
|
||||||
|
@ -1319,16 +1319,16 @@ shell_app_init_search_data (ShellApp *app)
|
|||||||
|
|
||||||
appinfo = gmenu_tree_entry_get_app_info (app->entry);
|
appinfo = gmenu_tree_entry_get_app_info (app->entry);
|
||||||
name = g_app_info_get_name (G_APP_INFO (appinfo));
|
name = g_app_info_get_name (G_APP_INFO (appinfo));
|
||||||
app->casefolded_name = shell_util_normalize_and_casefold (name);
|
app->casefolded_name = shell_util_normalize_casefold_and_unaccent (name);
|
||||||
|
|
||||||
generic_name = g_desktop_app_info_get_generic_name (appinfo);
|
generic_name = g_desktop_app_info_get_generic_name (appinfo);
|
||||||
if (generic_name)
|
if (generic_name)
|
||||||
app->casefolded_generic_name = shell_util_normalize_and_casefold (generic_name);
|
app->casefolded_generic_name = shell_util_normalize_casefold_and_unaccent (generic_name);
|
||||||
else
|
else
|
||||||
app->casefolded_generic_name = NULL;
|
app->casefolded_generic_name = NULL;
|
||||||
|
|
||||||
exec = g_app_info_get_executable (G_APP_INFO (appinfo));
|
exec = g_app_info_get_executable (G_APP_INFO (appinfo));
|
||||||
normalized_exec = shell_util_normalize_and_casefold (exec);
|
normalized_exec = shell_util_normalize_casefold_and_unaccent (exec);
|
||||||
app->casefolded_exec = trim_exec_line (normalized_exec);
|
app->casefolded_exec = trim_exec_line (normalized_exec);
|
||||||
g_free (normalized_exec);
|
g_free (normalized_exec);
|
||||||
|
|
||||||
@ -1343,7 +1343,7 @@ shell_app_init_search_data (ShellApp *app)
|
|||||||
i = 0;
|
i = 0;
|
||||||
while (keywords[i])
|
while (keywords[i])
|
||||||
{
|
{
|
||||||
app->casefolded_keywords[i] = shell_util_normalize_and_casefold (keywords[i]);
|
app->casefolded_keywords[i] = shell_util_normalize_casefold_and_unaccent (keywords[i]);
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
app->casefolded_keywords[i] = NULL;
|
app->casefolded_keywords[i] = NULL;
|
||||||
|
@ -122,12 +122,90 @@ shell_util_normalize_and_casefold (const char *str)
|
|||||||
if (str == NULL)
|
if (str == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
/* NOTE: 'ALL' is equivalent to 'NFKD'. If this is ever updated, please
|
||||||
|
* update the unaccenting mechanism as well. */
|
||||||
normalized = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
|
normalized = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
|
||||||
result = g_utf8_casefold (normalized, -1);
|
result = g_utf8_casefold (normalized, -1);
|
||||||
g_free (normalized);
|
g_free (normalized);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Combining diacritical mark?
|
||||||
|
* Basic range: [0x0300,0x036F]
|
||||||
|
* Supplement: [0x1DC0,0x1DFF]
|
||||||
|
* For Symbols: [0x20D0,0x20FF]
|
||||||
|
* Half marks: [0xFE20,0xFE2F]
|
||||||
|
*/
|
||||||
|
#define IS_CDM_UCS4(c) (((c) >= 0x0300 && (c) <= 0x036F) || \
|
||||||
|
((c) >= 0x1DC0 && (c) <= 0x1DFF) || \
|
||||||
|
((c) >= 0x20D0 && (c) <= 0x20FF) || \
|
||||||
|
((c) >= 0xFE20 && (c) <= 0xFE2F))
|
||||||
|
|
||||||
|
/* Copied from tracker/src/libtracker-fts/tracker-parser-glib.c under the GPL
|
||||||
|
* Originally written by Aleksander Morgado <aleksander@gnu.org>
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
shell_util_normalize_casefold_and_unaccent (const char *str)
|
||||||
|
{
|
||||||
|
char *tmp;
|
||||||
|
gsize i = 0, j = 0, ilen;
|
||||||
|
|
||||||
|
if (str == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* Get the NFKD-normalized and casefolded string */
|
||||||
|
tmp = shell_util_normalize_and_casefold (str);
|
||||||
|
ilen = strlen (tmp);
|
||||||
|
|
||||||
|
while (i < ilen)
|
||||||
|
{
|
||||||
|
gunichar unichar;
|
||||||
|
gchar *next_utf8;
|
||||||
|
gint utf8_len;
|
||||||
|
|
||||||
|
/* Get next character of the word as UCS4 */
|
||||||
|
unichar = g_utf8_get_char_validated (&tmp[i], -1);
|
||||||
|
|
||||||
|
/* Invalid UTF-8 character or end of original string. */
|
||||||
|
if (unichar == (gunichar) -1 ||
|
||||||
|
unichar == (gunichar) -2)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find next UTF-8 character */
|
||||||
|
next_utf8 = g_utf8_next_char (&tmp[i]);
|
||||||
|
utf8_len = next_utf8 - &tmp[i];
|
||||||
|
|
||||||
|
if (IS_CDM_UCS4 ((guint32) unichar))
|
||||||
|
{
|
||||||
|
/* If the given unichar is a combining diacritical mark,
|
||||||
|
* just update the original index, not the output one */
|
||||||
|
i += utf8_len;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If already found a previous combining
|
||||||
|
* diacritical mark, indexes are different so
|
||||||
|
* need to copy characters. As output and input
|
||||||
|
* buffers may overlap, need to use memmove
|
||||||
|
* instead of memcpy */
|
||||||
|
if (i != j)
|
||||||
|
{
|
||||||
|
memmove (&tmp[j], &tmp[i], utf8_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update both indexes */
|
||||||
|
i += utf8_len;
|
||||||
|
j += utf8_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Force proper string end */
|
||||||
|
tmp[j] = '\0';
|
||||||
|
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* shell_util_format_date:
|
* shell_util_format_date:
|
||||||
* @format: a strftime-style string format, as parsed by
|
* @format: a strftime-style string format, as parsed by
|
||||||
|
@ -20,6 +20,8 @@ int shell_util_get_week_start (void);
|
|||||||
|
|
||||||
char *shell_util_normalize_and_casefold (const char *str);
|
char *shell_util_normalize_and_casefold (const char *str);
|
||||||
|
|
||||||
|
char *shell_util_normalize_casefold_and_unaccent (const char *str);
|
||||||
|
|
||||||
char *shell_util_format_date (const char *format,
|
char *shell_util_format_date (const char *format,
|
||||||
gint64 time_ms);
|
gint64 time_ms);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user