diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 80be0f435ca0b8aa005e8e63f93ed31ef2b0447e..2b02d93e34901f86399a6880f6e505c072265c9a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,7 +26,7 @@ variables: meson vala desktop-file-utils enchant2-devel folks-devel gcr-devel glib2-devel gmime30-devel gnome-online-accounts-devel gspell-devel gsound-devel gtk3-devel iso-codes-devel json-glib-devel itstool - libappstream-glib-devel libgee-devel libhandy1-devel + libappstream-glib-devel libgee-devel libhandy1-devel libicu-devel libpeas-devel libsecret-devel libstemmer-devel libunwind-devel libxml2-devel libytnef-devel sqlite-devel webkitgtk4-devel FEDORA_TEST_DEPS: glibc-langpack-en gnutls-utils tar Xvfb xz @@ -37,9 +37,9 @@ variables: itstool libappstream-glib-dev libenchant-2-dev libfolks-dev libgcr-3-dev libgee-0.8-dev libglib2.0-dev libgmime-3.0-dev libgoa-1.0-dev libgspell-1-dev libgsound-dev libgtk-3-dev - libhandy-1-dev libjson-glib-dev libmessaging-menu-dev libpeas-dev - libsecret-1-dev libsqlite3-dev libstemmer-dev libunwind-dev - libwebkit2gtk-4.0-dev libxml2-dev libytnef0-dev + libhandy-1-dev libicu-dev libjson-glib-dev libmessaging-menu-dev + libpeas-dev libsecret-1-dev libsqlite3-dev libstemmer-dev + libunwind-dev libwebkit2gtk-4.0-dev libxml2-dev libytnef0-dev UBUNTU_TEST_DEPS: gnutls-bin librsvg2-common locales xauth xvfb fedora: diff --git a/BUILDING.md b/BUILDING.md index ff2c7e46ebc56a6196d428e213a2fc2d4d4e6f37..f63b104143547fbc0542368c17ec280a61a1b1ae 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -74,8 +74,10 @@ Building Geary requires the following major libraries and tools: See the `meson.build` file in the top-level directory for the complete list of required dependencies and minimum versions. -Geary also requires SQLite to be built with the compiler flag -`-DSQLITE_ENABLE_FTS3`. +Geary requires SQLite is built with both FTS3 and FTS5 support. Ensure +`--enable-fts5`, `-DSQLITE_ENABLE_FTS3` and +`-DSQLITE_ENABLE_FTS3_PARENTHESIS` are passed to the SQLite configure +script. All required libraries and tools are available from major Linux distribution's package repositories: @@ -91,8 +93,9 @@ sudo dnf install meson vala desktop-file-utils enchant2-devel \ gnome-online-accounts-devel gspell-devel gsound-devel \ gtk3-devel iso-codes-devel itstool json-glib-devel \ libappstream-glib-devel libgee-devel libhandy1-devel \ - libpeas-devel libsecret-devel libstemmer-devel libunwind-devel \ - libxml2-devel libytnef-devel sqlite-devel webkitgtk4-devel + libpeas-devel libsecret-devel libicu-devel libstemmer-devel \ + libunwind-devel libxml2-devel libytnef-devel sqlite-devel \ + webkitgtk4-devel ``` Installing dependencies on Ubuntu/Debian @@ -106,8 +109,8 @@ sudo apt-get install meson build-essential valac \ libappstream-glib-dev libenchant-2-dev libfolks-dev \ libgcr-3-dev libgee-0.8-dev libglib2.0-dev libgmime-3.0-dev \ libgoa-1.0-dev libgspell-1-dev libgsound-dev libgtk-3-dev \ - libjson-glib-dev libhandy-1-dev libpeas-dev libsecret-1-dev \ - libsqlite3-dev libstemmer-dev libunwind-dev \ + libjson-glib-dev libhandy-1-dev libicu-dev libpeas-dev \ + libsecret-1-dev libsqlite3-dev libstemmer-dev libunwind-dev \ libwebkit2gtk-4.0-dev libxml2-dev libytnef0-dev ``` diff --git a/bindings/vapi/icu-uc.vapi b/bindings/vapi/icu-uc.vapi new file mode 100644 index 0000000000000000000000000000000000000000..b470faff1adf8cc05391c39b8318043822be5ae4 --- /dev/null +++ b/bindings/vapi/icu-uc.vapi @@ -0,0 +1,95 @@ +// Based on icu-uc.vapi from the Dino project. + +[CCode (cprefix="u_")] +namespace Icu { + + [CCode (cname = "UChar")] + [IntegerType (rank = 5, min = 0, max = 65535)] + struct Char {} + + [CCode (cname = "UErrorCode", cprefix = "U_", cheader_filename = "unicode/utypes.h")] + enum ErrorCode { + ZERO_ERROR, + INVALID_CHAR_FOUND, + INDEX_OUTOFBOUNDS_ERROR, + BUFFER_OVERFLOW_ERROR, + STRINGPREP_PROHIBITED_ERROR, + UNASSIGNED_CODE_POINT_FOUND, + IDNA_STD3_ASCII_RULES_ERROR; + + [CCode (cname = "u_errorName")] + public unowned string errorName(); + + [CCode (cname = "U_SUCCESS")] + public bool is_success(); + + [CCode (cname = "U_FAILURE")] + public bool is_failure(); + } + + [CCode (cname = "UParseError", cprefix = "U_", cheader_filename = "unicode/parseerr.h")] + struct ParseError {} + + [CCode (cname = "UText", cprefix = "utext_", free_function = "utext_close", cheader_filename = "unicode/utext.h")] + [Compact] + class Text { + [CCode (cname="utext_openUTF8")] + public static Text open_utf8(Text* existing, [CCode (array_length_type = "int64_t")] uint8[] text, ref ErrorCode status); + } + + [CCode (cname = "UBreakIterator", cprefix = "ubrk_", free_function = "ubrk_close", cheader_filename = "unicode/ubrk.h")] + [Compact] + class BreakIterator { + + [CCode (cname = "UBRK_DONE")] + public const int32 DONE; + + [CCode (cname = "UBreakIteratorType", cprefix = "UBRK_")] + public enum Type { + CHARACTER, + WORD, + LINE, + SENTENCE; + } + + [CCode (cname = "UWordBreak", cprefix = "UBRK_WORD_")] + enum WordBreak { + NONE, + NONE_LIMIT, + NUMBER, + NUMBER_LIMIT, + LETTER, + LETTER_LIMIT, + KANA, + KANA_LIMIT, + IDEO, + IDEO_LIMIT; + } + + public static BreakIterator open(Type type, string locale, Char* text, int32 text_len, ref ErrorCode status); + + public int32 current { + [CCode (cname="ubrk_current")] get; + } + public int32 rule_status { + [CCode (cname="ubrk_getRuleStatus")] get; + } + + [CCode (cname="ubrk_isBoundary")] + public bool is_boundary(int32 offset); + + public int32 first(); + public int32 last(); + + public int32 next(); + public int32 previous(); + + public int32 proceeding(int32 offset); + public int32 following(int32 offset); + + [CCode (cname="ubrk_setUText")] + public void set_utext(Text text, ref ErrorCode status); + } + + +} \ No newline at end of file diff --git a/meson.build b/meson.build index 03533ec165e9b63a4832f0250be43c933470cb40..37b52709575a345f27b162493acde38f11e258c8 100644 --- a/meson.build +++ b/meson.build @@ -85,6 +85,7 @@ goa = dependency('goa-1.0') gsound = dependency('gsound') gspell = dependency('gspell-1') gthread = dependency('gthread-2.0', version: '>=' + target_glib) +icu_uc = dependency('icu-uc', version: '>=60') iso_codes = dependency('iso-codes') javascriptcoregtk = dependency('javascriptcoregtk-4.0', version: '>=' + target_webkit) json_glib = dependency('json-glib-1.0', version: '>= 1.0') @@ -110,6 +111,13 @@ webkit2gtk_web_extension = dependency('webkit2gtk-web-extension-4.0', version: ' # following libraries, but the declared dependency is what we actually # build against so we can include the custom VAPI correctly. +icu_uc = declare_dependency( + dependencies: [ + valac.find_library('icu-uc', dirs: [vapi_dir]), + cc.find_library('icuuc'), + ], +) + if libunwind_dep.found() # We need to add native lib to the search path for these so Flatpak # builds can find it. @@ -158,8 +166,10 @@ web_extensions_dir = client_lib_dir / 'web-extensions' # Ensure SQLite was built correctly if not cc.has_header_symbol('sqlite3.h', 'SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER', dependencies: sqlite) - error('SQLite3 is missing FTS3 tokenizer support. Please compile it with -DSQLITE_ENABLE_FTS3.\n' - + 'See https://bugzilla.gnome.org/show_bug.cgi?id=763203 for details.') + error('SQLite3 was not built with FTS3 support. See BUILDING.md for details.') +endif +if not cc.has_header_symbol('sqlite3.h', 'Fts5ExtensionApi', dependencies: sqlite) + error('SQLite3 was not built with FTS5 support. See BUILDING.md for details.') endif # diff --git a/po/POTFILES.in b/po/POTFILES.in index a5cbd6ee740e3e3251ee473c2d780ca7ac11a96a..1e476d6769950690d9b99219defcf6b3f0238c3b 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -211,6 +211,7 @@ src/engine/app/email-store/app-mark-operation.vala src/engine/common/common-contact-harvester.vala src/engine/common/common-contact-store-impl.vala src/engine/common/common-message-data.vala +src/engine/common/common-fts-search-query.vala src/engine/db/db.vala src/engine/db/db-connection.vala src/engine/db/db-context.vala @@ -267,9 +268,11 @@ src/engine/imap-db/imap-db-attachment.vala src/engine/imap-db/imap-db-database.vala src/engine/imap-db/imap-db-email-identifier.vala src/engine/imap-db/imap-db-folder.vala +src/engine/imap-db/imap-db-fts5-matches.c +src/engine/imap-db/imap-db-fts5-tokeniser.c src/engine/imap-db/imap-db-gc.vala src/engine/imap-db/imap-db-message-row.vala -src/engine/imap-db/imap-db-search-query.vala +src/engine/imap-db/imap-db-sqlite.c src/engine/imap-engine/gmail/imap-engine-gmail-account.vala src/engine/imap-engine/gmail/imap-engine-gmail-all-mail-folder.vala src/engine/imap-engine/gmail/imap-engine-gmail-drafts-folder.vala diff --git a/sql/meson.build b/sql/meson.build index 6bf30dcaf3971c89a219e52e02f9eb1660e5d003..c47972276f2df4250145d47780f6944ec2d6084b 100644 --- a/sql/meson.build +++ b/sql/meson.build @@ -28,6 +28,7 @@ sql_files = [ 'version-027.sql', 'version-028.sql', 'version-029.sql', + 'version-030.sql', ] install_data(sql_files, diff --git a/sql/version-030.sql b/sql/version-030.sql new file mode 100644 index 0000000000000000000000000000000000000000..4fbde30d77a506df773de79dd6ead44964329cc4 --- /dev/null +++ b/sql/version-030.sql @@ -0,0 +1,19 @@ +-- +-- Convert full-text search from FTS3/4 to FTS5 +-- + +DROP TABLE IF EXISTS MessageSearchTable; + +CREATE VIRTUAL TABLE MessageSearchTable USING fts5( + body, + attachments, + subject, + "from", + receivers, + cc, + bcc, + flags, + + tokenize="geary_tokeniser", + prefix="2,4,6,8,10" +) diff --git a/src/client/application/application-controller.vala b/src/client/application/application-controller.vala index 2237aa618bc52ce78eb4000a3515451bd3a2bb34..6aec77acfe4cf65109acc16f64e364f4919662eb 100644 --- a/src/client/application/application-controller.vala +++ b/src/client/application/application-controller.vala @@ -1088,7 +1088,7 @@ internal class Application.Controller : update_account_status(); // Stop any background processes - context.search.clear(); + context.search.clear_query(); context.contacts.close(); context.cancellable.cancel(); diff --git a/src/client/application/application-main-window.vala b/src/client/application/application-main-window.vala index d5255a4154109e2252330296a24b1b2e08df79de..d0f0ae21b147879a96ccb659e619149327295968 100644 --- a/src/client/application/application-main-window.vala +++ b/src/client/application/application-main-window.vala @@ -323,7 +323,6 @@ public class Application.MainWindow : private GLib.Cancellable action_update_cancellable = new GLib.Cancellable(); private GLib.Cancellable folder_open = new GLib.Cancellable(); - private GLib.Cancellable search_open = new GLib.Cancellable(); private Geary.TimeoutManager update_ui_timeout; private int64 update_ui_last = 0; @@ -968,30 +967,30 @@ public class Application.MainWindow : return closed; } - internal async void start_search(string query_text, bool is_interactive) { + internal void start_search(string query_text, bool is_interactive) { var context = get_selected_account_context(); if (context != null) { - // Stop any search in progress - this.search_open.cancel(); - var cancellable = this.search_open = new GLib.Cancellable(); - + // If the current folder is not the search folder, save it + // so it can be re-selected later when search is closed if (this.previous_non_search_folder == null && this.selected_folder != null && this.selected_folder.used_as != SEARCH) { this.previous_non_search_folder = this.selected_folder; } - var strategy = this.application.config.get_search_strategy(); try { - var query = yield context.account.new_search_query( - query_text, - strategy, - cancellable + var expr_factory = new Util.Email.SearchExpressionFactory( + this.application.config.get_search_strategy(), + context.account.information + ); + var query = context.account.new_search_query( + expr_factory.parse_query(query_text), + query_text ); this.folder_list.set_search( this.application.engine, context.search ); - yield context.search.search(query, cancellable); + context.search.update_query(query); } catch (GLib.Error error) { handle_error(context.account.information, error); } @@ -999,10 +998,8 @@ public class Application.MainWindow : } internal void stop_search(bool is_interactive) { - // Stop any search in progress - this.search_open.cancel(); - this.search_open = new GLib.Cancellable(); - + // If the search folder is current selected, deselect and + // re-select any previously selected folder if (this.selected_folder == null || this.selected_folder.used_as == SEARCH) { var to_select = this.previous_non_search_folder; @@ -1024,7 +1021,7 @@ public class Application.MainWindow : this.folder_list.remove_search(); foreach (var context in this.controller.get_account_contexts()) { - context.search.clear(); + context.search.clear_query(); } } @@ -2216,7 +2213,7 @@ public class Application.MainWindow : if (Geary.String.is_empty_or_whitespace(text)) { stop_search(true); } else { - this.start_search.begin(text, true); + this.start_search(text, true); } } diff --git a/src/client/conversation-viewer/conversation-viewer.vala b/src/client/conversation-viewer/conversation-viewer.vala index 74706f8ca33034845b5bc53ff475b6e36259ecad..4d895db4659457855cfa11a4b008cbaafdd93e80 100644 --- a/src/client/conversation-viewer/conversation-viewer.vala +++ b/src/client/conversation-viewer/conversation-viewer.vala @@ -297,9 +297,7 @@ public class ConversationViewer : Gtk.Stack, Geary.BaseInterface { // Highlight matching terms from find if active, otherwise // from the search folder if that's where we are at - Geary.SearchQuery? query = yield get_find_search_query( - conversation.base_folder.account, null - ); + var query = get_find_search_query(conversation.base_folder.account); if (query == null) { var search_folder = conversation.base_folder as Geary.App.SearchFolder; if (search_folder != null) { @@ -406,9 +404,8 @@ public class ConversationViewer : Gtk.Stack, Geary.BaseInterface { }); this.find_cancellable = cancellable; try { - Geary.SearchQuery? query = yield get_find_search_query( - list.conversation.base_folder.account, - cancellable + var query = get_find_search_query( + list.conversation.base_folder.account ); if (query != null) { yield list.search.highlight_matching_email(query, true); @@ -419,8 +416,7 @@ public class ConversationViewer : Gtk.Stack, Geary.BaseInterface { } } - private async Geary.SearchQuery? get_find_search_query(Geary.Account account, - GLib.Cancellable? cancellable) + private Geary.SearchQuery? get_find_search_query(Geary.Account account) throws GLib.Error { Geary.SearchQuery? query = null; if (this.conversation_find_bar.get_search_mode()) { @@ -429,11 +425,13 @@ public class ConversationViewer : Gtk.Stack, Geary.BaseInterface { // opening every message in the conversation as soon as // the user presses a key if (text.length >= 2) { - var strategy = this.config.get_search_strategy(); - query = yield account.new_search_query( - text, - strategy, - cancellable + var expr_factory = new Util.Email.SearchExpressionFactory( + this.config.get_search_strategy(), + account.information + ); + query = account.new_search_query( + expr_factory.parse_query(text), + text ); } } diff --git a/src/client/meson.build b/src/client/meson.build index aeea36d01c83e8192de57545c6cd3b27f6fc5701..e4726cebb9647c7a1463047c77b4a47b8c30c9a2 100644 --- a/src/client/meson.build +++ b/src/client/meson.build @@ -164,6 +164,7 @@ client_dependencies = [ goa, gspell, gtk, + icu_uc, javascriptcoregtk, json_glib, libhandy, diff --git a/src/client/util/util-email.vala b/src/client/util/util-email.vala index 4a1754a702312700bdcf91f6da7e206424866321..c94af33b85ee66f2c3c6f91baa8b1bcb549a626c 100644 --- a/src/client/util/util-email.vala +++ b/src/client/util/util-email.vala @@ -1,7 +1,9 @@ -/* Copyright 2016 Software Freedom Conservancy Inc. +/* + * Copyright 2016 Software Freedom Conservancy Inc. + * Copyright 2019-2021 Michael Gratton * * This software is licensed under the GNU Lesser General Public License - * (version 2.1 or later). See the COPYING file in this distribution. + * (version 2.1 or later). See the COPYING file in this distribution. */ namespace Util.Email { @@ -312,3 +314,594 @@ namespace Util.Email { } } + + +/** + * Parses a human-entered email query string as a query expression. + * + * @see Geary.SearchQuery.Term + */ +public class Util.Email.SearchExpressionFactory : Geary.BaseObject { + + + private const unichar OPERATOR_SEPARATOR = ':'; + private const string OPERATOR_TEMPLATE = "%s:%s"; + + + private delegate Geary.SearchQuery.Term? OperatorFactory( + string value, + bool is_quoted + ); + + + private class FactoryContext { + + + public unowned OperatorFactory factory; + + + public FactoryContext(OperatorFactory factory) { + this.factory = factory; + } + + } + + + private class Tokeniser { + + + [Flags] + private enum CharStatus { NONE, IN_WORD, END_WORD; } + + + // These characters are chosen for being commonly used to + // continue a single word (such as extended last names, + // i.e. "Lars-Eric") or in terms commonly searched for in an + // email client, i.e. unadorned mailbox addresses. Note that + // characters commonly used for wildcards or that would be + // interpreted as wildcards by SQLite are not included here. + private const unichar[] CONTINUATION_CHARS = { + '-', '_', '.', '@' + }; + + public bool has_next { + get { return (this.current_pos < this.query.length); } + } + + public bool is_at_word { + get { return CharStatus.IN_WORD in this.char_status[this.current_pos]; } + } + + public bool is_at_quote { + get { return (this.c == '"'); } + } + + public unichar current_character { get { return this.c; } } + + + private string query; + private int current_pos = -1; + private int next_pos = 0; + + private unichar c = 0; + private CharStatus[] char_status; + + + public Tokeniser(string query) { + this.query = query; + + // Break up search string into individual words and/or + // operators. Can't simply break on space or + // non-alphanumeric chars since some languages don't use + // spaces, so use ICU for its support for the Unicode UAX + // #29 word boundary spec and dictionary-based breaking + // for languages that do not use spaces for work breaks. + + this.char_status = new CharStatus[query.length + 1]; + + var icu_err = Icu.ErrorCode.ZERO_ERROR; + var icu_text = Icu.Text.open_utf8(null, this.query.data, ref icu_err); + var word_breaker = Icu.BreakIterator.open( + WORD, "en", null, -1, ref icu_err + ); + word_breaker.set_utext(icu_text, ref icu_err); + + int32 prev_index = 0; + var current_index = word_breaker.first(); + var status = 0; + while (current_index != Icu.BreakIterator.DONE) { + status = word_breaker.rule_status; + if (!(status >= Icu.BreakIterator.WordBreak.NONE && + status < Icu.BreakIterator.WordBreak.NONE_LIMIT)) { + for (int i = prev_index; i < current_index; i++) { + this.char_status[i] |= IN_WORD; + } + this.char_status[current_index] |= END_WORD; + } + + prev_index = current_index; + current_index = word_breaker.next(); + } + + consume_char(); + } + + public void consume_char() { + var current_pos = this.next_pos; + this.query.get_next_char(ref this.next_pos, out this.c); + this.current_pos = current_pos; + } + + public void skip_to_next() { + while (this.has_next && !this.is_at_quote && !this.is_at_word) { + consume_char(); + } + } + + public string consume_word() { + var start = this.current_pos; + consume_char(); + while (this.has_next && + this.c != OPERATOR_SEPARATOR && + (this.c in CONTINUATION_CHARS || + !(CharStatus.END_WORD in this.char_status[this.current_pos]))) { + consume_char(); + } + return this.query.slice(start, this.current_pos); + } + + public string consume_quote() { + consume_char(); // skip the leading quote + var start = this.current_pos; + var last_c = this.c; + while (this.has_next && (this.c != '"' || last_c == '\\')) { + consume_char(); + } + var quote = this.query.slice(start, this.current_pos); + consume_char(); // skip the trailing quote + return quote; + } + + } + + + public Geary.SearchQuery.Strategy default_strategy { get; private set; } + + public Geary.AccountInformation account { get; private set; } + + // Maps of localised search operator names and values to their + // internal forms + private Gee.Map text_operators = + new Gee.HashMap(); + private Gee.Map boolean_operators = + new Gee.HashMap(); + private Gee.Set search_op_to_me = new Gee.HashSet(); + private Gee.Set search_op_from_me = new Gee.HashSet(); + + + public SearchExpressionFactory(Geary.SearchQuery.Strategy default_strategy, + Geary.AccountInformation account) { + this.default_strategy = default_strategy; + this.account = account; + construct_factories(); + } + + /** Constructs a search expression from the given query string. */ + public Gee.List parse_query(string query) { + var operands = new Gee.LinkedList(); + var tokens = new Tokeniser(query); + while (tokens.has_next) { + if (tokens.is_at_word) { + Geary.SearchQuery.Term? op = null; + var word = tokens.consume_word(); + if (tokens.current_character == OPERATOR_SEPARATOR && + tokens.has_next) { + op = new_extended_operator(word, tokens); + } + if (op == null) { + op = new_text_all_operator(word, false); + } + operands.add(op); + } else if (tokens.is_at_quote) { + operands.add( + new_text_all_operator(tokens.consume_quote(), true) + ); + } else { + tokens.skip_to_next(); + } + } + + return operands; + } + + private Geary.SearchQuery.Term? new_extended_operator(string name, + Tokeniser tokens) { + Geary.SearchQuery.Term? op = null; + + // consume the ':' + tokens.consume_char(); + + bool is_quoted = false; + string? value = null; + if (tokens.is_at_word) { + value = tokens.consume_word(); + } else if (tokens.is_at_quote) { + value = tokens.consume_quote(); + is_quoted = true; + } + + FactoryContext? context = null; + if (value != null) { + context = this.text_operators[name]; + if (context == null) { + context = this.boolean_operators[ + OPERATOR_TEMPLATE.printf(name, value) + ]; + } + } + + if (context != null) { + op = context.factory(value, is_quoted); + } + + if (op == null) { + // Still no operator, so the name or value must have been + // invalid. Repair by treating each as separate ops, if + // present. + var term = ( + value == null + ? "%s:".printf(name) + : "%s:%s".printf(name, value) + ); + op = new_text_all_operator(term, false); + } + + return op; + } + + private inline Geary.SearchQuery.Strategy get_matching_strategy(bool is_quoted) { + return ( + is_quoted + ? Geary.SearchQuery.Strategy.EXACT + : this.default_strategy + ); + } + + private Gee.List get_account_addresses() { + Gee.List? mailboxes = + this.account.sender_mailboxes; + var addresses = new Gee.LinkedList(); + if (mailboxes != null) { + foreach (var mailbox in mailboxes) { + addresses.add(mailbox.address); + } + } + return addresses; + } + + private void construct_factories() { + // Maps of possibly translated search operator names and values + // to English/internal names and values. We include the + // English version anyway so that when translations provide a + // localised version of the operator names but have not also + // translated the user manual, the English version in the + // manual still works. + + // Text operators + /////////////////////////////////////////////////////////// + + FactoryContext attachment_name = new FactoryContext( + this.new_text_attachment_name_operator + ); + this.text_operators.set("attachment", attachment_name); + /// Translators: Can be typed in the search box like + /// "attachment:file.txt" to find messages with attachments + /// with a particular name. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "attachment"), + attachment_name); + + FactoryContext bcc = new FactoryContext(this.new_text_bcc_operator); + this.text_operators.set("bcc", bcc); + /// Translators: Can be typed in the search box like + /// "bcc:johndoe@example.com" to find messages bcc'd to a + /// particular person. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "bcc"), bcc); + + FactoryContext body = new FactoryContext(this.new_text_body_operator); + this.text_operators.set("body", body); + /// Translators: Can be typed in the search box like + /// "body:word" to find "word" only if it occurs in the body + /// of a message. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "body"), body); + + FactoryContext cc = new FactoryContext(this.new_text_cc_operator); + this.text_operators.set("cc", cc); + /// Translators: Can be typed in the search box like + /// "cc:johndoe@example.com" to find messages cc'd to a + /// particular person. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "cc"), cc); + + FactoryContext from = new FactoryContext(this.new_text_from_operator); + this.text_operators.set("from", from); + /// Translators: Can be typed in the search box like + /// "from:johndoe@example.com" to find messages from a + /// particular sender. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "from"), from); + + FactoryContext subject = new FactoryContext( + this.new_text_subject_operator + ); + this.text_operators.set("subject", subject); + /// Translators: Can be typed in the search box like + /// "subject:word" to find "word" only if it occurs in the + /// subject of a message. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "subject"), subject); + + FactoryContext to = new FactoryContext(this.new_text_to_operator); + this.text_operators.set("to", to); + /// Translators: Can be typed in the search box like + /// "to:johndoe@example.com" to find messages received by a + /// particular person. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.text_operators.set(C_("Search operator", "to"), to); + + /// Translators: Can be typed in the search box after "to:", + /// "cc:" and "bcc:" e.g.: "to:me". Matches conversations that + /// are addressed to the user. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.search_op_to_me.add( + C_("Search operator value - mail addressed to the user", "me") + ); + this.search_op_to_me.add("me"); + + /// Translators: Can be typed in the search box after "from:" + /// i.e.: "from:me". Matches conversations were sent by the + /// user. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + this.search_op_from_me.add( + C_("Search operator value - mail sent by the user", "me") + ); + this.search_op_from_me.add("me"); + + // Boolean operators + /////////////////////////////////////////////////////////// + + /// Translators: Can be typed in the search box like + /// "is:unread" to find messages that are read, unread, or + /// starred. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_name = C_("Search operator", "is"); + + /// Translators: Can be typed in the search box after "is:" + /// i.e.: "is:unread". Matches conversations that are flagged + /// unread. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_unread_value = C_("'is:' search operator value", "unread"); + + /// Translators: Can be typed in the search box after "is:" + /// i.e.: "is:read". Matches conversations that are flagged as + /// read. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_read_value = C_("'is:' search operator value", "read"); + + /// Translators: Can be typed in the search box after "is:" + /// i.e.: "is:starred". Matches conversations that are flagged + /// as starred. + /// + /// The translated string must be a single word (use '-', '_' + /// or similar to combine words into one), should be short, + /// and also match the translation in "search.page" of the + /// Geary User Guide. + string bool_is_starred_value = C_("'is:' search operator value", "starred"); + + FactoryContext is_unread = new FactoryContext( + this.new_boolean_unread_operator + ); + this.boolean_operators.set("is:unread", is_unread); + this.boolean_operators.set( + OPERATOR_TEMPLATE.printf( + bool_is_name, bool_is_unread_value + ), is_unread + ); + + FactoryContext is_read = new FactoryContext( + this.new_boolean_read_operator + ); + this.boolean_operators.set("is:read", is_read); + this.boolean_operators.set( + OPERATOR_TEMPLATE.printf( + bool_is_name, bool_is_read_value + ), is_read + ); + + FactoryContext is_starred = new FactoryContext( + this.new_boolean_starred_operator + ); + this.boolean_operators.set("is:starred", is_starred); + this.boolean_operators.set( + OPERATOR_TEMPLATE.printf( + bool_is_name, bool_is_starred_value + ), is_starred + ); + } + + private Geary.SearchQuery.Term? new_text_all_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + ALL, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_attachment_name_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + ATTACHMENT_NAME, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_bcc_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_to_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + BCC, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm( + BCC, EXACT, value + ); + } + return op; + } + + private Geary.SearchQuery.Term? new_text_body_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + BODY, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_cc_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_to_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + CC, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm( + CC, get_matching_strategy(is_quoted), value + ); + } + return op; + } + + private Geary.SearchQuery.Term? new_text_from_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_from_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + FROM, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm(FROM, EXACT, value); + } + return op; + } + + private Geary.SearchQuery.Term? new_text_subject_operator( + string value, bool is_quoted + ) { + return new Geary.SearchQuery.EmailTextTerm( + SUBJECT, get_matching_strategy(is_quoted), value + ); + } + + private Geary.SearchQuery.Term? new_text_to_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted && value in this.search_op_to_me) { + op = new Geary.SearchQuery.EmailTextTerm.disjunction( + TO, EXACT, get_account_addresses() + ); + } else { + op = new Geary.SearchQuery.EmailTextTerm( + TO, EXACT, value + ); + } + return op; + } + + private Geary.SearchQuery.Term? new_boolean_unread_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted) { + op = new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.UNREAD); + } + return op; + } + + private Geary.SearchQuery.Term? new_boolean_read_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted) { + op = new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.UNREAD); + op.is_negated = true; + } + return op; + } + + private Geary.SearchQuery.Term? new_boolean_starred_operator( + string value, bool is_quoted + ) { + Geary.SearchQuery.Term? op = null; + if (!is_quoted) { + op = new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.FLAGGED); + } + return op; + } + +} diff --git a/src/engine/api/geary-account.vala b/src/engine/api/geary-account.vala index d81661cae05ff81dfa04fe39cb1b72c5e241fef3..bc887459b0735e92277c3e989cec023b6da01a41 100644 --- a/src/engine/api/geary-account.vala +++ b/src/engine/api/geary-account.vala @@ -512,23 +512,12 @@ public abstract class Geary.Account : BaseObject, Logging.Source { ) throws GLib.Error; /** - * Create a new {@link SearchQuery} for this {@link Account}. - * - * See {@link Geary.SearchQuery.Strategy} for more information about how its interpreted by the - * Engine. In particular, note that it's an advisory parameter only and may have no effect, - * especially on server searches. However, it may also have a dramatic effect on what search - * results are returned and so should be used with some caution. Whether this parameter is - * user-configurable, available through GSettings or another configuration mechanism, or simply - * baked into the caller's code is up to the caller. CONSERVATIVE is designed to be a good - * default. - * - * The resulting object can only be used with calls into this - * account instance. - */ - public abstract async SearchQuery new_search_query(string query, - SearchQuery.Strategy strategy, - GLib.Cancellable? cancellable) - throws GLib.Error; + * Create a new search query for this account. + */ + public abstract SearchQuery new_search_query( + Gee.List expression, + string text + ) throws GLib.Error; /** * Performs a search with the given query. Optionally, a list of folders not to search diff --git a/src/engine/api/geary-contact.vala b/src/engine/api/geary-contact.vala index 7da8e6ca0f12bf076f41e1c6d781d781dcd97662..63f43071b9b672b8efe9e20e212719fb15b10ea6 100644 --- a/src/engine/api/geary-contact.vala +++ b/src/engine/api/geary-contact.vala @@ -43,7 +43,7 @@ public class Geary.Contact : BaseObject { public string serialize() { string ret = ""; foreach (NamedFlag flag in list) { - ret += flag.serialize() + " "; + ret += flag.serialise() + " "; } return ret.strip(); diff --git a/src/engine/api/geary-named-flag.vala b/src/engine/api/geary-named-flag.vala index eefd8824ae85b7d8cd817cbc12b56b0e55750af8..7472de3ffab4b9ac16bdbf7345d97e3eba426599 100644 --- a/src/engine/api/geary-named-flag.vala +++ b/src/engine/api/geary-named-flag.vala @@ -28,7 +28,7 @@ public class Geary.NamedFlag : BaseObject, Gee.Hashable { return name.down().hash(); } - public string serialize() { + public string serialise() { return name; } @@ -36,4 +36,3 @@ public class Geary.NamedFlag : BaseObject, Gee.Hashable { return name; } } - diff --git a/src/engine/api/geary-named-flags.vala b/src/engine/api/geary-named-flags.vala index 56e486134a23c97deecbe35de95d48ec59993031..08fc1bad50586b427959d28e59f6942c66592df2 100644 --- a/src/engine/api/geary-named-flags.vala +++ b/src/engine/api/geary-named-flags.vala @@ -90,6 +90,17 @@ public class Geary.NamedFlags : BaseObject, Gee.Hashable { return Geary.String.stri_hash(to_string()); } + /** Formats the flags for serialising in the database. */ + public string serialise() { + var builder = new GLib.StringBuilder(); + foreach (NamedFlag flag in this.list) { + builder.append(flag.serialise()); + builder.append_c(' '); + } + return builder.str; + } + + /** Formats the flags for debugging. */ public string to_string() { string ret = "["; foreach (NamedFlag flag in list) { @@ -99,4 +110,3 @@ public class Geary.NamedFlags : BaseObject, Gee.Hashable { return ret + "]"; } } - diff --git a/src/engine/api/geary-search-query.vala b/src/engine/api/geary-search-query.vala index 0a15ff2ead251d61a75f5df1fb3583a9ac998249..d9603d66ae3a2e6a4ed58568723ebd65885b5e3b 100644 --- a/src/engine/api/geary-search-query.vala +++ b/src/engine/api/geary-search-query.vala @@ -1,6 +1,6 @@ /* - * Copyright 2016 Software Freedom Conservancy Inc. - * Copyright 2019 Michael Gratton + * Copyright © 2016 Software Freedom Conservancy Inc. + * Copyright © 2019-2020 Michael Gratton * * This software is licensed under the GNU Lesser General Public License * (version 2.1 or later). See the COPYING file in this distribution. @@ -13,10 +13,20 @@ * Account.new_search_query} and then passed to search methods on * {@link Account} or {@link App.SearchFolder}. * + * Actual search queries are specified by the given {@link + * expression}, which is a list of {@link Term}. The expression + * denotes the conjunction of all given terms, that is, each term is + * combined by a Boolean AND function. While the order of the terms is + * not important, the expression should attempt to reflect the + * free-text search query it was built from (if any). A more + * expressive language is not supported since it is designed to work + * with both the Engine's built-in full text search system as well as + * other server-based systems, including IMAP. + * * @see Account.new_search_query * @see Account.local_search_async * @see Account.get_search_matches_async - * @see App.SearchFolder.search + * @see App.SearchFolder.update_query */ public abstract class Geary.SearchQuery : BaseObject { @@ -54,36 +64,302 @@ public abstract class Geary.SearchQuery : BaseObject { /** * Search for all textual variants, i.e. "the sky's the limit." */ - HORIZON + HORIZON; + + + /** Determines if stemming may be used for an operator. */ + internal bool is_stemming_enabled() { + return this != EXACT; + } + + /** + * The minimum term length before stemming is allowed. + * + * This prevents short words that might be stemmed from being stemmed. + */ + internal int get_min_term_length_for_stemming() { + var min = 0; + switch (this) { + case EXACT: + min = int.MAX; + break; + case CONSERVATIVE: + min = 6; + break; + case AGGRESSIVE: + min = 4; + break; + case HORIZON: + min = 0; + break; + } + return min; + } + + /** + * Maximum difference in lengths between term and stemmed variant. + * + * This prevents long words from being stemmed to much shorter + * words (which creates opportunities for greedy matching). + */ + internal int get_max_difference_term_stem_lengths() { + var max = 0; + switch (this) { + case EXACT: + max = 0; + break; + case CONSERVATIVE: + max = 2; + break; + case AGGRESSIVE: + max = 4; + break; + case HORIZON: + max =int.MAX; + break; + } + return max; + } + + } + + + /** + * Parent class for terms that make up a search query's expression. + * + * @see SearchQuery.expression + */ + public abstract class Term : BaseObject { + + /** Determines opposite of the term is matched. */ + public bool is_negated { get; set; default = false; } + + /** Determines if this term is equal to another. */ + public virtual bool equal_to(Term other) { + return ( + this.is_negated == other.is_negated && + this.get_type() == other.get_type() + ); + } + + /** Returns a string representation, for debugging. */ + public abstract string to_string(); + + } + + /** + * A term that matches text properties of an email. + */ + public class EmailTextTerm : Term { + + + /** + * Supported text email properties that can be queried. + * + * @see EmailTextTerm + */ + public enum Property { + /** Search for a term in all supported properties. */ + ALL, + + /** Search for a term in the To field. */ + TO, + + /** Search for a term in the Cc field. */ + CC, + + /** Search for a term in the Bcc field. */ + BCC, + + /** Search for a term in the From field. */ + FROM, + + /** Search for a term in the email subject. */ + SUBJECT, + + /** Search for a term in the email body. */ + BODY, + + /** Search for a term in email attachment names. */ + ATTACHMENT_NAME; + } + + + /** The email property this term applies to. */ + public Property target { get; private set; } + + /** The strategy used for matching the given terms. */ + public Strategy matching_strategy { get; private set; } + + /** + * The strings to match against the given target. + * + * If more than one term is given, they are treated as the + * disjunction of all, that is they are combined using the + * Boolean OR function. + */ + public Gee.List terms { + get; private set; default = new Gee.ArrayList(); + } + + + public EmailTextTerm(Property target, + Strategy matching_strategy, + string term) { + this.target = target; + this.matching_strategy = matching_strategy; + this.terms.add(term); + } + + public EmailTextTerm.disjunction(Property target, + Strategy matching_strategy, + Gee.List terms) { + this.target = target; + this.matching_strategy = matching_strategy; + this.terms.add_all(terms); + } + + public override bool equal_to(Term other) { + if (this == other) { + return true; + } + if (!base.equal_to(other)) { + return false; + } + var text = (EmailTextTerm) other; + if (this.target != text.target || + this.matching_strategy != text.matching_strategy || + this.terms.size != text.terms.size) { + return false; + } + for (int i = 0; i < this.terms.size; i++) { + if (this.terms[i] != text.terms[i]) { + return false; + } + } + return true; + } + + public override string to_string() { + var builder = new GLib.StringBuilder(); + if (this.is_negated) { + builder.append_c('!'); + } + + builder.append( + ObjectUtils.to_enum_nick( + typeof(Property), this.target).up() + ); + builder.append_c(':'); + builder.append( + ObjectUtils.to_enum_nick( + typeof(Strategy), this.matching_strategy + ).up() + ); + builder.append_c('('); + + var iter = this.terms.iterator(); + if (iter.next()) { + builder.append(iter.get().to_string()); + } + while (iter.next()) { + builder.append_c(','); + builder.append(iter.get().to_string()); + } + builder.append_c(')'); + return builder.str; + } + } - /** The account that owns this query. */ - public Account owner { get; private set; } + /** + * A term that matches a given flag in an email. + */ + public class EmailFlagTerm : Term { + + + public NamedFlag value { get; private set; } + + + public EmailFlagTerm(NamedFlag value) { + this.value = value; + } + + public override bool equal_to(Term other) { + if (this == other) { + return true; + } + if (!base.equal_to(other)) { + return false; + } + return this.value.equal_to(((EmailFlagTerm) other).value); + } + + public override string to_string() { + return "%s(%s)".printf( + this.is_negated ? "!" : "", + this.value.to_string() + ); + } + + } + /** - * The original search text. + * A read-only list of search terms to be evaluated. * - * This is used mostly for debugging. + * Each given term is used in a conjunction, that is combined + * using a Boolean `AND` operator. */ - public string raw { get; private set; } + public Gee.List expression { get; private set; } + private Gee.List _rw_expression = new Gee.ArrayList(); /** - * The selected {@link Strategy} quality. + * The original search text, if any. + * + * This is used mostly for debugging. */ - public Strategy strategy { get; private set; } + public string raw { get; private set; } - protected SearchQuery(Account owner, - string raw, - Strategy strategy) { - this.owner = owner; + protected SearchQuery(Gee.Collection expression, + string raw) { + this._rw_expression.add_all(expression); + this.expression = this._rw_expression.read_only_view; this.raw = raw; - this.strategy = strategy; } + /** Determines if this query's expression is equal to another's. */ + public bool equal_to(SearchQuery other) { + if (this == other) { + return true; + } + if (this.expression.size != other.expression.size) { + return false; + } + for (int i = 0; i < this.expression.size; i++) { + if (!this.expression[i].equal_to(other.expression[i])) { + return false; + } + } + return true; + } + + /** Returns a string representation of this query, for debugging. */ public string to_string() { - return "\"%s\" (%s)".printf(this.raw, this.strategy.to_string()); + var builder = new GLib.StringBuilder(); + builder.append_printf("\"%s\": ", this.raw); + + var iter = this.expression.iterator(); + if (iter.next()) { + builder.append(iter.get().to_string()); + } + while (iter.next()) { + builder.append_c(','); + builder.append(iter.get().to_string()); + } + return builder.str; } -} +} diff --git a/src/engine/app/app-search-folder.vala b/src/engine/app/app-search-folder.vala index 0ad7552f2fb62dec9a95d9026eaf85290cd20286..a8fa9404eaa335c6041533ef87212be36e8a88a7 100644 --- a/src/engine/app/app-search-folder.vala +++ b/src/engine/app/app-search-folder.vala @@ -110,10 +110,10 @@ public class Geary.App.SearchFolder : new Gee.HashSet(); // The email present in the folder, sorted - private Gee.TreeSet contents; + private Gee.SortedSet entries; // Map of engine ids to search ids - private Gee.Map id_map; + private Gee.Map ids; private Nonblocking.Mutex result_mutex = new Nonblocking.Mutex(); @@ -131,7 +131,8 @@ public class Geary.App.SearchFolder : account.email_removed.connect(on_account_email_removed); account.email_locally_removed.connect(on_account_email_removed); - new_contents(); + this.entries = new_entry_set(); + this.ids = new_id_map(); // Always exclude emails that don't live anywhere from search // results. @@ -147,51 +148,40 @@ public class Geary.App.SearchFolder : } /** - * Executes the given query over the account's local email. + * Sets the current search query for the folder. * - * Calling this will block until the search is complete. + * Calling this method will start the search folder asynchronously + * in the background. If the given query is not equal to the + * existing query, the folder's contents will be updated to + * reflect the changed query. */ - public async void search(SearchQuery query, GLib.Cancellable? cancellable) - throws GLib.Error { - int result_mutex_token = yield result_mutex.claim_async(); - - clear(); + public void update_query(SearchQuery query) { + if (this.query == null || !this.query.equal_to(query)) { + this.executing.cancel(); + this.executing = new GLib.Cancellable(); - if (cancellable != null) { - GLib.Cancellable @internal = this.executing; - cancellable.cancelled.connect(() => { @internal.cancel(); }); - } - - this.query = query; - GLib.Error? error = null; - try { - yield do_search_async(null, null, this.executing); - } catch(Error e) { - error = e; - } - - result_mutex.release(ref result_mutex_token); - - if (error != null) { - throw error; + this.query = query; + this.update.begin(); } } /** * Cancels and clears the search query and results. * - * The {@link query} property will be cleared. + * The {@link query} property will be set to null. */ - public void clear() { + public void clear_query() { this.executing.cancel(); this.executing = new GLib.Cancellable(); - var old_ids = this.id_map; - new_contents(); + this.query = null; + var old_ids = this.ids; + + this.entries = new_entry_set(); + this.ids = new_id_map(); + notify_email_removed(old_ids.keys); notify_email_count_changed(0, REMOVED); - - this.query = null; } /** @@ -218,10 +208,16 @@ public class Geary.App.SearchFolder : Gee.Collection ids, GLib.Cancellable? cancellable = null) throws GLib.Error { + debug("Waiting for checking contains"); + int result_mutex_token = yield this.result_mutex.claim_async(cancellable); + var existing_ids = this.ids; + result_mutex.release(ref result_mutex_token); + + debug("Checking contains"); return Geary.traverse( ids ).filter( - (id) => this.id_map.has_key(id) + (id) => existing_ids.has_key(id) ).to_hash_set(); } @@ -232,17 +228,22 @@ public class Geary.App.SearchFolder : Folder.ListFlags flags, Cancellable? cancellable = null ) throws GLib.Error { - int result_mutex_token = yield result_mutex.claim_async(); + debug("Waiting to list email"); + int result_mutex_token = yield this.result_mutex.claim_async(cancellable); + var existing_entries = this.entries; + var existing_ids = this.ids; + result_mutex.release(ref result_mutex_token); + debug("Listing email"); var engine_ids = new Gee.LinkedList(); if (Folder.ListFlags.OLDEST_TO_NEWEST in flags) { EmailEntry? oldest = null; - if (!this.contents.is_empty) { + if (!existing_entries.is_empty) { if (initial_id == null) { - oldest = this.contents.last(); + oldest = existing_entries.last(); } else { - oldest = this.id_map.get(initial_id); + oldest = existing_ids.get(initial_id); if (oldest == null) { throw new EngineError.NOT_FOUND( @@ -251,13 +252,13 @@ public class Geary.App.SearchFolder : } if (!(Folder.ListFlags.INCLUDING_ID in flags)) { - oldest = contents.higher(oldest); + oldest = existing_entries.higher(oldest); } } } if (oldest != null) { var iter = ( - this.contents.iterator_at(oldest) as + existing_entries.iterator_at(oldest) as Gee.BidirIterator ); engine_ids.add(oldest.id); @@ -268,11 +269,11 @@ public class Geary.App.SearchFolder : } else { // Newest to oldest EmailEntry? newest = null; - if (!this.contents.is_empty) { + if (!existing_entries.is_empty) { if (initial_id == null) { - newest = this.contents.first(); + newest = existing_entries.first(); } else { - newest = this.id_map.get(initial_id); + newest = existing_ids.get(initial_id); if (newest == null) { throw new EngineError.NOT_FOUND( @@ -281,13 +282,13 @@ public class Geary.App.SearchFolder : } if (!(Folder.ListFlags.INCLUDING_ID in flags)) { - newest = contents.lower(newest); + newest = existing_entries.lower(newest); } } } if (newest != null) { var iter = ( - this.contents.iterator_at(newest) as + existing_entries.iterator_at(newest) as Gee.BidirIterator ); engine_ids.add(newest.id); @@ -311,8 +312,6 @@ public class Geary.App.SearchFolder : } } - result_mutex.release(ref result_mutex_token); - if (list_error != null) { throw list_error; } @@ -390,7 +389,7 @@ public class Geary.App.SearchFolder : private void require_id(EmailIdentifier id) throws EngineError.NOT_FOUND { - if (!this.id_map.has_key(id)) { + if (!this.ids.has_key(id)) { throw new EngineError.NOT_FOUND( "Id not found: %s", id.to_string() ); @@ -401,17 +400,114 @@ public class Geary.App.SearchFolder : Gee.Collection to_check ) { var available = new Gee.LinkedList(); - var id_map = this.id_map; + var ids = this.ids; var iter = to_check.iterator(); while (iter.next()) { var id = iter.get(); - if (id_map.has_key(id)) { + if (ids.has_key(id)) { available.add(id); } } return available; } + private async void append(Folder folder, + Gee.Collection ids) { + // Grab the cancellable before the lock so that if the current + // search is cancelled while waiting, this doesn't go and try + // to update the new results. + var cancellable = this.executing; + + debug("Waiting to append to search results"); + try { + int result_mutex_token = yield this.result_mutex.claim_async( + cancellable + ); + try { + if (!this.exclude_folders.contains(folder.path)) { + yield do_search_async(ids, null, cancellable); + } + } catch (GLib.Error error) { + this.account.report_problem( + new AccountProblemReport(this.account.information, error) + ); + } + + this.result_mutex.release(ref result_mutex_token); + } catch (GLib.IOError.CANCELLED mutex_err) { + // all good + } catch (GLib.Error mutex_err) { + warning("Error acquiring lock: %s", mutex_err.message); + } + } + + private async void update() { + // Grab the cancellable before the lock so that if the current + // search is cancelled while waiting, this doesn't go and try + // to update the new results. + var cancellable = this.executing; + + debug("Waiting to update search results"); + try { + int result_mutex_token = yield this.result_mutex.claim_async( + cancellable + ); + try { + yield do_search_async(null, null, cancellable); + } catch (GLib.Error error) { + this.account.report_problem( + new AccountProblemReport(this.account.information, error) + ); + } + + this.result_mutex.release(ref result_mutex_token); + } catch (GLib.IOError.CANCELLED mutex_err) { + // all good + } catch (GLib.Error mutex_err) { + warning("Error acquiring lock: %s", mutex_err.message); + } + } + + private async void remove(Folder folder, + Gee.Collection ids) { + + // Grab the cancellable before the lock so that if the current + // search is cancelled while waiting, this doesn't go and try + // to update the new results. + var cancellable = this.executing; + + debug("Waiting to remove from search results"); + try { + int result_mutex_token = yield this.result_mutex.claim_async( + cancellable + ); + + // Ensure this happens inside the lock so it is working with + // up-to-date data + var id_map = this.ids; + var relevant_ids = ( + traverse(ids) + .filter(id => id_map.has_key(id)) + .to_linked_list() + ); + if (relevant_ids.size > 0) { + try { + yield do_search_async(null, relevant_ids, cancellable); + } catch (GLib.Error error) { + this.account.report_problem( + new AccountProblemReport(this.account.information, error) + ); + } + } + + this.result_mutex.release(ref result_mutex_token); + } catch (GLib.IOError.CANCELLED mutex_err) { + // all good + } catch (GLib.Error mutex_err) { + warning("Error acquiring lock: %s", mutex_err.message); + } + } + // NOTE: you must call this ONLY after locking result_mutex_token. // If both *_ids parameters are null, the results of this search are // considered to be the full new set. If non-null, the results are @@ -422,11 +518,15 @@ public class Geary.App.SearchFolder : Gee.Collection? remove_ids, GLib.Cancellable? cancellable) throws GLib.Error { - var id_map = this.id_map; - var contents = this.contents; + debug("Processing search results"); + var entries = new_entry_set(); + var ids = new_id_map(); var added = new Gee.LinkedList(); var removed = new Gee.LinkedList(); + entries.add_all(this.entries); + ids.set_all(this.ids); + if (remove_ids == null) { // Adding email to the search, either searching all local // email if to_add is null, or adding only a matching @@ -463,24 +563,24 @@ public class Geary.App.SearchFolder : var hashed_results = new Gee.HashSet(); hashed_results.add_all(id_results); - var existing = id_map.map_iterator(); + var existing = ids.map_iterator(); while (existing.next()) { if (!hashed_results.contains(existing.get_key())) { var entry = existing.get_value(); existing.unset(); - contents.remove(entry); + entries.remove(entry); removed.add(entry.id); } } } foreach (var email in email_results) { - if (!id_map.has_key(email.id)) { + if (!ids.has_key(email.id)) { var entry = new EmailEntry( email.id, email.properties.date_received ); - contents.add(entry); - id_map.set(email.id, entry); + entries.add(entry); + ids.set(email.id, entry); added.add(email.id); } } @@ -489,89 +589,53 @@ public class Geary.App.SearchFolder : // Removing email, can just remove them directly foreach (var id in remove_ids) { EmailEntry entry; - if (id_map.unset(id, out entry)) { - contents.remove(entry); + if (ids.unset(id, out entry)) { + entries.remove(entry); removed.add(id); } } } - this._properties.set_total(this.contents.size); - - // Note that we probably shouldn't be firing these signals from inside - // our mutex lock. Keep an eye on it, and if there's ever a case where - // it might cause problems, it shouldn't be too hard to move the - // firings outside. - - Folder.CountChangeReason reason = CountChangeReason.NONE; - if (added.size > 0) { - // TODO: we'd like to be able to use APPENDED here when applicable, - // but because of the potential to append a thousand results at - // once and the ConversationMonitor's inability to handle that - // gracefully (#7464), we always use INSERTED for now. - notify_email_inserted(added); - reason |= Folder.CountChangeReason.INSERTED; - } - if (removed.size > 0) { - notify_email_removed(removed); - reason |= Folder.CountChangeReason.REMOVED; - } - if (reason != CountChangeReason.NONE) - notify_email_count_changed(this.contents.size, reason); - } - - private async void do_append(Folder folder, - Gee.Collection ids, - GLib.Cancellable? cancellable) - throws GLib.Error { - int result_mutex_token = yield result_mutex.claim_async(); - - GLib.Error? error = null; - try { - if (!this.exclude_folders.contains(folder.path)) { - yield do_search_async(ids, null, cancellable); - } - } catch (GLib.Error e) { - error = e; - } - - result_mutex.release(ref result_mutex_token); + if (!cancellable.is_cancelled()) { + this.entries = entries; + this.ids = ids; - if (error != null) - throw error; - } + this._properties.set_total(entries.size); - private async void do_remove(Folder folder, - Gee.Collection ids, - GLib.Cancellable? cancellable) - throws GLib.Error { - int result_mutex_token = yield result_mutex.claim_async(); + // Note that we probably shouldn't be firing these signals from inside + // our mutex lock. Keep an eye on it, and if there's ever a case where + // it might cause problems, it shouldn't be too hard to move the + // firings outside. - GLib.Error? error = null; - try { - var id_map = this.id_map; - var relevant_ids = ( - traverse(ids) - .filter(id => id_map.has_key(id)) - .to_linked_list() - ); - - if (relevant_ids.size > 0) { - yield do_search_async(null, relevant_ids, cancellable); + Folder.CountChangeReason reason = CountChangeReason.NONE; + if (removed.size > 0) { + notify_email_removed(removed); + reason |= Folder.CountChangeReason.REMOVED; + } + if (added.size > 0) { + // TODO: we'd like to be able to use APPENDED here + // when applicable, but because of the potential to + // append a thousand results at once and the + // ConversationMonitor's inability to handle that + // gracefully (#7464), we always use INSERTED for now. + notify_email_inserted(added); + reason |= Folder.CountChangeReason.INSERTED; } - } catch (GLib.Error e) { - error = e; + if (reason != CountChangeReason.NONE) { + notify_email_count_changed(this.entries.size, reason); + } + debug("Processing done, entries/ids: %d/%d", entries.size, ids.size); + } else { + debug("Processing cancelled, dropping entries/ids: %d/%d", entries.size, ids.size); } + } - result_mutex.release(ref result_mutex_token); - - if (error != null) - throw error; + private inline Gee.SortedSet new_entry_set() { + return new Gee.TreeSet(EmailEntry.compare_to); } - private inline void new_contents() { - this.contents = new Gee.TreeSet(EmailEntry.compare_to); - this.id_map = new Gee.HashMap(); + private inline Gee.Map new_id_map() { + return new Gee.HashMap(); } private void include_folder(Folder folder) { @@ -611,40 +675,14 @@ public class Geary.App.SearchFolder : private void on_email_locally_complete(Folder folder, Gee.Collection ids) { if (this.query != null) { - this.do_append.begin( - folder, ids, null, - (obj, res) => { - try { - this.do_append.end(res); - } catch (GLib.Error error) { - this.account.report_problem( - new AccountProblemReport( - this.account.information, error - ) - ); - } - } - ); + this.append.begin(folder, ids); } } private void on_account_email_removed(Folder folder, Gee.Collection ids) { if (this.query != null) { - this.do_remove.begin( - folder, ids, null, - (obj, res) => { - try { - this.do_remove.end(res); - } catch (GLib.Error error) { - this.account.report_problem( - new AccountProblemReport( - this.account.information, error - ) - ); - } - } - ); + this.remove.begin(folder, ids); } } diff --git a/src/engine/app/conversation-monitor/app-fill-window-operation.vala b/src/engine/app/conversation-monitor/app-fill-window-operation.vala index 9f34797b149f6fa4a1afbdaf3625acd657acf003..a7ca8b8cb186417ffab4c8ef007666aa81eaf32c 100644 --- a/src/engine/app/conversation-monitor/app-fill-window-operation.vala +++ b/src/engine/app/conversation-monitor/app-fill-window-operation.vala @@ -37,9 +37,16 @@ private class Geary.App.FillWindowOperation : ConversationOperation { num_to_load = MAX_FILL_COUNT; } - int loaded = yield this.monitor.load_by_id_async( - this.monitor.window_lowest, num_to_load, LOCAL_ONLY - ); + int loaded = 0; + + try { + loaded = yield this.monitor.load_by_id_async( + this.monitor.window_lowest, num_to_load, LOCAL_ONLY + ); + } catch (EngineError.NOT_FOUND err) { + debug("Stale FillWindowOperation: %s", err.message); + return; + } debug( "Filled %d of %d locally, window: %d, total: %d", @@ -61,9 +68,14 @@ private class Geary.App.FillWindowOperation : ConversationOperation { // Load the max amount if going to the trouble of talking // to the remote. num_to_load = MAX_FILL_COUNT; - loaded = yield this.monitor.load_by_id_async( - this.monitor.window_lowest, num_to_load, FORCE_UPDATE - ); + try { + loaded = yield this.monitor.load_by_id_async( + this.monitor.window_lowest, num_to_load, FORCE_UPDATE + ); + } catch (EngineError.NOT_FOUND err) { + debug("Stale FillWindowOperation: %s", err.message); + return; + } debug( "Filled %d of %d from the remote, window: %d, total: %d", diff --git a/src/engine/app/conversation-monitor/app-remove-operation.vala b/src/engine/app/conversation-monitor/app-remove-operation.vala index 8105d959e0e4dd8f857bb17bb9770c5d9cdaed45..f977e9023c0a545f4e512bcf61f82dd7573ffb18 100644 --- a/src/engine/app/conversation-monitor/app-remove-operation.vala +++ b/src/engine/app/conversation-monitor/app-remove-operation.vala @@ -35,13 +35,14 @@ private class Geary.App.RemoveOperation : BatchOperation { trimmed ); - - // Fire signals, clean up this.monitor.removed( removed, trimmed, (this.source_folder == this.monitor.base_folder) ? batch : null ); + + // Queue an update since many emails may have been removed + this.monitor.check_window_count(); } } diff --git a/src/engine/common/common-fts-search-query.vala b/src/engine/common/common-fts-search-query.vala new file mode 100644 index 0000000000000000000000000000000000000000..e8b73042acb7bbd61d6e15ff4e6f1ff93a6f569f --- /dev/null +++ b/src/engine/common/common-fts-search-query.vala @@ -0,0 +1,394 @@ +/* + * Copyright © 2016 Software Freedom Conservancy Inc. + * Copyright © 2019-2020 Michael Gratton . + * + * This software is licensed under the GNU Lesser General Public License + * (version 2.1 or later). See the COPYING file in this distribution. + */ + +/** + * A search query implementation that provides full-text search. + */ +internal class Geary.FtsSearchQuery : Geary.SearchQuery { + + + private const string EMAIL_TEXT_STEMMED_TERMS = "geary-stemmed-terms"; + + + internal bool has_stemmed_terms { get; private set; default = false; } + + private bool is_all_negated = true; + + private unowned SnowBall.Stemmer stemmer; + + + public FtsSearchQuery(Gee.List expression, + string raw, + SnowBall.Stemmer stemmer) { + base(expression, raw); + this.stemmer = stemmer; + + foreach (var term in this.expression) { + // Use this brittle form of type checking for performance + // (both here and further below in the class) - the Engine + // controls the Term hierarchy the needed assumptions can + // be made + if (term.get_type() == typeof(SearchQuery.EmailTextTerm)) { + // Pre-stem search terms up front since the stemmed + // form is needed in a few different places + var text = (SearchQuery.EmailTextTerm) term; + if (text.matching_strategy.is_stemming_enabled()) { + stem_search_terms(text); + } + } + if (!term.is_negated) { + this.is_all_negated = false; + } + } + } + + internal Db.Statement get_search_query( + Db.Connection cx, + string? search_ids_sql, + string? excluded_folder_ids_sql, + bool exclude_folderless, + int limit, + int offset + ) throws GLib.Error { + var sql = new GLib.StringBuilder(); + + // Select distinct since messages may exist in more than one + // folder. + sql.append(""" + SELECT DISTINCT mt.id + FROM MessageTable AS mt + INDEXED BY MessageTableInternalDateTimeTIndex"""); + + // If excluding folderless messages, an inner join on + // MessageLocationTable will cause them to be excluded + // automatically. Otherwise a left join always required to + // exclude messages marked for deletion, even if there are no + // folder exclusions. + if (exclude_folderless) { + sql.append(""" + INNER JOIN MessageLocationTable AS mlt ON mt.id = mlt.message_id"""); + } else { + sql.append(""" + LEFT JOIN MessageLocationTable AS mlt ON mt.id = mlt.message_id"""); + } + + var conditions_added = false; + sql.append(""" + WHERE"""); + + // Folder exclusions + if (excluded_folder_ids_sql != null) { + sql.append_printf( + " mlt.folder_id NOT IN (%s)", + excluded_folder_ids_sql + ); + conditions_added = true; + } + + // FTS match exclusions + if (!this.expression.is_empty) { + if (conditions_added) { + sql.append(" AND"); + } + sql.append( + this.is_all_negated + ? " mt.id NOT IN" + : " mt.id IN" + ); + sql.append( + " (SELECT mst.rowid FROM MessageSearchTable as mst WHERE " + ); + sql_add_term_conditions(sql, false); + sql.append_c(')'); + conditions_added = true; + } + + // Email id exclusions + if (!String.is_empty(search_ids_sql)) { + if (conditions_added) { + sql.append(" AND"); + } + sql.append(""" mt.id IN (%s)""".printf(search_ids_sql)); + } + + // Marked as deleted (but not folderless) exclusions + if (conditions_added) { + sql.append(" AND"); + } + sql.append(" mlt.remove_marker IN (0, null)"); + + // Ordering + sql.append(""" + ORDER BY mt.internaldate_time_t DESC"""); + + // Limit exclusions + if (limit > 0) { + sql.append(""" + LIMIT ? OFFSET ?"""); + } + + Db.Statement stmt = cx.prepare(sql.str); + int bind_index = sql_bind_term_conditions(stmt, 0); + if (limit > 0) { + stmt.bind_int(bind_index++, limit); + stmt.bind_int(bind_index++, offset); + } + + return stmt; + } + + internal Db.Statement get_match_query( + Db.Connection cx, + string? search_ids_sql + ) throws GLib.Error { + var sql = new GLib.StringBuilder(); + sql.append(""" + SELECT mst.rowid, geary_matches(MessageSearchTable) + FROM MessageSearchTable as mst + WHERE rowid IN ( + """); + sql.append(search_ids_sql); + sql.append(") AND "); + sql_add_term_conditions(sql, false); + + Db.Statement stmt = cx.prepare(sql.str); + sql_bind_term_conditions(stmt, 0); + return stmt; + } + + /** + * Applies stemming for the given term to a specific term value. + * + * Prior experience with the Snowball stemmer indicates it is too + * aggressive for our tastes when coupled with prefix-matching of + * all unquoted terms. See + * https://bugzilla.gnome.org/show_bug.cgi?id=713179 and + * https://bugzilla.gnome.org/show_bug.cgi?id=720361 + * + * This method is part of a larger strategy designed to dampen + * that aggressiveness without losing the benefits of stemming + * entirely: The database's FTS table uses no stemming, but + * libstemmer is used to generate stemmed search terms. + * Post-search processing is then to strip results which are too + * "greedy" due to prefix-matching the stemmed variant. + * + * Some heuristics are in place simply to determine if stemming + * should occur: + * + * # If stemming is unallowed, no stemming occurs. + * # If the term is < min. term length for stemming, no stemming + * occurs. + * # If the stemmer returns a stem that is the same as the + * original term, no stemming occurs. + * # If the difference between the stemmed word and the original + * term is more than maximum allowed, no stemming occurs. This + * works under the assumption that if the user has typed a long + * word, they do not want to "go back" to searching for a much + * shorter version of it. (For example, "accountancies" stems + * to "account"). + * + * Otherwise, the stem for the term is returned. + */ + private void stem_search_terms(SearchQuery.EmailTextTerm text) { + var stemmed_terms = new Gee.ArrayList(); + foreach (var term in text.terms) { + int term_length = term.length; + string? stemmed = null; + if (term_length > text.matching_strategy.get_min_term_length_for_stemming()) { + stemmed = this.stemmer.stem(term, term_length); + if (String.is_empty(stemmed) || + term == stemmed || + term_length - stemmed.length > + text.matching_strategy.get_max_difference_term_stem_lengths()) { + stemmed = null; + } + } + if (stemmed != null) { + this.has_stemmed_terms = true; + debug(@"Search term \"$term\" stemmed to \"$stemmed\""); + } else { + debug(@"Search term \"$term\" not stemmed"); + } + stemmed_terms.add(stemmed); + } + text.set_data(EMAIL_TEXT_STEMMED_TERMS, stemmed_terms); + } + + private bool sql_add_term_conditions(GLib.StringBuilder sql, + bool have_added_sql_condition) { + if (!this.expression.is_empty) { + if (have_added_sql_condition) { + sql.append(" AND"); + } + have_added_sql_condition = true; + sql.append(" MessageSearchTable MATCH '"); + + // Add all non-negated terms first, since NOT in FTS5 is a + // binary operator (not unary, FFS), and hence all negated + // clauses must follow a positive operator and a single + // NOT clause. For example, for positive terms a,c and + // negated terms b,d, the match value must be structured + // as: (a AND c) NOT (b AND d) + + var is_first_positive_term = true; + foreach (var term in this.expression) { + if (!term.is_negated) { + if (is_first_positive_term) { + sql.append(" ("); + } else { + sql.append(" AND"); + } + sql_add_term_condition(sql, term); + is_first_positive_term = false; + } + } + if (!is_first_positive_term) { + sql.append_c(')'); + } + + var is_first_negated_term = true; + foreach (var term in this.expression) { + if (term.is_negated) { + if (is_first_negated_term) { + // If all negated, there won't be any positive + // terms above, and the MATCH will be used as + // an exclusion instead, so the NOT operator + // is not required. + if (!this.is_all_negated) { + sql.append(" NOT ("); + } else { + sql.append(" ("); + } + } else { + sql.append(" AND"); + } + sql_add_term_condition(sql, term); + is_first_negated_term = false; + } + } + if (!is_first_negated_term) { + sql.append_c(')'); + } + + sql.append("'"); + } + return have_added_sql_condition; + } + + private inline void sql_add_term_condition(GLib.StringBuilder sql, + SearchQuery.Term term) { + if (term.get_type() == typeof(SearchQuery.EmailTextTerm)) { + sql_add_email_text_term_conditions((SearchQuery.EmailTextTerm) term, sql); + } else if (term.get_type() == typeof(SearchQuery.EmailFlagTerm)) { + sql.append(" ({flags} : \"' || ? || '\")"); + } + } + + private inline void sql_add_email_text_term_conditions(SearchQuery.EmailTextTerm text, + GLib.StringBuilder sql) { + var target = ""; + switch (text.target) { + case ALL: + target = ""; + break; + case TO: + target = "receivers"; + break; + case CC: + target = "cc"; + break; + case BCC: + target = "bcc"; + break; + case FROM: + target = "from"; + break; + case SUBJECT: + target = "subject"; + break; + case BODY: + target = "body"; + break; + case ATTACHMENT_NAME: + target = "attachments"; + break; + } + + sql.append(" ("); + + var values = text.terms; + var stemmed_values = text.get_data>( + EMAIL_TEXT_STEMMED_TERMS + ); + var is_first_disjunct = true; + for (int i = 0; i < values.size; i++) { + if (!is_first_disjunct) { + sql.append(" OR"); + } + if (target != "") { + sql.append_printf("{%s} :", target); + } + if (stemmed_values != null && stemmed_values[i] != null) { + // Original is not a prefix match, stemmed is + sql.append(" \"' || ? || '\" OR \"' || ? || '\"*"); + } else if (text.matching_strategy != EXACT) { + // A regular match, do a suffix match + sql.append(" \"' || ? || '\"*"); + } else { + // EXACT is not a prefix match + sql.append(" \"' || ? || '\""); + } + is_first_disjunct = false; + } + sql.append_c(')'); + } + + private int sql_bind_term_conditions(Db.Statement sql, + int index) + throws Geary.DatabaseError { + int next_index = index; + // Per sql_add_term_conditions, add all non-negated terms + // first before adding any negated terms. + foreach (var term in this.expression) { + if (!term.is_negated) { + next_index = sql_bind_term_condition(sql, term, next_index); + } + } + foreach (var term in this.expression) { + if (term.is_negated) { + next_index = sql_bind_term_condition(sql, term, next_index); + } + } + return next_index; + } + + private inline int sql_bind_term_condition(Db.Statement sql, + SearchQuery.Term term, + int index) + throws Geary.DatabaseError { + int next_index = index; + var type = term.get_type(); + if (type == typeof(SearchQuery.EmailTextTerm)) { + var text = (SearchQuery.EmailTextTerm) term; + var stemmed_terms = text.get_data>( + EMAIL_TEXT_STEMMED_TERMS + ); + for (int i = 0; i < text.terms.size; i++) { + sql.bind_string(next_index++, text.terms[i]); + if (stemmed_terms != null && stemmed_terms[i] != null) { + sql.bind_string(next_index++, stemmed_terms[i]); + } + } + } else if (type == typeof(SearchQuery.EmailFlagTerm)) { + var flag = (SearchQuery.EmailFlagTerm) term; + sql.bind_string(next_index++, flag.value.serialise()); + } + return next_index; + } + +} diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala index 54522b90e71e95336a692d6bda5cb38599c068df..e9d3abc0d34b741b63df9461bb3a022aaef5b19d 100644 --- a/src/engine/imap-db/imap-db-account.vala +++ b/src/engine/imap-db/imap-db-account.vala @@ -543,36 +543,6 @@ private class Geary.ImapDB.Account : BaseObject { return (messages.size == 0 ? null : messages); } - private void sql_add_query_phrases(StringBuilder sql, Gee.HashMap query_phrases, - string operator, string columns, string condition) { - bool is_first_field = true; - foreach (string field in query_phrases.keys) { - if (!is_first_field) - sql.append_printf(""" - %s - SELECT %s - FROM MessageSearchTable - WHERE %s - MATCH ? - %s - """, operator, columns, field, condition); - else - sql.append_printf(" AND %s MATCH ?", field); - is_first_field = false; - } - } - - private int sql_bind_query_phrases(Db.Statement stmt, int start_index, - Gee.HashMap query_phrases) throws Geary.DatabaseError { - int i = start_index; - // This relies on the keys being returned in the same order every time - // from the same map. It might not be guaranteed, but I feel pretty - // confident it'll work unless you change the map in between. - foreach (string field in query_phrases.keys) - stmt.bind_string(i++, query_phrases.get(field)); - return i - start_index; - } - // Append each id in the collection to the StringBuilder, in a format // suitable for use in an SQL statement IN (...) clause. private void sql_append_ids(StringBuilder s, Gee.Iterable ids) { @@ -608,95 +578,41 @@ private class Geary.ImapDB.Account : BaseObject { } public async Gee.Collection? search_async(Geary.SearchQuery q, - int limit = 100, int offset = 0, Gee.Collection? folder_blacklist = null, + int limit = 100, int offset = 0, Gee.Collection? excluded_folders = null, Gee.Collection? search_ids = null, Cancellable? cancellable = null) throws Error { - debug("Search terms, offset/limit: %s %d/%d", - q.to_string(), offset, limit); + debug("Search query: %s", q.to_string()); check_open(); - ImapDB.SearchQuery query = check_search_query(q); - - Gee.HashMap query_phrases = query.get_query_phrases(); - Gee.Map removal_conditions = query.get_removal_conditions(); - if (query_phrases.size == 0 && removal_conditions.is_empty) - return null; - - foreach (string? field in query.get_fields()) { - debug(" - Field \"%s\" terms:", field); - foreach (SearchQuery.Term? term in query.get_search_terms(field)) { - if (term != null) { - debug(" - \"%s\": %s, %s", - term.original, - term.parsed, - term.stemmed - ); - debug(" SQL terms:"); - foreach (string sql in term.sql) { - debug(" - \"%s\"", sql); - } - } - } - } + FtsSearchQuery query = check_search_query(q); // Do this outside of transaction to catch invalid search ids up-front string? search_ids_sql = get_search_ids_sql(search_ids); - bool strip_greedy = query.should_strip_greedy_results(); Gee.List matching_ids = new Gee.LinkedList(); Gee.Map>? search_matches = null; - yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => { - string blacklisted_ids_sql = do_get_blacklisted_message_ids_sql( - folder_blacklist, cx, cancellable); - - // Every mutation of this query we could think of has been tried, - // and this version was found to minimize running time. We - // discovered that just doing a JOIN between the MessageTable and - // MessageSearchTable was causing a full table scan to order the - // results. When it's written this way, and we force SQLite to use - // the correct index (not sure why it can't figure it out on its - // own), it cuts the running time roughly in half of how it was - // before. The short version is: modify with extreme caution. See - // . - StringBuilder sql = new StringBuilder(); - sql.append(""" - SELECT id - FROM MessageTable - INDEXED BY MessageTableInternalDateTimeTIndex - """); - if (query_phrases.size != 0) { - sql.append(""" - WHERE id IN ( - SELECT docid - FROM MessageSearchTable - WHERE 1=1 - """); - sql_add_query_phrases(sql, query_phrases, "INTERSECT", "docid", ""); - sql.append(")"); - } else - sql.append(" WHERE 1=1"); - - if (blacklisted_ids_sql != "") - sql.append(" AND id NOT IN (%s)".printf(blacklisted_ids_sql)); - if (!Geary.String.is_empty(search_ids_sql)) - sql.append(" AND id IN (%s)".printf(search_ids_sql)); - sql.append(" ORDER BY internaldate_time_t DESC"); - if (limit > 0) - sql.append(" LIMIT ? OFFSET ?"); - - Db.Statement stmt = cx.prepare(sql.str); - int bind_index = sql_bind_query_phrases(stmt, 0, query_phrases); - if (limit > 0) { - stmt.bind_int(bind_index++, limit); - stmt.bind_int(bind_index++, offset); + yield db.exec_transaction_async(RO, (cx) => { + string? excluded_folder_ids_sql = null; + bool exclude_folderless = false; + if (excluded_folders != null) { + excluded_folder_ids_sql = do_get_excluded_folder_ids( + excluded_folders, cx, out exclude_folderless, cancellable + ); } - Gee.HashMap id_map = new Gee.HashMap( + var id_map = new Gee.HashMap( Collection.int64_hash_func, Collection.int64_equal_func); - + Db.Statement stmt = query.get_search_query( + cx, + search_ids_sql, + excluded_folder_ids_sql, + exclude_folderless, + limit, offset + ); + debug("Search SQL: %s", stmt.get_expanded_sql()); Db.Result result = stmt.exec(cancellable); while (!result.finished) { int64 message_id = result.int64_at(0); @@ -707,8 +623,7 @@ private class Geary.ImapDB.Account : BaseObject { result.next(cancellable); } - - if (strip_greedy && !id_map.is_empty) { + if (query.has_stemmed_terms && !id_map.is_empty) { search_matches = do_get_search_matches( cx, query, id_map, cancellable ); @@ -718,111 +633,13 @@ private class Geary.ImapDB.Account : BaseObject { }, cancellable); debug("Matching emails found: %d", matching_ids.size); - - if (!removal_conditions.is_empty) { - yield strip_removal_conditions( - query, matching_ids, removal_conditions, cancellable - ); - } - - if (strip_greedy && search_matches != null) { - strip_greedy_results(query, matching_ids, search_matches); - } - - debug("Final search matches: %d", matching_ids.size); return matching_ids.is_empty ? null : matching_ids; } - // Strip out from the given collection any email that matches the - // given removal conditions - private async void strip_removal_conditions(ImapDB.SearchQuery query, - Gee.Collection matches, - Gee.Map conditions, - GLib.Cancellable? cancellable = null) - throws GLib.Error { - Email.Field required_fields = Geary.Email.Field.FLAGS; - Gee.Iterator iter = matches.iterator(); - - yield db.exec_transaction_async(RO, (cx) => { - while (iter.next()) { - ImapDB.EmailIdentifier id = iter.get(); - MessageRow row = Geary.ImapDB.Folder.do_fetch_message_row( - cx, id.message_id, required_fields, null, cancellable - ); - Geary.EmailFlags? flags = row.get_generic_email_flags(); - if (flags != null) { - foreach (Gee.Map.Entry condition - in conditions.entries) { - if (flags.contains(condition.key) == condition.value) { - iter.remove(); - break; - } - } - } else { - iter.remove(); - } - } - return Db.TransactionOutcome.DONE; - }, cancellable - ); - } - - // Strip out from the given collection of matching ids and results - // for any search results that only contain a hit due to "greedy" - // matching of the stemmed variants on all search terms. - private void strip_greedy_results(SearchQuery query, - Gee.Collection matches, - Gee.Map> results) { - int prestripped_results = matches.size; - Gee.Iterator iter = matches.iterator(); - while (iter.next()) { - // For each matched string in this message, retain the message in the search results - // if it prefix-matches any of the straight-up parsed terms or matches a stemmed - // variant (with only max. difference in their lengths allowed, i.e. not a "greedy" - // match) - EmailIdentifier id = iter.get(); - bool good_match_found = false; - Gee.Set? result = results.get(id); - if (result != null) { - foreach (string match in result) { - foreach (SearchQuery.Term term in query.get_all_terms()) { - // if prefix-matches parsed term, then don't strip - if (match.has_prefix(term.parsed)) { - good_match_found = true; - break; - } - - // if prefix-matches stemmed term w/o doing so - // greedily, then don't strip - if (term.stemmed != null && match.has_prefix(term.stemmed)) { - int diff = match.length - term.stemmed.length; - if (diff <= query.max_difference_match_stem_lengths) { - good_match_found = true; - break; - } - } - } - } - - if (good_match_found) { - break; - } - } - - if (!good_match_found) { - iter.remove(); - matches.remove(id); - } - } - - debug("Stripped %d emails from search for [%s] due to greedy stem matching", - prestripped_results - matches.size, query.raw); - } - public async Gee.Set? get_search_matches_async(Geary.SearchQuery q, Gee.Collection ids, Cancellable? cancellable = null) throws Error { check_open(); - ImapDB.SearchQuery query = check_search_query(q); + FtsSearchQuery query = check_search_query(q); Gee.Set? search_matches = null; yield db.exec_transaction_async(Db.TransactionType.RO, (cx) => { @@ -831,13 +648,12 @@ private class Geary.ImapDB.Account : BaseObject { foreach (ImapDB.EmailIdentifier id in ids) id_map.set(id.message_id, id); - Gee.Map>? match_map = - do_get_search_matches(cx, query, id_map, cancellable); - if (match_map == null || match_map.size == 0) + Gee.Map>? match_map = null; + if (!id_map.is_empty) { + match_map = do_get_search_matches(cx, query, id_map, cancellable); + } + if (match_map == null || match_map.size == 0) { return Db.TransactionOutcome.DONE; - - if (query.should_strip_greedy_results()) { - strip_greedy_results(query, ids, match_map); } search_matches = new Gee.HashSet(); @@ -980,7 +796,7 @@ private class Geary.ImapDB.Account : BaseObject { // the order of seconds, so manually perform the operation var result = cx.prepare( - "SELECT docid FROM MessageSearchTable" + "SELECT rowid FROM MessageSearchTable" ).exec(cancellable); while (!result.finished) { search_ids.add(result.rowid_at(0)); @@ -1061,7 +877,8 @@ private class Geary.ImapDB.Account : BaseObject { Email.REQUIRED_FOR_MESSAGE | Email.Field.ORIGINATORS | Email.Field.RECEIVERS | - Email.Field.SUBJECT + Email.Field.SUBJECT | + Email.Field.FLAGS ); Email.Field db_fields; @@ -1218,68 +1035,6 @@ private class Geary.ImapDB.Account : BaseObject { return !result.finished; } - // Turn the collection of folder paths into actual folder ids. As a - // special case, if "folderless" or orphan emails are to be blacklisted, - // set the out bool to true. - private Gee.Collection do_get_blacklisted_folder_ids(Gee.Collection? folder_blacklist, - Db.Connection cx, out bool blacklist_folderless, Cancellable? cancellable) throws Error { - blacklist_folderless = false; - Gee.ArrayList ids = new Gee.ArrayList(); - - if (folder_blacklist != null) { - foreach (Geary.FolderPath? folder_path in folder_blacklist) { - if (folder_path == null) { - blacklist_folderless = true; - } else { - int64 id; - do_fetch_folder_id(cx, folder_path, true, out id, cancellable); - if (id != Db.INVALID_ROWID) - ids.add(id); - } - } - } - - return ids; - } - - // Return a parameterless SQL statement that selects any message ids that - // are in a blacklisted folder. This is used as a sub-select for the - // search query to omit results from blacklisted folders. - private string do_get_blacklisted_message_ids_sql(Gee.Collection? folder_blacklist, - Db.Connection cx, Cancellable? cancellable) throws Error { - bool blacklist_folderless; - Gee.Collection blacklisted_ids = do_get_blacklisted_folder_ids( - folder_blacklist, cx, out blacklist_folderless, cancellable); - - StringBuilder sql = new StringBuilder(); - if (blacklisted_ids.size > 0) { - sql.append(""" - SELECT message_id - FROM MessageLocationTable - WHERE remove_marker = 0 - AND folder_id IN ( - """); - sql_append_ids(sql, blacklisted_ids); - sql.append(")"); - - if (blacklist_folderless) - sql.append(" UNION "); - } - if (blacklist_folderless) { - sql.append(""" - SELECT id - FROM MessageTable - WHERE id NOT IN ( - SELECT message_id - FROM MessageLocationTable - WHERE remove_marker = 0 - ) - """); - } - - return sql.str; - } - // For a message row id, return a set of all folders it's in, or null if // it's not in any folders. private Gee.Set? @@ -1405,41 +1160,21 @@ private class Geary.ImapDB.Account : BaseObject { // Not using a MultiMap because when traversing want to process all values at once per iteration, // not per key-value - public Gee.Map>? do_get_search_matches(Db.Connection cx, - ImapDB.SearchQuery query, Gee.Map id_map, Cancellable? cancellable) - throws Error { - if (id_map.size == 0) - return null; - - Gee.HashMap query_phrases = query.get_query_phrases(); - if (query_phrases.size == 0) - return null; - - StringBuilder sql = new StringBuilder(); - sql.append(""" - SELECT docid, offsets(MessageSearchTable), * - FROM MessageSearchTable - WHERE docid IN ( - """); - sql_append_ids(sql, id_map.keys); - sql.append(")"); - - StringBuilder condition = new StringBuilder("AND docid IN ("); - sql_append_ids(condition, id_map.keys); - condition.append(")"); - sql_add_query_phrases(sql, query_phrases, "UNION", "docid, offsets(MessageSearchTable), *", - condition.str); - - Db.Statement stmt = cx.prepare(sql.str); - sql_bind_query_phrases(stmt, 0, query_phrases); - - Gee.Map> search_matches = new Gee.HashMap< - ImapDB.EmailIdentifier, Gee.Set>(); + public Gee.Map> do_get_search_matches( + Db.Connection cx, + FtsSearchQuery query, + Gee.Map id_map, + GLib.Cancellable? cancellable + ) throws GLib.Error { + var search_ids_sql = new GLib.StringBuilder(); + sql_append_ids(search_ids_sql, id_map.keys); - Db.Result result = stmt.exec(cancellable); + var search_matches = new Gee.HashMap>(); + Db.Result result = query.get_match_query( + cx, search_ids_sql.str + ).exec(cancellable); while (!result.finished) { int64 docid = result.rowid_at(0); - assert(id_map.has_key(docid)); ImapDB.EmailIdentifier id = id_map.get(docid); // XXX Avoid a crash when "database disk image is @@ -1451,30 +1186,12 @@ private class Geary.ImapDB.Account : BaseObject { continue; } - // offsets() function returns a list of 4 strings that are ints indicating position - // and length of match string in search table corpus - string[] offset_array = result.nonnull_string_at(1).split(" "); + var matches = new Gee.HashSet(); + matches.add_all_array(result.nonnull_string_at(1).split(",")); - Gee.Set matches = new Gee.HashSet(); - - int j = 0; - while (true) { - unowned string[] offset_string = offset_array[j:j+4]; - - int column = int.parse(offset_string[0]); - int byte_offset = int.parse(offset_string[2]); - int size = int.parse(offset_string[3]); - - unowned string text = result.nonnull_string_at(column + 2); - matches.add(text[byte_offset : byte_offset + size].down()); - - j += 4; - if (j >= offset_array.length) - break; - } - - if (search_matches.has_key(id)) + if (search_matches.has_key(id)) { matches.add_all(search_matches.get(id)); + } search_matches.set(id, matches); result.next(cancellable); @@ -1511,16 +1228,49 @@ private class Geary.ImapDB.Account : BaseObject { } } + // Turn the collection of folder paths into actual folder ids. As a + // special case, if "folderless" or orphan emails are to be excluded, + // set the out bool to true. + private string do_get_excluded_folder_ids( + Gee.Collection excluded_folder, + Db.Connection cx, + out bool exclude_folderless, + GLib.Cancellable? cancellable + ) throws GLib.Error { + exclude_folderless = false; + + var ids = new GLib.StringBuilder(); + var is_first = true; + foreach (Geary.FolderPath? folder_path in excluded_folder) { + if (folder_path == null) { + exclude_folderless = true; + } else { + int64 id; + do_fetch_folder_id(cx, folder_path, true, out id, cancellable); + if (id != Db.INVALID_ROWID) { + if (!is_first) { + ids.append_c(','); + } + ids.append(id.to_string()); + is_first = false; + } + } + } + + return ids.str; + } + private inline void check_open() throws GLib.Error { if (!this.db.is_open) { throw new EngineError.OPEN_REQUIRED("Database not open"); } } - private ImapDB.SearchQuery check_search_query(Geary.SearchQuery q) throws Error { - ImapDB.SearchQuery? query = q as ImapDB.SearchQuery; - if (query == null || query.account != this) + private FtsSearchQuery check_search_query(Geary.SearchQuery q) throws Error { + var query = q as FtsSearchQuery; + if (query == null) { throw new EngineError.BAD_PARAMETERS("Geary.SearchQuery not associated with %s", name); + } return query; } diff --git a/src/engine/imap-db/imap-db-database.vala b/src/engine/imap-db/imap-db-database.vala index cd428c89e5dfcfd712fcae8cf14832aa3acdac7e..45286f9b068255c3b12dc374d56d1aafa43c3290 100644 --- a/src/engine/imap-db/imap-db-database.vala +++ b/src/engine/imap-db/imap-db-database.vala @@ -7,6 +7,8 @@ [CCode (cname = "g_utf8_collate_key")] extern string utf8_collate_key(string data, ssize_t len); +extern int sqlite3_register_fts5_tokeniser(Sqlite.Database db); +extern int sqlite3_register_fts5_matches(Sqlite.Database db); extern int sqlite3_register_legacy_tokenizer(Sqlite.Database db); private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase { @@ -629,6 +631,15 @@ private class Geary.ImapDB.Database : Geary.Db.VersionedDatabase { sqlite3_register_legacy_tokenizer(cx.db); } + // Register custom FTS5 tokeniser that uses ICU to correctly + // segment at both Latin and on-Latin (e.g. CJK, Thai) word + // boundaries. + sqlite3_register_fts5_tokeniser(cx.db); + + // Register custom `geary_matches()` FTS5 function that + // obtains matching tokens from FTS queries. + sqlite3_register_fts5_matches(cx.db); + if (cx.db.create_function( UTF8_CASE_INSENSITIVE_FN, 1, // n args diff --git a/src/engine/imap-db/imap-db-folder.vala b/src/engine/imap-db/imap-db-folder.vala index 2c2d4015b116602db26d45c469a1540ee8e035b0..465508080ae6c3d452979d49ad1b78b8703d947d 100644 --- a/src/engine/imap-db/imap-db-folder.vala +++ b/src/engine/imap-db/imap-db-folder.vala @@ -1044,7 +1044,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { sql = new StringBuilder(); sql.append(""" DELETE FROM MessageSearchTable - WHERE docid IN ( + WHERE rowid IN ( """); sql.append(message_ids_sql_sublist.str); sql.append(")"); @@ -1712,6 +1712,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { string? from = email.from != null ? email.from.to_searchable_string() : null; string? cc = email.cc != null ? email.cc.to_searchable_string() : null; string? bcc = email.bcc != null ? email.bcc.to_searchable_string() : null; + string? flags = email.email_flags != null ? email.email_flags.serialise() : null; if (!Geary.String.is_empty(body) || !Geary.String.is_empty(attachments) || @@ -1719,12 +1720,13 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { !Geary.String.is_empty(from) || !Geary.String.is_empty(recipients) || !Geary.String.is_empty(cc) || - !Geary.String.is_empty(bcc)) { + !Geary.String.is_empty(bcc) || + !Geary.String.is_empty(flags)) { Db.Statement stmt = cx.prepare(""" INSERT INTO MessageSearchTable - (docid, body, attachment, subject, from_field, receivers, cc, bcc) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + (rowid, body, attachments, subject, "from", receivers, cc, bcc, flags) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """); stmt.bind_rowid(0, message_id); stmt.bind_string(1, body); @@ -1734,6 +1736,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { stmt.bind_string(5, recipients); stmt.bind_string(6, cc); stmt.bind_string(7, bcc); + stmt.bind_string(8, flags); stmt.exec_insert(cancellable); } @@ -1741,7 +1744,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { private static bool do_check_for_message_search_row(Db.Connection cx, int64 message_id, Cancellable? cancellable) throws Error { - Db.Statement stmt = cx.prepare("SELECT 'TRUE' FROM MessageSearchTable WHERE docid=?"); + Db.Statement stmt = cx.prepare("SELECT 'TRUE' FROM MessageSearchTable WHERE rowid=?"); stmt.bind_rowid(0, message_id); Db.Result result = stmt.exec(cancellable); @@ -1923,10 +1926,17 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { // TODO: Unroll loop private void do_set_email_flags(Db.Connection cx, Gee.Map map, Cancellable? cancellable) throws Error { - Db.Statement update_stmt = cx.prepare( - "UPDATE MessageTable SET flags=?, fields = fields | ? WHERE id=?"); + Db.Statement update_message = cx.prepare( + "UPDATE MessageTable SET flags = ?, fields = fields | ? WHERE id = ?" + ); + Db.Statement update_search = cx.prepare(""" + UPDATE MessageSearchTable SET flags = ? WHERE rowid = ? + """ + ); foreach (ImapDB.EmailIdentifier id in map.keys) { + // Find the email location + LocationIdentifier? location = do_get_location_for_id( cx, id, @@ -1940,6 +1950,8 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { ); } + // Update MessageTable + Geary.Imap.EmailFlags? flags = map.get(id) as Geary.Imap.EmailFlags; if (flags == null) { throw new EngineError.BAD_PARAMETERS( @@ -1947,12 +1959,18 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { ); } - update_stmt.reset(Db.ResetScope.CLEAR_BINDINGS); - update_stmt.bind_string(0, flags.message_flags.serialize()); - update_stmt.bind_int(1, Geary.Email.Field.FLAGS); - update_stmt.bind_rowid(2, id.message_id); + update_message.reset(Db.ResetScope.CLEAR_BINDINGS); + update_message.bind_string(0, flags.message_flags.serialize()); + update_message.bind_int(1, Geary.Email.Field.FLAGS); + update_message.bind_rowid(2, id.message_id); + update_message.exec(cancellable); + + // Update MessageSearchTable - update_stmt.exec(cancellable); + update_search.reset(Db.ResetScope.CLEAR_BINDINGS); + update_search.bind_string(0, flags.serialise()); + update_search.bind_rowid(1, id.message_id); + update_search.exec_insert(cancellable); } } @@ -2119,9 +2137,9 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { // existing data, then do a DELETE and INSERT. See Bug 772522. Db.Statement select = cx.prepare(""" - SELECT body, attachment, subject, from_field, receivers, cc, bcc + SELECT body, attachments, subject, "from", receivers, cc, bcc, flags FROM MessageSearchTable - WHERE docid=? + WHERE rowid=? """); select.bind_rowid(0, message_id); Db.Result row = select.exec(cancellable); @@ -2133,6 +2151,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { string? recipients = row.string_at(4); string? cc = row.string_at(5); string? bcc = row.string_at(6); + string? flags = row.string_at(7); if (new_fields.is_any_set(Geary.Email.REQUIRED_FOR_MESSAGE) && email.fields.is_all_set(Geary.Email.REQUIRED_FOR_MESSAGE)) { @@ -2165,16 +2184,22 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { bcc = email.bcc.to_searchable_string(); } + if (new_fields.is_any_set(Geary.Email.Field.FLAGS)) { + if (email.email_flags != null) { + flags = email.email_flags.serialise(); + } + } + Db.Statement del = cx.prepare( - "DELETE FROM MessageSearchTable WHERE docid=?" + "DELETE FROM MessageSearchTable WHERE rowid=?" ); del.bind_rowid(0, message_id); del.exec(cancellable); Db.Statement insert = cx.prepare(""" INSERT INTO MessageSearchTable - (docid, body, attachment, subject, from_field, receivers, cc, bcc) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + (rowid, body, attachments, subject, "from", receivers, cc, bcc, flags) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """); insert.bind_rowid(0, message_id); insert.bind_string(1, body); @@ -2184,6 +2209,7 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { insert.bind_string(5, recipients); insert.bind_string(6, cc); insert.bind_string(7, bcc); + insert.bind_string(8, flags); insert.exec_insert(cancellable); } @@ -2230,7 +2256,6 @@ private class Geary.ImapDB.Folder : BaseObject, Geary.ReferenceSemantics { ); post_fields |= Geary.Email.Field.FLAGS; - } } diff --git a/src/engine/imap-db/imap-db-fts5-matches.c b/src/engine/imap-db/imap-db-fts5-matches.c new file mode 100644 index 0000000000000000000000000000000000000000..a213cb4bb3a7716e663123166f23c9e2571e57f0 --- /dev/null +++ b/src/engine/imap-db/imap-db-fts5-matches.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2011 Nokia + * + * Author: Carlos Garnacho + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +/* + * Borrowed from the Tracker project (see: tracker-fts-tokenizer.c) + * and adapted for Geary by Michael Gratton . + */ + +#include +#include + +#define unused __attribute__((unused)) + + +typedef struct { + int start; + int end; +} Offset; + +static int +offsets_tokenizer_func (void *data, + unused int flags, + unused const char *token, + unused int n_token, + int start, + int end) +{ + GArray *offsets = data; + Offset offset = { 0 }; + offset.start = start; + offset.end = end; + g_array_append_val(offsets, offset); + return SQLITE_OK; +} + +static void +geary_matches (const Fts5ExtensionApi *api, + Fts5Context *fts_ctx, + sqlite3_context *ctx, + int n_args, + unused sqlite3_value **args) +{ + GString *str; + int rc, n_hits, i; + GArray *offsets = NULL; + gint cur_col = -1; + gboolean first = TRUE; + + if (n_args > 0) { + sqlite3_result_error(ctx, "Invalid argument count", -1); + return; + } + + rc = api->xInstCount(fts_ctx, &n_hits); + if (rc != SQLITE_OK) { + sqlite3_result_null(ctx); + return; + } + + str = g_string_new(NULL); + + for (i = 0; i < n_hits; i++) { + int phrase, col, n_token; + const char *text; + int length; + Offset offset; + + rc = api->xInst(fts_ctx, i, &phrase, &col, &n_token); + if (rc != SQLITE_OK) + break; + + if (first || cur_col != col) { + if (offsets) { + g_array_free(offsets, TRUE); + } + + rc = api->xColumnText(fts_ctx, col, &text, &length); + if (rc != SQLITE_OK) + break; + + offsets = g_array_new(FALSE, FALSE, sizeof(Offset)); + rc = api->xTokenize(fts_ctx, + text, + length, + offsets, + &offsets_tokenizer_func); + if (rc != SQLITE_OK) { + break; + } + + cur_col = col; + } + + first = FALSE; + + if (str->len != 0) { + g_string_append_c(str, ','); + } + + offset = g_array_index(offsets, Offset, n_token); + g_string_append_len(str, text + offset.start, offset.end - offset.start); + } + + if (offsets) { + g_array_free (offsets, TRUE); + } + + if (rc == SQLITE_OK) { + sqlite3_result_text (ctx, str->str, str->len, g_free); + g_string_free (str, FALSE); + } else { + sqlite3_result_error_code (ctx, rc); + g_string_free (str, TRUE); + } +} + +static fts5_api *get_fts5_api (sqlite3 *db) { + int rc = SQLITE_OK; + sqlite3_stmt *stmt; + fts5_api *api = NULL; + + rc = sqlite3_prepare_v2(db, "SELECT fts5(?1)", + -1, &stmt, 0); + if (rc != SQLITE_OK) { + return NULL; + } + + sqlite3_bind_pointer(stmt, 1, (void*) &api, "fts5_api_ptr", NULL); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + + return api; +} + +gboolean sqlite3_register_fts5_matches(sqlite3 *db) { + fts5_api *api; + int rc = SQLITE_OK; + + api = get_fts5_api(db); + if (!api) { + return FALSE; + } + + rc = api->xCreateFunction(api, + "geary_matches", + NULL, + &geary_matches, + NULL); + + return (rc == SQLITE_OK) ? TRUE : FALSE; +} diff --git a/src/engine/imap-db/imap-db-fts5-tokeniser.c b/src/engine/imap-db/imap-db-fts5-tokeniser.c new file mode 100644 index 0000000000000000000000000000000000000000..0468fd5973401841f17f5ca4028506718850ccf8 --- /dev/null +++ b/src/engine/imap-db/imap-db-fts5-tokeniser.c @@ -0,0 +1,277 @@ +/* + * Copyright © 2020 Michael Gratton + * + * This software is licensed under the GNU Lesser General Public License + * (version 2.1 or later). See the COPYING file in this distribution. + */ + +#include +SQLITE_EXTENSION_INIT1 + +#include +#include +#include +#include +#include +#include "unicode/utf.h" +#include "unicode/utypes.h" + +#define unused __attribute__((unused)) + +// Full text search tokeniser for SQLite. This exists since SQLite's +// existing Unicode tokeniser doesn't work with languages that don't +// use spaces as word boundaries. +// +// When generating tokens, the follow process is applied to text using +// the ICU library: +// +// 1. ICU NFKC_Casefold normalisation, handles normalisation, case +// folding and removal of ignorable characters such as accents. +// +// 2. ICU word-boundary tokenisation, splits both on words at spaces +// and other punctuation, and also using a dictionary lookup for +// languages that do not use spaces (CJK, Thai, etc) +// +// Note: Since SQLite is single-threaded, it's safe to use single +// instances of ICU services for all calls for a single tokeniser. + +#define NORM_BUF_LEN 8 +#define TOKEN_BUF_LEN 8 + +typedef struct { + // Singleton object, threadsafe, does not need to be deleted. + const UNormalizer2 * norm; + + // Stateful object, not threadsafe, must be deleted. + UBreakIterator *iter; +} IcuTokeniser; + + +static int icu_create(unused void *context, + unused const char **args, + unused int n_args, + Fts5Tokenizer **ret) { + const UNormalizer2 *norm; + UBreakIterator *iter; + IcuTokeniser *tokeniser; + UErrorCode err = U_ZERO_ERROR; + + norm = unorm2_getNFKCCasefoldInstance(&err); + if (U_FAILURE(err)) { + g_warning("Error constructing ICU normaliser: %s", u_errorName(err)); + return SQLITE_ABORT; + } + + // The given locale doesn't matter here since it ICU doesn't + // (currently) use different rules for different word breaking + // languages that uses spaces as word boundaries, and uses + // dictionary look-ups for CJK and other scripts that don't. + iter = ubrk_open(UBRK_WORD, "en", NULL, 0, &err); + if (U_FAILURE(err)) { + g_warning("Error constructing ICU word-breaker: %s", u_errorName(err)); + ubrk_close(tokeniser->iter); + return SQLITE_ABORT; + } + + tokeniser = g_new0(IcuTokeniser, 1); + tokeniser->norm = norm; + tokeniser->iter = iter; + *ret = (Fts5Tokenizer *) tokeniser; + + return SQLITE_OK; +} + +static void icu_delete(Fts5Tokenizer *fts5_tokeniser) { + IcuTokeniser *tokeniser = (IcuTokeniser *) fts5_tokeniser; + + ubrk_close(tokeniser->iter); + g_free(tokeniser); +} + +static int icu_tokenise(Fts5Tokenizer *fts5_tokeniser, + void *context, + unused int flags, + const char *chars, + int32_t chars_len, + int (*token_callback)(void*, int, const char*, int, int, int)) { + int ret = SQLITE_OK; + IcuTokeniser *tokeniser = (IcuTokeniser *) fts5_tokeniser; + UErrorCode err = U_ZERO_ERROR; + + const UNormalizer2 *norm = tokeniser->norm; + GArray *wide_chars = NULL; + GArray *wide_offsets = NULL; + UChar *wide_data = NULL; + gsize wide_data_len_long = 0; + int32_t wide_data_len = 0; + + UChar norm_buf[NORM_BUF_LEN] = {0}; + + UBreakIterator *iter = tokeniser->iter; + int32_t start_index, current_index = 0; + char *token_buf = NULL; + int32_t token_buf_len = NORM_BUF_LEN; + + // Normalisation. + // + // SQLite needs the byte-index of tokens found in the chars, but + // ICU doesn't support UTF-8-based normalisation. So convert UTF-8 + // input to UTF-16 char-by-char and record the byte offsets for + // each, so that when converting back to UTF-8 the byte offsets + // can be determined. + + wide_chars = g_array_sized_new(FALSE, FALSE, sizeof(UChar), chars_len); + wide_offsets = g_array_sized_new(FALSE, FALSE, sizeof(int32_t), chars_len); + + for (int32_t byte_offset = 0; byte_offset < chars_len;) { + UChar wide_char; + int32_t norm_len; + int32_t start_byte_offset = byte_offset; + + U8_NEXT_OR_FFFD(chars, byte_offset, chars_len, wide_char); + norm_len = unorm2_normalize(norm, + &wide_char, 1, + norm_buf, NORM_BUF_LEN, + &err); + if (U_FAILURE(err)) { + g_warning("Token text normalisation failed"); + err = SQLITE_ABORT; + goto cleanup; + } + + // NFKC may decompose a single character into multiple + // characters, e.g. 'fi' into "fi", '…' into "...". + for (int i = 0; i < norm_len; i++) { + g_array_append_val(wide_chars, norm_buf[i]); + g_array_append_val(wide_offsets, start_byte_offset); + } + } + + // Word breaking. + // + // UTF-16 is passed to the tokeniser, hence its indexes are + // character-based. Use the offset array to convert those back to + // byte indexes for individual tokens. + + wide_data = (UChar *) g_array_steal(wide_chars, &wide_data_len_long); + wide_data_len = (int32_t) wide_data_len_long; + + ubrk_setText(iter, wide_data, wide_data_len, &err); + if (U_FAILURE(err)) { + err = SQLITE_ABORT; + g_warning("Setting word break iterator text failed"); + goto cleanup; + } + start_index = 0; + current_index = ubrk_first(iter); + token_buf = g_malloc0(sizeof(char) * token_buf_len); + while (current_index != UBRK_DONE && ret == SQLITE_OK) { + int32_t status = ubrk_getRuleStatus(iter); + int32_t token_char_len = current_index - start_index; + if (token_char_len > 0 && + !(status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) && + !(status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT)) { + int32_t token_byte_len = 0; + int32_t token_byte_start = 0; + int32_t token_byte_end = 0; + + for (;;) { + u_strToUTF8WithSub(token_buf, token_buf_len, &token_byte_len, + wide_data + start_index, token_char_len, + 0xFFFD, NULL, + &err); + + if (U_SUCCESS(err)) { + break; + } else if (err == U_BUFFER_OVERFLOW_ERROR) { + token_buf_len *= 2; + token_buf = g_realloc(token_buf, sizeof(char) * token_buf_len); + err = U_ZERO_ERROR; + } else { + err = SQLITE_ABORT; + g_warning("Conversion to UTF-8 failed"); + goto cleanup; + } + } + + token_byte_start = g_array_index(wide_offsets, int32_t, start_index); + if (current_index < wide_data_len) { + token_byte_end = g_array_index(wide_offsets, int32_t, current_index); + } else { + token_byte_end = chars_len; + } + + ret = token_callback(context, + 0, + token_buf, + token_byte_len, + token_byte_start, + token_byte_end); + } + + start_index = current_index; + current_index = ubrk_next(iter); + } + + cleanup: + g_free(wide_data); + g_array_unref(wide_chars); + g_array_unref(wide_offsets); + g_free(token_buf); + + return ret; +} + +static fts5_api *get_fts5_api(sqlite3 *db) { + int rc = SQLITE_OK; + sqlite3_stmt *stmt; + fts5_api *api = NULL; + + rc = sqlite3_prepare_v2(db, "SELECT fts5(?1)", + -1, &stmt, 0); + if (rc != SQLITE_OK) { + return NULL; + } + + sqlite3_bind_pointer(stmt, 1, (void*) &api, "fts5_api_ptr", NULL); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + + return api; +} + +static const fts5_tokenizer icu_tokeniser = { + icu_create, + icu_delete, + icu_tokenise +}; + +gboolean sqlite3_register_fts5_tokeniser(sqlite3 *db) { + fts5_api *api; + fts5_tokenizer *tokeniser = (fts5_tokenizer *) &icu_tokeniser; + int rc = SQLITE_OK; + + api = get_fts5_api(db); + if (!api) { + return FALSE; + } + + rc = api->xCreateTokenizer(api, + "geary_tokeniser", + NULL, + tokeniser, + NULL); + + return (rc == SQLITE_OK) ? TRUE : FALSE; +} + +// Entry point for external loadable library, required when using +// command line SQLite tool. The name of this function must match the +// name of the shared module. +int sqlite3_gearytokeniser_init(sqlite3 *db, + unused char **error_message, + const sqlite3_api_routines *api) { + g_info("Loading geary_tokeniser\n"); + SQLITE_EXTENSION_INIT2(api); + return sqlite3_register_fts5_tokeniser(db) ? SQLITE_OK : SQLITE_ABORT; +} diff --git a/src/engine/imap-db/imap-db-gc.vala b/src/engine/imap-db/imap-db-gc.vala index dd090ea8ef51b1ed3edeb3c2723edd962a49bd0c..44d268a5566de8f3067d4a86ca2acce694776f67 100644 --- a/src/engine/imap-db/imap-db-gc.vala +++ b/src/engine/imap-db/imap-db-gc.vala @@ -433,7 +433,7 @@ private class Geary.ImapDB.GC { stmt = cx.prepare(""" DELETE FROM MessageSearchTable - WHERE docid = ? + WHERE rowid = ? """); stmt.bind_rowid(0, message_id); diff --git a/src/engine/imap-db/imap-db-search-query.vala b/src/engine/imap-db/imap-db-search-query.vala deleted file mode 100644 index 968ac6eacc7d6873f63531566829d2ac013ecf81..0000000000000000000000000000000000000000 --- a/src/engine/imap-db/imap-db-search-query.vala +++ /dev/null @@ -1,720 +0,0 @@ -/* - * Copyright 2016 Software Freedom Conservancy Inc. - * Copyright 2019 Michael Gratton . - * - * This software is licensed under the GNU Lesser General Public License - * (version 2.1 or later). See the COPYING file in this distribution. - */ - -/** - * Internal implementation of {@link Geary.SearchQuery}. - */ -private class Geary.ImapDB.SearchQuery : Geary.SearchQuery { - - // These characters are chosen for being commonly used to continue a single word (such as - // extended last names, i.e. "Lars-Eric") or in terms commonly searched for in an email client, - // i.e. unadorned mailbox addresses. Note that characters commonly used for wildcards or that - // would be interpreted as wildcards by SQLite are not included here. - private const unichar[] SEARCH_TERM_CONTINUATION_CHARS = { '-', '_', '.', '@' }; - - // Search operator field names, eg: "to:foo@example.com" or "is:unread" - private const string SEARCH_OP_ATTACHMENT = "attachment"; - private const string SEARCH_OP_BCC = "bcc"; - private const string SEARCH_OP_BODY = "body"; - private const string SEARCH_OP_CC = "cc"; - private const string SEARCH_OP_FROM = "from_field"; - private const string SEARCH_OP_IS = "is"; - private const string SEARCH_OP_SUBJECT = "subject"; - private const string SEARCH_OP_TO = "receivers"; - - // Operators allowing finding mail addressed to "me" - private const string[] SEARCH_OP_TO_ME_FIELDS = { - SEARCH_OP_BCC, - SEARCH_OP_CC, - SEARCH_OP_TO, - }; - - // The addressable op value for "me" - private const string SEARCH_OP_ADDRESSABLE_VALUE_ME = "me"; - - // Search operator field values - private const string SEARCH_OP_VALUE_READ = "read"; - private const string SEARCH_OP_VALUE_STARRED = "starred"; - private const string SEARCH_OP_VALUE_UNREAD = "unread"; - - - /** - * Various associated state with a single term in a search query. - */ - internal class Term : GLib.Object { - - /** - * The original tokenized search term with minimal other processing performed. - * - * For example, punctuation might be removed, but no casefolding has occurred. - */ - public string original { get; private set; } - - /** - * The parsed tokenized search term. - * - * Casefolding and other normalizing text operations have been performed. - */ - public string parsed { get; private set; } - - /** - * The stemmed search term. - * - * Only used if stemming is being done ''and'' the stem is different than the {@link parsed} - * term. - */ - public string? stemmed { get; private set; } - - /** - * A list of terms ready for binding to an SQLite statement. - * - * This should include prefix operators and quotes (i.e. ["party"] or [party*]). These texts - * are guaranteed not to be null or empty strings. - */ - public Gee.List sql { get; private set; default = new Gee.ArrayList(); } - - /** - * Returns true if the {@link parsed} term is exact-match only (i.e. starts with quotes) and - * there is no {@link stemmed} variant. - */ - public bool is_exact { get { return parsed.has_prefix("\"") && stemmed == null; } } - - public Term(string original, string parsed, string? stemmed, string? sql_parsed, string? sql_stemmed) { - this.original = original; - this.parsed = parsed; - this.stemmed = stemmed; - - // for now, only two variations: the parsed string and the stemmed; since stem is usually - // shorter (and will be first in the OR statement), include it first - if (!String.is_empty(sql_stemmed)) - sql.add(sql_stemmed); - - if (!String.is_empty(sql_parsed)) - sql.add(sql_parsed); - } - } - - - // Maps of localised search operator names and values to their - // internal forms - private static Gee.HashMap search_op_names = - new Gee.HashMap(); - private static Gee.ArrayList search_op_to_me_values = - new Gee.ArrayList(); - private static Gee.ArrayList search_op_from_me_values = - new Gee.ArrayList(); - private static Gee.HashMap search_op_is_values = - new Gee.HashMap(); - - - static construct { - // Map of possibly translated search operator names and values - // to English/internal names and values. We include the - // English version anyway so that when translations provide a - // localised version of the operator names but have not also - // translated the user manual, the English version in the - // manual still works. - - // Can be typed in the search box like "attachment:file.txt" - // to find messages with attachments with a particular name. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_names.set(C_("Search operator", "attachment"), SEARCH_OP_ATTACHMENT); - // Can be typed in the search box like - // "bcc:johndoe@example.com" to find messages bcc'd to a - // particular person. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_names.set(C_("Search operator", "bcc"), SEARCH_OP_BCC); - // Can be typed in the search box like "body:word" to find - // "word" only if it occurs in the body of a message. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_names.set(C_("Search operator", "body"), SEARCH_OP_BODY); - // Can be typed in the search box like - // "cc:johndoe@example.com" to find messages cc'd to a - // particular person. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_names.set(C_("Search operator", "cc"), SEARCH_OP_CC); - // Can be typed in the search box like - // "from:johndoe@example.com" to find messages from a - // particular sender. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_names.set(C_("Search operator", "from"), SEARCH_OP_FROM); - // Can be typed in the search box like "is:unread" to find - // messages that are read, unread, or starred. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_names.set(C_("Search operator", "is"), SEARCH_OP_IS); - // Can be typed in the search box like "subject:word" to find - // "word" only if it occurs in the subject of a message. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary - // User Guide. - search_op_names.set(C_("Search operator", "subject"), SEARCH_OP_SUBJECT); - // Can be typed in the search box like - // "to:johndoe@example.com" to find messages received by a - // particular person. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_names.set(C_("Search operator", "to"), SEARCH_OP_TO); - - // And the English language versions - search_op_names.set("attachment", SEARCH_OP_ATTACHMENT); - search_op_names.set("bcc", SEARCH_OP_BCC); - search_op_names.set("body", SEARCH_OP_BODY); - search_op_names.set("cc", SEARCH_OP_CC); - search_op_names.set("from", SEARCH_OP_FROM); - search_op_names.set("is", SEARCH_OP_IS); - search_op_names.set("subject", SEARCH_OP_SUBJECT); - search_op_names.set("to", SEARCH_OP_TO); - - // Can be typed in the search box after "to:", "cc:" and - // "bcc:" e.g.: "to:me". Matches conversations that are - // addressed to the user. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_to_me_values.add( - C_("Search operator value - mail addressed to the user", "me") - ); - search_op_to_me_values.add(SEARCH_OP_ADDRESSABLE_VALUE_ME); - - // Can be typed in the search box after "from:" i.e.: - // "from:me". Matches conversations were sent by the user. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_from_me_values.add( - C_("Search operator value - mail sent by the user", "me") - ); - search_op_from_me_values.add(SEARCH_OP_ADDRESSABLE_VALUE_ME); - - // Can be typed in the search box after "is:" i.e.: - // "is:read". Matches conversations that are flagged as read. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_is_values.set( - C_("'is:' search operator value", "read"), SEARCH_OP_VALUE_READ - ); - // Can be typed in the search box after "is:" i.e.: - // "is:starred". Matches conversations that are flagged as - // starred. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_is_values.set( - C_("'is:' search operator value", "starred"), SEARCH_OP_VALUE_STARRED - ); - // Can be typed in the search box after "is:" i.e.: - // "is:unread". Matches conversations that are flagged unread. - // - // The translated string must be a single word (use '-', '_' - // or similar to combine words into one), should be short, and - // also match the translation in "search.page" of the Geary User - // Guide. - search_op_is_values.set( - C_("'is:' search operator value", "unread"), SEARCH_OP_VALUE_UNREAD - ); - search_op_is_values.set(SEARCH_OP_VALUE_READ, SEARCH_OP_VALUE_READ); - search_op_is_values.set(SEARCH_OP_VALUE_STARRED, SEARCH_OP_VALUE_STARRED); - search_op_is_values.set(SEARCH_OP_VALUE_UNREAD, SEARCH_OP_VALUE_UNREAD); - } - - - /** - * Associated {@link ImapDB.Account}. - */ - public weak ImapDB.Account account { get; private set; } - - /** - * Returns whether stemming may be used when exerting the search. - * - * Determined by {@link Geary.SearchQuery.Strategy} passed to the - * constructor. - */ - public bool allow_stemming { get; private set; } - - /** - * Minimum length of the term before stemming is allowed. - * - * This prevents short words that might be stemmed from being stemmed. - * - * Overridden by {@link allow_stemming}. Determined by the {@link - * Geary.SearchQuery.Strategy} passed to the constructor. - */ - public int min_term_length_for_stemming { get; private set; } - - - /** - * Maximum difference in lengths between term and stemmed variant. - * - * This prevents long words from being stemmed to much shorter - * words (which creates opportunities for greedy matching). - * - * Overridden by {@link allow_stemming}. Determined by the {@link - * Geary.SearchQuery.Strategy} passed to the constructor. - */ - public int max_difference_term_stem_lengths { get; private set; } - - /** - * Maximum difference in lengths between a matched word and the stemmed variant it matched - * against. - * - * This prevents long words being matched to short stem variants (which creates opportunities - * for greedy matching). - * - * Overridden by {@link allow_stemming}. Determined by the {@link - * Geary.SearchQuery.Strategy} passed to the constructor. - */ - public int max_difference_match_stem_lengths { get; private set; } - - // Maps search operator field names such as "to", "cc", "is" to - // their search term values. Note that terms without an operator - // are stored with null as the key. Not using a MultiMap because - // we (might) need a guarantee of order. - private Gee.HashMap> field_map - = new Gee.HashMap>(); - - // A list of all search terms, regardless of search op field name - private Gee.ArrayList all = new Gee.ArrayList(); - - private SnowBall.Stemmer stemmer; - - - public async SearchQuery(Geary.Account owner, - ImapDB.Account local, - string query, - Geary.SearchQuery.Strategy strategy, - GLib.Cancellable? cancellable) { - base(owner, query, strategy); - this.account = local; - this.stemmer = new SnowBall.Stemmer(find_appropriate_search_stemmer()); - - switch (strategy) { - case Strategy.EXACT: - allow_stemming = false; - min_term_length_for_stemming = int.MAX; - max_difference_term_stem_lengths = 0; - max_difference_match_stem_lengths = 0; - break; - - case Strategy.CONSERVATIVE: - allow_stemming = true; - min_term_length_for_stemming = 6; - max_difference_term_stem_lengths = 2; - max_difference_match_stem_lengths = 2; - break; - - case Strategy.AGGRESSIVE: - allow_stemming = true; - min_term_length_for_stemming = 4; - max_difference_term_stem_lengths = 4; - max_difference_match_stem_lengths = 3; - break; - - case Strategy.HORIZON: - allow_stemming = true; - min_term_length_for_stemming = 0; - max_difference_term_stem_lengths = int.MAX; - max_difference_match_stem_lengths = int.MAX; - break; - } - - yield prepare(cancellable); - } - - public Gee.Collection get_fields() { - return field_map.keys; - } - - public Gee.List? get_search_terms(string? field) { - return field_map.has_key(field) ? field_map.get(field) : null; - } - - public Gee.List? get_all_terms() { - return all; - } - - // For some searches, results are stripped if they're too - // "greedy", but this requires examining the matched text, which - // has an expense to fetch, so avoid doing so unless necessary - internal bool should_strip_greedy_results() { - // HORIZON strategy is configured in such a way to allow all - // stemmed variants to match, so don't do any stripping in - // that case - // - // If any of the search terms is exact-match (no prefix - // matching) or none have stemmed variants, then don't do - // stripping of "greedy" stemmed matching (because in both - // cases, there are none) - - bool strip_results = true; - if (this.strategy == Geary.SearchQuery.Strategy.HORIZON) - strip_results = false; - else if (traverse(this.all).any( - term => term.stemmed == null || term.is_exact)) { - strip_results = false; - } - return strip_results; - } - - internal Gee.Map get_removal_conditions() { - Gee.Map conditions = - new Gee.HashMap(); - foreach (string? field in this.field_map.keys) { - if (field == SEARCH_OP_IS) { - Gee.List? terms = get_search_terms(field); - foreach (Term term in terms) - if (term.parsed == SEARCH_OP_VALUE_READ) - conditions.set(new NamedFlag("UNREAD"), true); - else if (term.parsed == SEARCH_OP_VALUE_UNREAD) - conditions.set(new NamedFlag("UNREAD"), false); - else if (term.parsed == SEARCH_OP_VALUE_STARRED) - conditions.set(new NamedFlag("FLAGGED"), false); - } - } - return conditions; - } - - // Return a map of column -> phrase, to use as WHERE column MATCH 'phrase'. - internal Gee.HashMap get_query_phrases() { - Gee.HashMap phrases = new Gee.HashMap(); - foreach (string? field in field_map.keys) { - Gee.List? terms = get_search_terms(field); - if (terms == null || terms.size == 0 || field == "is") - continue; - - // Each Term is an AND but the SQL text within in are OR ... this allows for - // each user term to be AND but the variants of each term are or. So, if terms are - // [party] and [eventful] and stems are [parti] and [event], the search would be: - // - // (party* OR parti*) AND (eventful* OR event*) - // - // Obviously with stemming there's the possibility of the stemmed variant being nothing - // but a broader search of the original term (such as event* and eventful*) but do both - // to determine from each hit result which term caused the hit, and if it's too greedy - // a match of the stemmed variant, it can be stripped from the results. - // - // Note that this uses SQLite's "standard" query syntax for MATCH, where AND is implied - // (and would be treated as search term if included), parentheses are not allowed, and - // OR has a higher precedence than AND. So the above example in standard syntax is: - // - // party* OR parti* eventful* OR event* - StringBuilder builder = new StringBuilder(); - foreach (Term term in terms) { - if (term.sql.size == 0) - continue; - - if (term.is_exact) { - builder.append_printf("%s ", term.parsed); - } else { - bool is_first_sql = true; - foreach (string sql in term.sql) { - if (!is_first_sql) - builder.append(" OR "); - - builder.append_printf("%s ", sql); - is_first_sql = false; - } - } - } - - phrases.set(field ?? "MessageSearchTable", builder.str); - } - - return phrases; - } - - private async void prepare(GLib.Cancellable? cancellable) { - // A few goals here: - // 1) Append an * after every term so it becomes a prefix search - // (see ) - // 2) Strip out common words/operators that might get interpreted as - // search operators - // 3) Parse each word into a list of which field it applies to, so - // you can do "to:johndoe@example.com thing" (quotes excluded) - // to find messages to John containing the word thing - // We ignore everything inside quotes to give the user a way to - // override our algorithm here. The idea is to offer one search query - // syntax for Geary that we can use locally and via IMAP, etc. - - string quote_balanced = this.raw; - if (Geary.String.count_char(this.raw, '"') % 2 != 0) { - // Remove the last quote if it's not balanced. This has the - // benefit of showing decent results as you type a quoted phrase. - int last_quote = this.raw.last_index_of_char('"'); - assert(last_quote >= 0); - quote_balanced = this.raw.splice(last_quote, last_quote + 1, " "); - } - - string[] words = quote_balanced.split_set(" \t\r\n()%*\\"); - bool in_quote = false; - foreach (string s in words) { - string? field = null; - - s = s.strip(); - - int quotes = Geary.String.count_char(s, '"'); - if (!in_quote && quotes > 0) { - in_quote = true; - --quotes; - } - - Term? term; - if (in_quote) { - // HACK: this helps prevent a syntax error when the user types - // something like from:"somebody". If we ever properly support - // quotes after : we can get rid of this. - term = new Term(s, s, null, s.replace(":", " "), null); - } else { - string original = s; - - // Some common search phrases we don't respect and - // therefore don't want to fall through to search - // results - // XXX translate these - string lower = s.down(); - switch (lower) { - case "": - case "and": - case "or": - case "not": - case "near": - continue; - - default: - if (lower.has_prefix("near/")) - continue; - break; - } - - if (s.has_prefix("-")) - s = s.substring(1); - - if (s == "") - continue; - - // TODO: support quotes after : - string[] parts = s.split(":", 2); - if (parts.length > 1) - field = extract_field_from_token(parts, ref s); - - if (field == SEARCH_OP_IS) { - // s will have been de-translated - term = new Term(original, s, null, null, null); - } else { - // SQL MATCH syntax for parsed term - string? sql_s = "%s*".printf(s); - - // stem the word, but if stemmed and stem is - // simply shorter version of original term, only - // prefix-match search for it (i.e. avoid - // searching for [archive* OR archiv*] when that's - // the same as [archiv*]), otherwise search for - // both - string? stemmed = yield stem_search_term(s, cancellable); - - string? sql_stemmed = null; - if (stemmed != null) { - sql_stemmed = "%s*".printf(stemmed); - if (s.has_prefix(stemmed)) - sql_s = null; - } - - // if term contains continuation characters, treat - // as exact search to reduce effects of tokenizer - // splitting terms w/ punctuation in them - if (String.contains_any_char(s, SEARCH_TERM_CONTINUATION_CHARS)) - s = "\"%s\"".printf(s); - - term = new Term(original, s, stemmed, sql_s, sql_stemmed); - } - } - - if (in_quote && quotes % 2 != 0) - in_quote = false; - - // Finally, add the term - if (!this.field_map.has_key(field)) { - this.field_map.set(field, new Gee.ArrayList()); - } - this.field_map.get(field).add(term); - this.all.add(term); - } - } - - private string? extract_field_from_token(string[] parts, ref string token) { - string? field = null; - if (Geary.String.is_empty_or_whitespace(parts[1])) { - // User stopped at "field:", treat it as if they hadn't - // typed the ':' - token = parts[0]; - } else { - field = search_op_names.get(parts[0].down()); - if (field == SEARCH_OP_IS) { - string? value = search_op_is_values.get(parts[1].down()); - if (value != null) { - token = value; - } else { - // Unknown op value, pretend there is no search op - field = null; - } - } else if (field == SEARCH_OP_FROM && - parts[1].down() in search_op_from_me_values) { - // Search for all addresses on the account. Bug 768779 - token = this.account.account_information.primary_mailbox.address; - } else if (field in SEARCH_OP_TO_ME_FIELDS && - parts[1].down() in search_op_to_me_values) { - // Search for all addresses on the account. Bug 768779 - token = this.account.account_information.primary_mailbox.address; - } else if (field != null) { - token = parts[1]; - } - } - return field; - } - - /** - * Converts unquoted search terms into a stemmed search term. - * - * Prior experience with the Snowball stemmer indicates it is too - * aggressive for our tastes when coupled with prefix-matching of - * all unquoted terms (see - * https://bugzilla.gnome.org/show_bug.cgi?id=713179). - * - * This method is part of a larger strategy designed to dampen - * that aggressiveness without losing the benefits of stemming - * entirely: The database's FTS table uses no stemming, but - * libstemmer is used to generate stemmed search terms. - * Post-search processing is then to strip results which are too - * "greedy" due to prefix-matching the stemmed variant. - * - * Some heuristics are in place simply to determine if stemming should occur: - * - * # If stemming is unallowed, no stemming occurs. - * # If the term is < min. term length for stemming, no stemming occurs. - * # If the stemmer returns a stem that is the same as the original term, no stemming occurs. - * # If the difference between the stemmed word and the original term is more than - * maximum allowed, no stemming occurs. This works under the assumption that if - * the user has typed a long word, they do not want to "go back" to searching for a much - * shorter version of it. (For example, "accountancies" stems to "account"). - * - * Otherwise, the stem for the term is returned. - */ - private async string? stem_search_term(string term, - GLib.Cancellable? cancellable) { - if (!this.allow_stemming) - return null; - - int term_length = term.length; - if (term_length < this.min_term_length_for_stemming) - return null; - - string? stemmed = this.stemmer.stem(term, term.length); - if (String.is_empty(stemmed)) { - debug("Empty stemmed term returned for \"%s\"", term); - return null; - } - - // If same term returned, treat as non-stemmed - if (stemmed == term) - return null; - - // Don't search for stemmed words that are significantly shorter than the user's search term - if (term_length - stemmed.length > this.max_difference_term_stem_lengths) { - debug("Stemmed \"%s\" dropped searching for \"%s\": too much distance in terms", - stemmed, term); - - return null; - } - - debug("Search processing: term -> stem is \"%s\" -> \"%s\"", term, stemmed); - return stemmed; - } - - private string find_appropriate_search_stemmer() { - // Unfortunately, the stemmer library only accepts the full language - // name for the stemming algorithm. This translates between the user's - // preferred language ISO 639-1 code and our available stemmers. - // FIXME: the available list here is determined by what's included in - // src/sqlite3-unicodesn/CMakeLists.txt. We should pass that list in - // instead of hardcoding it here. - foreach (string l in Intl.get_language_names()) { - switch (l) { - case "ar": return "arabic"; - case "eu": return "basque"; - case "ca": return "catalan"; - case "da": return "danish"; - case "nl": return "dutch"; - case "en": return "english"; - case "fi": return "finnish"; - case "fr": return "french"; - case "de": return "german"; - case "el": return "greek"; - case "hi": return "hindi"; - case "hu": return "hungarian"; - case "id": return "indonesian"; - case "ga": return "irish"; - case "it": return "italian"; - case "lt": return "lithuanian"; - case "ne": return "nepali"; - case "no": return "norwegian"; - case "pt": return "portuguese"; - case "ro": return "romanian"; - case "ru": return "russian"; - case "sr": return "serbian"; - case "es": return "spanish"; - case "sv": return "swedish"; - case "ta": return "tamil"; - case "tr": return "turkish"; - } - } - - // Default to English because it seems to be on average the language - // most likely to be present in emails, regardless of the user's - // language setting. This is not an exact science, and search results - // should be ok either way in most cases. - return "english"; - } - -} diff --git a/src/engine/imap-engine/imap-engine-generic-account.vala b/src/engine/imap-engine/imap-engine-generic-account.vala index ef1ba7b4d16c1f4d3a536a71ccbd46fca3d70ea3..c3ae260385578f908800ea26acbe00284d96c0b0 100644 --- a/src/engine/imap-engine/imap-engine-generic-account.vala +++ b/src/engine/imap-engine/imap-engine-generic-account.vala @@ -60,6 +60,8 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account { private Gee.Map> special_search_names = new Gee.HashMap>(); + private SnowBall.Stemmer stemmer; + protected GenericAccount(AccountInformation config, ImapDB.Account local, @@ -107,6 +109,7 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account { this.db_vacuum_monitor = local.vacuum_monitor; compile_special_search_names(); + this.stemmer = new SnowBall.Stemmer(find_appropriate_search_stemmer()); } /** {@inheritDoc} */ @@ -572,13 +575,11 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account { } /** {@inheritDoc} */ - public override async SearchQuery new_search_query(string query, - SearchQuery.Strategy strategy, - GLib.Cancellable? cancellable) - throws GLib.Error { - return yield new ImapDB.SearchQuery( - this, local, query, strategy, cancellable - ); + public override SearchQuery new_search_query( + Gee.List expression, + string text + ) throws GLib.Error { + return new FtsSearchQuery(expression, text, this.stemmer); } public override async Gee.MultiMap? local_search_message_id_async( @@ -1063,6 +1064,49 @@ private abstract class Geary.ImapEngine.GenericAccount : Geary.Account { throw new EngineError.OPEN_REQUIRED("Account %s not opened", to_string()); } + private string find_appropriate_search_stemmer() { + // Unfortunately, the stemmer library only accepts the full + // language name for the stemming algorithm. This translates + // between the desktop sessions's preferred language ISO 639-1 + // code and the available stemmers. + // + // FIXME: the available list here is determined by what's + // included in libstemmer. We should pass that list in instead + // of hardcoding it here. + foreach (string l in Intl.get_language_names()) { + switch (l) { + case "ar": return "arabic"; + case "eu": return "basque"; + case "ca": return "catalan"; + case "da": return "danish"; + case "nl": return "dutch"; + case "en": return "english"; + case "fi": return "finnish"; + case "fr": return "french"; + case "de": return "german"; + case "el": return "greek"; + case "hi": return "hindi"; + case "hu": return "hungarian"; + case "id": return "indonesian"; + case "ga": return "irish"; + case "it": return "italian"; + case "lt": return "lithuanian"; + case "ne": return "nepali"; + case "no": return "norwegian"; + case "pt": return "portuguese"; + case "ro": return "romanian"; + case "ru": return "russian"; + case "sr": return "serbian"; + case "es": return "spanish"; + case "sv": return "swedish"; + case "ta": return "tamil"; + case "tr": return "turkish"; + } + } + + return "english"; + } + private void on_operation_error(AccountOperation op, Error error) { notify_service_problem(this.information.incoming, error); } diff --git a/src/engine/meson.build b/src/engine/meson.build index 992053eaa2cac019b11aeaa5233d10462c19b912..a29daf0b05f8010c4f8ba5d49bcd7faf0026a1f9 100644 --- a/src/engine/meson.build +++ b/src/engine/meson.build @@ -67,6 +67,7 @@ engine_vala_sources = files( 'common/common-contact-harvester.vala', 'common/common-contact-store-impl.vala', + 'common/common-fts-search-query.vala', 'common/common-message-data.vala', 'db/db.vala', @@ -176,9 +177,10 @@ engine_vala_sources = files( 'imap-db/imap-db-database.vala', 'imap-db/imap-db-email-identifier.vala', 'imap-db/imap-db-folder.vala', + 'imap-db/imap-db-fts5-matches.c', + 'imap-db/imap-db-fts5-tokeniser.c', 'imap-db/imap-db-gc.vala', 'imap-db/imap-db-message-row.vala', - 'imap-db/imap-db-search-query.vala', 'imap-db/imap-db-sqlite.c', 'imap-engine/imap-engine.vala', @@ -323,6 +325,7 @@ engine_dependencies = [ gio, glib, gmime, + icu_uc, libmath, libstemmer, libxml, @@ -336,10 +339,17 @@ endif engine_build_dir = meson.current_build_dir() +engine_c_args = geary_c_args +engine_vala_args = geary_vala_args + +# Suppress SQLite loadable module init code +engine_c_args += [ + '-D', 'SQLITE_CORE', +] + # Generate internal VAPI for unit testing. See Meson issue # https://github.com/mesonbuild/meson/issues/1781 for official # internal VAPI support. -engine_vala_args = geary_vala_args engine_vala_args += [ '--internal-header=@0@/geary-engine-internal.h'.format(engine_build_dir), '--internal-vapi=@0@/geary-engine-internal.vapi'.format(engine_build_dir) @@ -363,7 +373,7 @@ engine_lib = static_library('geary-engine', dependencies: engine_dependencies, include_directories: config_h_dir, vala_args: engine_vala_args, - c_args: geary_c_args, + c_args: engine_c_args, ) # Dummy target to tell Meson about the internal VAPI given the @@ -401,3 +411,14 @@ engine_internal_dep = declare_dependency( include_directories: include_directories('.'), sources: engine_internal_header_fixup ) + +# Compile a loadable library containing the custom tokeniser so SQLite +# command line app can still be used. +tokeniser_lib = shared_library('geary-tokeniser', + files('imap-db/imap-db-fts5-tokeniser.c'), + dependencies: [ glib, icu_uc, sqlite ], + c_args: [ + # Enable GLib structured logging + '-DG_LOG_USE_STRUCTURED', + ], +) diff --git a/src/meson.build b/src/meson.build index 7cbfa80a8118a207c9a541caa04da48a73299900..d2b018038514ee79c14d5cf95b6eca617450ca9c 100644 --- a/src/meson.build +++ b/src/meson.build @@ -49,8 +49,6 @@ geary_c_args = [ '-DGCK_API_SUBJECT_TO_CHANGE', '-DGCR_API_SUBJECT_TO_CHANGE', '-DGOA_API_IS_SUBJECT_TO_CHANGE', - '-DSQLITE_ENABLE_FTS4', - '-DSQLITE_ENABLE_FTS4_UNICODE61' ] subdir('engine') @@ -151,6 +149,7 @@ foreach dep : valadoc_dependencies valadoc_dep_args += '--pkg' valadoc_dep_args += dep.name() endforeach +valadoc_dep_args += [ '--pkg', 'icu-uc' ] valadoc_dep_args += [ '--pkg', 'libstemmer' ] valadoc_dep_args += [ '--pkg', 'posix' ] diff --git a/test/client/util/util-email-test.vala b/test/client/util/util-email-test.vala index fb3c365f5a61029c913f63d565766c8075730f3a..b3e45d7d7f3f4aa945944e8cd9b949e9d4a9074c 100644 --- a/test/client/util/util-email-test.vala +++ b/test/client/util/util-email-test.vala @@ -7,14 +7,45 @@ public class Util.Email.Test : TestCase { + + private Application.Configuration? config = null; + private Geary.AccountInformation? account = null; + + public Test() { - base("UtilEmailTest"); + base("Util.Email.Test"); add_test("null_originator", null_originator); add_test("from_originator", from_originator); add_test("sender_originator", sender_originator); add_test("reply_to_originator", reply_to_originator); add_test("reply_to_via_originator", reply_to_via_originator); add_test("plain_via_originator", plain_via_originator); + add_test("empty_search_query", empty_search_query); + add_test("plain_search_terms", plain_search_terms); + add_test("continuation_search_terms", continuation_search_terms); + add_test("i18n_search_terms", i18n_search_terms); + add_test("multiple_search_terms", multiple_search_terms); + add_test("quoted_search_terms", quoted_search_terms); + add_test("text_op_terms", text_op_terms); + add_test("text_op_single_me_terms", text_op_single_me_terms); + add_test("text_op_multiple_me_terms", text_op_multiple_me_terms); + add_test("boolean_op_terms", boolean_op_terms); + add_test("invalid_op_terms", invalid_op_terms); + } + + public override void set_up() { + this.config = new Application.Configuration(Application.Client.SCHEMA_ID); + this.account = new Geary.AccountInformation( + "test", + OTHER, + new Mock.CredentialsMediator(), + new Geary.RFC822.MailboxAddress("test", "test@example.com") + ); + } + + public override void tear_down() { + this.config = null; + this.account = null; } public void null_originator() throws GLib.Error { @@ -95,6 +126,337 @@ public class Util.Email.Test : TestCase { assert_equal(originator.address, "bot@example.com"); } + public void empty_search_query() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + assert_collection(test_article.parse_query("")).is_empty(); + } + + public void plain_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple1 = test_article.parse_query("hello"); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text1.target == ALL); + assert_true(text1.matching_strategy == CONSERVATIVE); + assert_collection(text1.terms).size(1).contains("hello"); + + var simple2 = test_article.parse_query("h"); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text2.terms).size(1).contains("h"); + + var simple3 = test_article.parse_query(" h"); + assert_collection(simple3).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text3.terms).size(1).contains("h"); + + var simple4 = test_article.parse_query("h "); + assert_collection(simple4).size(1); + assert_true(simple4[0] is Geary.SearchQuery.EmailTextTerm); + var text4 = simple4[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text4.terms).size(1).contains("h"); + } + + public void continuation_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), + this.account + ); + + var simple1 = test_article.parse_query("hello-there"); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text1.terms).size(1).contains("hello-there"); + + var simple2 = test_article.parse_query("hello-"); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text2.terms).size(1).contains("hello-"); + + var simple3 = test_article.parse_query("test@example.com"); + assert_collection(simple2).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text3.terms).size(1).contains("test@example.com"); + } + + public void i18n_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), + this.account + ); + + var thai = test_article.parse_query("ภาษาไทย"); + assert_collection(thai).size(2); + assert_true(thai[0] is Geary.SearchQuery.EmailTextTerm); + assert_true(thai[1] is Geary.SearchQuery.EmailTextTerm); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) thai[0]).terms + ).size(1).contains("ภาษา"); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) thai[1]).terms + ).size(1).contains("ไทย"); + + var chinese = test_article.parse_query("男子去"); + assert_collection(chinese).size(2); + assert_true(chinese[0] is Geary.SearchQuery.EmailTextTerm); + assert_true(chinese[1] is Geary.SearchQuery.EmailTextTerm); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) chinese[0]).terms + ).size(1).contains("男子"); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) chinese[1]).terms + ).size(1).contains("去"); + } + + public void multiple_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var multiple = test_article.parse_query("hello there"); + assert_collection(multiple).size(2); + assert_true(multiple[0] is Geary.SearchQuery.EmailTextTerm); + assert_true(multiple[1] is Geary.SearchQuery.EmailTextTerm); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) multiple[0]).terms + ).size(1).contains("hello"); + assert_collection( + ((Geary.SearchQuery.EmailTextTerm) multiple[1]).terms + ).size(1).contains("there"); + } + + public void quoted_search_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple1 = test_article.parse_query("\"hello\""); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text1.target == ALL); + assert_true(text1.matching_strategy == EXACT); + assert_collection(text1.terms).size(1).contains("hello"); + + var simple2 = test_article.parse_query("\"h\""); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text2.terms).size(1).contains("h"); + + var simple3 = test_article.parse_query(" \"h\""); + assert_collection(simple3).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text3.terms).size(1).contains("h"); + + var simple4 = test_article.parse_query("\"h"); + assert_collection(simple4).size(1); + assert_true(simple4[0] is Geary.SearchQuery.EmailTextTerm); + var text4 = simple4[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text4.terms).size(1).contains("h"); + + var simple5 = test_article.parse_query("\"h\" "); + assert_collection(simple5).size(1); + assert_true(simple5[0] is Geary.SearchQuery.EmailTextTerm); + var text5 = simple5[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text5.terms).size(1).contains("h"); + + var simple6 = test_article.parse_query("\"hello there\""); + assert_collection(simple6).size(1); + assert_true(simple6[0] is Geary.SearchQuery.EmailTextTerm); + var text6 = simple6[0] as Geary.SearchQuery.EmailTextTerm; + assert_collection(text6.terms).size(1).contains("hello there"); + } + + public void text_op_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple_body = test_article.parse_query("body:hello"); + assert_collection(simple_body).size(1); + assert_true(simple_body[0] is Geary.SearchQuery.EmailTextTerm, "type"); + var text_body = simple_body[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_body.target == BODY, "target"); + assert_true(text_body.matching_strategy == CONSERVATIVE, "strategy"); + assert_collection(text_body.terms).size(1).contains("hello"); + + var simple_body_quoted = test_article.parse_query("body:\"hello\""); + assert_collection(simple_body_quoted).size(1); + assert_true(simple_body_quoted[0] is Geary.SearchQuery.EmailTextTerm); + var text_body_quoted = simple_body_quoted[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_body_quoted.target == BODY); + assert_true(text_body_quoted.matching_strategy == EXACT); + assert_collection(text_body_quoted.terms).size(1).contains("hello"); + + var simple_attach_name = test_article.parse_query("attachment:hello"); + assert_collection(simple_attach_name).size(1); + assert_true(simple_attach_name[0] is Geary.SearchQuery.EmailTextTerm); + var text_attch_name = simple_attach_name[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_attch_name.target == ATTACHMENT_NAME); + + var simple_bcc = test_article.parse_query("bcc:hello"); + assert_collection(simple_bcc).size(1); + assert_true(simple_bcc[0] is Geary.SearchQuery.EmailTextTerm); + var text_bcc = simple_bcc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_bcc.target == BCC); + + var simple_cc = test_article.parse_query("cc:hello"); + assert_collection(simple_cc).size(1); + assert_true(simple_cc[0] is Geary.SearchQuery.EmailTextTerm); + var text_cc = simple_cc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_cc.target == CC); + + var simple_from = test_article.parse_query("from:hello"); + assert_collection(simple_from).size(1); + assert_true(simple_from[0] is Geary.SearchQuery.EmailTextTerm); + var text_from = simple_from[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_from.target == FROM); + + var simple_subject = test_article.parse_query("subject:hello"); + assert_collection(simple_subject).size(1); + assert_true(simple_subject[0] is Geary.SearchQuery.EmailTextTerm); + var text_subject = simple_subject[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_subject.target == SUBJECT); + + var simple_to = test_article.parse_query("to:hello"); + assert_collection(simple_to).size(1); + assert_true(simple_to[0] is Geary.SearchQuery.EmailTextTerm); + var text_to = simple_to[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_to.target == TO); + } + + public void text_op_single_me_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple_to = test_article.parse_query("to:me"); + assert_collection(simple_to).size(1); + assert_true(simple_to[0] is Geary.SearchQuery.EmailTextTerm); + var text_to = simple_to[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_to.target == TO); + assert_true(text_to.matching_strategy == EXACT); + assert_collection(text_to.terms).size(1).contains("test@example.com"); + + var simple_cc = test_article.parse_query("cc:me"); + assert_collection(simple_cc).size(1); + assert_true(simple_cc[0] is Geary.SearchQuery.EmailTextTerm); + var text_cc = simple_cc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_cc.target == CC); + assert_true(text_cc.matching_strategy == EXACT); + assert_collection(text_cc.terms).size(1).contains("test@example.com"); + + var simple_bcc = test_article.parse_query("bcc:me"); + assert_collection(simple_bcc).size(1); + assert_true(simple_bcc[0] is Geary.SearchQuery.EmailTextTerm); + var text_bcc = simple_bcc[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_bcc.target == BCC); + assert_true(text_bcc.matching_strategy == EXACT); + assert_collection(text_bcc.terms).size(1).contains("test@example.com"); + + var simple_from = test_article.parse_query("from:me"); + assert_collection(simple_from).size(1); + assert_true(simple_from[0] is Geary.SearchQuery.EmailTextTerm); + var text_from = simple_from[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_from.target == FROM); + assert_true(text_from.matching_strategy == EXACT); + assert_collection(text_from.terms).size(1).contains("test@example.com"); + } + + public void text_op_multiple_me_terms() throws GLib.Error { + this.account.append_sender( + new Geary.RFC822.MailboxAddress("test2", "test2@example.com") + ); + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var to = test_article.parse_query("to:me"); + assert_collection(to).size(1); + assert_true(to[0] is Geary.SearchQuery.EmailTextTerm); + var text_to = to[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text_to.target == TO); + assert_true(text_to.matching_strategy == EXACT); + assert_collection(text_to.terms).size(2).contains( + "test@example.com" + ).contains( + "test@example.com" + ); + } + + public void boolean_op_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple_unread = test_article.parse_query("is:unread"); + assert_true(simple_unread[0] is Geary.SearchQuery.EmailFlagTerm); + var bool_unread = simple_unread[0] as Geary.SearchQuery.EmailFlagTerm; + assert_true( + bool_unread.value.equal_to(Geary.EmailFlags.UNREAD), "unread flag" + ); + assert_false(bool_unread.is_negated, "unread negated"); + + var simple_read = test_article.parse_query("is:read"); + assert_true(simple_read[0] is Geary.SearchQuery.EmailFlagTerm); + var bool_read = simple_read[0] as Geary.SearchQuery.EmailFlagTerm; + assert_true( + bool_read.value.equal_to(Geary.EmailFlags.UNREAD), "read flag" + ); + assert_true(bool_read.is_negated, "read negated"); + + var simple_starred = test_article.parse_query("is:starred"); + assert_true(simple_starred[0] is Geary.SearchQuery.EmailFlagTerm); + var bool_starred = simple_starred[0] as Geary.SearchQuery.EmailFlagTerm; + assert_true( + bool_starred.value.equal_to(Geary.EmailFlags.FLAGGED), "starred flag" + ); + assert_false(bool_starred.is_negated, "starred negated"); + } + + public void invalid_op_terms() throws GLib.Error { + var test_article = new SearchExpressionFactory( + this.config.get_search_strategy(), this.account + ); + + var simple1 = test_article.parse_query("body:"); + assert_collection(simple1).size(1); + assert_true(simple1[0] is Geary.SearchQuery.EmailTextTerm); + var text1 = simple1[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text1.target == ALL); + assert_true(text1.matching_strategy == CONSERVATIVE); + assert_collection(text1.terms).size(1).contains("body:"); + + var simple2 = test_article.parse_query("blarg:"); + assert_collection(simple2).size(1); + assert_true(simple2[0] is Geary.SearchQuery.EmailTextTerm); + var text2 = simple2[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text2.target == ALL); + assert_true(text2.matching_strategy == CONSERVATIVE); + assert_collection(text2.terms).size(1).contains("blarg:"); + + var simple3 = test_article.parse_query("blarg:hello"); + assert_collection(simple3).size(1); + assert_true(simple3[0] is Geary.SearchQuery.EmailTextTerm); + var text3 = simple3[0] as Geary.SearchQuery.EmailTextTerm; + assert_true(text3.target == ALL); + assert_true(text3.matching_strategy == CONSERVATIVE); + assert_collection(text3.terms).size(1).contains("blarg:hello"); + } + private Geary.Email new_email(Geary.RFC822.MailboxAddress? from, Geary.RFC822.MailboxAddress? sender, Geary.RFC822.MailboxAddress? reply_to) diff --git a/test/engine/common/common-fts-search-query-test.vala b/test/engine/common/common-fts-search-query-test.vala new file mode 100644 index 0000000000000000000000000000000000000000..790f89954a8975ca031ea64f77ebf59456847595 --- /dev/null +++ b/test/engine/common/common-fts-search-query-test.vala @@ -0,0 +1,258 @@ +/* + * Copyright © 2020 Michael Gratton + * + * This software is licensed under the GNU Lesser General Public License + * (version 2.1 or later). See the COPYING file in this distribution. + */ + +public class Geary.FtsSearchQueryTest : TestCase { + + + private GLib.File? tmp_dir = null; + private Geary.AccountInformation? config = null; + private ImapDB.Account? account = null; + private SnowBall.Stemmer? stemmer = null; + + + public FtsSearchQueryTest() { + base("Geary.FtsSearchQueryTest"); + add_test("email_text_terms", email_text_terms); + add_test("email_text_terms_stemmed", email_text_terms_stemmed); + add_test("email_text_terms_specific", email_text_terms_specific); + add_test("email_text_terms_disjunction", email_text_terms_disjunction); + add_test("email_flag_terms", email_flag_terms); + } + + public override void set_up() throws GLib.Error { + this.tmp_dir = GLib.File.new_for_path( + GLib.DirUtils.make_tmp("geary-common-fts-search-query-test-XXXXXX") + ); + + this.config = new Geary.AccountInformation( + "test", + ServiceProvider.OTHER, + new Mock.CredentialsMediator(), + new Geary.RFC822.MailboxAddress(null, "test@example.com") + ); + + this.account = new ImapDB.Account( + config, + this.tmp_dir, + GLib.File.new_for_path(_SOURCE_ROOT_DIR).get_child("sql") + ); + this.account.open_async.begin( + null, + this.async_completion + ); + this.account.open_async.end(async_result()); + + this.stemmer = new SnowBall.Stemmer("english"); + + Db.Context.enable_sql_logging = true; + } + + public override void tear_down() throws GLib.Error { + Db.Context.enable_sql_logging = false; + + this.stemmer = null; + + this.account.close_async.begin( + null, + this.async_completion + ); + this.account.close_async.end(async_result()); + this.account = null; + this.config = null; + + delete_file(this.tmp_dir); + this.tmp_dir = null; + } + + public void email_text_terms() throws GLib.Error { + var single_all_term = new_search_query( + { new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "test")}, + "test" + ); + assert_queries(single_all_term); + + var multiple_all_term = new_search_query( + { + new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "foo"), + new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "bar") + }, + "foo bar" + ); + assert_queries(multiple_all_term); + + var all_to_term = new_search_query( + { + new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "foo"), + new Geary.SearchQuery.EmailTextTerm(TO, EXACT, "bar") + }, + "foo to:bar" + ); + assert_queries(all_to_term); + + var all_to_all_term = new_search_query( + { + new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "foo"), + new Geary.SearchQuery.EmailTextTerm(TO, EXACT, "bar"), + new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "baz") + }, + "foo to:bar baz" + ); + assert_queries(all_to_all_term); + } + + public void email_text_terms_stemmed() throws GLib.Error { + var single_all_term = new_search_query( + { new Geary.SearchQuery.EmailTextTerm(ALL, CONSERVATIVE, "universal")}, + "universal" + ); + assert_queries(single_all_term); + + var multiple_all_term = new_search_query( + { + new Geary.SearchQuery.EmailTextTerm(ALL, CONSERVATIVE, "universal"), + new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "bar") + }, + "universal bar" + ); + assert_queries(multiple_all_term); + + var all_to_term = new_search_query( + { + new Geary.SearchQuery.EmailTextTerm(ALL, CONSERVATIVE, "universal"), + new Geary.SearchQuery.EmailTextTerm(TO, EXACT, "bar") + }, + "universal to:bar" + ); + assert_queries(all_to_term); + } + + public void email_text_terms_specific() throws GLib.Error { + var single_term = new_search_query( + { new Geary.SearchQuery.EmailTextTerm(SUBJECT, EXACT, "test")}, + "subject:test" + ); + assert_queries(single_term); + + var missing_term = new_search_query( + { new Geary.SearchQuery.EmailTextTerm(SUBJECT, EXACT, "")}, + "subject:" + ); + assert_queries(missing_term); + + var conflicting_property = new_search_query( + { new Geary.SearchQuery.EmailTextTerm(ALL, EXACT, "subject:")}, + "subject:" + ); + assert_queries(conflicting_property); + + var conflicting_property_and_term = new_search_query( + { new Geary.SearchQuery.EmailTextTerm(SUBJECT, EXACT, "subject:")}, + "subject:subject:" + ); + assert_queries(conflicting_property_and_term); + } + + public void email_text_terms_disjunction() throws GLib.Error { + var multiple_all = new_search_query( + { + new Geary.SearchQuery.EmailTextTerm.disjunction( + ALL, EXACT, new Gee.ArrayList.wrap({ "foo", "bar" }) + ) + }, + "(foo|bar)" + ); + assert_queries(multiple_all); + + var multiple_subject = new_search_query( + { + new Geary.SearchQuery.EmailTextTerm.disjunction( + ALL, EXACT, new Gee.ArrayList.wrap({ "foo", "bar" }) + ) + }, + "subject:(foo|bar)" + ); + assert_queries(multiple_subject); + } + + public void email_flag_terms() throws GLib.Error { + var unread = new_search_query( + { new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.UNREAD)}, + "is:unread" + ); + assert_queries(unread); + + var read_term = new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.UNREAD); + read_term.is_negated = true; + var read = new_search_query({ read_term }, "is:read"); + assert_queries(read); + + var flagged = new_search_query( + { new Geary.SearchQuery.EmailFlagTerm(Geary.EmailFlags.FLAGGED)}, + "is:flagged" + ); + assert_queries(flagged); + } + + private FtsSearchQuery new_search_query(Geary.SearchQuery.Term[] ops, + string raw) + throws GLib.Error { + return new FtsSearchQuery( + new Gee.ArrayList.wrap(ops), + raw, + this.stemmer + ); + } + + private void assert_queries(FtsSearchQuery query) throws GLib.Error { + var search = query.get_search_query( + this.account.db.get_primary_connection(), + null, + null, + false, + 10, + 0 + ); + search.exec(null); + + var search_with_excluded_ids = query.get_search_query( + this.account.db.get_primary_connection(), + null, + "10,20,30,40", + false, + 10, + 0 + ); + search_with_excluded_ids.exec(null); + + var search_with_exclude_folderless = query.get_search_query( + this.account.db.get_primary_connection(), + null, + null, + true, + 10, + 0 + ); + search_with_exclude_folderless.exec(null); + + var search_with_both = query.get_search_query( + this.account.db.get_primary_connection(), + null, + "10,20,30,40", + true, + 10, + 0 + ); + search_with_both.exec(null); + + var match = query.get_match_query( + this.account.db.get_primary_connection(), + "" + ); + match.exec(null); + } + +} diff --git a/test/engine/imap-db/imap-db-database-test.vala b/test/engine/imap-db/imap-db-database-test.vala index 9d69ac19b10b4e84721c82865da0acd412f853e1..60c69ef4bceb4672ac7e77301a01a93596efcc68 100644 --- a/test/engine/imap-db/imap-db-database-test.vala +++ b/test/engine/imap-db/imap-db-database-test.vala @@ -106,7 +106,7 @@ class Geary.ImapDB.DatabaseTest : TestCase { ); db.open.end(async_result()); - assert_equal(db.get_schema_version(), 29, "Post-upgrade version"); + assert_equal(db.get_schema_version(), 30, "Post-upgrade version"); // Since schema v22 deletes the re-creates all attachments, // attachment 12 should no longer exist on the file system and diff --git a/test/meson.build b/test/meson.build index a50f12a6d09daf872dca254d736db2aff8139364..a4fe2c292a3af90fbee448da2f59dddd0b92c970 100644 --- a/test/meson.build +++ b/test/meson.build @@ -36,6 +36,7 @@ test_engine_sources = [ 'engine/app/app-conversation-set-test.vala', 'engine/common/common-contact-store-impl-test.vala', 'engine/common/common-contact-harvester-test.vala', + 'engine/common/common-fts-search-query-test.vala', 'engine/db/db-database-test.vala', 'engine/db/db-versioned-database-test.vala', 'engine/imap/command/imap-create-command-test.vala', diff --git a/test/mock/mock-account.vala b/test/mock/mock-account.vala index 173b2ecaeb64a83cc8860cda7c522d6502665c8f..f617f969304cd57f897d7243b6d8355006448d02 100644 --- a/test/mock/mock-account.vala +++ b/test/mock/mock-account.vala @@ -222,12 +222,11 @@ public class Mock.Account : Geary.Account, ); } - public override async Geary.SearchQuery - new_search_query(string raw, - Geary.SearchQuery.Strategy strategy, - GLib.Cancellable? cancellable) - throws GLib.Error { - return new SearchQuery(this, raw); + public override Geary.SearchQuery new_search_query( + Gee.List expression, + string text + ) throws GLib.Error { + return new SearchQuery(expression, text); } public override async Gee.Collection? diff --git a/test/mock/mock-search-query.vala b/test/mock/mock-search-query.vala index 6653f96deaa3c4ba56b5029b40e90ce7fd00459d..310cde7ef22b639d02810a0363b3cc230b04ec92 100644 --- a/test/mock/mock-search-query.vala +++ b/test/mock/mock-search-query.vala @@ -7,8 +7,9 @@ public class Mock.SearchQuery : Geary.SearchQuery { - internal SearchQuery(Geary.Account owner, string raw) { - base(owner, raw, Geary.SearchQuery.Strategy.EXACT); + internal SearchQuery(Gee.List expression, + string raw) { + base(expression, raw); } } diff --git a/test/test-engine.vala b/test/test-engine.vala index 0260ef6a8ea05f6897065a4e0daa11a03cceb159..e829b0bf5c84a5ea1dd90589f555d7696ca19713 100644 --- a/test/test-engine.vala +++ b/test/test-engine.vala @@ -73,6 +73,9 @@ int main(string[] args) { engine.add_suite(new Geary.ImapDB.EmailIdentifierTest().suite); engine.add_suite(new Geary.ImapDB.FolderTest().suite); + // Depends on ImapDB working + engine.add_suite(new Geary.FtsSearchQueryTest().suite); + engine.add_suite(new Geary.ImapEngine.AccountProcessorTest().suite); engine.add_suite(new Geary.ImapEngine.GenericAccountTest().suite);