diff options
Diffstat (limited to 'core/fulltext.cpp')
-rw-r--r-- | core/fulltext.cpp | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/core/fulltext.cpp b/core/fulltext.cpp new file mode 100644 index 000000000..5ed0c9a17 --- /dev/null +++ b/core/fulltext.cpp @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "fulltext.h" +#include "dive.h" +#include "divesite.h" +#include "trip.h" +#include <QLocale> +#include <map> + +#ifndef SUBSURFACE_MOBILE + +// This class caches each dives words, so that we can unregister a dive from the full text search +struct full_text_cache { + std::vector<QString> words; +}; + +// The FullText-search class +class FullText { + std::map<QString, std::vector<dive *>> words; // Dives that belong to each word +public: + + void reload(); // Rebuild from current dive_table + void registerDive(struct dive *d); // Note: can be called repeatedly + void unregisterDive(struct dive *d); // Note: can be called repeatedly + void unregisterAll(); // Unregister all dives in the dive table + FullTextResult find(const FullTextQuery &q, StringFilterMode mode) const; // Find dives matchin all words. +private: + void registerWords(struct dive *d, const std::vector<QString> &w); + void unregisterWords(struct dive *d, const std::vector<QString> &w); + std::vector<dive *> findDives(const QString &s, StringFilterMode mode) const; // Find dives matching a given word. +}; + +// This class doesn't depend on any other objects, we might just initialize it at startup. +static FullText self; + +// C-interface functions + +extern "C" { + +void fulltext_register(struct dive *d) +{ + self.registerDive(d); +} + +void fulltext_unregister(struct dive *d) +{ + self.unregisterDive(d); +} + +void fulltext_unregister_all() +{ + self.unregisterAll(); +} + +void fulltext_reload() +{ + self.reload(); +} + +} // extern "C" + +// C++-only interface functions +FullTextResult fulltext_find_dives(const FullTextQuery &q, StringFilterMode mode) +{ + return self.find(q, mode); +} + +// Check whether a single dive matches the fulltext criterion +bool fulltext_dive_matches(const struct dive *d, const FullTextQuery &q, StringFilterMode mode) +{ + if (!q.doit()) + return true; + if (!d->full_text) + return false; + auto matchFunc = + mode == StringFilterMode::EXACT ? [](const QString &s1, const QString &s2) { return s1 == s2; } : + mode == StringFilterMode::STARTSWITH ? [](const QString &s1, const QString &s2) { return s1.startsWith(s2); } : + /* mode == StringFilterMode::SUBSTRING ? */ [](const QString &s1, const QString &s2) { return s1.contains(s2); }; + const std::vector<QString> &words = d->full_text->words; + for (const QString &search: q.words) { + if (std::any_of(words.begin(), words.end(), [&search,matchFunc](const QString &w) { return matchFunc(w, search); })) + return true; + } + return false; +} + +// Class implementation + +// Take a text and tokenize it into words. Normalize the words to upper case +// and add to a given list, if not already in list. +// We might think about limiting the lower size of words we store. +// Note: we convert to QString before tokenization because we rely in +// Qt's isPunct() function. +static void tokenize(QString s, std::vector<QString> &res) +{ + if (s.isEmpty()) + return; + + QLocale loc; + int size = s.size(); + int pos = 0; + while (pos < size) { + // Skip whitespace and punctuation + while (s[pos].isSpace() || s[pos].isPunct()) { + if (++pos >= size) + return; + } + int end = pos; + while (end < size && !s[end].isSpace() && !s[end].isPunct()) + ++end; + QString word = loc.toUpper(s.mid(pos, end - pos)); // Sad: Locale::toUpper can't use QStringRef - we have to copy the substring! + pos = end; + + if (find(res.begin(), res.end(), word) == res.end()) + res.push_back(word); + } +} + +// Get all words of a dive +static std::vector<QString> getWords(const dive *d) +{ + std::vector<QString> res; + tokenize(QString(d->notes), res); + tokenize(QString(d->divemaster), res); + tokenize(QString(d->buddy), res); + tokenize(QString(d->suit), res); + for (int i = 0; i < d->cylinders.nr; ++i) { + const cylinder_t &cyl = d->cylinders.cylinders[i]; + tokenize(QString(cyl.type.description), res); + } + for (int i = 0; i < d->weightsystems.nr; ++i) { + const weightsystem_t &ws = d->weightsystems.weightsystems[i]; + tokenize(QString(ws.description), res); + } + // TODO: We should tokenize all dive-sites and trips first and then + // take the tokens from a cache. + if (d->dive_site) + tokenize(d->dive_site->name, res); + // TODO: We should index trips separately! + if (d->divetrip) + tokenize(d->divetrip->location, res); + return res; +} + +void FullText::reload() +{ + words.clear(); + int i; + dive *d; + for_each_dive(i, d) + registerDive(d); +} + +void FullText::registerDive(struct dive *d) +{ + if (d->full_text) { + unregisterWords(d, d->full_text->words); + } else { + d->full_text = new full_text_cache; + } + d->full_text->words = getWords(d); + registerWords(d, d->full_text->words); +} + +void FullText::unregisterDive(struct dive *d) +{ + if (!d->full_text) + return; + unregisterWords(d, d->full_text->words); + delete d->full_text; + d->full_text = nullptr; +} + +void FullText::unregisterAll() +{ + int i; + dive *d; + for_each_dive(i, d) { + delete d->full_text; + d->full_text = nullptr; + } + words.clear(); +} + +// Register words of a dive. +void FullText::registerWords(struct dive *d, const std::vector<QString> &w) +{ + for (const QString &word: w) { + std::vector<dive *> &entry = words[word]; + if (std::find(entry.begin(), entry.end(), d) == entry.end()) + entry.push_back(d); + } +} + +// Unregister words of a dive. +void FullText::unregisterWords(struct dive *d, const std::vector<QString> &w) +{ + for (const QString &word: w) { + auto it = words.find(word); + if (it == words.end()) { + qWarning("FullText::unregisterWords: didn't find word '%s' in index!?", qPrintable(word)); + continue; + } + std::vector<dive *> &entry = it->second; + entry.erase(std::remove(entry.begin(), entry.end(), d)); + if (entry.empty()) + words.erase(it); + } +} + +// Add dives from second array to first, if not yet there +void combineDives(std::vector<dive *> &to, const std::vector<dive *> &from) +{ + for (dive *d: from) { + if (std::find(to.begin(), to.end(), d) == to.end()) + to.push_back(d); + } +} + +std::vector<dive *> FullText::findDives(const QString &s, StringFilterMode mode) const +{ + switch (mode) { + case StringFilterMode::EXACT: + default: { + // Try to access a single word + auto it = words.find(s); + if (it == words.end()) + return {}; + return it->second; + } + case StringFilterMode::STARTSWITH: { + // Find all words that start with a substring. We use the fact + // that these words must form a contiguous block, since the words are + // ordered lexicographically. + auto it = words.lower_bound(s); + if (it == words.end() || !it->first.startsWith(s)) + return {}; + std::vector<dive *> res = it->second; + ++it; + while (it != words.end() && it->first.startsWith(s)) { + combineDives(res, it->second); + ++it; + } + return res; + } + case StringFilterMode::SUBSTRING: { + // Find all words that contain a substring. Here, we have to check all words! + std::vector<dive *> res; + for (auto it = words.begin(); it != words.end(); ++it) { + if (it->first.contains(s)) + combineDives(res, it->second); + } + return res; + } + } +} + +FullTextResult FullText::find(const FullTextQuery &q, StringFilterMode mode) const +{ + if (q.words.empty()) + return FullTextResult(); + + std::vector<dive *> res = findDives(q.words[0], mode); + for (size_t i = 1; i < q.words.size(); ++i) { + std::vector<dive *> res2 = findDives(q.words[i], mode); + // Remove dives from res that are not in res2 + res.erase(std::remove_if(res.begin(), res.end(), + [&res2] (dive *d) { return std::find(res2.begin(), res2.end(), d) == res2.end(); }), res.end()); + } + + return { res }; +} + +FullTextQuery &FullTextQuery::operator=(const QString &s) +{ + words.clear(); + tokenize(s, words); + return *this; +} + +bool FullTextQuery::doit() const +{ + return !words.empty(); +} + +bool FullTextResult::dive_matches(const struct dive *d) const +{ + return std::find(dives.begin(), dives.end(), d) != dives.end(); +} + +#endif |