diff options
| author | 2020-02-05 14:03:54 +0700 | |
|---|---|---|
| committer | 2020-02-05 14:03:54 +0700 | |
| commit | 8dd66bc12ecae678c2f17819b298b60823806b95 (patch) | |
| tree | 7f40a05db0879ca50f38ca3df0440aa915192ac2 | |
| parent | Removed regex, implemented a stricter letter searching. (diff) | |
Made searching even stricter by searching from start of each word
- Added regex back to sub and split by non-alphabet.
- Now use two pointers to move from words to words.
| -rw-r--r-- | bot/cogs/tags.py | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/bot/cogs/tags.py b/bot/cogs/tags.py index eaf307569..54a51921c 100644 --- a/bot/cogs/tags.py +++ b/bot/cogs/tags.py @@ -1,4 +1,5 @@ import logging +import re import time from typing import Dict, List, Optional @@ -19,6 +20,8 @@ TEST_CHANNELS = ( Channels.helpers ) +REGEX_NON_ALPHABET = re.compile(r"[^a-z]", re.MULTILINE & re.IGNORECASE) + class Tags(Cog): """Save new tags and fetch existing tags.""" @@ -42,20 +45,19 @@ class Tags(Cog): @staticmethod def _fuzzy_search(search: str, target: str) -> int: """A simple scoring algorithm based on how many letters are found / total, with order in mind.""" - found, index = 0, 0 - _search = search.lower().replace(' ', '') - _targets = iter(target.lower()) + current, index = 0, 0 + _search = REGEX_NON_ALPHABET.sub('', search.lower()) + _targets = iter(REGEX_NON_ALPHABET.split(target.lower())) _target = next(_targets) try: - for letter in _search: - index = _target.find(letter, index) - while index == -1: - _target = next(_targets) - index = _target.find(letter) - found += 1 - except StopIteration: + while True: + while index < len(_target) and _search[current] == _target[index]: + current += 1 + index += 1 + index, _target = 0, next(_targets) + except (StopIteration, IndexError): pass - return found / len(_search) * 100 + return current / len(_search) * 100 def _get_suggestions(self, tag_name: str, thresholds: Optional[List[int]] = None) -> List[str]: """Return a list of suggested tags.""" |