Removed non-alphabets from both search and tag_name when scoring.

- Added a regex to remove non-alphabet ( `[^a-z]` with `re.IGNORECASE` )
author: Shirayuki Nekomata <[email protected]> 2020-02-05 04:00:46 +0700
committer: Shirayuki Nekomata <[email protected]> 2020-02-05 04:00:46 +0700
commit: a38926fe797cdcc13d64d836776f56db09e9efd2 (patch)
tree: ae45759ea1f7e5adf03eab4c6948973944af4b6b
parent: Refactored _get_suggestions following Mark's suggestions about inefficiency. (diff)
1 files changed, 5 insertions, 2 deletions
diff --git a/bot/cogs/tags.py b/bot/cogs/tags.py
index 8d3586b19..0e8cf0278 100644
--- a/bot/cogs/tags.py
+++ b/bot/cogs/tags.py
@@ -1,4 +1,5 @@
 import logging
+import re
 import time
 from typing import Dict, List, Optional
 
@@ -19,6 +20,8 @@ TEST_CHANNELS = (
     Channels.helpers
 )
 
+REGEX_NON_ALPHABET = re.compile(r"[^a-z]", re.IGNORECASE & re.MULTILINE)
+
 
 class Tags(Cog):
     """Save new tags and fetch existing tags."""
@@ -43,8 +46,8 @@ class Tags(Cog):
     def _fuzzy_search(search: str, target: str) -> int:
         """A simple scoring algorithm based on how many letters are found / total, with order in mind."""
         found, index = 0, 0
-        _search = search.lower().replace(' ', '')
-        _target = target.lower().replace(' ', '')
+        _search = REGEX_NON_ALPHABET.sub('', search.lower())
+        _target = REGEX_NON_ALPHABET.sub('', target.lower())
         for letter in _search:
             index = _target.find(letter, index)
             if index == -1:
author	Shirayuki Nekomata <[email protected]>	2020-02-05 04:00:46 +0700
committer	Shirayuki Nekomata <[email protected]>	2020-02-05 04:00:46 +0700
commit	a38926fe797cdcc13d64d836776f56db09e9efd2 (patch)
tree	ae45759ea1f7e5adf03eab4c6948973944af4b6b
parent	Refactored _get_suggestions following Mark's suggestions about inefficiency. (diff)