diff options
| author | 2019-10-18 10:39:27 +0700 | |
|---|---|---|
| committer | 2019-10-18 10:39:27 +0700 | |
| commit | 8c2871f89846f2c34f52061323dc7503855266ea (patch) | |
| tree | 34ef3457570340c1dcc3d0aad7855ae029bcba79 | |
| parent | Fix rule alias. (#537) (diff) | |
Make it easier for user to search for tags
#### Closes #231
Applying the algorithm for `Needles and Haystack` to find and match tag in tags, for example:

This only applies to searching tag_name with more than 3 in length, and at least 80% of its letters are found, from left to right.
There are 3 levels of checking, stop at first found:
- Check if exact name ( case insensitive ) O(1) getting from a dictionary Dict[str, Tag]
- Check for all tags that has 100% matching via algorithm
- Check for all tags that has >= 80% matching
If there are more than one hit, it will be shown as suggestions:

In order to avoid api being called multiple times, I've implemented a cache to only refresh itself when the is a gap of more than 5 minutes from the last api call to get all tags.
Editing / Adding / Deleting tags will also modify the cache directly.
##### What about other solution like fuzzywuzzy?
fuzzywuzzy was considered for using, but from testing, it was giving much lower scores than expected:
Code used to test:
```py
from fuzzywuzzy import fuzz
def _fuzzy_search(search: str, target: str) -> bool:
found = 0
index = 0
_search = search.lower().replace(' ', '')
_target = target.lower().replace(' ', '')
for letter in _search:
index = _target.find(letter, index)
if index == -1:
break
found += index > 0
# return found / len(_search) * 100
return (
found / len(_search) * 100,
fuzz.ratio(search, target),
fuzz.partial_ratio(search, target)
)
tests = (
'this-is-gonna-be-fun',
'this-too-will-be-fun'
)
for test in tests:
print(test, '->', _fuzzy_search('this too fun', test))
```
Result from test:
```py
this-is-gonna-be-fun -> (30.0, 50, 50)
this-too-will-be-fun -> (90.0, 62, 58)
```
| -rw-r--r-- | bot/cogs/tags.py | 70 |
1 files changed, 61 insertions, 9 deletions
diff --git a/bot/cogs/tags.py b/bot/cogs/tags.py index cd70e783a..1aea97b37 100644 --- a/bot/cogs/tags.py +++ b/bot/cogs/tags.py @@ -9,7 +9,6 @@ from bot.converters import TagContentConverter, TagNameConverter from bot.decorators import with_role from bot.pagination import LinePaginator - log = logging.getLogger(__name__) TEST_CHANNELS = ( @@ -26,6 +25,44 @@ class Tags(Cog): self.bot = bot self.tag_cooldowns = {} + self._cache = {} + self._last_fetch = None + + async def _get_tags(self, is_forced: bool = False) -> None: + """Getting all tags.""" + # Refresh only when there's a more than 5m gap from last call. + if is_forced or not self._last_fetch or time.time() - self._last_fetch > 5 * 60: + tags = await self.bot.api_client.get('bot/tags') + self._cache = {tag['title'].lower(): tag for tag in tags} + + @staticmethod + def _fuzzy_search(search: str, target: str) -> bool: + found = 0 + index = 0 + _search = search.lower().replace(' ', '') + _target = target.lower().replace(' ', '') + for letter in _search: + index = _target.find(letter, index) + if index == -1: + break + found += index > 0 + return found / len(_search) * 100 + + def _get_suggestions(self, tag_name: str, score: int) -> list: + return sorted( + (tag for tag in self._cache.values() if Tags._fuzzy_search(tag_name, tag['title']) >= score), + key=lambda tag: Tags._fuzzy_search(tag_name, tag['title']), + reverse=True + ) + + async def _get_tag(self, tag_name: str) -> list: + """Get a specific tag.""" + await self._get_tags() + found = [self._cache.get(tag_name.lower(), None)] + if not found[0]: + return self._get_suggestions(tag_name, 100) or self._get_suggestions(tag_name, 80) + return found + @group(name='tags', aliases=('tag', 't'), invoke_without_command=True) async def tags_group(self, ctx: Context, *, tag_name: TagNameConverter = None) -> None: """Show all known tags, a single tag, or run a subcommand.""" @@ -59,17 +96,29 @@ class Tags(Cog): f"Cooldown ends in {time_left:.1f} seconds.") return + await self._get_tags() + if tag_name is not None: - tag = await self.bot.api_client.get(f'bot/tags/{tag_name}') - if ctx.channel.id not in TEST_CHANNELS: - self.tag_cooldowns[tag_name] = { - "time": time.time(), - "channel": ctx.channel.id - } - await ctx.send(embed=Embed.from_dict(tag['embed'])) + # tag = await self.bot.api_client.get(f'bot/tags/{tag_name}') + founds = await self._get_tag(tag_name) + + if len(founds) == 1: + tag = founds[0] + if ctx.channel.id not in TEST_CHANNELS: + self.tag_cooldowns[tag_name] = { + "time": time.time(), + "channel": ctx.channel.id + } + await ctx.send(embed=Embed.from_dict(tag['embed'])) + elif founds and len(tag_name) >= 3: + await ctx.send(embed=Embed( + title='Did you mean ...', + description='\n'.join(tag['title'] for tag in founds[:10]) + )) else: - tags = await self.bot.api_client.get('bot/tags') + # tags = await self.bot.api_client.get('bot/tags') + tags = self._cache.values() if not tags: await ctx.send(embed=Embed( description="**There are no tags in the database!**", @@ -105,6 +154,7 @@ class Tags(Cog): } await self.bot.api_client.post('bot/tags', json=body) + self._cache[tag_name.lower()] = await self.bot.api_client.get(f'bot/tags/{tag_name}') log.debug(f"{ctx.author} successfully added the following tag to our database: \n" f"tag_name: {tag_name}\n" @@ -134,6 +184,7 @@ class Tags(Cog): } await self.bot.api_client.patch(f'bot/tags/{tag_name}', json=body) + self._cache[tag_name.lower()] = await self.bot.api_client.get(f'bot/tags/{tag_name}') log.debug(f"{ctx.author} successfully edited the following tag in our database: \n" f"tag_name: {tag_name}\n" @@ -150,6 +201,7 @@ class Tags(Cog): async def delete_command(self, ctx: Context, *, tag_name: TagNameConverter) -> None: """Remove a tag from the database.""" await self.bot.api_client.delete(f'bot/tags/{tag_name}') + self._cache.pop(tag_name.lower(), None) log.debug(f"{ctx.author} successfully deleted the tag called '{tag_name}'") await ctx.send(embed=Embed( |