diff options
| author | 2019-10-18 10:39:27 +0700 | |
|---|---|---|
| committer | 2019-10-18 10:39:27 +0700 | |
| commit | 8c2871f89846f2c34f52061323dc7503855266ea (patch) | |
| tree | 34ef3457570340c1dcc3d0aad7855ae029bcba79 | |
| parent | Fix rule alias. (#537) (diff) | |
Make it easier for user to search for tags
#### Closes #231
Applying the algorithm for `Needles and Haystack` to find and match tag in tags, for example:

This only applies to searching tag_name with more than 3 in length, and at least 80% of its letters are found, from left to right.
There are 3 levels of checking, stop at first found:
- Check if exact name ( case insensitive ) O(1) getting from a dictionary Dict[str, Tag]
- Check for all tags that has 100% matching via algorithm
- Check for all tags that has >= 80% matching
If there are more than one hit, it will be shown as suggestions:

In order to avoid api being called multiple times, I've implemented a cache to only refresh itself when the is a gap of more than 5 minutes from the last api call to get all tags.
Editing / Adding / Deleting tags will also modify the cache directly.
##### What about other solution like fuzzywuzzy?
fuzzywuzzy was considered for using, but from testing, it was giving much lower scores than expected:
Code used to test:
```py
from fuzzywuzzy import fuzz
def _fuzzy_search(search: str, target: str) -> bool:
    found = 0
    index = 0
    _search = search.lower().replace(' ', '')
    _target = target.lower().replace(' ', '')
    for letter in _search:
        index = _target.find(letter, index)
        if index == -1:
            break
        found += index > 0
    # return found / len(_search) * 100
    return (
        found / len(_search) * 100,
        fuzz.ratio(search, target),
        fuzz.partial_ratio(search, target)
    )
tests = (
    'this-is-gonna-be-fun',
    'this-too-will-be-fun'
)
for test in tests:
    print(test, '->', _fuzzy_search('this too fun', test))
```
Result from test:
```py
this-is-gonna-be-fun -> (30.0, 50, 50)
this-too-will-be-fun -> (90.0, 62, 58)
```
| -rw-r--r-- | bot/cogs/tags.py | 70 | 
1 files changed, 61 insertions, 9 deletions
| diff --git a/bot/cogs/tags.py b/bot/cogs/tags.py index cd70e783a..1aea97b37 100644 --- a/bot/cogs/tags.py +++ b/bot/cogs/tags.py @@ -9,7 +9,6 @@ from bot.converters import TagContentConverter, TagNameConverter  from bot.decorators import with_role  from bot.pagination import LinePaginator -  log = logging.getLogger(__name__)  TEST_CHANNELS = ( @@ -26,6 +25,44 @@ class Tags(Cog):          self.bot = bot          self.tag_cooldowns = {} +        self._cache = {} +        self._last_fetch = None + +    async def _get_tags(self, is_forced: bool = False) -> None: +        """Getting all tags.""" +        # Refresh only when there's a more than 5m gap from last call. +        if is_forced or not self._last_fetch or time.time() - self._last_fetch > 5 * 60: +            tags = await self.bot.api_client.get('bot/tags') +            self._cache = {tag['title'].lower(): tag for tag in tags} + +    @staticmethod +    def _fuzzy_search(search: str, target: str) -> bool: +        found = 0 +        index = 0 +        _search = search.lower().replace(' ', '') +        _target = target.lower().replace(' ', '') +        for letter in _search: +            index = _target.find(letter, index) +            if index == -1: +                break +            found += index > 0 +        return found / len(_search) * 100 + +    def _get_suggestions(self, tag_name: str, score: int) -> list: +        return sorted( +            (tag for tag in self._cache.values() if Tags._fuzzy_search(tag_name, tag['title']) >= score), +            key=lambda tag: Tags._fuzzy_search(tag_name, tag['title']), +            reverse=True +        ) + +    async def _get_tag(self, tag_name: str) -> list: +        """Get a specific tag.""" +        await self._get_tags() +        found = [self._cache.get(tag_name.lower(), None)] +        if not found[0]: +            return self._get_suggestions(tag_name, 100) or self._get_suggestions(tag_name, 80) +        return found +      @group(name='tags', aliases=('tag', 't'), invoke_without_command=True)      async def tags_group(self, ctx: Context, *, tag_name: TagNameConverter = None) -> None:          """Show all known tags, a single tag, or run a subcommand.""" @@ -59,17 +96,29 @@ class Tags(Cog):                          f"Cooldown ends in {time_left:.1f} seconds.")              return +        await self._get_tags() +          if tag_name is not None: -            tag = await self.bot.api_client.get(f'bot/tags/{tag_name}') -            if ctx.channel.id not in TEST_CHANNELS: -                self.tag_cooldowns[tag_name] = { -                    "time": time.time(), -                    "channel": ctx.channel.id -                } -            await ctx.send(embed=Embed.from_dict(tag['embed'])) +            # tag = await self.bot.api_client.get(f'bot/tags/{tag_name}') +            founds = await self._get_tag(tag_name) + +            if len(founds) == 1: +                tag = founds[0] +                if ctx.channel.id not in TEST_CHANNELS: +                    self.tag_cooldowns[tag_name] = { +                        "time": time.time(), +                        "channel": ctx.channel.id +                    } +                await ctx.send(embed=Embed.from_dict(tag['embed'])) +            elif founds and len(tag_name) >= 3: +                await ctx.send(embed=Embed( +                    title='Did you mean ...', +                    description='\n'.join(tag['title'] for tag in founds[:10]) +                ))          else: -            tags = await self.bot.api_client.get('bot/tags') +            # tags = await self.bot.api_client.get('bot/tags') +            tags = self._cache.values()              if not tags:                  await ctx.send(embed=Embed(                      description="**There are no tags in the database!**", @@ -105,6 +154,7 @@ class Tags(Cog):          }          await self.bot.api_client.post('bot/tags', json=body) +        self._cache[tag_name.lower()] = await self.bot.api_client.get(f'bot/tags/{tag_name}')          log.debug(f"{ctx.author} successfully added the following tag to our database: \n"                    f"tag_name: {tag_name}\n" @@ -134,6 +184,7 @@ class Tags(Cog):          }          await self.bot.api_client.patch(f'bot/tags/{tag_name}', json=body) +        self._cache[tag_name.lower()] = await self.bot.api_client.get(f'bot/tags/{tag_name}')          log.debug(f"{ctx.author} successfully edited the following tag in our database: \n"                    f"tag_name: {tag_name}\n" @@ -150,6 +201,7 @@ class Tags(Cog):      async def delete_command(self, ctx: Context, *, tag_name: TagNameConverter) -> None:          """Remove a tag from the database."""          await self.bot.api_client.delete(f'bot/tags/{tag_name}') +        self._cache.pop(tag_name.lower(), None)          log.debug(f"{ctx.author} successfully deleted the tag called '{tag_name}'")          await ctx.send(embed=Embed( | 
