aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Shirayuki Nekomata <[email protected]>2019-10-18 10:39:27 +0700
committerGravatar Shirayuki Nekomata <[email protected]>2019-10-18 10:39:27 +0700
commit8c2871f89846f2c34f52061323dc7503855266ea (patch)
tree34ef3457570340c1dcc3d0aad7855ae029bcba79
parentFix rule alias. (#537) (diff)
Make it easier for user to search for tags
#### Closes #231 Applying the algorithm for `Needles and Haystack` to find and match tag in tags, for example: ![Example](https://cdn.discordapp.com/attachments/634243438459486219/634592981915140107/unknown.png) This only applies to searching tag_name with more than 3 in length, and at least 80% of its letters are found, from left to right. There are 3 levels of checking, stop at first found: - Check if exact name ( case insensitive ) O(1) getting from a dictionary Dict[str, Tag] - Check for all tags that has 100% matching via algorithm - Check for all tags that has >= 80% matching If there are more than one hit, it will be shown as suggestions: ![Suggestions](https://cdn.discordapp.com/attachments/634243438459486219/634595369531211778/unknown.png) In order to avoid api being called multiple times, I've implemented a cache to only refresh itself when the is a gap of more than 5 minutes from the last api call to get all tags. Editing / Adding / Deleting tags will also modify the cache directly. ##### What about other solution like fuzzywuzzy? fuzzywuzzy was considered for using, but from testing, it was giving much lower scores than expected: Code used to test: ```py from fuzzywuzzy import fuzz def _fuzzy_search(search: str, target: str) -> bool: found = 0 index = 0 _search = search.lower().replace(' ', '') _target = target.lower().replace(' ', '') for letter in _search: index = _target.find(letter, index) if index == -1: break found += index > 0 # return found / len(_search) * 100 return ( found / len(_search) * 100, fuzz.ratio(search, target), fuzz.partial_ratio(search, target) ) tests = ( 'this-is-gonna-be-fun', 'this-too-will-be-fun' ) for test in tests: print(test, '->', _fuzzy_search('this too fun', test)) ``` Result from test: ```py this-is-gonna-be-fun -> (30.0, 50, 50) this-too-will-be-fun -> (90.0, 62, 58) ```
-rw-r--r--bot/cogs/tags.py70
1 files changed, 61 insertions, 9 deletions
diff --git a/bot/cogs/tags.py b/bot/cogs/tags.py
index cd70e783a..1aea97b37 100644
--- a/bot/cogs/tags.py
+++ b/bot/cogs/tags.py
@@ -9,7 +9,6 @@ from bot.converters import TagContentConverter, TagNameConverter
from bot.decorators import with_role
from bot.pagination import LinePaginator
-
log = logging.getLogger(__name__)
TEST_CHANNELS = (
@@ -26,6 +25,44 @@ class Tags(Cog):
self.bot = bot
self.tag_cooldowns = {}
+ self._cache = {}
+ self._last_fetch = None
+
+ async def _get_tags(self, is_forced: bool = False) -> None:
+ """Getting all tags."""
+ # Refresh only when there's a more than 5m gap from last call.
+ if is_forced or not self._last_fetch or time.time() - self._last_fetch > 5 * 60:
+ tags = await self.bot.api_client.get('bot/tags')
+ self._cache = {tag['title'].lower(): tag for tag in tags}
+
+ @staticmethod
+ def _fuzzy_search(search: str, target: str) -> bool:
+ found = 0
+ index = 0
+ _search = search.lower().replace(' ', '')
+ _target = target.lower().replace(' ', '')
+ for letter in _search:
+ index = _target.find(letter, index)
+ if index == -1:
+ break
+ found += index > 0
+ return found / len(_search) * 100
+
+ def _get_suggestions(self, tag_name: str, score: int) -> list:
+ return sorted(
+ (tag for tag in self._cache.values() if Tags._fuzzy_search(tag_name, tag['title']) >= score),
+ key=lambda tag: Tags._fuzzy_search(tag_name, tag['title']),
+ reverse=True
+ )
+
+ async def _get_tag(self, tag_name: str) -> list:
+ """Get a specific tag."""
+ await self._get_tags()
+ found = [self._cache.get(tag_name.lower(), None)]
+ if not found[0]:
+ return self._get_suggestions(tag_name, 100) or self._get_suggestions(tag_name, 80)
+ return found
+
@group(name='tags', aliases=('tag', 't'), invoke_without_command=True)
async def tags_group(self, ctx: Context, *, tag_name: TagNameConverter = None) -> None:
"""Show all known tags, a single tag, or run a subcommand."""
@@ -59,17 +96,29 @@ class Tags(Cog):
f"Cooldown ends in {time_left:.1f} seconds.")
return
+ await self._get_tags()
+
if tag_name is not None:
- tag = await self.bot.api_client.get(f'bot/tags/{tag_name}')
- if ctx.channel.id not in TEST_CHANNELS:
- self.tag_cooldowns[tag_name] = {
- "time": time.time(),
- "channel": ctx.channel.id
- }
- await ctx.send(embed=Embed.from_dict(tag['embed']))
+ # tag = await self.bot.api_client.get(f'bot/tags/{tag_name}')
+ founds = await self._get_tag(tag_name)
+
+ if len(founds) == 1:
+ tag = founds[0]
+ if ctx.channel.id not in TEST_CHANNELS:
+ self.tag_cooldowns[tag_name] = {
+ "time": time.time(),
+ "channel": ctx.channel.id
+ }
+ await ctx.send(embed=Embed.from_dict(tag['embed']))
+ elif founds and len(tag_name) >= 3:
+ await ctx.send(embed=Embed(
+ title='Did you mean ...',
+ description='\n'.join(tag['title'] for tag in founds[:10])
+ ))
else:
- tags = await self.bot.api_client.get('bot/tags')
+ # tags = await self.bot.api_client.get('bot/tags')
+ tags = self._cache.values()
if not tags:
await ctx.send(embed=Embed(
description="**There are no tags in the database!**",
@@ -105,6 +154,7 @@ class Tags(Cog):
}
await self.bot.api_client.post('bot/tags', json=body)
+ self._cache[tag_name.lower()] = await self.bot.api_client.get(f'bot/tags/{tag_name}')
log.debug(f"{ctx.author} successfully added the following tag to our database: \n"
f"tag_name: {tag_name}\n"
@@ -134,6 +184,7 @@ class Tags(Cog):
}
await self.bot.api_client.patch(f'bot/tags/{tag_name}', json=body)
+ self._cache[tag_name.lower()] = await self.bot.api_client.get(f'bot/tags/{tag_name}')
log.debug(f"{ctx.author} successfully edited the following tag in our database: \n"
f"tag_name: {tag_name}\n"
@@ -150,6 +201,7 @@ class Tags(Cog):
async def delete_command(self, ctx: Context, *, tag_name: TagNameConverter) -> None:
"""Remove a tag from the database."""
await self.bot.api_client.delete(f'bot/tags/{tag_name}')
+ self._cache.pop(tag_name.lower(), None)
log.debug(f"{ctx.author} successfully deleted the tag called '{tag_name}'")
await ctx.send(embed=Embed(