From 857468ce4efbe26220d4c36a8840a13f89b30c44 Mon Sep 17 00:00:00 2001 From: Numerlor <25886452+Numerlor@users.noreply.github.com> Date: Mon, 30 Aug 2021 19:11:11 +0200 Subject: create a helper function to get the redis key of a doc item --- bot/exts/info/doc/_redis_cache.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py index ad764816f..0c635bf6e 100644 --- a/bot/exts/info/doc/_redis_cache.py +++ b/bot/exts/info/doc/_redis_cache.py @@ -24,8 +24,7 @@ class DocRedisCache(RedisObject): All keys from a single page are stored together, expiring a week after the first set. """ - url_key = remove_suffix(item.relative_url_path, ".html") - redis_key = f"{self.namespace}:{item.package}:{url_key}" + redis_key = f"{self.namespace}:{item_key(item)}" needs_expire = False with await self._get_pool_connection() as connection: @@ -43,10 +42,8 @@ class DocRedisCache(RedisObject): @namespace_lock async def get(self, item: DocItem) -> Optional[str]: """Return the Markdown content of the symbol `item` if it exists.""" - url_key = remove_suffix(item.relative_url_path, ".html") - with await self._get_pool_connection() as connection: - return await connection.hget(f"{self.namespace}:{item.package}:{url_key}", item.symbol_id, encoding="utf8") + return await connection.hget(f"{self.namespace}:{item_key(item)}", item.symbol_id, encoding="utf8") @namespace_lock async def delete(self, package: str) -> bool: @@ -61,10 +58,6 @@ class DocRedisCache(RedisObject): return False -def remove_suffix(string: str, suffix: str) -> str: - """Remove `suffix` from end of `string`.""" - # TODO replace usages with str.removesuffix on 3.9 - if string.endswith(suffix): - return string[:-len(suffix)] - else: - return string +def item_key(item: DocItem) -> str: + """Get the redis redis key string from `item`.""" + return f"{item.package}:{item.relative_url_path.removesuffix('.html')}" -- cgit v1.2.3 From 48b1a7b042ec23488243ae471842bdfcce8ee9a4 Mon Sep 17 00:00:00 2001 From: Numerlor <25886452+Numerlor@users.noreply.github.com> Date: Mon, 30 Aug 2021 20:23:23 +0200 Subject: Prevent erroneous symbols from always raising stale warnings Some doc symbols are improperly generated and never exist on the doc page the inventory file defines them in, causing the stale warning to get raised every time the page is parsed (at a maximum every week because of the redis expire). This can be prevented by keeping a counter in redis for the items which were stale, every time the item is warned for the counter is incremented and set to expire in 3 weeks. Then a warning is only raised when the counter is below 3, resulting in the unpreventable warning only being raised twice until it is fixed by the maintainers after it expires in 3 weeks after the last increment. --- bot/exts/info/doc/_batch_parser.py | 20 +++++++++++++------- bot/exts/info/doc/_redis_cache.py | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py index 369bb462c..cadf1e121 100644 --- a/bot/exts/info/doc/_batch_parser.py +++ b/bot/exts/info/doc/_batch_parser.py @@ -16,6 +16,7 @@ from bot.constants import Channels from bot.utils import scheduling from . import _cog, doc_cache from ._parsing import get_symbol_markdown +from ._redis_cache import StaleItemCounter log = logging.getLogger(__name__) @@ -23,6 +24,8 @@ log = logging.getLogger(__name__) class StaleInventoryNotifier: """Handle sending notifications about stale inventories through `DocItem`s to dev log.""" + symbol_counter = StaleItemCounter() + def __init__(self): self._init_task = bot.instance.loop.create_task( self._init_channel(), @@ -38,13 +41,16 @@ class StaleInventoryNotifier: async def send_warning(self, doc_item: _cog.DocItem) -> None: """Send a warning to dev log if one wasn't already sent for `item`'s url.""" if doc_item.url not in self._warned_urls: - self._warned_urls.add(doc_item.url) - await self._init_task - embed = discord.Embed( - description=f"Doc item `{doc_item.symbol_id=}` present in loaded documentation inventories " - f"not found on [site]({doc_item.url}), inventories may need to be refreshed." - ) - await self._dev_log.send(embed=embed) + # Only warn if the item got less than 3 warnings + # or if it has been more than 3 weeks since the last warning + if await self.symbol_counter.increment_for(doc_item) < 3: + self._warned_urls.add(doc_item.url) + await self._init_task + embed = discord.Embed( + description=f"Doc item `{doc_item.symbol_id=}` present in loaded documentation inventories " + f"not found on [site]({doc_item.url}), inventories may need to be refreshed." + ) + await self._dev_log.send(embed=embed) class QueueItem(NamedTuple): diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py index 0c635bf6e..3fa3460ca 100644 --- a/bot/exts/info/doc/_redis_cache.py +++ b/bot/exts/info/doc/_redis_cache.py @@ -58,6 +58,22 @@ class DocRedisCache(RedisObject): return False +class StaleItemCounter(RedisObject): + """Manage increment counters for stale `DocItem`s.""" + + @namespace_lock + async def increment_for(self, item: DocItem) -> int: + """ + Increment the counter for `item` by 1, set it to expire in 3 weeks and return the new value. + + If the counter didn't exist, initialize it with 1. + """ + key = f"{self.namespace}:{item_key(item)}:{item.symbol_id}" + with await self._get_pool_connection() as connection: + await connection.expire(key, WEEK_SECONDS * 3) + return int(await connection.incr(key)) + + def item_key(item: DocItem) -> str: """Get the redis redis key string from `item`.""" return f"{item.package}:{item.relative_url_path.removesuffix('.html')}" -- cgit v1.2.3 From 727ef751ec2bb308d4a2d8bb0e348e438620494c Mon Sep 17 00:00:00 2001 From: Numerlor <25886452+Numerlor@users.noreply.github.com> Date: Mon, 30 Aug 2021 20:44:49 +0200 Subject: Delete stale item counters when clearing doc cache --- bot/exts/info/doc/_cog.py | 1 + bot/exts/info/doc/_redis_cache.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py index fb9b2584a..6c3110306 100644 --- a/bot/exts/info/doc/_cog.py +++ b/bot/exts/info/doc/_cog.py @@ -439,6 +439,7 @@ class DocCog(commands.Cog): ) -> None: """Clear the persistent redis cache for `package`.""" if await doc_cache.delete(package_name): + await self.item_fetcher.stale_inventory_notifier.symbol_counter.delete() await ctx.send(f"Successfully cleared the cache for `{package_name}`.") else: await ctx.send("No keys matching the package found.") diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py index 3fa3460ca..05871eef7 100644 --- a/bot/exts/info/doc/_redis_cache.py +++ b/bot/exts/info/doc/_redis_cache.py @@ -73,6 +73,18 @@ class StaleItemCounter(RedisObject): await connection.expire(key, WEEK_SECONDS * 3) return int(await connection.incr(key)) + @namespace_lock + async def delete(self, package: str) -> bool: + """Remove all values for `package`; return True if at least one key was deleted, False otherwise.""" + with await self._get_pool_connection() as connection: + package_keys = [ + package_key async for package_key in connection.iscan(match=f"{self.namespace}:{package}:*") + ] + if package_keys: + await connection.delete(*package_keys) + return True + return False + def item_key(item: DocItem) -> str: """Get the redis redis key string from `item`.""" -- cgit v1.2.3 From 23a3e5e53e1c9229433439de90e423499a9742b7 Mon Sep 17 00:00:00 2001 From: Numerlor <25886452+Numerlor@users.noreply.github.com> Date: Tue, 31 Aug 2021 03:59:19 +0200 Subject: Raise for status to prevent parsing of invalid pages --- bot/exts/info/doc/_batch_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py index cadf1e121..62b04b649 100644 --- a/bot/exts/info/doc/_batch_parser.py +++ b/bot/exts/info/doc/_batch_parser.py @@ -107,7 +107,7 @@ class BatchParser: if doc_item not in self._item_futures and doc_item not in self._queue: self._item_futures[doc_item].user_requested = True - async with bot.instance.http_session.get(doc_item.url) as response: + async with bot.instance.http_session.get(doc_item.url, raise_for_status=True) as response: soup = await bot.instance.loop.run_in_executor( None, BeautifulSoup, -- cgit v1.2.3