diff options
author | 2021-01-23 04:36:26 +0100 | |
---|---|---|
committer | 2021-01-23 04:36:26 +0100 | |
commit | 59ca1cbed6bcf234b9eb277da291bdaeb259e939 (patch) | |
tree | fb8e9e49bcd1585a500d1943e6682561a424fc75 | |
parent | Merge remote-tracking branch 'upstream/master' into doc-imp (diff) |
Properly handle cache being cleared
Previously the code deleted the entry of all of the DocItems of the page
after its contents were requested once, but this caused problems when
the cache was cleared when it expired.
Instead of deleting the entry to check if it should be queued on the
next item request, we keep it and create an entry in the
_item_futures dict for all items again and check for containment there.
To avoid populating the queue multiple times with the same item in some
cases the futures cleanup task will now only run when the queue is empty
-rw-r--r-- | bot/exts/info/doc/_batch_parser.py | 25 | ||||
-rw-r--r-- | bot/exts/info/doc/_cog.py | 4 |
2 files changed, 16 insertions, 13 deletions
diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py index 606c5d803..42d81e98c 100644 --- a/bot/exts/info/doc/_batch_parser.py +++ b/bot/exts/info/doc/_batch_parser.py @@ -14,6 +14,7 @@ from bs4 import BeautifulSoup import bot from bot.constants import Channels +from bot.utils.lock import lock_arg from . import doc_cache from ._parsing import get_symbol_markdown if TYPE_CHECKING: @@ -92,13 +93,14 @@ class BatchParser: def __init__(self): self._queue: List[QueueItem] = [] self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list) - self._item_futures: Dict[DocItem, ParseResultFuture] = defaultdict(ParseResultFuture) + self._item_futures: Dict[DocItem, ParseResultFuture] = {} self._parse_task = None self.cleanup_futures_task = bot.instance.loop.create_task(self._cleanup_futures()) self.stale_inventory_notifier = StaleInventoryNotifier() + @lock_arg("doc.get_markdown", "doc_item", attrgetter("url"), wait=True) async def get_markdown(self, doc_item: DocItem) -> str: """ Get the result Markdown of `doc_item`. @@ -108,18 +110,20 @@ class BatchParser: Not safe to run while `self.clear` is running. """ - self._item_futures[doc_item].user_requested = True - if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None: + if doc_item not in self._item_futures: + self._item_futures.update((symbol, ParseResultFuture()) for symbol in self._page_symbols[doc_item.url]) + self._item_futures[doc_item].user_requested = True + async with bot.instance.http_session.get(doc_item.url) as response: soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml") - self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue) - del self._page_symbols[doc_item.url] + self._queue.extend(QueueItem(symbol, soup) for symbol in self._page_symbols[doc_item.url]) log.debug(f"Added symbols from {doc_item.url} to parse queue.") if self._parse_task is None: self._parse_task = asyncio.create_task(self._parse_queue()) - + else: + self._item_futures[doc_item].user_requested = True with suppress(ValueError): # If the item is not in the list then the item is already parsed or is being parsed self._move_to_front(doc_item) @@ -196,8 +200,9 @@ class BatchParser: Keeping them around for longer than a second is unnecessary and keeps the parsed Markdown strings alive. """ while True: - current_time = time.time() - for key, future in self._item_futures.copy().items(): - if current_time - future.result_set_time > 5: - del self._item_futures[key] + if not self._queue: + current_time = time.time() + for key, future in self._item_futures.copy().items(): + if current_time - future.result_set_time > 5: + del self._item_futures[key] await asyncio.sleep(5) diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py index 26694ae55..c3458d776 100644 --- a/bot/exts/info/doc/_cog.py +++ b/bot/exts/info/doc/_cog.py @@ -408,12 +408,10 @@ class DocCog(commands.Cog): @docs_group.command(name="cleardoccache") @commands.has_any_role(*MODERATION_ROLES) - @lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True) async def clear_cache_command(self, ctx: commands.Context, package_name: PackageName) -> None: """Clear the persistent redis cache for `package`.""" if await doc_cache.delete(package_name): - await self.refresh_inventory() - await ctx.send(f"Successfully cleared the cache for `{package_name}` and refreshed the inventories.") + await ctx.send(f"Successfully cleared the cache for `{package_name}`.") else: await ctx.send("No keys matching the package found.") |