aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Numerlor <[email protected]>2021-01-23 04:36:26 +0100
committerGravatar Numerlor <[email protected]>2021-01-23 04:36:26 +0100
commit59ca1cbed6bcf234b9eb277da291bdaeb259e939 (patch)
treefb8e9e49bcd1585a500d1943e6682561a424fc75
parentMerge remote-tracking branch 'upstream/master' into doc-imp (diff)
Properly handle cache being cleared
Previously the code deleted the entry of all of the DocItems of the page after its contents were requested once, but this caused problems when the cache was cleared when it expired. Instead of deleting the entry to check if it should be queued on the next item request, we keep it and create an entry in the _item_futures dict for all items again and check for containment there. To avoid populating the queue multiple times with the same item in some cases the futures cleanup task will now only run when the queue is empty
-rw-r--r--bot/exts/info/doc/_batch_parser.py25
-rw-r--r--bot/exts/info/doc/_cog.py4
2 files changed, 16 insertions, 13 deletions
diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py
index 606c5d803..42d81e98c 100644
--- a/bot/exts/info/doc/_batch_parser.py
+++ b/bot/exts/info/doc/_batch_parser.py
@@ -14,6 +14,7 @@ from bs4 import BeautifulSoup
import bot
from bot.constants import Channels
+from bot.utils.lock import lock_arg
from . import doc_cache
from ._parsing import get_symbol_markdown
if TYPE_CHECKING:
@@ -92,13 +93,14 @@ class BatchParser:
def __init__(self):
self._queue: List[QueueItem] = []
self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
- self._item_futures: Dict[DocItem, ParseResultFuture] = defaultdict(ParseResultFuture)
+ self._item_futures: Dict[DocItem, ParseResultFuture] = {}
self._parse_task = None
self.cleanup_futures_task = bot.instance.loop.create_task(self._cleanup_futures())
self.stale_inventory_notifier = StaleInventoryNotifier()
+ @lock_arg("doc.get_markdown", "doc_item", attrgetter("url"), wait=True)
async def get_markdown(self, doc_item: DocItem) -> str:
"""
Get the result Markdown of `doc_item`.
@@ -108,18 +110,20 @@ class BatchParser:
Not safe to run while `self.clear` is running.
"""
- self._item_futures[doc_item].user_requested = True
- if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
+ if doc_item not in self._item_futures:
+ self._item_futures.update((symbol, ParseResultFuture()) for symbol in self._page_symbols[doc_item.url])
+ self._item_futures[doc_item].user_requested = True
+
async with bot.instance.http_session.get(doc_item.url) as response:
soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
- self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
- del self._page_symbols[doc_item.url]
+ self._queue.extend(QueueItem(symbol, soup) for symbol in self._page_symbols[doc_item.url])
log.debug(f"Added symbols from {doc_item.url} to parse queue.")
if self._parse_task is None:
self._parse_task = asyncio.create_task(self._parse_queue())
-
+ else:
+ self._item_futures[doc_item].user_requested = True
with suppress(ValueError):
# If the item is not in the list then the item is already parsed or is being parsed
self._move_to_front(doc_item)
@@ -196,8 +200,9 @@ class BatchParser:
Keeping them around for longer than a second is unnecessary and keeps the parsed Markdown strings alive.
"""
while True:
- current_time = time.time()
- for key, future in self._item_futures.copy().items():
- if current_time - future.result_set_time > 5:
- del self._item_futures[key]
+ if not self._queue:
+ current_time = time.time()
+ for key, future in self._item_futures.copy().items():
+ if current_time - future.result_set_time > 5:
+ del self._item_futures[key]
await asyncio.sleep(5)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 26694ae55..c3458d776 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -408,12 +408,10 @@ class DocCog(commands.Cog):
@docs_group.command(name="cleardoccache")
@commands.has_any_role(*MODERATION_ROLES)
- @lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def clear_cache_command(self, ctx: commands.Context, package_name: PackageName) -> None:
"""Clear the persistent redis cache for `package`."""
if await doc_cache.delete(package_name):
- await self.refresh_inventory()
- await ctx.send(f"Successfully cleared the cache for `{package_name}` and refreshed the inventories.")
+ await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
else:
await ctx.send("No keys matching the package found.")