aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Numerlor <[email protected]>2020-12-15 00:30:17 +0100
committerGravatar Numerlor <[email protected]>2021-01-09 20:10:54 +0100
commitcf00aff24d20a57c2c9178d6d9e30f5d33d9a426 (patch)
treef532b97fd25ec3a4e985b73f2e1afcf80136dd5f
parentClear up docstring so it doesn't rely on private attribute (diff)
Create futures for all items in the queue
Creating futures for everything and then awaiting at the end takes care of all the potential race conditions that may pop up from items that are parsed and sent to redis while the get_markdown method is in the middle of fetching a page. In case it happens with the implementation we'll just need to move the item to the front and the future will get a result set soon afterwards.
-rw-r--r--bot/exts/info/doc/_cog.py23
1 files changed, 18 insertions, 5 deletions
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 6c51ab738..0d344c363 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -7,6 +7,7 @@ import sys
from collections import defaultdict
from contextlib import suppress
from functools import partial
+from operator import attrgetter
from types import SimpleNamespace
from typing import Dict, List, NamedTuple, Optional, Union
@@ -78,6 +79,14 @@ class QueueItem(NamedTuple):
return NamedTuple.__eq__(self, other)
+class ParseResultFuture(asyncio.Future):
+ """Future with the user_requested attribute to know which futures need to be waited for before clearing."""
+
+ def __init__(self):
+ super().__init__()
+ self.user_requested = False
+
+
class CachedParser:
"""
Get the symbol Markdown from pages with smarter caching.
@@ -90,7 +99,7 @@ class CachedParser:
def __init__(self):
self._queue: List[QueueItem] = []
self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
- self._item_futures: Dict[DocItem, asyncio.Future] = {}
+ self._item_futures: Dict[DocItem, ParseResultFuture] = {}
self._parse_task = None
async def get_markdown(self, doc_item: DocItem) -> str:
@@ -99,21 +108,25 @@ class CachedParser:
If no symbols were fetched from `doc_item`s page before,
the HTML has to be fetched before parsing can be queued.
+
+ Not safe to run while `self.clear` is running.
"""
if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
async with bot_instance.http_session.get(doc_item.url) as response:
soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
+ self._item_futures.update((symbol, ParseResultFuture()) for symbol in symbols_to_queue)
del self._page_symbols[doc_item.url]
log.debug(f"Added symbols from {doc_item.url} to parse queue.")
if self._parse_task is None:
self._parse_task = asyncio.create_task(self._parse_queue())
- self._move_to_front(doc_item)
- if doc_item not in self._item_futures:
- self._item_futures[doc_item] = bot_instance.loop.create_future()
+ with suppress(ValueError):
+ # If the item is not in the list then the item is already parsed or is being parsed
+ self._move_to_front(doc_item)
+ self._item_futures[doc_item].user_requested = True
return await self._item_futures[doc_item]
async def _parse_queue(self) -> None:
@@ -161,7 +174,7 @@ class CachedParser:
All currently requested items are waited to be parsed before clearing.
"""
- for future in self._item_futures.values():
+ for future in filter(attrgetter("user_requested"), self._item_futures.values()):
await future
if self._parse_task is not None:
self._parse_task.cancel()