aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Numerlor <[email protected]>2020-12-11 09:44:11 +0100
committerGravatar Numerlor <[email protected]>2020-12-11 09:46:16 +0100
commitea9b3e0e9ac74ea541f436f8021178f76f19af39 (patch)
tree81860b0da9924c0ea9da2fad36f579128e6c3020
parentCreate a generator instead of returning a list (diff)
Restructure doc cache to handle caches of whole pages
Previously we used packages as the top level keys and fields contained the url and the symbol id, however if we want to store all symbols from fetched pages instead of only the ones that were fetched by the users this comes worse off than using the page url in the field and setting EXPIREs for them instead of doing it manually in python. The new implementation uses package:url as the redis key and only the symbol id for field names, with the expire being set to a week on the key, this means we have to pattern match the keys when deleting the cache for a package but that's being done far less than the expire checking done previously.
-rw-r--r--bot/exts/info/doc/_cog.py3
-rw-r--r--bot/exts/info/doc/_redis_cache.py95
2 files changed, 37 insertions, 61 deletions
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 7d57f65ad..d1518f69d 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -122,7 +122,7 @@ class CachedParser:
item, soup = self._queue.pop()
try:
markdown = get_symbol_markdown(soup, item)
- await doc_cache.set_if_exists(item, markdown)
+ await doc_cache.set(item, markdown)
self._results[item] = markdown
except Exception:
log.exception(f"Unexpected error when handling {item}")
@@ -178,7 +178,6 @@ class DocCog(commands.Cog):
self.scheduled_inventories = set()
self.bot.loop.create_task(self.init_refresh_inventory())
- self.bot.loop.create_task(doc_cache.delete_expired())
async def init_refresh_inventory(self) -> None:
"""Refresh documentation inventory on cog initialization."""
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
index e8577aa64..52cb2bc94 100644
--- a/bot/exts/info/doc/_redis_cache.py
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -1,7 +1,6 @@
from __future__ import annotations
import datetime
-import pickle
from typing import Optional, TYPE_CHECKING
from async_rediscache.types.base import RedisObject, namespace_lock
@@ -12,77 +11,55 @@ if TYPE_CHECKING:
class DocRedisCache(RedisObject):
"""Interface for redis functionality needed by the Doc cog."""
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._set_expires = set()
+
@namespace_lock
async def set(self, item: DocItem, value: str) -> None:
"""
- Set markdown `value` for `key`.
+ Set the Markdown `value` for the symbol `item`.
- Keys expire after a week to keep data up to date.
+ All keys from a single page are stored together, expiring a week after the first set.
"""
- expiry_timestamp = (datetime.datetime.now() + datetime.timedelta(weeks=1)).timestamp()
+ url_key = remove_suffix(item.relative_url_path, ".html")
+ redis_key = f"{self.namespace}:{item.package}:{url_key}"
+ needs_expire = False
with await self._get_pool_connection() as connection:
- await connection.hset(
- f"{self.namespace}:{item.package}",
- self.get_item_key(item),
- pickle.dumps((value, expiry_timestamp))
- )
-
- @namespace_lock
- async def set_if_exists(self, item: DocItem, value: str) -> None:
- """
- Set markdown `value` for `key` if `key` exists.
+ if item.package+url_key not in self._set_expires:
+ self._set_expires.add(item.package+url_key)
+ needs_expire = not await connection.exists(redis_key)
- Keys expire after a week to keep data up to date.
- """
- expiry_timestamp = (datetime.datetime.now() + datetime.timedelta(weeks=1)).timestamp()
-
- with await self._get_pool_connection() as connection:
- if await connection.hexists(f"{self.namespace}:{item.package}", self.get_item_key(item)):
- await connection.hset(
- f"{self.namespace}:{item.package}",
- self.get_item_key(item),
- pickle.dumps((value, expiry_timestamp))
- )
+ await connection.hset(redis_key, item.symbol_id, value)
+ if needs_expire:
+ await connection.expire(redis_key, datetime.timedelta(weeks=1).total_seconds())
@namespace_lock
async def get(self, item: DocItem) -> Optional[str]:
- """Get markdown contents for `key`."""
- with await self._get_pool_connection() as connection:
- cached_value = await connection.hget(f"{self.namespace}:{item.package}", self.get_item_key(item))
- if cached_value is None:
- return None
-
- value, expire = pickle.loads(cached_value)
- if expire <= datetime.datetime.now().timestamp():
- await connection.hdel(f"{self.namespace}:{item.package}", self.get_item_key(item))
- return None
+ """Return the Markdown content of the symbol `item` if it exists."""
+ url_key = remove_suffix(item.relative_url_path, ".html")
- return value
-
- @namespace_lock
- async def delete(self, package: str) -> None:
- """Remove all values for `package`."""
with await self._get_pool_connection() as connection:
- await connection.delete(f"{self.namespace}:{package}")
+ return await connection.hget(f"{self.namespace}:{item.package}:{url_key}", item.symbol_id, encoding="utf8")
@namespace_lock
- async def delete_expired(self) -> None:
- """Delete all expired keys."""
- current_timestamp = datetime.datetime.now().timestamp()
+ async def delete(self, package: str) -> bool:
+ """Remove all values for `package`; return True if at least one key was deleted, False otherwise."""
with await self._get_pool_connection() as connection:
- async for package_key in connection.iscan(match=f"{self.namespace}*"):
- expired_fields = []
-
- for field, cached_value in (await connection.hgetall(package_key)).items():
- _, expire = pickle.loads(cached_value)
- if expire <= current_timestamp:
- expired_fields.append(field)
-
- if expired_fields:
- await connection.hdel(package_key, *expired_fields)
-
- @staticmethod
- def get_item_key(item: DocItem) -> str:
- """Create redis key for `item`."""
- return item.relative_url_path + item.symbol_id
+ package_keys = [
+ package_key async for package_key in connection.iscan(match=f"{self.namespace}:{package}:*")
+ ]
+ if package_keys:
+ await connection.delete(*package_keys)
+ return True
+ return False
+
+
+def remove_suffix(string: str, suffix: str) -> str:
+ """Remove `suffix` from end of `string`."""
+ # TODO replace usages with str.removesuffix on 3.9
+ if string.endswith(suffix):
+ return string[:-len(suffix)]
+ else:
+ return string