From b5af23252fe9186a6b1412cf67a935380f616555 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 17 Jun 2020 19:42:25 +0200
Subject: Resolve relative href urls in a html elements.
Most docs will use relative urls to link across their pages,
without resolving them ourselves the links remain unusable in discord's
markdown and break out of codeblocks on mobile.
---
bot/cogs/doc.py | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 204cffb37..51fb2cb82 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -7,6 +7,7 @@ from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
from typing import Any, Callable, Optional, Tuple
+from urllib.parse import urljoin
import discord
from bs4 import BeautifulSoup
@@ -98,6 +99,10 @@ def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
class DocMarkdownConverter(MarkdownConverter):
"""Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
+ def __init__(self, *, page_url: str, **options):
+ super().__init__(**options)
+ self.page_url = page_url
+
def convert_code(self, el: PageElement, text: str) -> str:
"""Undo `markdownify`s underscore escaping."""
return f"`{text}`".replace('\\', '')
@@ -107,10 +112,15 @@ class DocMarkdownConverter(MarkdownConverter):
code = ''.join(el.strings)
return f"```py\n{code}```"
+ def convert_a(self, el: PageElement, text: str) -> str:
+ """Resolve relative URLs to `self.page_url`."""
+ el["href"] = urljoin(self.page_url, el["href"])
+ return super().convert_a(el, text)
+
-def markdownify(html: str) -> DocMarkdownConverter:
+def markdownify(html: str, *, url: str = "") -> DocMarkdownConverter:
"""Create a DocMarkdownConverter object from the input html."""
- return DocMarkdownConverter(bullets='•').convert(html)
+ return DocMarkdownConverter(bullets='•', page_url=url).convert(html)
class InventoryURL(commands.Converter):
@@ -293,7 +303,7 @@ class Doc(commands.Cog):
signatures = scraped_html[0]
permalink = self.inventories[symbol]
- description = markdownify(scraped_html[1])
+ description = markdownify(scraped_html[1], url=permalink)
# Truncate the description of the embed to the last occurrence
# of a double newline (interpreted as a paragraph) before index 1000.
--
cgit v1.2.3
From 5dfbec9d589f62bb1270b162d734749d5b7b069d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 17 Jun 2020 21:41:04 +0200
Subject: Make doc get greedy.
This allows us to find docs for symbols with spaces in them.
---
bot/cogs/doc.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 51fb2cb82..010cb9f4c 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -353,12 +353,12 @@ class Doc(commands.Cog):
return embed
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
- async def docs_group(self, ctx: commands.Context, symbol: commands.clean_content = None) -> None:
+ async def docs_group(self, ctx: commands.Context, *, symbol: str) -> None:
"""Lookup documentation for Python symbols."""
- await ctx.invoke(self.get_command, symbol)
+ await ctx.invoke(self.get_command, symbol=symbol)
@docs_group.command(name='get', aliases=('g',))
- async def get_command(self, ctx: commands.Context, symbol: commands.clean_content = None) -> None:
+ async def get_command(self, ctx: commands.Context, *, symbol: str) -> None:
"""
Return a documentation embed for a given symbol.
@@ -370,7 +370,7 @@ class Doc(commands.Cog):
!docs aiohttp.ClientSession
!docs get aiohttp.ClientSession
"""
- if symbol is None:
+ if not symbol:
inventory_embed = discord.Embed(
title=f"All inventories (`{len(self.base_urls)}` total)",
colour=discord.Colour.blue()
@@ -392,8 +392,9 @@ class Doc(commands.Cog):
doc_embed = await self.get_symbol_embed(symbol)
if doc_embed is None:
+ symbol = await discord.ext.commands.clean_content().convert(ctx, symbol)
error_embed = discord.Embed(
- description=f"Sorry, I could not find any documentation for `{symbol}`.",
+ description=f"Sorry, I could not find any documentation for `{(symbol)}`.",
colour=discord.Colour.red()
)
error_message = await ctx.send(embed=error_embed)
--
cgit v1.2.3
From 39aa2fbe0d19edcb61080e49d591a370820bce47 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 17 Jun 2020 21:48:55 +0200
Subject: Skip symbols with slashes in them.
The symbols mostly point to autogenerated pages, and do not link
to specific symbols on their pages and are thus unreachable with
the current implementation.
---
bot/cogs/doc.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 010cb9f4c..59c3cc729 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -191,6 +191,8 @@ class Doc(commands.Cog):
for group, value in package.items():
for symbol, (package_name, _version, relative_doc_url, _) in value.items():
+ if "/" in symbol:
+ continue # skip unreachable symbols with slashes
absolute_doc_url = base_url + relative_doc_url
if symbol in self.inventories:
--
cgit v1.2.3
From 41e906d6b978f0745f0aff5e7065ce142282a44f Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Thu, 18 Jun 2020 00:20:25 +0200
Subject: Move symbol parsing into separate methods.
---
bot/cogs/doc.py | 66 +++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 43 insertions(+), 23 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 59c3cc729..a1364dd8b 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -6,7 +6,7 @@ import textwrap
from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Any, Callable, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple
from urllib.parse import urljoin
import discord
@@ -265,30 +265,14 @@ class Doc(commands.Cog):
return None
if symbol_id == f"module-{symbol}":
- # Get page content from the module headerlink to the
- # first tag that has its class in `SEARCH_END_TAG_ATTRS`
- start_tag = symbol_heading.find("a", attrs={"class": "headerlink"})
- if start_tag is None:
- return [], ""
-
- end_tag = start_tag.find_next(self._match_end_tag)
- if end_tag is None:
- return [], ""
-
- description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent))
- description_end_index = search_html.find(str(end_tag))
- description = search_html[description_start_index:description_end_index]
- signatures = None
+ parsed_module = self.parse_module_symbol(symbol_heading, search_html)
+ if parsed_module is None:
+ return None
+ else:
+ signatures, description = parsed_module
else:
- signatures = []
- description = str(symbol_heading.find_next_sibling("dd"))
- description_pos = search_html.find(description)
- # Get text of up to 3 signatures, remove unwanted symbols
- for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2):
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
- if signature and search_html.find(str(element)) < description_pos:
- signatures.append(signature)
+ signatures, description = self.parse_symbol(symbol_heading, search_html)
return signatures, description.replace('¶', '')
@@ -354,6 +338,42 @@ class Doc(commands.Cog):
)
return embed
+ @classmethod
+ def parse_module_symbol(cls, heading: PageElement, html: str) -> Optional[Tuple[None, str]]:
+ """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
+ start_tag = heading.find("a", attrs={"class": "headerlink"})
+ if start_tag is None:
+ return None
+
+ end_tag = start_tag.find_next(cls._match_end_tag)
+ if end_tag is None:
+ return None
+
+ description_start_index = html.find(str(start_tag.parent)) + len(str(start_tag.parent))
+ description_end_index = html.find(str(end_tag))
+ description = html[description_start_index:description_end_index]
+
+ return None, description
+
+ @staticmethod
+ def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
+ """
+ Parse the signatures and description of a symbol.
+
+ Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
+ """
+ signatures = []
+ description = str(heading.find_next_sibling("dd"))
+ description_pos = html.find(description)
+
+ for element in [heading] + heading.find_next_siblings("dt", limit=2):
+ signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+
+ if signature and html.find(str(element)) < description_pos:
+ signatures.append(signature)
+
+ return signatures, description
+
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
async def docs_group(self, ctx: commands.Context, *, symbol: str) -> None:
"""Lookup documentation for Python symbols."""
--
cgit v1.2.3
From b0f46ace7b2d4997d5002eb75199490f7828d829 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Thu, 18 Jun 2020 03:58:27 +0200
Subject: Make sure only class contents are included, without methods.
When parsing classes, methods would sometimes get included
causing bad looking markdown to be included in the description,
this is solved by collecting all text *up to* the next dt tag.
fixes: #990
---
bot/cogs/doc.py | 55 ++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 42 insertions(+), 13 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index a1364dd8b..51323e64f 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -6,7 +6,7 @@ import textwrap
from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple, Union
from urllib.parse import urljoin
import discord
@@ -265,7 +265,7 @@ class Doc(commands.Cog):
return None
if symbol_id == f"module-{symbol}":
- parsed_module = self.parse_module_symbol(symbol_heading, search_html)
+ parsed_module = self.parse_module_symbol(symbol_heading)
if parsed_module is None:
return None
else:
@@ -339,32 +339,29 @@ class Doc(commands.Cog):
return embed
@classmethod
- def parse_module_symbol(cls, heading: PageElement, html: str) -> Optional[Tuple[None, str]]:
+ def parse_module_symbol(cls, heading: PageElement) -> Optional[Tuple[None, str]]:
"""Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
start_tag = heading.find("a", attrs={"class": "headerlink"})
if start_tag is None:
return None
- end_tag = start_tag.find_next(cls._match_end_tag)
- if end_tag is None:
+ description = cls.find_all_text_until_tag(start_tag, cls._match_end_tag)
+ if description is None:
return None
- description_start_index = html.find(str(start_tag.parent)) + len(str(start_tag.parent))
- description_end_index = html.find(str(end_tag))
- description = html[description_start_index:description_end_index]
-
return None, description
- @staticmethod
- def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
+ @classmethod
+ def parse_symbol(cls, heading: PageElement, html: str) -> Tuple[List[str], str]:
"""
Parse the signatures and description of a symbol.
Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
"""
signatures = []
- description = str(heading.find_next_sibling("dd"))
- description_pos = html.find(description)
+ description_element = heading.find_next_sibling("dd")
+ description_pos = html.find(str(description_element))
+ description = "".join(cls.find_all_text_until_tag(description_element, ("dt",)))
for element in [heading] + heading.find_next_siblings("dt", limit=2):
signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
@@ -374,6 +371,38 @@ class Doc(commands.Cog):
return signatures, description
+ @staticmethod
+ def find_all_text_until_tag(
+ start_element: PageElement,
+ tag_filter: Union[Tuple[str], Callable[[Tag], bool]]
+ ) -> Optional[str]:
+ """
+ Get all text from
elements until a tag matching `tag_filter` is found, max 1000 elements searched.
+
+ `tag_filter` can be either a tuple of string names to check against,
+ or a filtering callable that's applied to the tags.
+ If no matching end tag is found, None is returned.
+ """
+ text = ""
+ element = start_element
+ for _ in range(1000):
+ if element is None:
+ break
+
+ element = element.find_next()
+ if element.name == "p":
+ text += str(element)
+
+ elif isinstance(tag_filter, tuple):
+ if element.name in tag_filter:
+ break
+ else:
+ if tag_filter(element):
+ break
+ else:
+ return None
+ return text
+
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
async def docs_group(self, ctx: commands.Context, *, symbol: str) -> None:
"""Lookup documentation for Python symbols."""
--
cgit v1.2.3
From 8756c741035d007a5d3f3309b877f56b9ccd0ef1 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 00:59:32 +0200
Subject: Account for `NavigableString`s when gathering text.
`find_next()` only goes to tags, leaving out text outside of them when parsing.
---
bot/cogs/doc.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 51323e64f..d64e6692f 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -11,7 +11,7 @@ from urllib.parse import urljoin
import discord
from bs4 import BeautifulSoup
-from bs4.element import PageElement, Tag
+from bs4.element import NavigableString, PageElement, Tag
from discord.errors import NotFound
from discord.ext import commands
from markdownify import MarkdownConverter
@@ -377,7 +377,9 @@ class Doc(commands.Cog):
tag_filter: Union[Tuple[str], Callable[[Tag], bool]]
) -> Optional[str]:
"""
- Get all text from
elements until a tag matching `tag_filter` is found, max 1000 elements searched.
+ Get all text from
elements and strings until a tag matching `tag_filter` is found.
+
+ Max 1000 elements are searched to avoid going through whole pages when no matching tag is found.
`tag_filter` can be either a tuple of string names to check against,
or a filtering callable that's applied to the tags.
@@ -389,7 +391,11 @@ class Doc(commands.Cog):
if element is None:
break
- element = element.find_next()
+ element = element.next
+ while isinstance(element, NavigableString):
+ text += element
+ element = element.next
+
if element.name == "p":
text += str(element)
--
cgit v1.2.3
From e11c5a35f8f494f13323d53c0c514524902b2ae7 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 01:45:54 +0200
Subject: Also check signatures before selected symbol when collecting 3
signatures.
---
bot/cogs/doc.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index d64e6692f..b0adc52ba 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -363,7 +363,11 @@ class Doc(commands.Cog):
description_pos = html.find(str(description_element))
description = "".join(cls.find_all_text_until_tag(description_element, ("dt",)))
- for element in [heading] + heading.find_next_siblings("dt", limit=2):
+ for element in (
+ *reversed(heading.find_previous_siblings("dt", limit=2)),
+ heading,
+ *heading.find_next_siblings("dt", limit=2),
+ )[-3:]:
signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
if signature and html.find(str(element)) < description_pos:
--
cgit v1.2.3
From bdccd72747829560eddecc2ae247e5da3a936237 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 01:46:46 +0200
Subject: Remove unnecessary join.
`find_all_text_until_tag` already returns a string so a join is not needed.
---
bot/cogs/doc.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index b0adc52ba..35139a050 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -361,7 +361,7 @@ class Doc(commands.Cog):
signatures = []
description_element = heading.find_next_sibling("dd")
description_pos = html.find(str(description_element))
- description = "".join(cls.find_all_text_until_tag(description_element, ("dt",)))
+ description = cls.find_all_text_until_tag(description_element, ("dt",))
for element in (
*reversed(heading.find_previous_siblings("dt", limit=2)),
--
cgit v1.2.3
From d1900d537086b5d195da320cdc949e64afb99cd0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 01:52:02 +0200
Subject: Add symbol group name to symbol inventory entries.
---
bot/cogs/doc.py | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 35139a050..741fd0ddd 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -6,7 +6,7 @@ import textwrap
from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union
from urllib.parse import urljoin
import discord
@@ -67,6 +67,13 @@ FAILED_REQUEST_RETRY_AMOUNT = 3
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
+class DocItem(NamedTuple):
+ """Holds inventory symbol information."""
+
+ url: str
+ group: str
+
+
def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
"""
LRU cache implementation for coroutines.
@@ -194,10 +201,10 @@ class Doc(commands.Cog):
if "/" in symbol:
continue # skip unreachable symbols with slashes
absolute_doc_url = base_url + relative_doc_url
+ group_name = group.split(":")[1]
if symbol in self.inventories:
- group_name = group.split(":")[1]
- symbol_base_url = self.inventories[symbol].split("/", 3)[2]
+ symbol_base_url = self.inventories[symbol].url.split("/", 3)[2]
if (
group_name in NO_OVERRIDE_GROUPS
or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
@@ -209,11 +216,11 @@ class Doc(commands.Cog):
# Split `package_name` because of packages like Pillow that have spaces in them.
symbol = f"{package_name.split()[0]}.{symbol}"
- self.inventories[symbol] = absolute_doc_url
+ self.inventories[symbol] = DocItem(absolute_doc_url, group_name)
self.renamed_symbols.add(symbol)
continue
- self.inventories[symbol] = absolute_doc_url
+ self.inventories[symbol] = DocItem(absolute_doc_url, group_name)
log.trace(f"Fetched inventory for {package_name}.")
@@ -248,15 +255,15 @@ class Doc(commands.Cog):
If the given symbol is a module, returns a tuple `(None, str)`
else if the symbol could not be found, returns `None`.
"""
- url = self.inventories.get(symbol)
- if url is None:
+ symbol_info = self.inventories.get(symbol)
+ if symbol_info is None:
return None
- async with self.bot.http_session.get(url) as response:
+ async with self.bot.http_session.get(symbol_info.url) as response:
html = await response.text(encoding='utf-8')
# Find the signature header and parse the relevant parts.
- symbol_id = url.split('#')[-1]
+ symbol_id = symbol_info.url.split('#')[-1]
soup = BeautifulSoup(html, 'lxml')
symbol_heading = soup.find(id=symbol_id)
search_html = str(soup)
@@ -288,7 +295,7 @@ class Doc(commands.Cog):
return None
signatures = scraped_html[0]
- permalink = self.inventories[symbol]
+ permalink = self.inventories[symbol].url
description = markdownify(scraped_html[1], url=permalink)
# Truncate the description of the embed to the last occurrence
--
cgit v1.2.3
From d790c404ca3dba3843f351d6f42e766956aa73a1 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 02:37:32 +0200
Subject: Renamed existing symbols from `NO_OVERRIDE_GROUPS` instead of
replacing.
Before, when a symbol from the group shared the name with a symbol outside of it
the symbol was simply replaced and lost. The new implementation renames the old
symbols to the group_name.symbol format before the new symbol takes their place.
---
bot/cogs/doc.py | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 741fd0ddd..4eea06386 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -209,16 +209,21 @@ class Doc(commands.Cog):
group_name in NO_OVERRIDE_GROUPS
or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
):
-
symbol = f"{group_name}.{symbol}"
- # If renamed `symbol` already exists, add library name in front to differentiate between them.
- if symbol in self.renamed_symbols:
- # Split `package_name` because of packages like Pillow that have spaces in them.
- symbol = f"{package_name.split()[0]}.{symbol}"
- self.inventories[symbol] = DocItem(absolute_doc_url, group_name)
+ elif (overridden_symbol_group := self.inventories[symbol].group) in NO_OVERRIDE_GROUPS:
+ overridden_symbol = f"{overridden_symbol_group}.{symbol}"
+ if overridden_symbol in self.renamed_symbols:
+ overridden_symbol = f"{package_name.split()[0]}.{overridden_symbol}"
+
+ self.inventories[overridden_symbol] = self.inventories[symbol]
+ self.renamed_symbols.add(overridden_symbol)
+
+ # If renamed `symbol` already exists, add library name in front to differentiate between them.
+ if symbol in self.renamed_symbols:
+ # Split `package_name` because of packages like Pillow that have spaces in them.
+ symbol = f"{package_name.split()[0]}.{symbol}"
self.renamed_symbols.add(symbol)
- continue
self.inventories[symbol] = DocItem(absolute_doc_url, group_name)
--
cgit v1.2.3
From bca55c25ffb3631ba05889a88908a02ccb2beb2a Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 02:42:26 +0200
Subject: Fix typehint.
---
bot/cogs/doc.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 4eea06386..a01f6d64d 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -125,7 +125,7 @@ class DocMarkdownConverter(MarkdownConverter):
return super().convert_a(el, text)
-def markdownify(html: str, *, url: str = "") -> DocMarkdownConverter:
+def markdownify(html: str, *, url: str = "") -> str:
"""Create a DocMarkdownConverter object from the input html."""
return DocMarkdownConverter(bullets='•', page_url=url).convert(html)
--
cgit v1.2.3
From 38991027a38b1adc4be3c99d126dae76a3a62036 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 03:09:23 +0200
Subject: Correct return when a module symbol could not be parsed.
---
bot/cogs/doc.py | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index a01f6d64d..1c9d80e47 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -279,7 +279,7 @@ class Doc(commands.Cog):
if symbol_id == f"module-{symbol}":
parsed_module = self.parse_module_symbol(symbol_heading)
if parsed_module is None:
- return None
+ return [], ""
else:
signatures, description = parsed_module
@@ -538,14 +538,13 @@ class Doc(commands.Cog):
old_inventories = set(self.base_urls)
with ctx.typing():
await self.refresh_inventory()
- # Get differences of added and removed inventories
- added = ', '.join(inv for inv in self.base_urls if inv not in old_inventories)
- if added:
- added = f"+ {added}"
-
- removed = ', '.join(inv for inv in old_inventories if inv not in self.base_urls)
- if removed:
- removed = f"- {removed}"
+ new_inventories = set(self.base_urls)
+
+ if added := ", ".join(new_inventories - old_inventories):
+ added = "+ " + added
+
+ if removed := ", ".join(old_inventories - new_inventories):
+ removed = "- " + removed
embed = discord.Embed(
title="Inventories refreshed",
--
cgit v1.2.3
From a28ae5dfb610151060eab9856c44b2d192131f0d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 21 Jun 2020 15:58:55 +0200
Subject: Strip backticks from symbol input.
This allows the user to wrap symbols in codeblocks to avoid markdown.
---
bot/cogs/doc.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 1c9d80e47..0dc1713a3 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -458,6 +458,7 @@ class Doc(commands.Cog):
await ctx.send(embed=inventory_embed)
else:
+ symbol = symbol.strip("`")
# Fetching documentation for a symbol (at least for the first time, since
# caching is used) takes quite some time, so let's send typing to indicate
# that we got the command, but are still working on it.
--
cgit v1.2.3
From c461bef250cd3d44fac2c0e64da21072f963909d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 27 Jun 2020 15:46:47 +0200
Subject: Redesign `find_all_text_until_tag` to search through all direct
children.
The previous approach didn't work for arbitrary tags with text.
---
bot/cogs/doc.py | 39 ++++++++++++---------------------------
1 file changed, 12 insertions(+), 27 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 0dc1713a3..e4b54f0a5 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -11,7 +11,7 @@ from urllib.parse import urljoin
import discord
from bs4 import BeautifulSoup
-from bs4.element import NavigableString, PageElement, Tag
+from bs4.element import PageElement, Tag
from discord.errors import NotFound
from discord.ext import commands
from markdownify import MarkdownConverter
@@ -357,7 +357,7 @@ class Doc(commands.Cog):
if start_tag is None:
return None
- description = cls.find_all_text_until_tag(start_tag, cls._match_end_tag)
+ description = cls.find_all_children_until_tag(start_tag, cls._match_end_tag)
if description is None:
return None
@@ -373,7 +373,7 @@ class Doc(commands.Cog):
signatures = []
description_element = heading.find_next_sibling("dd")
description_pos = html.find(str(description_element))
- description = cls.find_all_text_until_tag(description_element, ("dt",))
+ description = cls.find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
for element in (
*reversed(heading.find_previous_siblings("dt", limit=2)),
@@ -388,41 +388,26 @@ class Doc(commands.Cog):
return signatures, description
@staticmethod
- def find_all_text_until_tag(
+ def find_all_children_until_tag(
start_element: PageElement,
- tag_filter: Union[Tuple[str], Callable[[Tag], bool]]
+ tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
) -> Optional[str]:
"""
- Get all text from
elements and strings until a tag matching `tag_filter` is found.
-
- Max 1000 elements are searched to avoid going through whole pages when no matching tag is found.
+ Get all direct children until a child matching `tag_filter` is found.
`tag_filter` can be either a tuple of string names to check against,
or a filtering callable that's applied to the tags.
- If no matching end tag is found, None is returned.
"""
text = ""
- element = start_element
- for _ in range(1000):
- if element is None:
- break
-
- element = element.next
- while isinstance(element, NavigableString):
- text += element
- element = element.next
- if element.name == "p":
- text += str(element)
-
- elif isinstance(tag_filter, tuple):
+ for element in start_element.find_next().find_next_siblings():
+ if isinstance(tag_filter, tuple):
if element.name in tag_filter:
break
- else:
- if tag_filter(element):
- break
- else:
- return None
+ elif tag_filter(element):
+ break
+ text += str(element)
+
return text
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
--
cgit v1.2.3
From ff3afe58548a8f1ed675c1933545e481e99bfc78 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 27 Jun 2020 15:48:28 +0200
Subject: Only include one newline for `p` tags in `li` elements.
---
bot/cogs/doc.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index e4b54f0a5..c1e8cebcf 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -124,6 +124,13 @@ class DocMarkdownConverter(MarkdownConverter):
el["href"] = urljoin(self.page_url, el["href"])
return super().convert_a(el, text)
+ def convert_p(self, el: PageElement, text: str) -> str:
+ """Include only one newline instead of two when the parent is a li tag."""
+ parent = el.parent
+ if parent is not None and parent.name == "li":
+ return f"{text}\n"
+ return super().convert_p(el, text)
+
def markdownify(html: str, *, url: str = "") -> str:
"""Create a DocMarkdownConverter object from the input html."""
--
cgit v1.2.3
From 6532618a503a55653499089a2d6a4ca43be7e2bf Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 28 Jun 2020 01:45:17 +0200
Subject: Only update added inventory instead of all.
---
bot/cogs/doc.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index c1e8cebcf..7c4beb075 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -504,7 +504,7 @@ class Doc(commands.Cog):
# Rebuilding the inventory can take some time, so lets send out a
# typing event to show that the Bot is still working.
async with ctx.typing():
- await self.refresh_inventory()
+ await self.update_single(package_name, base_url, inventory_url)
await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
@docs_group.command(name='delete', aliases=('remove', 'rm', 'd'))
--
cgit v1.2.3
From fd839ef3f193586c204f52ca76a84c18a8f3ba1e Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 29 Jun 2020 02:39:00 +0200
Subject: Add stat for packages of fetched symbols.
An additional variable is added to the DocItem named tuple to accommodate this.
The `_package_name` is separated from `api_package_name` it previously overwrote and is now used
for the stats and renamed symbols because the names are in a friendlier format.
---
bot/cogs/doc.py | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 7c4beb075..e1c25d173 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -6,7 +6,7 @@ import textwrap
from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, Union
from urllib.parse import urljoin
import discord
@@ -70,6 +70,7 @@ NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
class DocItem(NamedTuple):
"""Holds inventory symbol information."""
+ package: str
url: str
group: str
@@ -174,7 +175,7 @@ class Doc(commands.Cog):
def __init__(self, bot: Bot):
self.base_urls = {}
self.bot = bot
- self.inventories = {}
+ self.inventories: Dict[str, DocItem] = {}
self.renamed_symbols = set()
self.bot.loop.create_task(self.init_refresh_inventory())
@@ -185,7 +186,7 @@ class Doc(commands.Cog):
await self.refresh_inventory()
async def update_single(
- self, package_name: str, base_url: str, inventory_url: str
+ self, api_package_name: str, base_url: str, inventory_url: str
) -> None:
"""
Rebuild the inventory for a single package.
@@ -197,14 +198,14 @@ class Doc(commands.Cog):
* `inventory_url` is the absolute URL to the intersphinx inventory, fetched by running
`intersphinx.fetch_inventory` in an executor on the bot's event loop
"""
- self.base_urls[package_name] = base_url
+ self.base_urls[api_package_name] = base_url
package = await self._fetch_inventory(inventory_url)
if not package:
return None
for group, value in package.items():
- for symbol, (package_name, _version, relative_doc_url, _) in value.items():
+ for symbol, (_package_name, _version, relative_doc_url, _) in value.items():
if "/" in symbol:
continue # skip unreachable symbols with slashes
absolute_doc_url = base_url + relative_doc_url
@@ -221,7 +222,7 @@ class Doc(commands.Cog):
elif (overridden_symbol_group := self.inventories[symbol].group) in NO_OVERRIDE_GROUPS:
overridden_symbol = f"{overridden_symbol_group}.{symbol}"
if overridden_symbol in self.renamed_symbols:
- overridden_symbol = f"{package_name.split()[0]}.{overridden_symbol}"
+ overridden_symbol = f"{api_package_name}.{overridden_symbol}"
self.inventories[overridden_symbol] = self.inventories[symbol]
self.renamed_symbols.add(overridden_symbol)
@@ -229,12 +230,12 @@ class Doc(commands.Cog):
# If renamed `symbol` already exists, add library name in front to differentiate between them.
if symbol in self.renamed_symbols:
# Split `package_name` because of packages like Pillow that have spaces in them.
- symbol = f"{package_name.split()[0]}.{symbol}"
+ symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
- self.inventories[symbol] = DocItem(absolute_doc_url, group_name)
+ self.inventories[symbol] = DocItem(api_package_name, absolute_doc_url, group_name)
- log.trace(f"Fetched inventory for {package_name}.")
+ log.trace(f"Fetched inventory for {api_package_name}.")
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
@@ -306,8 +307,10 @@ class Doc(commands.Cog):
if scraped_html is None:
return None
+ symbol_obj = self.inventories[symbol]
+ self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
signatures = scraped_html[0]
- permalink = self.inventories[symbol].url
+ permalink = symbol_obj.url
description = markdownify(scraped_html[1], url=permalink)
# Truncate the description of the embed to the last occurrence
--
cgit v1.2.3
From b6dc7536fd90e27f5dfdf3204dc2f17917d78ee2 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 29 Jun 2020 02:42:27 +0200
Subject: Trigger typing in converter instead of command.
The converter does a web request so triggering typing in the command itself
left out a period where the bot seemed inactive.
---
bot/cogs/doc.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index e1c25d173..50aa9bbad 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -151,6 +151,7 @@ class InventoryURL(commands.Converter):
@staticmethod
async def convert(ctx: commands.Context, url: str) -> str:
"""Convert url to Intersphinx inventory URL."""
+ await ctx.trigger_typing()
try:
intersphinx.fetch_inventory(SPHINX_MOCK_APP, '', url)
except AttributeError:
@@ -504,10 +505,7 @@ class Doc(commands.Cog):
f"Inventory URL: {inventory_url}"
)
- # Rebuilding the inventory can take some time, so lets send out a
- # typing event to show that the Bot is still working.
- async with ctx.typing():
- await self.update_single(package_name, base_url, inventory_url)
+ await self.update_single(package_name, base_url, inventory_url)
await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
@docs_group.command(name='delete', aliases=('remove', 'rm', 'd'))
--
cgit v1.2.3
From 782cd1771ce9254761a70bbfbfa8e883c1330c6c Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 29 Jun 2020 16:27:24 +0200
Subject: Add option for user to delete the not found message before it's auto
deleted.
---
bot/cogs/doc.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 50aa9bbad..b288a92b1 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -12,7 +12,6 @@ from urllib.parse import urljoin
import discord
from bs4 import BeautifulSoup
from bs4.element import PageElement, Tag
-from discord.errors import NotFound
from discord.ext import commands
from markdownify import MarkdownConverter
from requests import ConnectTimeout, ConnectionError, HTTPError
@@ -24,6 +23,7 @@ from bot.constants import MODERATION_ROLES, RedirectOutput
from bot.converters import ValidPythonIdentifier, ValidURL
from bot.decorators import with_role
from bot.pagination import LinePaginator
+from bot.utils.messages import wait_for_deletion
log = logging.getLogger(__name__)
@@ -468,9 +468,16 @@ class Doc(commands.Cog):
colour=discord.Colour.red()
)
error_message = await ctx.send(embed=error_embed)
- with suppress(NotFound):
- await error_message.delete(delay=NOT_FOUND_DELETE_DELAY)
- await ctx.message.delete(delay=NOT_FOUND_DELETE_DELAY)
+ await wait_for_deletion(
+ error_message,
+ (ctx.author.id,),
+ timeout=NOT_FOUND_DELETE_DELAY,
+ client=self.bot
+ )
+ with suppress(discord.NotFound):
+ await ctx.message.delete()
+ with suppress(discord.NotFound):
+ await error_message.delete()
else:
await ctx.send(embed=doc_embed)
--
cgit v1.2.3
From 09820f5b4a55d6240a05f848ea446bd46062f444 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Sun, 5 Jul 2020 20:33:03 +0200
Subject: Added better support for GitHub/GitLab
---
bot/__main__.py | 2 +
bot/cogs/print_snippets.py | 200 +++++++++++++++++++++++++++++++++++++++++++++
bot/cogs/repo_widgets.py | 123 ++++++++++++++++++++++++++++
3 files changed, 325 insertions(+)
create mode 100644 bot/cogs/print_snippets.py
create mode 100644 bot/cogs/repo_widgets.py
diff --git a/bot/__main__.py b/bot/__main__.py
index 4e0d4a111..1d415eb20 100644
--- a/bot/__main__.py
+++ b/bot/__main__.py
@@ -71,6 +71,8 @@ bot.load_extension("bot.cogs.utils")
bot.load_extension("bot.cogs.watchchannels")
bot.load_extension("bot.cogs.webhook_remover")
bot.load_extension("bot.cogs.wolfram")
+bot.load_extension("bot.cogs.print_snippets")
+bot.load_extension("bot.cogs.repo_widgets")
if constants.HelpChannels.enable:
bot.load_extension("bot.cogs.help_channels")
diff --git a/bot/cogs/print_snippets.py b/bot/cogs/print_snippets.py
new file mode 100644
index 000000000..06c9d6cc1
--- /dev/null
+++ b/bot/cogs/print_snippets.py
@@ -0,0 +1,200 @@
+"""
+Cog that prints out snippets to Discord
+
+Matches each message against a regex and prints the contents
+of the first matched snippet url
+"""
+
+import os
+import re
+import textwrap
+
+from discord import Message
+from discord.ext.commands import Cog
+import aiohttp
+
+from bot.bot import Bot
+
+
+async def fetch_http(session: aiohttp.ClientSession, url: str, response_format='text', **kwargs) -> str:
+ """Uses aiohttp to make http GET requests"""
+
+ async with session.get(url, **kwargs) as response:
+ if response_format == 'text':
+ return await response.text()
+ elif response_format == 'json':
+ return await response.json()
+
+
+async def revert_to_orig(d: dict) -> dict:
+ """Replace URL Encoded values back to their original"""
+
+ for obj in d:
+ if d[obj] is not None:
+ d[obj] = d[obj].replace('%2F', '/').replace('%2E', '.')
+
+
+async def orig_to_encode(d: dict) -> dict:
+ """Encode URL Parameters"""
+
+ for obj in d:
+ if d[obj] is not None:
+ d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
+
+
+async def snippet_to_embed(d: dict, file_contents: str) -> str:
+ """
+ Given a regex groupdict and file contents, creates a code block
+ """
+
+ if d['end_line']:
+ start_line = int(d['start_line'])
+ end_line = int(d['end_line'])
+ else:
+ start_line = end_line = int(d['start_line'])
+
+ split_file_contents = file_contents.split('\n')
+
+ if start_line > end_line:
+ start_line, end_line = end_line, start_line
+ if start_line > len(split_file_contents) or end_line < 1:
+ return ''
+ start_line = max(1, start_line)
+ end_line = min(len(split_file_contents), end_line)
+
+ required = '\n'.join(split_file_contents[start_line - 1:end_line])
+ required = textwrap.dedent(required).rstrip().replace('`', '`\u200b')
+
+ language = d['file_path'].split('/')[-1].split('.')[-1]
+ if not language.replace('-', '').replace('+', '').replace('_', '').isalnum():
+ language = ''
+
+ if len(required) != 0:
+ return f'```{language}\n{required}```\n'
+ return '``` ```\n'
+
+
+GITHUB_RE = re.compile(
+ r'https://github\.com/(?P.+?)/blob/(?P.+?)/'
+ + r'(?P.+?)#L(?P\d+)([-~]L(?P\d+))?\b'
+)
+
+GITHUB_GIST_RE = re.compile(
+ r'https://gist\.github\.com/([^/]*)/(?P[0-9a-zA-Z]+)/*'
+ + r'(?P[0-9a-zA-Z]*)/*#file-(?P.+?)'
+ + r'-L(?P\d+)([-~]L(?P\d+))?\b'
+)
+
+GITLAB_RE = re.compile(
+ r'https://gitlab\.com/(?P.+?)/\-/blob/(?P.+?)/'
+ + r'(?P.+?)#L(?P\d+)([-~](?P\d+))?\b'
+)
+
+BITBUCKET_RE = re.compile(
+ r'https://bitbucket\.org/(?P.+?)/src/(?P.+?)/'
+ + r'(?P.+?)#lines-(?P\d+)(:(?P\d+))?\b'
+)
+
+
+class PrintSnippets(Cog):
+ def __init__(self, bot):
+ """Initializes the cog's bot"""
+
+ self.bot = bot
+ self.session = aiohttp.ClientSession()
+
+ @Cog.listener()
+ async def on_message(self, message: Message) -> None:
+ """
+ Checks if the message starts is a GitHub snippet, then removes the embed,
+ then sends the snippet in Discord
+ """
+
+ gh_match = GITHUB_RE.search(message.content)
+ gh_gist_match = GITHUB_GIST_RE.search(message.content)
+ gl_match = GITLAB_RE.search(message.content)
+ bb_match = BITBUCKET_RE.search(message.content)
+
+ if (gh_match or gh_gist_match or gl_match or bb_match) and not message.author.bot:
+ message_to_send = ''
+
+ for gh in GITHUB_RE.finditer(message.content):
+ d = gh.groupdict()
+ headers = {'Accept': 'application/vnd.github.v3.raw'}
+ if 'GITHUB_TOKEN' in os.environ:
+ headers['Authorization'] = f'token {os.environ["GITHUB_TOKEN"]}'
+ file_contents = await fetch_http(
+ self.session,
+ f'https://api.github.com/repos/{d["repo"]}/contents/{d["file_path"]}?ref={d["branch"]}',
+ 'text',
+ headers=headers,
+ )
+ message_to_send += await snippet_to_embed(d, file_contents)
+
+ for gh_gist in GITHUB_GIST_RE.finditer(message.content):
+ d = gh_gist.groupdict()
+ gist_json = await fetch_http(
+ self.session,
+ f'https://api.github.com/gists/{d["gist_id"]}{"/" + d["revision"] if len(d["revision"]) > 0 else ""}',
+ 'json',
+ )
+ for f in gist_json['files']:
+ if d['file_path'] == f.lower().replace('.', '-'):
+ d['file_path'] = f
+ file_contents = await fetch_http(
+ self.session,
+ gist_json['files'][f]['raw_url'],
+ 'text',
+ )
+ message_to_send += await snippet_to_embed(d, file_contents)
+ break
+
+ for gl in GITLAB_RE.finditer(message.content):
+ d = gl.groupdict()
+ await orig_to_encode(d)
+ headers = {}
+ if 'GITLAB_TOKEN' in os.environ:
+ headers['PRIVATE-TOKEN'] = os.environ["GITLAB_TOKEN"]
+ file_contents = await fetch_http(
+ self.session,
+ f'https://gitlab.com/api/v4/projects/{d["repo"]}/repository/files/{d["file_path"]}/raw?ref={d["branch"]}',
+ 'text',
+ headers=headers,
+ )
+ await revert_to_orig(d)
+ message_to_send += await snippet_to_embed(d, file_contents)
+
+ for bb in BITBUCKET_RE.finditer(message.content):
+ d = bb.groupdict()
+ await orig_to_encode(d)
+ file_contents = await fetch_http(
+ self.session,
+ f'https://bitbucket.org/{d["repo"]}/raw/{d["branch"]}/{d["file_path"]}',
+ 'text',
+ )
+ await revert_to_orig(d)
+ message_to_send += await snippet_to_embed(d, file_contents)
+
+ message_to_send = message_to_send[:-1]
+
+ if len(message_to_send) > 2000:
+ await message.channel.send(
+ 'Sorry, Discord has a 2000 character limit. Please send a shorter '
+ + 'snippet or split the big snippet up into several smaller ones :slight_smile:'
+ )
+ elif len(message_to_send) == 0:
+ await message.channel.send(
+ 'Please send valid snippet links to prevent spam :slight_smile:'
+ )
+ elif message_to_send.count('\n') > 50:
+ await message.channel.send(
+ 'Please limit the total number of lines to at most 50 to prevent spam :slight_smile:'
+ )
+ else:
+ await message.channel.send(message_to_send)
+ await message.edit(suppress=True)
+
+
+def setup(bot: Bot) -> None:
+ """Load the Utils cog."""
+ bot.add_cog(PrintSnippets(bot))
diff --git a/bot/cogs/repo_widgets.py b/bot/cogs/repo_widgets.py
new file mode 100644
index 000000000..70ca387ec
--- /dev/null
+++ b/bot/cogs/repo_widgets.py
@@ -0,0 +1,123 @@
+"""
+Cog that sends pretty embeds of repos
+
+Matches each message against a regex and prints the contents
+of the first matched snippet url
+"""
+
+import os
+import re
+
+from discord import Embed, Message
+from discord.ext.commands import Cog
+import aiohttp
+
+from bot.bot import Bot
+
+
+async def fetch_http(session: aiohttp.ClientSession, url: str, response_format='text', **kwargs) -> str:
+ """Uses aiohttp to make http GET requests"""
+
+ async with session.get(url, **kwargs) as response:
+ if response_format == 'text':
+ return await response.text()
+ elif response_format == 'json':
+ return await response.json()
+
+
+async def orig_to_encode(d: dict) -> dict:
+ """Encode URL Parameters"""
+
+ for obj in d:
+ if d[obj] is not None:
+ d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
+
+
+GITHUB_RE = re.compile(
+ r'https://github\.com/(?P[^/]+?)/(?P[^/]+?)(?:\s|$)')
+
+GITLAB_RE = re.compile(
+ r'https://gitlab\.com/(?P[^/]+?)/(?P[^/]+?)(?:\s|$)')
+
+
+class RepoWidgets(Cog):
+ def __init__(self, bot: Bot):
+ """Initializes the cog's bot"""
+
+ self.bot = bot
+ self.session = aiohttp.ClientSession()
+
+ @Cog.listener()
+ async def on_message(self, message: Message) -> None:
+ """
+ Checks if the message starts is a GitHub repo link, then removes the embed,
+ then sends a rich embed to Discord
+ """
+
+ gh_match = GITHUB_RE.search(message.content)
+ gl_match = GITLAB_RE.search(message.content)
+
+ if (gh_match or gl_match) and not message.author.bot:
+ for gh in GITHUB_RE.finditer(message.content):
+ d = gh.groupdict()
+ headers = {}
+ if 'GITHUB_TOKEN' in os.environ:
+ headers['Authorization'] = f'token {os.environ["GITHUB_TOKEN"]}'
+ repo = await fetch_http(
+ self.session,
+ f'https://api.github.com/repos/{d["owner"]}/{d["repo"]}',
+ 'json',
+ headers=headers,
+ )
+
+ embed = Embed(
+ title=repo['full_name'],
+ description='No description provided' if repo[
+ 'description'] is None else repo['description'],
+ url=repo['html_url'],
+ color=0x111111
+ ).set_footer(
+ text=f'Language: {repo["language"]} | ' +
+ f'Stars: {repo["stargazers_count"]} | ' +
+ f'Forks: {repo["forks_count"]} | ' +
+ f'Size: {repo["size"]}kb'
+ ).set_thumbnail(url=repo['owner']['avatar_url'])
+ if repo['homepage']:
+ embed.add_field(name='Website', value=repo['homepage'])
+ await message.channel.send(embed=embed)
+
+ for gl in GITLAB_RE.finditer(message.content):
+ d = gl.groupdict()
+ await orig_to_encode(d)
+ headers = {}
+ if 'GITLAB_TOKEN' in os.environ:
+ headers['PRIVATE-TOKEN'] = os.environ["GITLAB_TOKEN"]
+ repo = await fetch_http(
+ self.session,
+ f'https://gitlab.com/api/v4/projects/{d["owner"]}%2F{d["repo"]}',
+ 'json',
+ headers=headers,
+ )
+
+ embed = Embed(
+ title=repo['path_with_namespace'],
+ description='No description provided' if repo[
+ 'description'] == "" else repo['description'],
+ url=repo['web_url'],
+ color=0x111111
+ ).set_footer(
+ text=f'Stars: {repo["star_count"]} | ' +
+ f'Forks: {repo["forks_count"]}'
+ )
+
+ if repo['avatar_url'] is not None:
+ embed.set_thumbnail(url=repo['avatar_url'])
+
+ await message.channel.send(embed=embed)
+
+ await message.edit(suppress=True)
+
+
+def setup(bot: Bot) -> None:
+ """Load the Utils cog."""
+ bot.add_cog(RepoWidgets(bot))
--
cgit v1.2.3
From 668d96e12acd76c5021ede07401cdb6062b89add Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Sun, 5 Jul 2020 20:49:46 +0200
Subject: Tried to fix some of the flake8 style errors
---
bot/cogs/print_snippets.py | 43 +++++++++++++++++--------------------------
bot/cogs/repo_widgets.py | 26 +++++++++-----------------
2 files changed, 26 insertions(+), 43 deletions(-)
diff --git a/bot/cogs/print_snippets.py b/bot/cogs/print_snippets.py
index 06c9d6cc1..4be3653d5 100644
--- a/bot/cogs/print_snippets.py
+++ b/bot/cogs/print_snippets.py
@@ -1,24 +1,16 @@
-"""
-Cog that prints out snippets to Discord
-
-Matches each message against a regex and prints the contents
-of the first matched snippet url
-"""
-
import os
import re
import textwrap
+import aiohttp
from discord import Message
from discord.ext.commands import Cog
-import aiohttp
from bot.bot import Bot
-async def fetch_http(session: aiohttp.ClientSession, url: str, response_format='text', **kwargs) -> str:
+async def fetch_http(session: aiohttp.ClientSession, url: str, response_format: str, **kwargs) -> str:
"""Uses aiohttp to make http GET requests"""
-
async with session.get(url, **kwargs) as response:
if response_format == 'text':
return await response.text()
@@ -28,7 +20,6 @@ async def fetch_http(session: aiohttp.ClientSession, url: str, response_format='
async def revert_to_orig(d: dict) -> dict:
"""Replace URL Encoded values back to their original"""
-
for obj in d:
if d[obj] is not None:
d[obj] = d[obj].replace('%2F', '/').replace('%2E', '.')
@@ -36,17 +27,13 @@ async def revert_to_orig(d: dict) -> dict:
async def orig_to_encode(d: dict) -> dict:
"""Encode URL Parameters"""
-
for obj in d:
if d[obj] is not None:
d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
async def snippet_to_embed(d: dict, file_contents: str) -> str:
- """
- Given a regex groupdict and file contents, creates a code block
- """
-
+ """Given a regex groupdict and file contents, creates a code block"""
if d['end_line']:
start_line = int(d['start_line'])
end_line = int(d['end_line'])
@@ -97,19 +84,20 @@ BITBUCKET_RE = re.compile(
class PrintSnippets(Cog):
- def __init__(self, bot):
- """Initializes the cog's bot"""
+ """
+ Cog that prints out snippets to Discord
+ Matches each message against a regex and prints the contents of all matched snippets
+ """
+
+ def __init__(self, bot: Bot):
+ """Initializes the cog's bot"""
self.bot = bot
self.session = aiohttp.ClientSession()
@Cog.listener()
async def on_message(self, message: Message) -> None:
- """
- Checks if the message starts is a GitHub snippet, then removes the embed,
- then sends the snippet in Discord
- """
-
+ """Checks if the message starts is a GitHub snippet, then removes the embed, then sends the snippet in Discord"""
gh_match = GITHUB_RE.search(message.content)
gh_gist_match = GITHUB_GIST_RE.search(message.content)
gl_match = GITLAB_RE.search(message.content)
@@ -125,7 +113,8 @@ class PrintSnippets(Cog):
headers['Authorization'] = f'token {os.environ["GITHUB_TOKEN"]}'
file_contents = await fetch_http(
self.session,
- f'https://api.github.com/repos/{d["repo"]}/contents/{d["file_path"]}?ref={d["branch"]}',
+ f'https://api.github.com/repos/{d["repo"]}\
+ /contents/{d["file_path"]}?ref={d["branch"]}',
'text',
headers=headers,
)
@@ -135,7 +124,8 @@ class PrintSnippets(Cog):
d = gh_gist.groupdict()
gist_json = await fetch_http(
self.session,
- f'https://api.github.com/gists/{d["gist_id"]}{"/" + d["revision"] if len(d["revision"]) > 0 else ""}',
+ f'https://api.github.com/gists/{d["gist_id"]}\
+ {"/" + d["revision"] if len(d["revision"]) > 0 else ""}',
'json',
)
for f in gist_json['files']:
@@ -157,7 +147,8 @@ class PrintSnippets(Cog):
headers['PRIVATE-TOKEN'] = os.environ["GITLAB_TOKEN"]
file_contents = await fetch_http(
self.session,
- f'https://gitlab.com/api/v4/projects/{d["repo"]}/repository/files/{d["file_path"]}/raw?ref={d["branch"]}',
+ f'https://gitlab.com/api/v4/projects/{d["repo"]}/\
+ repository/files/{d["file_path"]}/raw?ref={d["branch"]}',
'text',
headers=headers,
)
diff --git a/bot/cogs/repo_widgets.py b/bot/cogs/repo_widgets.py
index 70ca387ec..feb931e72 100644
--- a/bot/cogs/repo_widgets.py
+++ b/bot/cogs/repo_widgets.py
@@ -1,23 +1,15 @@
-"""
-Cog that sends pretty embeds of repos
-
-Matches each message against a regex and prints the contents
-of the first matched snippet url
-"""
-
import os
import re
+import aiohttp
from discord import Embed, Message
from discord.ext.commands import Cog
-import aiohttp
from bot.bot import Bot
-async def fetch_http(session: aiohttp.ClientSession, url: str, response_format='text', **kwargs) -> str:
+async def fetch_http(session: aiohttp.ClientSession, url: str, response_format: str, **kwargs) -> str:
"""Uses aiohttp to make http GET requests"""
-
async with session.get(url, **kwargs) as response:
if response_format == 'text':
return await response.text()
@@ -27,7 +19,6 @@ async def fetch_http(session: aiohttp.ClientSession, url: str, response_format='
async def orig_to_encode(d: dict) -> dict:
"""Encode URL Parameters"""
-
for obj in d:
if d[obj] is not None:
d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
@@ -41,19 +32,20 @@ GITLAB_RE = re.compile(
class RepoWidgets(Cog):
+ """
+ Cog that sends pretty embeds of repos
+
+ Matches each message against a regex and sends an embed with the details of all referenced repos
+ """
+
def __init__(self, bot: Bot):
"""Initializes the cog's bot"""
-
self.bot = bot
self.session = aiohttp.ClientSession()
@Cog.listener()
async def on_message(self, message: Message) -> None:
- """
- Checks if the message starts is a GitHub repo link, then removes the embed,
- then sends a rich embed to Discord
- """
-
+ """Checks if the message starts is a GitHub repo link, then removes the embed, then sends a rich embed to Discord"""
gh_match = GITHUB_RE.search(message.content)
gl_match = GITLAB_RE.search(message.content)
--
cgit v1.2.3
From 2fe46fd372a5c8a69437e3f29c0137cb11d156d9 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Sun, 5 Jul 2020 20:54:55 +0200
Subject: Fixed all docstrings
---
bot/cogs/print_snippets.py | 14 +++++++-------
bot/cogs/repo_widgets.py | 20 ++++++++++----------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/bot/cogs/print_snippets.py b/bot/cogs/print_snippets.py
index 4be3653d5..5c83cd62b 100644
--- a/bot/cogs/print_snippets.py
+++ b/bot/cogs/print_snippets.py
@@ -10,7 +10,7 @@ from bot.bot import Bot
async def fetch_http(session: aiohttp.ClientSession, url: str, response_format: str, **kwargs) -> str:
- """Uses aiohttp to make http GET requests"""
+ """Uses aiohttp to make http GET requests."""
async with session.get(url, **kwargs) as response:
if response_format == 'text':
return await response.text()
@@ -19,21 +19,21 @@ async def fetch_http(session: aiohttp.ClientSession, url: str, response_format:
async def revert_to_orig(d: dict) -> dict:
- """Replace URL Encoded values back to their original"""
+ """Replace URL Encoded values back to their original."""
for obj in d:
if d[obj] is not None:
d[obj] = d[obj].replace('%2F', '/').replace('%2E', '.')
async def orig_to_encode(d: dict) -> dict:
- """Encode URL Parameters"""
+ """Encode URL Parameters."""
for obj in d:
if d[obj] is not None:
d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
async def snippet_to_embed(d: dict, file_contents: str) -> str:
- """Given a regex groupdict and file contents, creates a code block"""
+ """Given a regex groupdict and file contents, creates a code block."""
if d['end_line']:
start_line = int(d['start_line'])
end_line = int(d['end_line'])
@@ -85,9 +85,9 @@ BITBUCKET_RE = re.compile(
class PrintSnippets(Cog):
"""
- Cog that prints out snippets to Discord
+ Cog that prints out snippets to Discord.
- Matches each message against a regex and prints the contents of all matched snippets
+ Matches each message against a regex and prints the contents of all matched snippets.
"""
def __init__(self, bot: Bot):
@@ -97,7 +97,7 @@ class PrintSnippets(Cog):
@Cog.listener()
async def on_message(self, message: Message) -> None:
- """Checks if the message starts is a GitHub snippet, then removes the embed, then sends the snippet in Discord"""
+ """Checks if the message has a snippet link, removes the embed, then sends the snippet contents."""
gh_match = GITHUB_RE.search(message.content)
gh_gist_match = GITHUB_GIST_RE.search(message.content)
gl_match = GITLAB_RE.search(message.content)
diff --git a/bot/cogs/repo_widgets.py b/bot/cogs/repo_widgets.py
index feb931e72..c8fde7c8e 100644
--- a/bot/cogs/repo_widgets.py
+++ b/bot/cogs/repo_widgets.py
@@ -9,7 +9,7 @@ from bot.bot import Bot
async def fetch_http(session: aiohttp.ClientSession, url: str, response_format: str, **kwargs) -> str:
- """Uses aiohttp to make http GET requests"""
+ """Uses aiohttp to make http GET requests."""
async with session.get(url, **kwargs) as response:
if response_format == 'text':
return await response.text()
@@ -18,7 +18,7 @@ async def fetch_http(session: aiohttp.ClientSession, url: str, response_format:
async def orig_to_encode(d: dict) -> dict:
- """Encode URL Parameters"""
+ """Encode URL Parameters."""
for obj in d:
if d[obj] is not None:
d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
@@ -33,19 +33,19 @@ GITLAB_RE = re.compile(
class RepoWidgets(Cog):
"""
- Cog that sends pretty embeds of repos
+ Cog that sends pretty embeds of repos.
- Matches each message against a regex and sends an embed with the details of all referenced repos
+ Matches each message against a regex and sends an embed with the details of all referenced repos.
"""
def __init__(self, bot: Bot):
- """Initializes the cog's bot"""
+ """Initializes the cog's bot."""
self.bot = bot
self.session = aiohttp.ClientSession()
@Cog.listener()
async def on_message(self, message: Message) -> None:
- """Checks if the message starts is a GitHub repo link, then removes the embed, then sends a rich embed to Discord"""
+ """Checks if the message has a repo link, removes the embed, then sends a rich embed."""
gh_match = GITHUB_RE.search(message.content)
gl_match = GITLAB_RE.search(message.content)
@@ -69,10 +69,10 @@ class RepoWidgets(Cog):
url=repo['html_url'],
color=0x111111
).set_footer(
- text=f'Language: {repo["language"]} | ' +
- f'Stars: {repo["stargazers_count"]} | ' +
- f'Forks: {repo["forks_count"]} | ' +
- f'Size: {repo["size"]}kb'
+ text=f'Language: {repo["language"]} | '
+ + f'Stars: {repo["stargazers_count"]} | '
+ + f'Forks: {repo["forks_count"]} | '
+ + f'Size: {repo["size"]}kb'
).set_thumbnail(url=repo['owner']['avatar_url'])
if repo['homepage']:
embed.add_field(name='Website', value=repo['homepage'])
--
cgit v1.2.3
From ec3cc1704c7678f6389ac5c0688be90697410bed Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Sun, 5 Jul 2020 20:59:18 +0200
Subject: Minor style fixes
---
bot/cogs/print_snippets.py | 2 +-
bot/cogs/repo_widgets.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/print_snippets.py b/bot/cogs/print_snippets.py
index 5c83cd62b..67d411a63 100644
--- a/bot/cogs/print_snippets.py
+++ b/bot/cogs/print_snippets.py
@@ -91,7 +91,7 @@ class PrintSnippets(Cog):
"""
def __init__(self, bot: Bot):
- """Initializes the cog's bot"""
+ """Initializes the cog's bot."""
self.bot = bot
self.session = aiohttp.ClientSession()
diff --git a/bot/cogs/repo_widgets.py b/bot/cogs/repo_widgets.py
index c8fde7c8e..32c2451df 100644
--- a/bot/cogs/repo_widgets.py
+++ b/bot/cogs/repo_widgets.py
@@ -98,8 +98,8 @@ class RepoWidgets(Cog):
url=repo['web_url'],
color=0x111111
).set_footer(
- text=f'Stars: {repo["star_count"]} | ' +
- f'Forks: {repo["forks_count"]}'
+ text=f'Stars: {repo["star_count"]} | '
+ + f'Forks: {repo["forks_count"]}'
)
if repo['avatar_url'] is not None:
--
cgit v1.2.3
From fa60e51243c56e6658a91ea63be67a42e22f1512 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 6 Jul 2020 21:23:41 +0200
Subject: Intern `group_names`
---
bot/cogs/doc.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index b288a92b1..0975285e8 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -2,6 +2,7 @@ import asyncio
import functools
import logging
import re
+import sys
import textwrap
from collections import OrderedDict
from contextlib import suppress
@@ -210,7 +211,9 @@ class Doc(commands.Cog):
if "/" in symbol:
continue # skip unreachable symbols with slashes
absolute_doc_url = base_url + relative_doc_url
- group_name = group.split(":")[1]
+ # Intern the group names since they're reused in all the DocItems
+ # to remove unnecessary memory consumption from them being unique objects
+ group_name = sys.intern(group.split(":")[1])
if symbol in self.inventories:
symbol_base_url = self.inventories[symbol].url.split("/", 3)[2]
--
cgit v1.2.3
From 09987afb9b1e39fc5618b4217e1f33860cdd4bb4 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 7 Jul 2020 01:25:14 +0200
Subject: Create method to fetch and create a BeautifulSoup object from an url.
Moving this part of the logic into a separate method allows us to put a cache on it,
which caches the whole HTML document from the given url,
removing the need to do requests to the same URL for every symbol behind it.
---
bot/cogs/doc.py | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 0975285e8..71bfcfd4a 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -275,13 +275,9 @@ class Doc(commands.Cog):
symbol_info = self.inventories.get(symbol)
if symbol_info is None:
return None
+ request_url, symbol_id = symbol_info.url.rsplit('#')
- async with self.bot.http_session.get(symbol_info.url) as response:
- html = await response.text(encoding='utf-8')
-
- # Find the signature header and parse the relevant parts.
- symbol_id = symbol_info.url.split('#')[-1]
- soup = BeautifulSoup(html, 'lxml')
+ soup = await self._get_soup_from_url(request_url)
symbol_heading = soup.find(id=symbol_id)
search_html = str(soup)
@@ -424,6 +420,15 @@ class Doc(commands.Cog):
return text
+ @async_cache(arg_offset=1)
+ async def _get_soup_from_url(self, url: str) -> BeautifulSoup:
+ """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
+ log.trace(f"Sending a request to {url}.")
+ async with self.bot.http_session.get(url) as response:
+ soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
+ soup.find("head").decompose() # the head contains no useful data so we can remove it
+ return soup
+
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
async def docs_group(self, ctx: commands.Context, *, symbol: str) -> None:
"""Lookup documentation for Python symbols."""
--
cgit v1.2.3
From 8462abaa15e0f9eb7b4f861d0485686ec7470ed0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 7 Jul 2020 01:26:34 +0200
Subject: Use the group attribute instead of checking the symbol name.
---
bot/cogs/doc.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 71bfcfd4a..5ebfb6c25 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -284,7 +284,7 @@ class Doc(commands.Cog):
if symbol_heading is None:
return None
- if symbol_id == f"module-{symbol}":
+ if symbol_info.group == "module":
parsed_module = self.parse_module_symbol(symbol_heading)
if parsed_module is None:
return [], ""
--
cgit v1.2.3
From 5fb1203883a975d752d9c8b803bb8420ef0f7c60 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 7 Jul 2020 19:42:53 +0200
Subject: Removed repo widget prettification and added reaction to remove lines
---
bot/__main__.py | 1 -
bot/cogs/print_snippets.py | 45 +++++++++---------
bot/cogs/repo_widgets.py | 115 ---------------------------------------------
3 files changed, 22 insertions(+), 139 deletions(-)
delete mode 100644 bot/cogs/repo_widgets.py
diff --git a/bot/__main__.py b/bot/__main__.py
index 1d415eb20..3191faf85 100644
--- a/bot/__main__.py
+++ b/bot/__main__.py
@@ -72,7 +72,6 @@ bot.load_extension("bot.cogs.watchchannels")
bot.load_extension("bot.cogs.webhook_remover")
bot.load_extension("bot.cogs.wolfram")
bot.load_extension("bot.cogs.print_snippets")
-bot.load_extension("bot.cogs.repo_widgets")
if constants.HelpChannels.enable:
bot.load_extension("bot.cogs.help_channels")
diff --git a/bot/cogs/print_snippets.py b/bot/cogs/print_snippets.py
index 67d411a63..3f784d2c6 100644
--- a/bot/cogs/print_snippets.py
+++ b/bot/cogs/print_snippets.py
@@ -1,9 +1,10 @@
+import asyncio
import os
import re
import textwrap
import aiohttp
-from discord import Message
+from discord import Message, Reaction, User
from discord.ext.commands import Cog
from bot.bot import Bot
@@ -113,8 +114,8 @@ class PrintSnippets(Cog):
headers['Authorization'] = f'token {os.environ["GITHUB_TOKEN"]}'
file_contents = await fetch_http(
self.session,
- f'https://api.github.com/repos/{d["repo"]}\
- /contents/{d["file_path"]}?ref={d["branch"]}',
+ f'https://api.github.com/repos/{d["repo"]}'
+ + f'/contents/{d["file_path"]}?ref={d["branch"]}',
'text',
headers=headers,
)
@@ -124,8 +125,8 @@ class PrintSnippets(Cog):
d = gh_gist.groupdict()
gist_json = await fetch_http(
self.session,
- f'https://api.github.com/gists/{d["gist_id"]}\
- {"/" + d["revision"] if len(d["revision"]) > 0 else ""}',
+ f'https://api.github.com/gists/{d["gist_id"]}'
+ + f'{"/" + d["revision"] if len(d["revision"]) > 0 else ""}',
'json',
)
for f in gist_json['files']:
@@ -147,8 +148,8 @@ class PrintSnippets(Cog):
headers['PRIVATE-TOKEN'] = os.environ["GITLAB_TOKEN"]
file_contents = await fetch_http(
self.session,
- f'https://gitlab.com/api/v4/projects/{d["repo"]}/\
- repository/files/{d["file_path"]}/raw?ref={d["branch"]}',
+ f'https://gitlab.com/api/v4/projects/{d["repo"]}/'
+ + f'repository/files/{d["file_path"]}/raw?ref={d["branch"]}',
'text',
headers=headers,
)
@@ -168,22 +169,20 @@ class PrintSnippets(Cog):
message_to_send = message_to_send[:-1]
- if len(message_to_send) > 2000:
- await message.channel.send(
- 'Sorry, Discord has a 2000 character limit. Please send a shorter '
- + 'snippet or split the big snippet up into several smaller ones :slight_smile:'
- )
- elif len(message_to_send) == 0:
- await message.channel.send(
- 'Please send valid snippet links to prevent spam :slight_smile:'
- )
- elif message_to_send.count('\n') > 50:
- await message.channel.send(
- 'Please limit the total number of lines to at most 50 to prevent spam :slight_smile:'
- )
- else:
- await message.channel.send(message_to_send)
- await message.edit(suppress=True)
+ if 0 < len(message_to_send) <= 2000 and message_to_send.count('\n') <= 50:
+ sent_message = await message.channel.send(message_to_send)
+ await message.edit(suppress=True)
+ await sent_message.add_reaction('❌')
+
+ def check(reaction: Reaction, user: User) -> bool:
+ return user == message.author and str(reaction.emoji) == '❌'
+
+ try:
+ reaction, user = await self.bot.wait_for('reaction_add', timeout=10.0, check=check)
+ except asyncio.TimeoutError:
+ await sent_message.remove_reaction('❌', self.bot.user)
+ else:
+ await sent_message.delete()
def setup(bot: Bot) -> None:
diff --git a/bot/cogs/repo_widgets.py b/bot/cogs/repo_widgets.py
deleted file mode 100644
index 32c2451df..000000000
--- a/bot/cogs/repo_widgets.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import os
-import re
-
-import aiohttp
-from discord import Embed, Message
-from discord.ext.commands import Cog
-
-from bot.bot import Bot
-
-
-async def fetch_http(session: aiohttp.ClientSession, url: str, response_format: str, **kwargs) -> str:
- """Uses aiohttp to make http GET requests."""
- async with session.get(url, **kwargs) as response:
- if response_format == 'text':
- return await response.text()
- elif response_format == 'json':
- return await response.json()
-
-
-async def orig_to_encode(d: dict) -> dict:
- """Encode URL Parameters."""
- for obj in d:
- if d[obj] is not None:
- d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
-
-
-GITHUB_RE = re.compile(
- r'https://github\.com/(?P[^/]+?)/(?P[^/]+?)(?:\s|$)')
-
-GITLAB_RE = re.compile(
- r'https://gitlab\.com/(?P[^/]+?)/(?P[^/]+?)(?:\s|$)')
-
-
-class RepoWidgets(Cog):
- """
- Cog that sends pretty embeds of repos.
-
- Matches each message against a regex and sends an embed with the details of all referenced repos.
- """
-
- def __init__(self, bot: Bot):
- """Initializes the cog's bot."""
- self.bot = bot
- self.session = aiohttp.ClientSession()
-
- @Cog.listener()
- async def on_message(self, message: Message) -> None:
- """Checks if the message has a repo link, removes the embed, then sends a rich embed."""
- gh_match = GITHUB_RE.search(message.content)
- gl_match = GITLAB_RE.search(message.content)
-
- if (gh_match or gl_match) and not message.author.bot:
- for gh in GITHUB_RE.finditer(message.content):
- d = gh.groupdict()
- headers = {}
- if 'GITHUB_TOKEN' in os.environ:
- headers['Authorization'] = f'token {os.environ["GITHUB_TOKEN"]}'
- repo = await fetch_http(
- self.session,
- f'https://api.github.com/repos/{d["owner"]}/{d["repo"]}',
- 'json',
- headers=headers,
- )
-
- embed = Embed(
- title=repo['full_name'],
- description='No description provided' if repo[
- 'description'] is None else repo['description'],
- url=repo['html_url'],
- color=0x111111
- ).set_footer(
- text=f'Language: {repo["language"]} | '
- + f'Stars: {repo["stargazers_count"]} | '
- + f'Forks: {repo["forks_count"]} | '
- + f'Size: {repo["size"]}kb'
- ).set_thumbnail(url=repo['owner']['avatar_url'])
- if repo['homepage']:
- embed.add_field(name='Website', value=repo['homepage'])
- await message.channel.send(embed=embed)
-
- for gl in GITLAB_RE.finditer(message.content):
- d = gl.groupdict()
- await orig_to_encode(d)
- headers = {}
- if 'GITLAB_TOKEN' in os.environ:
- headers['PRIVATE-TOKEN'] = os.environ["GITLAB_TOKEN"]
- repo = await fetch_http(
- self.session,
- f'https://gitlab.com/api/v4/projects/{d["owner"]}%2F{d["repo"]}',
- 'json',
- headers=headers,
- )
-
- embed = Embed(
- title=repo['path_with_namespace'],
- description='No description provided' if repo[
- 'description'] == "" else repo['description'],
- url=repo['web_url'],
- color=0x111111
- ).set_footer(
- text=f'Stars: {repo["star_count"]} | '
- + f'Forks: {repo["forks_count"]}'
- )
-
- if repo['avatar_url'] is not None:
- embed.set_thumbnail(url=repo['avatar_url'])
-
- await message.channel.send(embed=embed)
-
- await message.edit(suppress=True)
-
-
-def setup(bot: Bot) -> None:
- """Load the Utils cog."""
- bot.add_cog(RepoWidgets(bot))
--
cgit v1.2.3
From 03dbddfcae35e47d57222343817ea779d6b67ab2 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 10 Jul 2020 22:36:19 +0200
Subject: Remove codeblock from symbol embed title.
The code block caused the url to not highlight the title text on mobile
---
bot/cogs/doc.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 5ebfb6c25..e2e3adb4e 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -350,7 +350,7 @@ class Doc(commands.Cog):
embed_description += f"\n{description}"
embed = discord.Embed(
- title=f'`{symbol}`',
+ title=discord.utils.escape_markdown(symbol),
url=permalink,
description=embed_description
)
--
cgit v1.2.3
From b59e39557ae97ac6bbc4e294651d1fe654bb2d21 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 14 Jul 2020 00:13:42 +0200
Subject: Add doc suffix to doc commands.
The `set` command shadowed the `set` symbol, causing the command
to seemingly not work. A suffix was added to all commands to keep
them consistent and future proof; the shorthands were kept unchanged
---
bot/cogs/doc.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index e2e3adb4e..7f1fb6135 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -434,7 +434,7 @@ class Doc(commands.Cog):
"""Lookup documentation for Python symbols."""
await ctx.invoke(self.get_command, symbol=symbol)
- @docs_group.command(name='get', aliases=('g',))
+ @docs_group.command(name='getdoc', aliases=('g',))
async def get_command(self, ctx: commands.Context, *, symbol: str) -> None:
"""
Return a documentation embed for a given symbol.
@@ -489,7 +489,7 @@ class Doc(commands.Cog):
else:
await ctx.send(embed=doc_embed)
- @docs_group.command(name='set', aliases=('s',))
+ @docs_group.command(name='setdoc', aliases=('s',))
@with_role(*MODERATION_ROLES)
async def set_command(
self, ctx: commands.Context, package_name: ValidPythonIdentifier,
@@ -523,7 +523,7 @@ class Doc(commands.Cog):
await self.update_single(package_name, base_url, inventory_url)
await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
- @docs_group.command(name='delete', aliases=('remove', 'rm', 'd'))
+ @docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
@with_role(*MODERATION_ROLES)
async def delete_command(self, ctx: commands.Context, package_name: ValidPythonIdentifier) -> None:
"""
@@ -540,7 +540,7 @@ class Doc(commands.Cog):
await self.refresh_inventory()
await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
- @docs_group.command(name="refresh", aliases=("rfsh", "r"))
+ @docs_group.command(name="refreshdoc", aliases=("rfsh", "r"))
@with_role(*MODERATION_ROLES)
async def refresh_command(self, ctx: commands.Context) -> None:
"""Refresh inventories and send differences to channel."""
--
cgit v1.2.3
From ea0dcabbca10c5fe2afcee2b9451e1494bc069a2 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 14 Jul 2020 00:18:58 +0200
Subject: Make the symbol parameter optional.
The commands were changed to be greedy, this however made them
required arguments breaking the access to the default listing
of the available inventories
---
bot/cogs/doc.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 7f1fb6135..66c4b4ea8 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -430,12 +430,12 @@ class Doc(commands.Cog):
return soup
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
- async def docs_group(self, ctx: commands.Context, *, symbol: str) -> None:
+ async def docs_group(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
"""Lookup documentation for Python symbols."""
await ctx.invoke(self.get_command, symbol=symbol)
@docs_group.command(name='getdoc', aliases=('g',))
- async def get_command(self, ctx: commands.Context, *, symbol: str) -> None:
+ async def get_command(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
"""
Return a documentation embed for a given symbol.
--
cgit v1.2.3
From 40d831fb7b5ca7192fb1bdca8be9157f206eb2bc Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 14 Jul 2020 03:40:52 +0200
Subject: Change package name converter to only accept _a-z.
Package names are now directly used for stats, where
the lowercase a-z characters and _ are used.
---
bot/cogs/doc.py | 6 +++---
bot/converters.py | 22 ++++++++++------------
2 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 66c4b4ea8..09bddb02c 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -21,7 +21,7 @@ from urllib3.exceptions import ProtocolError
from bot.bot import Bot
from bot.constants import MODERATION_ROLES, RedirectOutput
-from bot.converters import ValidPythonIdentifier, ValidURL
+from bot.converters import PackageName, ValidURL
from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
@@ -492,7 +492,7 @@ class Doc(commands.Cog):
@docs_group.command(name='setdoc', aliases=('s',))
@with_role(*MODERATION_ROLES)
async def set_command(
- self, ctx: commands.Context, package_name: ValidPythonIdentifier,
+ self, ctx: commands.Context, package_name: PackageName,
base_url: ValidURL, inventory_url: InventoryURL
) -> None:
"""
@@ -525,7 +525,7 @@ class Doc(commands.Cog):
@docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
@with_role(*MODERATION_ROLES)
- async def delete_command(self, ctx: commands.Context, package_name: ValidPythonIdentifier) -> None:
+ async def delete_command(self, ctx: commands.Context, package_name: PackageName) -> None:
"""
Removes the specified package from the database.
diff --git a/bot/converters.py b/bot/converters.py
index 72c46fdf0..fac94e9d0 100644
--- a/bot/converters.py
+++ b/bot/converters.py
@@ -34,22 +34,20 @@ def allowed_strings(*values, preserve_case: bool = False) -> t.Callable[[str], s
return converter
-class ValidPythonIdentifier(Converter):
+class PackageName(Converter):
"""
- A converter that checks whether the given string is a valid Python identifier.
+ A converter that checks whether the given string is a valid package name.
- This is used to have package names that correspond to how you would use the package in your
- code, e.g. `import package`.
-
- Raises `BadArgument` if the argument is not a valid Python identifier, and simply passes through
- the given argument otherwise.
+ Package names are used for stats and are restricted to the a-z and _ characters.
"""
- @staticmethod
- async def convert(ctx: Context, argument: str) -> str:
- """Checks whether the given string is a valid Python identifier."""
- if not argument.isidentifier():
- raise BadArgument(f"`{argument}` is not a valid Python identifier")
+ PACKAGE_NAME_RE = re.compile(r"[^a-z_]")
+
+ @classmethod
+ async def convert(cls, ctx: Context, argument: str) -> str:
+ """Checks whether the given string is a valid package name."""
+ if cls.PACKAGE_NAME_RE.search(argument):
+ raise BadArgument("The provided package name is not valid, please only use the _ and a-z characters.")
return argument
--
cgit v1.2.3
From 68805bb77d56f22854508f7912d00bdaab5daf5c Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 14 Jul 2020 03:49:18 +0200
Subject: Change docstrings to use suffixed command names.
---
bot/cogs/doc.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 09bddb02c..673a1156f 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -445,7 +445,7 @@ class Doc(commands.Cog):
!docs
!docs aiohttp
!docs aiohttp.ClientSession
- !docs get aiohttp.ClientSession
+ !docs getdoc aiohttp.ClientSession
"""
if not symbol:
inventory_embed = discord.Embed(
@@ -501,7 +501,7 @@ class Doc(commands.Cog):
The database will update the object, should an existing item with the specified `package_name` already exist.
Example:
- !docs set \
+ !docs setdoc \
python \
https://docs.python.org/3/ \
https://docs.python.org/3/objects.inv
@@ -530,7 +530,7 @@ class Doc(commands.Cog):
Removes the specified package from the database.
Examples:
- !docs delete aiohttp
+ !docs deletedoc aiohttp
"""
await self.bot.api_client.delete(f'bot/documentation-links/{package_name}')
--
cgit v1.2.3
From d1413409f3cbfaaec94060df5c0fea7827fe874b Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 14 Jul 2020 23:54:03 +0200
Subject: Rename inventories to doc_symbols.
---
bot/cogs/doc.py | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 673a1156f..526747bf4 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -177,7 +177,7 @@ class Doc(commands.Cog):
def __init__(self, bot: Bot):
self.base_urls = {}
self.bot = bot
- self.inventories: Dict[str, DocItem] = {}
+ self.doc_symbols: Dict[str, DocItem] = {}
self.renamed_symbols = set()
self.bot.loop.create_task(self.init_refresh_inventory())
@@ -215,20 +215,20 @@ class Doc(commands.Cog):
# to remove unnecessary memory consumption from them being unique objects
group_name = sys.intern(group.split(":")[1])
- if symbol in self.inventories:
- symbol_base_url = self.inventories[symbol].url.split("/", 3)[2]
+ if symbol in self.doc_symbols:
+ symbol_base_url = self.doc_symbols[symbol].url.split("/", 3)[2]
if (
group_name in NO_OVERRIDE_GROUPS
or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
):
symbol = f"{group_name}.{symbol}"
- elif (overridden_symbol_group := self.inventories[symbol].group) in NO_OVERRIDE_GROUPS:
+ elif (overridden_symbol_group := self.doc_symbols[symbol].group) in NO_OVERRIDE_GROUPS:
overridden_symbol = f"{overridden_symbol_group}.{symbol}"
if overridden_symbol in self.renamed_symbols:
overridden_symbol = f"{api_package_name}.{overridden_symbol}"
- self.inventories[overridden_symbol] = self.inventories[symbol]
+ self.doc_symbols[overridden_symbol] = self.doc_symbols[symbol]
self.renamed_symbols.add(overridden_symbol)
# If renamed `symbol` already exists, add library name in front to differentiate between them.
@@ -237,7 +237,7 @@ class Doc(commands.Cog):
symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
- self.inventories[symbol] = DocItem(api_package_name, absolute_doc_url, group_name)
+ self.doc_symbols[symbol] = DocItem(api_package_name, absolute_doc_url, group_name)
log.trace(f"Fetched inventory for {api_package_name}.")
@@ -245,11 +245,11 @@ class Doc(commands.Cog):
"""Refresh internal documentation inventory."""
log.debug("Refreshing documentation inventory...")
- # Clear the old base URLS and inventories to ensure
+ # Clear the old base URLS and doc symbols to ensure
# that we start from a fresh local dataset.
# Also, reset the cache used for fetching documentation.
self.base_urls.clear()
- self.inventories.clear()
+ self.doc_symbols.clear()
self.renamed_symbols.clear()
async_cache.cache = OrderedDict()
@@ -272,7 +272,7 @@ class Doc(commands.Cog):
If the given symbol is a module, returns a tuple `(None, str)`
else if the symbol could not be found, returns `None`.
"""
- symbol_info = self.inventories.get(symbol)
+ symbol_info = self.doc_symbols.get(symbol)
if symbol_info is None:
return None
request_url, symbol_id = symbol_info.url.rsplit('#')
@@ -307,7 +307,7 @@ class Doc(commands.Cog):
if scraped_html is None:
return None
- symbol_obj = self.inventories[symbol]
+ symbol_obj = self.doc_symbols[symbol]
self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
signatures = scraped_html[0]
permalink = symbol_obj.url
--
cgit v1.2.3
From daa46eccc6518e567777240d7b94f121c5eacf57 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 18 Jul 2020 15:52:25 +0200
Subject: Create a package for the Doc cog.
---
bot/cogs/doc.py | 603 -----------------------------------------------
bot/cogs/doc/__init__.py | 7 +
bot/cogs/doc/cog.py | 598 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 605 insertions(+), 603 deletions(-)
delete mode 100644 bot/cogs/doc.py
create mode 100644 bot/cogs/doc/__init__.py
create mode 100644 bot/cogs/doc/cog.py
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
deleted file mode 100644
index 526747bf4..000000000
--- a/bot/cogs/doc.py
+++ /dev/null
@@ -1,603 +0,0 @@
-import asyncio
-import functools
-import logging
-import re
-import sys
-import textwrap
-from collections import OrderedDict
-from contextlib import suppress
-from types import SimpleNamespace
-from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, Union
-from urllib.parse import urljoin
-
-import discord
-from bs4 import BeautifulSoup
-from bs4.element import PageElement, Tag
-from discord.ext import commands
-from markdownify import MarkdownConverter
-from requests import ConnectTimeout, ConnectionError, HTTPError
-from sphinx.ext import intersphinx
-from urllib3.exceptions import ProtocolError
-
-from bot.bot import Bot
-from bot.constants import MODERATION_ROLES, RedirectOutput
-from bot.converters import PackageName, ValidURL
-from bot.decorators import with_role
-from bot.pagination import LinePaginator
-from bot.utils.messages import wait_for_deletion
-
-
-log = logging.getLogger(__name__)
-logging.getLogger('urllib3').setLevel(logging.WARNING)
-
-# Since Intersphinx is intended to be used with Sphinx,
-# we need to mock its configuration.
-SPHINX_MOCK_APP = SimpleNamespace(
- config=SimpleNamespace(
- intersphinx_timeout=3,
- tls_verify=True,
- user_agent="python3:python-discord/bot:1.0.0"
- )
-)
-
-NO_OVERRIDE_GROUPS = (
- "2to3fixer",
- "token",
- "label",
- "pdbcommand",
- "term",
-)
-NO_OVERRIDE_PACKAGES = (
- "python",
-)
-
-SEARCH_END_TAG_ATTRS = (
- "data",
- "function",
- "class",
- "exception",
- "seealso",
- "section",
- "rubric",
- "sphinxsidebar",
-)
-UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
-WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
-
-FAILED_REQUEST_RETRY_AMOUNT = 3
-NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
-
-
-class DocItem(NamedTuple):
- """Holds inventory symbol information."""
-
- package: str
- url: str
- group: str
-
-
-def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
- """
- LRU cache implementation for coroutines.
-
- Once the cache exceeds the maximum size, keys are deleted in FIFO order.
-
- An offset may be optionally provided to be applied to the coroutine's arguments when creating the cache key.
- """
- # Assign the cache to the function itself so we can clear it from outside.
- async_cache.cache = OrderedDict()
-
- def decorator(function: Callable) -> Callable:
- """Define the async_cache decorator."""
- @functools.wraps(function)
- async def wrapper(*args) -> Any:
- """Decorator wrapper for the caching logic."""
- key = ':'.join(args[arg_offset:])
-
- value = async_cache.cache.get(key)
- if value is None:
- if len(async_cache.cache) > max_size:
- async_cache.cache.popitem(last=False)
-
- async_cache.cache[key] = await function(*args)
- return async_cache.cache[key]
- return wrapper
- return decorator
-
-
-class DocMarkdownConverter(MarkdownConverter):
- """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
-
- def __init__(self, *, page_url: str, **options):
- super().__init__(**options)
- self.page_url = page_url
-
- def convert_code(self, el: PageElement, text: str) -> str:
- """Undo `markdownify`s underscore escaping."""
- return f"`{text}`".replace('\\', '')
-
- def convert_pre(self, el: PageElement, text: str) -> str:
- """Wrap any codeblocks in `py` for syntax highlighting."""
- code = ''.join(el.strings)
- return f"```py\n{code}```"
-
- def convert_a(self, el: PageElement, text: str) -> str:
- """Resolve relative URLs to `self.page_url`."""
- el["href"] = urljoin(self.page_url, el["href"])
- return super().convert_a(el, text)
-
- def convert_p(self, el: PageElement, text: str) -> str:
- """Include only one newline instead of two when the parent is a li tag."""
- parent = el.parent
- if parent is not None and parent.name == "li":
- return f"{text}\n"
- return super().convert_p(el, text)
-
-
-def markdownify(html: str, *, url: str = "") -> str:
- """Create a DocMarkdownConverter object from the input html."""
- return DocMarkdownConverter(bullets='•', page_url=url).convert(html)
-
-
-class InventoryURL(commands.Converter):
- """
- Represents an Intersphinx inventory URL.
-
- This converter checks whether intersphinx accepts the given inventory URL, and raises
- `BadArgument` if that is not the case.
-
- Otherwise, it simply passes through the given URL.
- """
-
- @staticmethod
- async def convert(ctx: commands.Context, url: str) -> str:
- """Convert url to Intersphinx inventory URL."""
- await ctx.trigger_typing()
- try:
- intersphinx.fetch_inventory(SPHINX_MOCK_APP, '', url)
- except AttributeError:
- raise commands.BadArgument(f"Failed to fetch Intersphinx inventory from URL `{url}`.")
- except ConnectionError:
- if url.startswith('https'):
- raise commands.BadArgument(
- f"Cannot establish a connection to `{url}`. Does it support HTTPS?"
- )
- raise commands.BadArgument(f"Cannot connect to host with URL `{url}`.")
- except ValueError:
- raise commands.BadArgument(
- f"Failed to read Intersphinx inventory from URL `{url}`. "
- "Are you sure that it's a valid inventory file?"
- )
- return url
-
-
-class Doc(commands.Cog):
- """A set of commands for querying & displaying documentation."""
-
- def __init__(self, bot: Bot):
- self.base_urls = {}
- self.bot = bot
- self.doc_symbols: Dict[str, DocItem] = {}
- self.renamed_symbols = set()
-
- self.bot.loop.create_task(self.init_refresh_inventory())
-
- async def init_refresh_inventory(self) -> None:
- """Refresh documentation inventory on cog initialization."""
- await self.bot.wait_until_guild_available()
- await self.refresh_inventory()
-
- async def update_single(
- self, api_package_name: str, base_url: str, inventory_url: str
- ) -> None:
- """
- Rebuild the inventory for a single package.
-
- Where:
- * `package_name` is the package name to use, appears in the log
- * `base_url` is the root documentation URL for the specified package, used to build
- absolute paths that link to specific symbols
- * `inventory_url` is the absolute URL to the intersphinx inventory, fetched by running
- `intersphinx.fetch_inventory` in an executor on the bot's event loop
- """
- self.base_urls[api_package_name] = base_url
-
- package = await self._fetch_inventory(inventory_url)
- if not package:
- return None
-
- for group, value in package.items():
- for symbol, (_package_name, _version, relative_doc_url, _) in value.items():
- if "/" in symbol:
- continue # skip unreachable symbols with slashes
- absolute_doc_url = base_url + relative_doc_url
- # Intern the group names since they're reused in all the DocItems
- # to remove unnecessary memory consumption from them being unique objects
- group_name = sys.intern(group.split(":")[1])
-
- if symbol in self.doc_symbols:
- symbol_base_url = self.doc_symbols[symbol].url.split("/", 3)[2]
- if (
- group_name in NO_OVERRIDE_GROUPS
- or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
- ):
- symbol = f"{group_name}.{symbol}"
-
- elif (overridden_symbol_group := self.doc_symbols[symbol].group) in NO_OVERRIDE_GROUPS:
- overridden_symbol = f"{overridden_symbol_group}.{symbol}"
- if overridden_symbol in self.renamed_symbols:
- overridden_symbol = f"{api_package_name}.{overridden_symbol}"
-
- self.doc_symbols[overridden_symbol] = self.doc_symbols[symbol]
- self.renamed_symbols.add(overridden_symbol)
-
- # If renamed `symbol` already exists, add library name in front to differentiate between them.
- if symbol in self.renamed_symbols:
- # Split `package_name` because of packages like Pillow that have spaces in them.
- symbol = f"{api_package_name}.{symbol}"
- self.renamed_symbols.add(symbol)
-
- self.doc_symbols[symbol] = DocItem(api_package_name, absolute_doc_url, group_name)
-
- log.trace(f"Fetched inventory for {api_package_name}.")
-
- async def refresh_inventory(self) -> None:
- """Refresh internal documentation inventory."""
- log.debug("Refreshing documentation inventory...")
-
- # Clear the old base URLS and doc symbols to ensure
- # that we start from a fresh local dataset.
- # Also, reset the cache used for fetching documentation.
- self.base_urls.clear()
- self.doc_symbols.clear()
- self.renamed_symbols.clear()
- async_cache.cache = OrderedDict()
-
- # Run all coroutines concurrently - since each of them performs a HTTP
- # request, this speeds up fetching the inventory data heavily.
- coros = [
- self.update_single(
- package["package"], package["base_url"], package["inventory_url"]
- ) for package in await self.bot.api_client.get('bot/documentation-links')
- ]
- await asyncio.gather(*coros)
-
- async def get_symbol_html(self, symbol: str) -> Optional[Tuple[list, str]]:
- """
- Given a Python symbol, return its signature and description.
-
- The first tuple element is the signature of the given symbol as a markup-free string, and
- the second tuple element is the description of the given symbol with HTML markup included.
-
- If the given symbol is a module, returns a tuple `(None, str)`
- else if the symbol could not be found, returns `None`.
- """
- symbol_info = self.doc_symbols.get(symbol)
- if symbol_info is None:
- return None
- request_url, symbol_id = symbol_info.url.rsplit('#')
-
- soup = await self._get_soup_from_url(request_url)
- symbol_heading = soup.find(id=symbol_id)
- search_html = str(soup)
-
- if symbol_heading is None:
- return None
-
- if symbol_info.group == "module":
- parsed_module = self.parse_module_symbol(symbol_heading)
- if parsed_module is None:
- return [], ""
- else:
- signatures, description = parsed_module
-
- else:
- signatures, description = self.parse_symbol(symbol_heading, search_html)
-
- return signatures, description.replace('¶', '')
-
- @async_cache(arg_offset=1)
- async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
- """
- Attempt to scrape and fetch the data for the given `symbol`, and build an embed from its contents.
-
- If the symbol is known, an Embed with documentation about it is returned.
- """
- scraped_html = await self.get_symbol_html(symbol)
- if scraped_html is None:
- return None
-
- symbol_obj = self.doc_symbols[symbol]
- self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
- signatures = scraped_html[0]
- permalink = symbol_obj.url
- description = markdownify(scraped_html[1], url=permalink)
-
- # Truncate the description of the embed to the last occurrence
- # of a double newline (interpreted as a paragraph) before index 1000.
- if len(description) > 1000:
- shortened = description[:1000]
- description_cutoff = shortened.rfind('\n\n', 100)
- if description_cutoff == -1:
- # Search the shortened version for cutoff points in decreasing desirability,
- # cutoff at 1000 if none are found.
- for string in (". ", ", ", ",", " "):
- description_cutoff = shortened.rfind(string)
- if description_cutoff != -1:
- break
- else:
- description_cutoff = 1000
- description = description[:description_cutoff]
-
- # If there is an incomplete code block, cut it out
- if description.count("```") % 2:
- codeblock_start = description.rfind('```py')
- description = description[:codeblock_start].rstrip()
- description += f"... [read more]({permalink})"
-
- description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
- if signatures is None:
- # If symbol is a module, don't show signature.
- embed_description = description
-
- elif not signatures:
- # It's some "meta-page", for example:
- # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views
- embed_description = "This appears to be a generic page not tied to a specific symbol."
-
- else:
- embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
- embed_description += f"\n{description}"
-
- embed = discord.Embed(
- title=discord.utils.escape_markdown(symbol),
- url=permalink,
- description=embed_description
- )
- # Show all symbols with the same name that were renamed in the footer.
- embed.set_footer(
- text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}"))
- )
- return embed
-
- @classmethod
- def parse_module_symbol(cls, heading: PageElement) -> Optional[Tuple[None, str]]:
- """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
- start_tag = heading.find("a", attrs={"class": "headerlink"})
- if start_tag is None:
- return None
-
- description = cls.find_all_children_until_tag(start_tag, cls._match_end_tag)
- if description is None:
- return None
-
- return None, description
-
- @classmethod
- def parse_symbol(cls, heading: PageElement, html: str) -> Tuple[List[str], str]:
- """
- Parse the signatures and description of a symbol.
-
- Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
- """
- signatures = []
- description_element = heading.find_next_sibling("dd")
- description_pos = html.find(str(description_element))
- description = cls.find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
-
- for element in (
- *reversed(heading.find_previous_siblings("dt", limit=2)),
- heading,
- *heading.find_next_siblings("dt", limit=2),
- )[-3:]:
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
-
- if signature and html.find(str(element)) < description_pos:
- signatures.append(signature)
-
- return signatures, description
-
- @staticmethod
- def find_all_children_until_tag(
- start_element: PageElement,
- tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
- ) -> Optional[str]:
- """
- Get all direct children until a child matching `tag_filter` is found.
-
- `tag_filter` can be either a tuple of string names to check against,
- or a filtering callable that's applied to the tags.
- """
- text = ""
-
- for element in start_element.find_next().find_next_siblings():
- if isinstance(tag_filter, tuple):
- if element.name in tag_filter:
- break
- elif tag_filter(element):
- break
- text += str(element)
-
- return text
-
- @async_cache(arg_offset=1)
- async def _get_soup_from_url(self, url: str) -> BeautifulSoup:
- """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
- log.trace(f"Sending a request to {url}.")
- async with self.bot.http_session.get(url) as response:
- soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
- soup.find("head").decompose() # the head contains no useful data so we can remove it
- return soup
-
- @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
- async def docs_group(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
- """Lookup documentation for Python symbols."""
- await ctx.invoke(self.get_command, symbol=symbol)
-
- @docs_group.command(name='getdoc', aliases=('g',))
- async def get_command(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
- """
- Return a documentation embed for a given symbol.
-
- If no symbol is given, return a list of all available inventories.
-
- Examples:
- !docs
- !docs aiohttp
- !docs aiohttp.ClientSession
- !docs getdoc aiohttp.ClientSession
- """
- if not symbol:
- inventory_embed = discord.Embed(
- title=f"All inventories (`{len(self.base_urls)}` total)",
- colour=discord.Colour.blue()
- )
-
- lines = sorted(f"• [`{name}`]({url})" for name, url in self.base_urls.items())
- if self.base_urls:
- await LinePaginator.paginate(lines, ctx, inventory_embed, max_size=400, empty=False)
-
- else:
- inventory_embed.description = "Hmmm, seems like there's nothing here yet."
- await ctx.send(embed=inventory_embed)
-
- else:
- symbol = symbol.strip("`")
- # Fetching documentation for a symbol (at least for the first time, since
- # caching is used) takes quite some time, so let's send typing to indicate
- # that we got the command, but are still working on it.
- async with ctx.typing():
- doc_embed = await self.get_symbol_embed(symbol)
-
- if doc_embed is None:
- symbol = await discord.ext.commands.clean_content().convert(ctx, symbol)
- error_embed = discord.Embed(
- description=f"Sorry, I could not find any documentation for `{(symbol)}`.",
- colour=discord.Colour.red()
- )
- error_message = await ctx.send(embed=error_embed)
- await wait_for_deletion(
- error_message,
- (ctx.author.id,),
- timeout=NOT_FOUND_DELETE_DELAY,
- client=self.bot
- )
- with suppress(discord.NotFound):
- await ctx.message.delete()
- with suppress(discord.NotFound):
- await error_message.delete()
- else:
- await ctx.send(embed=doc_embed)
-
- @docs_group.command(name='setdoc', aliases=('s',))
- @with_role(*MODERATION_ROLES)
- async def set_command(
- self, ctx: commands.Context, package_name: PackageName,
- base_url: ValidURL, inventory_url: InventoryURL
- ) -> None:
- """
- Adds a new documentation metadata object to the site's database.
-
- The database will update the object, should an existing item with the specified `package_name` already exist.
-
- Example:
- !docs setdoc \
- python \
- https://docs.python.org/3/ \
- https://docs.python.org/3/objects.inv
- """
- body = {
- 'package': package_name,
- 'base_url': base_url,
- 'inventory_url': inventory_url
- }
- await self.bot.api_client.post('bot/documentation-links', json=body)
-
- log.info(
- f"User @{ctx.author} ({ctx.author.id}) added a new documentation package:\n"
- f"Package name: {package_name}\n"
- f"Base url: {base_url}\n"
- f"Inventory URL: {inventory_url}"
- )
-
- await self.update_single(package_name, base_url, inventory_url)
- await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
-
- @docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
- @with_role(*MODERATION_ROLES)
- async def delete_command(self, ctx: commands.Context, package_name: PackageName) -> None:
- """
- Removes the specified package from the database.
-
- Examples:
- !docs deletedoc aiohttp
- """
- await self.bot.api_client.delete(f'bot/documentation-links/{package_name}')
-
- async with ctx.typing():
- # Rebuild the inventory to ensure that everything
- # that was from this package is properly deleted.
- await self.refresh_inventory()
- await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
-
- @docs_group.command(name="refreshdoc", aliases=("rfsh", "r"))
- @with_role(*MODERATION_ROLES)
- async def refresh_command(self, ctx: commands.Context) -> None:
- """Refresh inventories and send differences to channel."""
- old_inventories = set(self.base_urls)
- with ctx.typing():
- await self.refresh_inventory()
- new_inventories = set(self.base_urls)
-
- if added := ", ".join(new_inventories - old_inventories):
- added = "+ " + added
-
- if removed := ", ".join(old_inventories - new_inventories):
- removed = "- " + removed
-
- embed = discord.Embed(
- title="Inventories refreshed",
- description=f"```diff\n{added}\n{removed}```" if added or removed else ""
- )
- await ctx.send(embed=embed)
-
- async def _fetch_inventory(self, inventory_url: str) -> Optional[dict]:
- """Get and return inventory from `inventory_url`. If fetching fails, return None."""
- fetch_func = functools.partial(intersphinx.fetch_inventory, SPHINX_MOCK_APP, '', inventory_url)
- for retry in range(1, FAILED_REQUEST_RETRY_AMOUNT+1):
- try:
- package = await self.bot.loop.run_in_executor(None, fetch_func)
- except ConnectTimeout:
- log.error(
- f"Fetching of inventory {inventory_url} timed out,"
- f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
- )
- except ProtocolError:
- log.error(
- f"Connection lost while fetching inventory {inventory_url},"
- f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
- )
- except HTTPError as e:
- log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.")
- return None
- except ConnectionError:
- log.error(f"Couldn't establish connection to inventory {inventory_url}.")
- return None
- else:
- return package
- log.error(f"Fetching of inventory {inventory_url} failed.")
- return None
-
- @staticmethod
- def _match_end_tag(tag: Tag) -> bool:
- """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
- for attr in SEARCH_END_TAG_ATTRS:
- if attr in tag.get("class", ()):
- return True
-
- return tag.name == "table"
-
-
-def setup(bot: Bot) -> None:
- """Load the Doc cog."""
- bot.add_cog(Doc(bot))
diff --git a/bot/cogs/doc/__init__.py b/bot/cogs/doc/__init__.py
new file mode 100644
index 000000000..19a71ee66
--- /dev/null
+++ b/bot/cogs/doc/__init__.py
@@ -0,0 +1,7 @@
+from bot.bot import Bot
+from .cog import DocCog
+
+
+def setup(bot: Bot) -> None:
+ """Load the Doc cog."""
+ bot.add_cog(DocCog(bot))
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
new file mode 100644
index 000000000..463e4ebc6
--- /dev/null
+++ b/bot/cogs/doc/cog.py
@@ -0,0 +1,598 @@
+import asyncio
+import functools
+import logging
+import re
+import sys
+import textwrap
+from collections import OrderedDict
+from contextlib import suppress
+from types import SimpleNamespace
+from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, Union
+from urllib.parse import urljoin
+
+import discord
+from bs4 import BeautifulSoup
+from bs4.element import PageElement, Tag
+from discord.ext import commands
+from markdownify import MarkdownConverter
+from requests import ConnectTimeout, ConnectionError, HTTPError
+from sphinx.ext import intersphinx
+from urllib3.exceptions import ProtocolError
+
+from bot.bot import Bot
+from bot.constants import MODERATION_ROLES, RedirectOutput
+from bot.converters import PackageName, ValidURL
+from bot.decorators import with_role
+from bot.pagination import LinePaginator
+from bot.utils.messages import wait_for_deletion
+
+
+log = logging.getLogger(__name__)
+logging.getLogger('urllib3').setLevel(logging.WARNING)
+
+# Since Intersphinx is intended to be used with Sphinx,
+# we need to mock its configuration.
+SPHINX_MOCK_APP = SimpleNamespace(
+ config=SimpleNamespace(
+ intersphinx_timeout=3,
+ tls_verify=True,
+ user_agent="python3:python-discord/bot:1.0.0"
+ )
+)
+
+NO_OVERRIDE_GROUPS = (
+ "2to3fixer",
+ "token",
+ "label",
+ "pdbcommand",
+ "term",
+)
+NO_OVERRIDE_PACKAGES = (
+ "python",
+)
+
+SEARCH_END_TAG_ATTRS = (
+ "data",
+ "function",
+ "class",
+ "exception",
+ "seealso",
+ "section",
+ "rubric",
+ "sphinxsidebar",
+)
+UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
+
+FAILED_REQUEST_RETRY_AMOUNT = 3
+NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
+
+
+class DocItem(NamedTuple):
+ """Holds inventory symbol information."""
+
+ package: str
+ url: str
+ group: str
+
+
+def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
+ """
+ LRU cache implementation for coroutines.
+
+ Once the cache exceeds the maximum size, keys are deleted in FIFO order.
+
+ An offset may be optionally provided to be applied to the coroutine's arguments when creating the cache key.
+ """
+ # Assign the cache to the function itself so we can clear it from outside.
+ async_cache.cache = OrderedDict()
+
+ def decorator(function: Callable) -> Callable:
+ """Define the async_cache decorator."""
+ @functools.wraps(function)
+ async def wrapper(*args) -> Any:
+ """Decorator wrapper for the caching logic."""
+ key = ':'.join(args[arg_offset:])
+
+ value = async_cache.cache.get(key)
+ if value is None:
+ if len(async_cache.cache) > max_size:
+ async_cache.cache.popitem(last=False)
+
+ async_cache.cache[key] = await function(*args)
+ return async_cache.cache[key]
+ return wrapper
+ return decorator
+
+
+class DocMarkdownConverter(MarkdownConverter):
+ """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
+
+ def __init__(self, *, page_url: str, **options):
+ super().__init__(**options)
+ self.page_url = page_url
+
+ def convert_code(self, el: PageElement, text: str) -> str:
+ """Undo `markdownify`s underscore escaping."""
+ return f"`{text}`".replace('\\', '')
+
+ def convert_pre(self, el: PageElement, text: str) -> str:
+ """Wrap any codeblocks in `py` for syntax highlighting."""
+ code = ''.join(el.strings)
+ return f"```py\n{code}```"
+
+ def convert_a(self, el: PageElement, text: str) -> str:
+ """Resolve relative URLs to `self.page_url`."""
+ el["href"] = urljoin(self.page_url, el["href"])
+ return super().convert_a(el, text)
+
+ def convert_p(self, el: PageElement, text: str) -> str:
+ """Include only one newline instead of two when the parent is a li tag."""
+ parent = el.parent
+ if parent is not None and parent.name == "li":
+ return f"{text}\n"
+ return super().convert_p(el, text)
+
+
+def markdownify(html: str, *, url: str = "") -> str:
+ """Create a DocMarkdownConverter object from the input html."""
+ return DocMarkdownConverter(bullets='•', page_url=url).convert(html)
+
+
+class InventoryURL(commands.Converter):
+ """
+ Represents an Intersphinx inventory URL.
+
+ This converter checks whether intersphinx accepts the given inventory URL, and raises
+ `BadArgument` if that is not the case.
+
+ Otherwise, it simply passes through the given URL.
+ """
+
+ @staticmethod
+ async def convert(ctx: commands.Context, url: str) -> str:
+ """Convert url to Intersphinx inventory URL."""
+ await ctx.trigger_typing()
+ try:
+ intersphinx.fetch_inventory(SPHINX_MOCK_APP, '', url)
+ except AttributeError:
+ raise commands.BadArgument(f"Failed to fetch Intersphinx inventory from URL `{url}`.")
+ except ConnectionError:
+ if url.startswith('https'):
+ raise commands.BadArgument(
+ f"Cannot establish a connection to `{url}`. Does it support HTTPS?"
+ )
+ raise commands.BadArgument(f"Cannot connect to host with URL `{url}`.")
+ except ValueError:
+ raise commands.BadArgument(
+ f"Failed to read Intersphinx inventory from URL `{url}`. "
+ "Are you sure that it's a valid inventory file?"
+ )
+ return url
+
+
+class DocCog(commands.Cog):
+ """A set of commands for querying & displaying documentation."""
+
+ def __init__(self, bot: Bot):
+ self.base_urls = {}
+ self.bot = bot
+ self.doc_symbols: Dict[str, DocItem] = {}
+ self.renamed_symbols = set()
+
+ self.bot.loop.create_task(self.init_refresh_inventory())
+
+ async def init_refresh_inventory(self) -> None:
+ """Refresh documentation inventory on cog initialization."""
+ await self.bot.wait_until_guild_available()
+ await self.refresh_inventory()
+
+ async def update_single(
+ self, api_package_name: str, base_url: str, inventory_url: str
+ ) -> None:
+ """
+ Rebuild the inventory for a single package.
+
+ Where:
+ * `package_name` is the package name to use, appears in the log
+ * `base_url` is the root documentation URL for the specified package, used to build
+ absolute paths that link to specific symbols
+ * `inventory_url` is the absolute URL to the intersphinx inventory, fetched by running
+ `intersphinx.fetch_inventory` in an executor on the bot's event loop
+ """
+ self.base_urls[api_package_name] = base_url
+
+ package = await self._fetch_inventory(inventory_url)
+ if not package:
+ return None
+
+ for group, value in package.items():
+ for symbol, (_package_name, _version, relative_doc_url, _) in value.items():
+ if "/" in symbol:
+ continue # skip unreachable symbols with slashes
+ absolute_doc_url = base_url + relative_doc_url
+ # Intern the group names since they're reused in all the DocItems
+ # to remove unnecessary memory consumption from them being unique objects
+ group_name = sys.intern(group.split(":")[1])
+
+ if symbol in self.doc_symbols:
+ symbol_base_url = self.doc_symbols[symbol].url.split("/", 3)[2]
+ if (
+ group_name in NO_OVERRIDE_GROUPS
+ or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
+ ):
+ symbol = f"{group_name}.{symbol}"
+
+ elif (overridden_symbol_group := self.doc_symbols[symbol].group) in NO_OVERRIDE_GROUPS:
+ overridden_symbol = f"{overridden_symbol_group}.{symbol}"
+ if overridden_symbol in self.renamed_symbols:
+ overridden_symbol = f"{api_package_name}.{overridden_symbol}"
+
+ self.doc_symbols[overridden_symbol] = self.doc_symbols[symbol]
+ self.renamed_symbols.add(overridden_symbol)
+
+ # If renamed `symbol` already exists, add library name in front to differentiate between them.
+ if symbol in self.renamed_symbols:
+ # Split `package_name` because of packages like Pillow that have spaces in them.
+ symbol = f"{api_package_name}.{symbol}"
+ self.renamed_symbols.add(symbol)
+
+ self.doc_symbols[symbol] = DocItem(api_package_name, absolute_doc_url, group_name)
+
+ log.trace(f"Fetched inventory for {api_package_name}.")
+
+ async def refresh_inventory(self) -> None:
+ """Refresh internal documentation inventory."""
+ log.debug("Refreshing documentation inventory...")
+
+ # Clear the old base URLS and doc symbols to ensure
+ # that we start from a fresh local dataset.
+ # Also, reset the cache used for fetching documentation.
+ self.base_urls.clear()
+ self.doc_symbols.clear()
+ self.renamed_symbols.clear()
+ async_cache.cache = OrderedDict()
+
+ # Run all coroutines concurrently - since each of them performs a HTTP
+ # request, this speeds up fetching the inventory data heavily.
+ coros = [
+ self.update_single(
+ package["package"], package["base_url"], package["inventory_url"]
+ ) for package in await self.bot.api_client.get('bot/documentation-links')
+ ]
+ await asyncio.gather(*coros)
+
+ async def get_symbol_html(self, symbol: str) -> Optional[Tuple[list, str]]:
+ """
+ Given a Python symbol, return its signature and description.
+
+ The first tuple element is the signature of the given symbol as a markup-free string, and
+ the second tuple element is the description of the given symbol with HTML markup included.
+
+ If the given symbol is a module, returns a tuple `(None, str)`
+ else if the symbol could not be found, returns `None`.
+ """
+ symbol_info = self.doc_symbols.get(symbol)
+ if symbol_info is None:
+ return None
+ request_url, symbol_id = symbol_info.url.rsplit('#')
+
+ soup = await self._get_soup_from_url(request_url)
+ symbol_heading = soup.find(id=symbol_id)
+ search_html = str(soup)
+
+ if symbol_heading is None:
+ return None
+
+ if symbol_info.group == "module":
+ parsed_module = self.parse_module_symbol(symbol_heading)
+ if parsed_module is None:
+ return [], ""
+ else:
+ signatures, description = parsed_module
+
+ else:
+ signatures, description = self.parse_symbol(symbol_heading, search_html)
+
+ return signatures, description.replace('¶', '')
+
+ @async_cache(arg_offset=1)
+ async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
+ """
+ Attempt to scrape and fetch the data for the given `symbol`, and build an embed from its contents.
+
+ If the symbol is known, an Embed with documentation about it is returned.
+ """
+ scraped_html = await self.get_symbol_html(symbol)
+ if scraped_html is None:
+ return None
+
+ symbol_obj = self.doc_symbols[symbol]
+ self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
+ signatures = scraped_html[0]
+ permalink = symbol_obj.url
+ description = markdownify(scraped_html[1], url=permalink)
+
+ # Truncate the description of the embed to the last occurrence
+ # of a double newline (interpreted as a paragraph) before index 1000.
+ if len(description) > 1000:
+ shortened = description[:1000]
+ description_cutoff = shortened.rfind('\n\n', 100)
+ if description_cutoff == -1:
+ # Search the shortened version for cutoff points in decreasing desirability,
+ # cutoff at 1000 if none are found.
+ for string in (". ", ", ", ",", " "):
+ description_cutoff = shortened.rfind(string)
+ if description_cutoff != -1:
+ break
+ else:
+ description_cutoff = 1000
+ description = description[:description_cutoff]
+
+ # If there is an incomplete code block, cut it out
+ if description.count("```") % 2:
+ codeblock_start = description.rfind('```py')
+ description = description[:codeblock_start].rstrip()
+ description += f"... [read more]({permalink})"
+
+ description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
+ if signatures is None:
+ # If symbol is a module, don't show signature.
+ embed_description = description
+
+ elif not signatures:
+ # It's some "meta-page", for example:
+ # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views
+ embed_description = "This appears to be a generic page not tied to a specific symbol."
+
+ else:
+ embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
+ embed_description += f"\n{description}"
+
+ embed = discord.Embed(
+ title=discord.utils.escape_markdown(symbol),
+ url=permalink,
+ description=embed_description
+ )
+ # Show all symbols with the same name that were renamed in the footer.
+ embed.set_footer(
+ text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}"))
+ )
+ return embed
+
+ @classmethod
+ def parse_module_symbol(cls, heading: PageElement) -> Optional[Tuple[None, str]]:
+ """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
+ start_tag = heading.find("a", attrs={"class": "headerlink"})
+ if start_tag is None:
+ return None
+
+ description = cls.find_all_children_until_tag(start_tag, cls._match_end_tag)
+ if description is None:
+ return None
+
+ return None, description
+
+ @classmethod
+ def parse_symbol(cls, heading: PageElement, html: str) -> Tuple[List[str], str]:
+ """
+ Parse the signatures and description of a symbol.
+
+ Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
+ """
+ signatures = []
+ description_element = heading.find_next_sibling("dd")
+ description_pos = html.find(str(description_element))
+ description = cls.find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
+
+ for element in (
+ *reversed(heading.find_previous_siblings("dt", limit=2)),
+ heading,
+ *heading.find_next_siblings("dt", limit=2),
+ )[-3:]:
+ signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+
+ if signature and html.find(str(element)) < description_pos:
+ signatures.append(signature)
+
+ return signatures, description
+
+ @staticmethod
+ def find_all_children_until_tag(
+ start_element: PageElement,
+ tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
+ ) -> Optional[str]:
+ """
+ Get all direct children until a child matching `tag_filter` is found.
+
+ `tag_filter` can be either a tuple of string names to check against,
+ or a filtering callable that's applied to the tags.
+ """
+ text = ""
+
+ for element in start_element.find_next().find_next_siblings():
+ if isinstance(tag_filter, tuple):
+ if element.name in tag_filter:
+ break
+ elif tag_filter(element):
+ break
+ text += str(element)
+
+ return text
+
+ @async_cache(arg_offset=1)
+ async def _get_soup_from_url(self, url: str) -> BeautifulSoup:
+ """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
+ log.trace(f"Sending a request to {url}.")
+ async with self.bot.http_session.get(url) as response:
+ soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
+ soup.find("head").decompose() # the head contains no useful data so we can remove it
+ return soup
+
+ @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
+ async def docs_group(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
+ """Lookup documentation for Python symbols."""
+ await ctx.invoke(self.get_command, symbol=symbol)
+
+ @docs_group.command(name='getdoc', aliases=('g',))
+ async def get_command(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
+ """
+ Return a documentation embed for a given symbol.
+
+ If no symbol is given, return a list of all available inventories.
+
+ Examples:
+ !docs
+ !docs aiohttp
+ !docs aiohttp.ClientSession
+ !docs getdoc aiohttp.ClientSession
+ """
+ if not symbol:
+ inventory_embed = discord.Embed(
+ title=f"All inventories (`{len(self.base_urls)}` total)",
+ colour=discord.Colour.blue()
+ )
+
+ lines = sorted(f"• [`{name}`]({url})" for name, url in self.base_urls.items())
+ if self.base_urls:
+ await LinePaginator.paginate(lines, ctx, inventory_embed, max_size=400, empty=False)
+
+ else:
+ inventory_embed.description = "Hmmm, seems like there's nothing here yet."
+ await ctx.send(embed=inventory_embed)
+
+ else:
+ symbol = symbol.strip("`")
+ # Fetching documentation for a symbol (at least for the first time, since
+ # caching is used) takes quite some time, so let's send typing to indicate
+ # that we got the command, but are still working on it.
+ async with ctx.typing():
+ doc_embed = await self.get_symbol_embed(symbol)
+
+ if doc_embed is None:
+ symbol = await discord.ext.commands.clean_content().convert(ctx, symbol)
+ error_embed = discord.Embed(
+ description=f"Sorry, I could not find any documentation for `{(symbol)}`.",
+ colour=discord.Colour.red()
+ )
+ error_message = await ctx.send(embed=error_embed)
+ await wait_for_deletion(
+ error_message,
+ (ctx.author.id,),
+ timeout=NOT_FOUND_DELETE_DELAY,
+ client=self.bot
+ )
+ with suppress(discord.NotFound):
+ await ctx.message.delete()
+ with suppress(discord.NotFound):
+ await error_message.delete()
+ else:
+ await ctx.send(embed=doc_embed)
+
+ @docs_group.command(name='setdoc', aliases=('s',))
+ @with_role(*MODERATION_ROLES)
+ async def set_command(
+ self, ctx: commands.Context, package_name: PackageName,
+ base_url: ValidURL, inventory_url: InventoryURL
+ ) -> None:
+ """
+ Adds a new documentation metadata object to the site's database.
+
+ The database will update the object, should an existing item with the specified `package_name` already exist.
+
+ Example:
+ !docs setdoc \
+ python \
+ https://docs.python.org/3/ \
+ https://docs.python.org/3/objects.inv
+ """
+ body = {
+ 'package': package_name,
+ 'base_url': base_url,
+ 'inventory_url': inventory_url
+ }
+ await self.bot.api_client.post('bot/documentation-links', json=body)
+
+ log.info(
+ f"User @{ctx.author} ({ctx.author.id}) added a new documentation package:\n"
+ f"Package name: {package_name}\n"
+ f"Base url: {base_url}\n"
+ f"Inventory URL: {inventory_url}"
+ )
+
+ await self.update_single(package_name, base_url, inventory_url)
+ await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
+
+ @docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
+ @with_role(*MODERATION_ROLES)
+ async def delete_command(self, ctx: commands.Context, package_name: PackageName) -> None:
+ """
+ Removes the specified package from the database.
+
+ Examples:
+ !docs deletedoc aiohttp
+ """
+ await self.bot.api_client.delete(f'bot/documentation-links/{package_name}')
+
+ async with ctx.typing():
+ # Rebuild the inventory to ensure that everything
+ # that was from this package is properly deleted.
+ await self.refresh_inventory()
+ await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
+
+ @docs_group.command(name="refreshdoc", aliases=("rfsh", "r"))
+ @with_role(*MODERATION_ROLES)
+ async def refresh_command(self, ctx: commands.Context) -> None:
+ """Refresh inventories and send differences to channel."""
+ old_inventories = set(self.base_urls)
+ with ctx.typing():
+ await self.refresh_inventory()
+ new_inventories = set(self.base_urls)
+
+ if added := ", ".join(new_inventories - old_inventories):
+ added = "+ " + added
+
+ if removed := ", ".join(old_inventories - new_inventories):
+ removed = "- " + removed
+
+ embed = discord.Embed(
+ title="Inventories refreshed",
+ description=f"```diff\n{added}\n{removed}```" if added or removed else ""
+ )
+ await ctx.send(embed=embed)
+
+ async def _fetch_inventory(self, inventory_url: str) -> Optional[dict]:
+ """Get and return inventory from `inventory_url`. If fetching fails, return None."""
+ fetch_func = functools.partial(intersphinx.fetch_inventory, SPHINX_MOCK_APP, '', inventory_url)
+ for retry in range(1, FAILED_REQUEST_RETRY_AMOUNT+1):
+ try:
+ package = await self.bot.loop.run_in_executor(None, fetch_func)
+ except ConnectTimeout:
+ log.error(
+ f"Fetching of inventory {inventory_url} timed out,"
+ f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
+ )
+ except ProtocolError:
+ log.error(
+ f"Connection lost while fetching inventory {inventory_url},"
+ f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
+ )
+ except HTTPError as e:
+ log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.")
+ return None
+ except ConnectionError:
+ log.error(f"Couldn't establish connection to inventory {inventory_url}.")
+ return None
+ else:
+ return package
+ log.error(f"Fetching of inventory {inventory_url} failed.")
+ return None
+
+ @staticmethod
+ def _match_end_tag(tag: Tag) -> bool:
+ """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
+ for attr in SEARCH_END_TAG_ATTRS:
+ if attr in tag.get("class", ()):
+ return True
+
+ return tag.name == "table"
--
cgit v1.2.3
From c3bda11a10e3706d7e457f727e57e6a92f604d1e Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 18 Jul 2020 16:16:49 +0200
Subject: Move async_cache into a separate module
---
bot/cogs/doc/cache.py | 32 ++++++++++++++++++++++++++++++++
bot/cogs/doc/cog.py | 33 ++-------------------------------
2 files changed, 34 insertions(+), 31 deletions(-)
create mode 100644 bot/cogs/doc/cache.py
diff --git a/bot/cogs/doc/cache.py b/bot/cogs/doc/cache.py
new file mode 100644
index 000000000..9da2a1dab
--- /dev/null
+++ b/bot/cogs/doc/cache.py
@@ -0,0 +1,32 @@
+import functools
+from collections import OrderedDict
+from typing import Any, Callable
+
+
+def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
+ """
+ LRU cache implementation for coroutines.
+
+ Once the cache exceeds the maximum size, keys are deleted in FIFO order.
+
+ An offset may be optionally provided to be applied to the coroutine's arguments when creating the cache key.
+ """
+ # Assign the cache to the function itself so we can clear it from outside.
+ async_cache.cache = OrderedDict()
+
+ def decorator(function: Callable) -> Callable:
+ """Define the async_cache decorator."""
+ @functools.wraps(function)
+ async def wrapper(*args) -> Any:
+ """Decorator wrapper for the caching logic."""
+ key = ':'.join(args[arg_offset:])
+
+ value = async_cache.cache.get(key)
+ if value is None:
+ if len(async_cache.cache) > max_size:
+ async_cache.cache.popitem(last=False)
+
+ async_cache.cache[key] = await function(*args)
+ return async_cache.cache[key]
+ return wrapper
+ return decorator
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 463e4ebc6..2627951e8 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -7,7 +7,7 @@ import textwrap
from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Any, Callable, Dict, List, NamedTuple, Optional, Tuple, Union
+from typing import Callable, Dict, List, NamedTuple, Optional, Tuple, Union
from urllib.parse import urljoin
import discord
@@ -25,7 +25,7 @@ from bot.converters import PackageName, ValidURL
from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
-
+from .cache import async_cache
log = logging.getLogger(__name__)
logging.getLogger('urllib3').setLevel(logging.WARNING)
@@ -76,35 +76,6 @@ class DocItem(NamedTuple):
group: str
-def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
- """
- LRU cache implementation for coroutines.
-
- Once the cache exceeds the maximum size, keys are deleted in FIFO order.
-
- An offset may be optionally provided to be applied to the coroutine's arguments when creating the cache key.
- """
- # Assign the cache to the function itself so we can clear it from outside.
- async_cache.cache = OrderedDict()
-
- def decorator(function: Callable) -> Callable:
- """Define the async_cache decorator."""
- @functools.wraps(function)
- async def wrapper(*args) -> Any:
- """Decorator wrapper for the caching logic."""
- key = ':'.join(args[arg_offset:])
-
- value = async_cache.cache.get(key)
- if value is None:
- if len(async_cache.cache) > max_size:
- async_cache.cache.popitem(last=False)
-
- async_cache.cache[key] = await function(*args)
- return async_cache.cache[key]
- return wrapper
- return decorator
-
-
class DocMarkdownConverter(MarkdownConverter):
"""Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
--
cgit v1.2.3
From 53213ec69208370342498cdc417f3c90d35b8f3e Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 18 Jul 2020 16:37:19 +0200
Subject: Move main parsing methods into a new module
---
bot/cogs/doc/cog.py | 102 +++----------------------------------------------
bot/cogs/doc/parser.py | 102 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 108 insertions(+), 96 deletions(-)
create mode 100644 bot/cogs/doc/parser.py
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 2627951e8..4a275c7c6 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -7,12 +7,11 @@ import textwrap
from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Callable, Dict, List, NamedTuple, Optional, Tuple, Union
+from typing import Dict, NamedTuple, Optional, Tuple
from urllib.parse import urljoin
import discord
-from bs4 import BeautifulSoup
-from bs4.element import PageElement, Tag
+from bs4.element import PageElement
from discord.ext import commands
from markdownify import MarkdownConverter
from requests import ConnectTimeout, ConnectionError, HTTPError
@@ -26,6 +25,7 @@ from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
from .cache import async_cache
+from .parser import get_soup_from_url, parse_module_symbol, parse_symbol
log = logging.getLogger(__name__)
logging.getLogger('urllib3').setLevel(logging.WARNING)
@@ -51,19 +51,7 @@ NO_OVERRIDE_PACKAGES = (
"python",
)
-SEARCH_END_TAG_ATTRS = (
- "data",
- "function",
- "class",
- "exception",
- "seealso",
- "section",
- "rubric",
- "sphinxsidebar",
-)
-UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
-
FAILED_REQUEST_RETRY_AMOUNT = 3
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
@@ -248,7 +236,7 @@ class DocCog(commands.Cog):
return None
request_url, symbol_id = symbol_info.url.rsplit('#')
- soup = await self._get_soup_from_url(request_url)
+ soup = await get_soup_from_url(self.bot.http_session, request_url)
symbol_heading = soup.find(id=symbol_id)
search_html = str(soup)
@@ -256,14 +244,14 @@ class DocCog(commands.Cog):
return None
if symbol_info.group == "module":
- parsed_module = self.parse_module_symbol(symbol_heading)
+ parsed_module = parse_module_symbol(symbol_heading)
if parsed_module is None:
return [], ""
else:
signatures, description = parsed_module
else:
- signatures, description = self.parse_symbol(symbol_heading, search_html)
+ signatures, description = parse_symbol(symbol_heading, search_html)
return signatures, description.replace('¶', '')
@@ -331,75 +319,6 @@ class DocCog(commands.Cog):
)
return embed
- @classmethod
- def parse_module_symbol(cls, heading: PageElement) -> Optional[Tuple[None, str]]:
- """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
- start_tag = heading.find("a", attrs={"class": "headerlink"})
- if start_tag is None:
- return None
-
- description = cls.find_all_children_until_tag(start_tag, cls._match_end_tag)
- if description is None:
- return None
-
- return None, description
-
- @classmethod
- def parse_symbol(cls, heading: PageElement, html: str) -> Tuple[List[str], str]:
- """
- Parse the signatures and description of a symbol.
-
- Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
- """
- signatures = []
- description_element = heading.find_next_sibling("dd")
- description_pos = html.find(str(description_element))
- description = cls.find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
-
- for element in (
- *reversed(heading.find_previous_siblings("dt", limit=2)),
- heading,
- *heading.find_next_siblings("dt", limit=2),
- )[-3:]:
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
-
- if signature and html.find(str(element)) < description_pos:
- signatures.append(signature)
-
- return signatures, description
-
- @staticmethod
- def find_all_children_until_tag(
- start_element: PageElement,
- tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
- ) -> Optional[str]:
- """
- Get all direct children until a child matching `tag_filter` is found.
-
- `tag_filter` can be either a tuple of string names to check against,
- or a filtering callable that's applied to the tags.
- """
- text = ""
-
- for element in start_element.find_next().find_next_siblings():
- if isinstance(tag_filter, tuple):
- if element.name in tag_filter:
- break
- elif tag_filter(element):
- break
- text += str(element)
-
- return text
-
- @async_cache(arg_offset=1)
- async def _get_soup_from_url(self, url: str) -> BeautifulSoup:
- """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
- log.trace(f"Sending a request to {url}.")
- async with self.bot.http_session.get(url) as response:
- soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
- soup.find("head").decompose() # the head contains no useful data so we can remove it
- return soup
-
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
async def docs_group(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
"""Lookup documentation for Python symbols."""
@@ -558,12 +477,3 @@ class DocCog(commands.Cog):
return package
log.error(f"Fetching of inventory {inventory_url} failed.")
return None
-
- @staticmethod
- def _match_end_tag(tag: Tag) -> bool:
- """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
- for attr in SEARCH_END_TAG_ATTRS:
- if attr in tag.get("class", ()):
- return True
-
- return tag.name == "table"
diff --git a/bot/cogs/doc/parser.py b/bot/cogs/doc/parser.py
new file mode 100644
index 000000000..67621591b
--- /dev/null
+++ b/bot/cogs/doc/parser.py
@@ -0,0 +1,102 @@
+import logging
+import re
+from typing import Callable, List, Optional, Tuple, Union
+
+from aiohttp import ClientSession
+from bs4 import BeautifulSoup
+from bs4.element import PageElement, Tag
+
+from .cache import async_cache
+
+log = logging.getLogger(__name__)
+
+UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+SEARCH_END_TAG_ATTRS = (
+ "data",
+ "function",
+ "class",
+ "exception",
+ "seealso",
+ "section",
+ "rubric",
+ "sphinxsidebar",
+)
+
+
+def parse_module_symbol(heading: PageElement) -> Optional[Tuple[None, str]]:
+ """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
+ start_tag = heading.find("a", attrs={"class": "headerlink"})
+ if start_tag is None:
+ return None
+
+ description = find_all_children_until_tag(start_tag, _match_end_tag)
+ if description is None:
+ return None
+
+ return None, description
+
+
+def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
+ """
+ Parse the signatures and description of a symbol.
+
+ Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
+ """
+ signatures = []
+ description_element = heading.find_next_sibling("dd")
+ description_pos = html.find(str(description_element))
+ description = find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
+
+ for element in (
+ *reversed(heading.find_previous_siblings("dt", limit=2)),
+ heading,
+ *heading.find_next_siblings("dt", limit=2),
+ )[-3:]:
+ signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+
+ if signature and html.find(str(element)) < description_pos:
+ signatures.append(signature)
+
+ return signatures, description
+
+
+def find_all_children_until_tag(
+ start_element: PageElement,
+ tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
+) -> Optional[str]:
+ """
+ Get all direct children until a child matching `tag_filter` is found.
+
+ `tag_filter` can be either a tuple of string names to check against,
+ or a filtering callable that's applied to the tags.
+ """
+ text = ""
+
+ for element in start_element.find_next().find_next_siblings():
+ if isinstance(tag_filter, tuple):
+ if element.name in tag_filter:
+ break
+ elif tag_filter(element):
+ break
+ text += str(element)
+
+ return text
+
+
+@async_cache(arg_offset=1)
+async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
+ """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
+ log.trace(f"Sending a request to {url}.")
+ async with http_session.get(url) as response:
+ soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
+ soup.find("head").decompose() # the head contains no useful data so we can remove it
+ return soup
+
+
+def _match_end_tag(tag: Tag) -> bool:
+ """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
+ for attr in SEARCH_END_TAG_ATTRS:
+ if attr in tag.get("class", ()):
+ return True
+
+ return tag.name == "table"
--
cgit v1.2.3
From eb8361d7fa9d0eb0dd5982c6df0fd35b80d40ba6 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 19 Jul 2020 03:13:02 +0200
Subject: Move markdown truncation into parser module
---
bot/cogs/doc/cog.py | 27 ++-------------------------
bot/cogs/doc/parser.py | 29 +++++++++++++++++++++++++++++
2 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 4a275c7c6..bd4e9d4d1 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -25,7 +25,7 @@ from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
from .cache import async_cache
-from .parser import get_soup_from_url, parse_module_symbol, parse_symbol
+from .parser import get_soup_from_url, parse_module_symbol, parse_symbol, truncate_markdown
log = logging.getLogger(__name__)
logging.getLogger('urllib3').setLevel(logging.WARNING)
@@ -270,30 +270,7 @@ class DocCog(commands.Cog):
self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
signatures = scraped_html[0]
permalink = symbol_obj.url
- description = markdownify(scraped_html[1], url=permalink)
-
- # Truncate the description of the embed to the last occurrence
- # of a double newline (interpreted as a paragraph) before index 1000.
- if len(description) > 1000:
- shortened = description[:1000]
- description_cutoff = shortened.rfind('\n\n', 100)
- if description_cutoff == -1:
- # Search the shortened version for cutoff points in decreasing desirability,
- # cutoff at 1000 if none are found.
- for string in (". ", ", ", ",", " "):
- description_cutoff = shortened.rfind(string)
- if description_cutoff != -1:
- break
- else:
- description_cutoff = 1000
- description = description[:description_cutoff]
-
- # If there is an incomplete code block, cut it out
- if description.count("```") % 2:
- codeblock_start = description.rfind('```py')
- description = description[:codeblock_start].rstrip()
- description += f"... [read more]({permalink})"
-
+ description = truncate_markdown(markdownify(scraped_html[1], url=permalink), permalink, 1000)
description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is None:
# If symbol is a module, don't show signature.
diff --git a/bot/cogs/doc/parser.py b/bot/cogs/doc/parser.py
index 67621591b..010826a96 100644
--- a/bot/cogs/doc/parser.py
+++ b/bot/cogs/doc/parser.py
@@ -83,6 +83,35 @@ def find_all_children_until_tag(
return text
+def truncate_markdown(markdown: str, permalink: str, max_length: int) -> str:
+ """
+ Truncate `markdown` to be at most `max_length` characters.
+
+ The markdown string is searched for substrings to cut at, to keep its structure,
+ but if none are found the string is simply sliced.
+ """
+ if len(markdown) > max_length:
+ shortened = markdown[:max_length]
+ description_cutoff = shortened.rfind('\n\n', 100)
+ if description_cutoff == -1:
+ # Search the shortened version for cutoff points in decreasing desirability,
+ # cutoff at 1000 if none are found.
+ for string in (". ", ", ", ",", " "):
+ description_cutoff = shortened.rfind(string)
+ if description_cutoff != -1:
+ break
+ else:
+ description_cutoff = max_length
+ markdown = markdown[:description_cutoff]
+
+ # If there is an incomplete code block, cut it out
+ if markdown.count("```") % 2:
+ codeblock_start = markdown.rfind('```py')
+ markdown = markdown[:codeblock_start].rstrip()
+ markdown += f"... [read more]({permalink})"
+ return markdown
+
+
@async_cache(arg_offset=1)
async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
"""Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
--
cgit v1.2.3
From 0f8b991fffce8b808bf25f1ad9ed710bb1ff4919 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 20 Jul 2020 02:24:19 +0200
Subject: Rename parser.py to parsing.py.
Parser is a stdlib module name, a rename avoids shadowing it.
---
bot/cogs/doc/cog.py | 2 +-
bot/cogs/doc/parser.py | 131 ------------------------------------------------
bot/cogs/doc/parsing.py | 131 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 132 insertions(+), 132 deletions(-)
delete mode 100644 bot/cogs/doc/parser.py
create mode 100644 bot/cogs/doc/parsing.py
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index bd4e9d4d1..4e4f3b737 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -25,7 +25,7 @@ from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
from .cache import async_cache
-from .parser import get_soup_from_url, parse_module_symbol, parse_symbol, truncate_markdown
+from .parsing import get_soup_from_url, parse_module_symbol, parse_symbol, truncate_markdown
log = logging.getLogger(__name__)
logging.getLogger('urllib3').setLevel(logging.WARNING)
diff --git a/bot/cogs/doc/parser.py b/bot/cogs/doc/parser.py
deleted file mode 100644
index 010826a96..000000000
--- a/bot/cogs/doc/parser.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import logging
-import re
-from typing import Callable, List, Optional, Tuple, Union
-
-from aiohttp import ClientSession
-from bs4 import BeautifulSoup
-from bs4.element import PageElement, Tag
-
-from .cache import async_cache
-
-log = logging.getLogger(__name__)
-
-UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
-SEARCH_END_TAG_ATTRS = (
- "data",
- "function",
- "class",
- "exception",
- "seealso",
- "section",
- "rubric",
- "sphinxsidebar",
-)
-
-
-def parse_module_symbol(heading: PageElement) -> Optional[Tuple[None, str]]:
- """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
- start_tag = heading.find("a", attrs={"class": "headerlink"})
- if start_tag is None:
- return None
-
- description = find_all_children_until_tag(start_tag, _match_end_tag)
- if description is None:
- return None
-
- return None, description
-
-
-def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
- """
- Parse the signatures and description of a symbol.
-
- Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
- """
- signatures = []
- description_element = heading.find_next_sibling("dd")
- description_pos = html.find(str(description_element))
- description = find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
-
- for element in (
- *reversed(heading.find_previous_siblings("dt", limit=2)),
- heading,
- *heading.find_next_siblings("dt", limit=2),
- )[-3:]:
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
-
- if signature and html.find(str(element)) < description_pos:
- signatures.append(signature)
-
- return signatures, description
-
-
-def find_all_children_until_tag(
- start_element: PageElement,
- tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
-) -> Optional[str]:
- """
- Get all direct children until a child matching `tag_filter` is found.
-
- `tag_filter` can be either a tuple of string names to check against,
- or a filtering callable that's applied to the tags.
- """
- text = ""
-
- for element in start_element.find_next().find_next_siblings():
- if isinstance(tag_filter, tuple):
- if element.name in tag_filter:
- break
- elif tag_filter(element):
- break
- text += str(element)
-
- return text
-
-
-def truncate_markdown(markdown: str, permalink: str, max_length: int) -> str:
- """
- Truncate `markdown` to be at most `max_length` characters.
-
- The markdown string is searched for substrings to cut at, to keep its structure,
- but if none are found the string is simply sliced.
- """
- if len(markdown) > max_length:
- shortened = markdown[:max_length]
- description_cutoff = shortened.rfind('\n\n', 100)
- if description_cutoff == -1:
- # Search the shortened version for cutoff points in decreasing desirability,
- # cutoff at 1000 if none are found.
- for string in (". ", ", ", ",", " "):
- description_cutoff = shortened.rfind(string)
- if description_cutoff != -1:
- break
- else:
- description_cutoff = max_length
- markdown = markdown[:description_cutoff]
-
- # If there is an incomplete code block, cut it out
- if markdown.count("```") % 2:
- codeblock_start = markdown.rfind('```py')
- markdown = markdown[:codeblock_start].rstrip()
- markdown += f"... [read more]({permalink})"
- return markdown
-
-
-@async_cache(arg_offset=1)
-async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
- """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
- log.trace(f"Sending a request to {url}.")
- async with http_session.get(url) as response:
- soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
- soup.find("head").decompose() # the head contains no useful data so we can remove it
- return soup
-
-
-def _match_end_tag(tag: Tag) -> bool:
- """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
- for attr in SEARCH_END_TAG_ATTRS:
- if attr in tag.get("class", ()):
- return True
-
- return tag.name == "table"
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
new file mode 100644
index 000000000..010826a96
--- /dev/null
+++ b/bot/cogs/doc/parsing.py
@@ -0,0 +1,131 @@
+import logging
+import re
+from typing import Callable, List, Optional, Tuple, Union
+
+from aiohttp import ClientSession
+from bs4 import BeautifulSoup
+from bs4.element import PageElement, Tag
+
+from .cache import async_cache
+
+log = logging.getLogger(__name__)
+
+UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+SEARCH_END_TAG_ATTRS = (
+ "data",
+ "function",
+ "class",
+ "exception",
+ "seealso",
+ "section",
+ "rubric",
+ "sphinxsidebar",
+)
+
+
+def parse_module_symbol(heading: PageElement) -> Optional[Tuple[None, str]]:
+ """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
+ start_tag = heading.find("a", attrs={"class": "headerlink"})
+ if start_tag is None:
+ return None
+
+ description = find_all_children_until_tag(start_tag, _match_end_tag)
+ if description is None:
+ return None
+
+ return None, description
+
+
+def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
+ """
+ Parse the signatures and description of a symbol.
+
+ Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
+ """
+ signatures = []
+ description_element = heading.find_next_sibling("dd")
+ description_pos = html.find(str(description_element))
+ description = find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
+
+ for element in (
+ *reversed(heading.find_previous_siblings("dt", limit=2)),
+ heading,
+ *heading.find_next_siblings("dt", limit=2),
+ )[-3:]:
+ signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+
+ if signature and html.find(str(element)) < description_pos:
+ signatures.append(signature)
+
+ return signatures, description
+
+
+def find_all_children_until_tag(
+ start_element: PageElement,
+ tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
+) -> Optional[str]:
+ """
+ Get all direct children until a child matching `tag_filter` is found.
+
+ `tag_filter` can be either a tuple of string names to check against,
+ or a filtering callable that's applied to the tags.
+ """
+ text = ""
+
+ for element in start_element.find_next().find_next_siblings():
+ if isinstance(tag_filter, tuple):
+ if element.name in tag_filter:
+ break
+ elif tag_filter(element):
+ break
+ text += str(element)
+
+ return text
+
+
+def truncate_markdown(markdown: str, permalink: str, max_length: int) -> str:
+ """
+ Truncate `markdown` to be at most `max_length` characters.
+
+ The markdown string is searched for substrings to cut at, to keep its structure,
+ but if none are found the string is simply sliced.
+ """
+ if len(markdown) > max_length:
+ shortened = markdown[:max_length]
+ description_cutoff = shortened.rfind('\n\n', 100)
+ if description_cutoff == -1:
+ # Search the shortened version for cutoff points in decreasing desirability,
+ # cutoff at 1000 if none are found.
+ for string in (". ", ", ", ",", " "):
+ description_cutoff = shortened.rfind(string)
+ if description_cutoff != -1:
+ break
+ else:
+ description_cutoff = max_length
+ markdown = markdown[:description_cutoff]
+
+ # If there is an incomplete code block, cut it out
+ if markdown.count("```") % 2:
+ codeblock_start = markdown.rfind('```py')
+ markdown = markdown[:codeblock_start].rstrip()
+ markdown += f"... [read more]({permalink})"
+ return markdown
+
+
+@async_cache(arg_offset=1)
+async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
+ """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
+ log.trace(f"Sending a request to {url}.")
+ async with http_session.get(url) as response:
+ soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
+ soup.find("head").decompose() # the head contains no useful data so we can remove it
+ return soup
+
+
+def _match_end_tag(tag: Tag) -> bool:
+ """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
+ for attr in SEARCH_END_TAG_ATTRS:
+ if attr in tag.get("class", ()):
+ return True
+
+ return tag.name == "table"
--
cgit v1.2.3
From 4560f0f89b52cfcb8b18abeb1efa707c334a86d4 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 20 Jul 2020 02:28:25 +0200
Subject: Remove permalink from truncated markdown.
The permalink serves no functional purpose in the embed,
as it is already included in the title. But it does
add the complexity of passing in the url to the parser.
---
bot/cogs/doc/cog.py | 2 +-
bot/cogs/doc/parsing.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 4e4f3b737..36fbe9010 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -270,7 +270,7 @@ class DocCog(commands.Cog):
self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
signatures = scraped_html[0]
permalink = symbol_obj.url
- description = truncate_markdown(markdownify(scraped_html[1], url=permalink), permalink, 1000)
+ description = truncate_markdown(markdownify(scraped_html[1], url=permalink), 1000)
description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is None:
# If symbol is a module, don't show signature.
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 010826a96..3b79e0a93 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -83,7 +83,7 @@ def find_all_children_until_tag(
return text
-def truncate_markdown(markdown: str, permalink: str, max_length: int) -> str:
+def truncate_markdown(markdown: str, max_length: int) -> str:
"""
Truncate `markdown` to be at most `max_length` characters.
@@ -108,7 +108,7 @@ def truncate_markdown(markdown: str, permalink: str, max_length: int) -> str:
if markdown.count("```") % 2:
codeblock_start = markdown.rfind('```py')
markdown = markdown[:codeblock_start].rstrip()
- markdown += f"... [read more]({permalink})"
+ markdown += "... read more"
return markdown
--
cgit v1.2.3
From cecd2c8e320a2a0ff0095cd1fa197552d43c6684 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 20 Jul 2020 02:31:56 +0200
Subject: Simplify cutoff text.
"read more" seemed out of place with no permalink over it.
---
bot/cogs/doc/parsing.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 3b79e0a93..994124e92 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -1,5 +1,6 @@
import logging
import re
+import string
from typing import Callable, List, Optional, Tuple, Union
from aiohttp import ClientSession
@@ -96,8 +97,8 @@ def truncate_markdown(markdown: str, max_length: int) -> str:
if description_cutoff == -1:
# Search the shortened version for cutoff points in decreasing desirability,
# cutoff at 1000 if none are found.
- for string in (". ", ", ", ",", " "):
- description_cutoff = shortened.rfind(string)
+ for cutoff_string in (". ", ", ", ",", " "):
+ description_cutoff = shortened.rfind(cutoff_string)
if description_cutoff != -1:
break
else:
@@ -108,7 +109,7 @@ def truncate_markdown(markdown: str, max_length: int) -> str:
if markdown.count("```") % 2:
codeblock_start = markdown.rfind('```py')
markdown = markdown[:codeblock_start].rstrip()
- markdown += "... read more"
+ markdown = markdown.rstrip(string.punctuation) + "..."
return markdown
--
cgit v1.2.3
From 2b24579b49ced873e05e375051bbbb4ec2855b12 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 20 Jul 2020 03:55:31 +0200
Subject: Add function for finding tags until a matching tag
This will allow flexibility in the future when collecting tags
for the description and signature of symbols.
The base is a function which accepts a callable which is called and
iterated over, but 3 names with a partial function that has the callable
supplied are provided to keep the outside interface neater.
---
bot/cogs/doc/parsing.py | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 994124e92..5e5a5be66 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -1,6 +1,7 @@
import logging
import re
import string
+from functools import partial
from typing import Callable, List, Optional, Tuple, Union
from aiohttp import ClientSession
@@ -24,6 +25,40 @@ SEARCH_END_TAG_ATTRS = (
)
+def find_elements_until_tag(
+ start_element: PageElement,
+ tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
+ *,
+ func: Callable,
+ limit: int = None,
+) -> List[str]:
+ """
+ Get all tags until a tag matching `tag_filter` is found.
+
+ `tag_filter` can be either a tuple of string names to check against,
+ or a filtering t.Callable that's applied to the tags.
+
+ `func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
+ That method is then iterated over and all tags until the matching tag are added to the return list as strings.
+ """
+ elements = []
+
+ for element in func(start_element, limit=limit):
+ if isinstance(tag_filter, tuple):
+ if element.name in tag_filter:
+ break
+ elif tag_filter(element):
+ break
+ elements.append(str(element))
+
+ return elements
+
+
+find_next_children_until_tag = partial(find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
+find_next_siblings_until_tag = partial(find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
+find_previous_siblings_until_tag = partial(find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
+
+
def parse_module_symbol(heading: PageElement) -> Optional[Tuple[None, str]]:
"""Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
start_tag = heading.find("a", attrs={"class": "headerlink"})
--
cgit v1.2.3
From 9f78dbafc3bc532bbfb5ffa0ef110fdeb0c3e8a5 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 20 Jul 2020 03:57:27 +0200
Subject: Simplify module parsing method.
Instead of returning None and multiple values, the method now
only returns the string of the description.
Previously the parsing returned None and quit
when appropriate tags for shortening the description
were not found, but the new implementation simply defaults to the
provided start tag if a better alternative is not found.
---
bot/cogs/doc/parsing.py | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 5e5a5be66..368feeb68 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -59,17 +59,18 @@ find_next_siblings_until_tag = partial(find_elements_until_tag, func=BeautifulSo
find_previous_siblings_until_tag = partial(find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-def parse_module_symbol(heading: PageElement) -> Optional[Tuple[None, str]]:
- """Get page content from the headerlink up to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`."""
- start_tag = heading.find("a", attrs={"class": "headerlink"})
- if start_tag is None:
- return None
+def get_module_description(start_element: PageElement) -> Optional[str]:
+ """
+ Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
- description = find_all_children_until_tag(start_tag, _match_end_tag)
- if description is None:
- return None
+ A headerlink a tag is attempted to be found to skip repeating the module name in the description,
+ if it's found it's used as the tag to search from instead of the `start_element`.
+ """
+ header = start_element.find("a", attrs={"class": "headerlink"})
+ start_tag = header.parent if header is not None else start_element
+ description = "".join(str(tag) for tag in find_next_siblings_until_tag(start_tag, _match_end_tag))
- return None, description
+ return description
def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
--
cgit v1.2.3
From 082867253cd19c70516102a3d4972da6d501ff6f Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 20 Jul 2020 17:35:07 +0200
Subject: Create a function for collecting signatures.
By getting the signatures without the description we get more
flexibility of parsing different symbol groups and decouple the logic
from the description which can be parsed directly with the new
`find_elements_until_tag` based function.
---
bot/cogs/doc/parsing.py | 46 ++++++++++------------------------------------
1 file changed, 10 insertions(+), 36 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 368feeb68..5b60f1609 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -73,51 +73,25 @@ def get_module_description(start_element: PageElement) -> Optional[str]:
return description
-def parse_symbol(heading: PageElement, html: str) -> Tuple[List[str], str]:
+def get_signatures(start_signature: PageElement) -> List[str]:
"""
- Parse the signatures and description of a symbol.
+ Collect up to 3 signatures from dt tags around the `start_signature` dt tag.
- Collects up to 3 signatures from dt tags and a description from their sibling dd tag.
+ First the signatures under the `start_signature` are included;
+ if less than 2 are found, tags above the start signature are added to the result if any are present.
"""
signatures = []
- description_element = heading.find_next_sibling("dd")
- description_pos = html.find(str(description_element))
- description = find_all_children_until_tag(description_element, tag_filter=("dt", "dl"))
-
for element in (
- *reversed(heading.find_previous_siblings("dt", limit=2)),
- heading,
- *heading.find_next_siblings("dt", limit=2),
+ *reversed(find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
+ start_signature,
+ *find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
)[-3:]:
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+ signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element)
- if signature and html.find(str(element)) < description_pos:
+ if signature:
signatures.append(signature)
- return signatures, description
-
-
-def find_all_children_until_tag(
- start_element: PageElement,
- tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]]
-) -> Optional[str]:
- """
- Get all direct children until a child matching `tag_filter` is found.
-
- `tag_filter` can be either a tuple of string names to check against,
- or a filtering callable that's applied to the tags.
- """
- text = ""
-
- for element in start_element.find_next().find_next_siblings():
- if isinstance(tag_filter, tuple):
- if element.name in tag_filter:
- break
- elif tag_filter(element):
- break
- text += str(element)
-
- return text
+ return signatures
def truncate_markdown(markdown: str, max_length: int) -> str:
--
cgit v1.2.3
From caedfb0c16bc98eb94d723caff42dfe0799f8f17 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 01:38:00 +0200
Subject: Remove conversion to str when finding elements.
The tags need to be processed down the line,
which is not viable on strings.
---
bot/cogs/doc/parsing.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 5b60f1609..acf3a0804 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -31,7 +31,7 @@ def find_elements_until_tag(
*,
func: Callable,
limit: int = None,
-) -> List[str]:
+) -> List[Tag]:
"""
Get all tags until a tag matching `tag_filter` is found.
@@ -49,7 +49,7 @@ def find_elements_until_tag(
break
elif tag_filter(element):
break
- elements.append(str(element))
+ elements.append(element)
return elements
--
cgit v1.2.3
From 1c997846f282f76d17700f0f16c0a0abb5c49a30 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 01:39:43 +0200
Subject: Fix handling of elements when fetching signatures.
After the change to `find_elements_until_tag`,
the text contentsneed to be extracted from the tags
instead of passing them directly to re.
---
bot/cogs/doc/parsing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index acf3a0804..725fe47cd 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -86,7 +86,7 @@ def get_signatures(start_signature: PageElement) -> List[str]:
start_signature,
*find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
)[-3:]:
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element)
+ signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
if signature:
signatures.append(signature)
--
cgit v1.2.3
From e10def8a3d79dffd8cc53acd6b30fa43741d140c Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 02:03:31 +0200
Subject: Move DocMarkdownConverter to parsing.
---
bot/cogs/doc/cog.py | 34 ----------------------------------
bot/cogs/doc/parsing.py | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 36fbe9010..a7dcd9020 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -64,40 +64,6 @@ class DocItem(NamedTuple):
group: str
-class DocMarkdownConverter(MarkdownConverter):
- """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
-
- def __init__(self, *, page_url: str, **options):
- super().__init__(**options)
- self.page_url = page_url
-
- def convert_code(self, el: PageElement, text: str) -> str:
- """Undo `markdownify`s underscore escaping."""
- return f"`{text}`".replace('\\', '')
-
- def convert_pre(self, el: PageElement, text: str) -> str:
- """Wrap any codeblocks in `py` for syntax highlighting."""
- code = ''.join(el.strings)
- return f"```py\n{code}```"
-
- def convert_a(self, el: PageElement, text: str) -> str:
- """Resolve relative URLs to `self.page_url`."""
- el["href"] = urljoin(self.page_url, el["href"])
- return super().convert_a(el, text)
-
- def convert_p(self, el: PageElement, text: str) -> str:
- """Include only one newline instead of two when the parent is a li tag."""
- parent = el.parent
- if parent is not None and parent.name == "li":
- return f"{text}\n"
- return super().convert_p(el, text)
-
-
-def markdownify(html: str, *, url: str = "") -> str:
- """Create a DocMarkdownConverter object from the input html."""
- return DocMarkdownConverter(bullets='•', page_url=url).convert(html)
-
-
class InventoryURL(commands.Converter):
"""
Represents an Intersphinx inventory URL.
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 725fe47cd..8f6688bd2 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -25,6 +25,40 @@ SEARCH_END_TAG_ATTRS = (
)
+class DocMarkdownConverter(MarkdownConverter):
+ """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
+
+ def __init__(self, *, page_url: str, **options):
+ super().__init__(**options)
+ self.page_url = page_url
+
+ def convert_code(self, el: PageElement, text: str) -> str:
+ """Undo `markdownify`s underscore escaping."""
+ return f"`{text}`".replace('\\', '')
+
+ def convert_pre(self, el: PageElement, text: str) -> str:
+ """Wrap any codeblocks in `py` for syntax highlighting."""
+ code = ''.join(el.strings)
+ return f"```py\n{code}```"
+
+ def convert_a(self, el: PageElement, text: str) -> str:
+ """Resolve relative URLs to `self.page_url`."""
+ el["href"] = urljoin(self.page_url, el["href"])
+ return super().convert_a(el, text)
+
+ def convert_p(self, el: PageElement, text: str) -> str:
+ """Include only one newline instead of two when the parent is a li tag."""
+ parent = el.parent
+ if parent is not None and parent.name == "li":
+ return f"{text}\n"
+ return super().convert_p(el, text)
+
+
+def markdownify(html: str, *, url: str = "") -> str:
+ """Create a DocMarkdownConverter object from the input html."""
+ return DocMarkdownConverter(bullets='•', page_url=url).convert(html)
+
+
def find_elements_until_tag(
start_element: PageElement,
tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
--
cgit v1.2.3
From 6795a7f05e3720f375a9195182b996a14d754ea0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 02:06:50 +0200
Subject: Fix ordered list indices in markdown converter.
markdownify relies on the parent tag's index method,
which goes through all of its contents, if there is anything else
in the contents apart from the li tags, those indices are then shifted.
---
bot/cogs/doc/parsing.py | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 8f6688bd2..25001b83d 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -3,10 +3,12 @@ import re
import string
from functools import partial
from typing import Callable, List, Optional, Tuple, Union
+from urllib.parse import urljoin
from aiohttp import ClientSession
from bs4 import BeautifulSoup
from bs4.element import PageElement, Tag
+from markdownify import MarkdownConverter
from .cache import async_cache
@@ -32,6 +34,22 @@ class DocMarkdownConverter(MarkdownConverter):
super().__init__(**options)
self.page_url = page_url
+ def convert_li(self, el: PageElement, text: str) -> str:
+ """Fix markdownify's erroneous indexing in ol tags."""
+ parent = el.parent
+ if parent is not None and parent.name == 'ol':
+ li_tags = parent.find_all("li")
+ bullet = '%s.' % (li_tags.index(el)+1)
+ else:
+ depth = -1
+ while el:
+ if el.name == 'ul':
+ depth += 1
+ el = el.parent
+ bullets = self.options['bullets']
+ bullet = bullets[depth % len(bullets)]
+ return '%s %s\n' % (bullet, text or '')
+
def convert_code(self, el: PageElement, text: str) -> str:
"""Undo `markdownify`s underscore escaping."""
return f"`{text}`".replace('\\', '')
--
cgit v1.2.3
From 4e9ffb210f6a8f0184ac97cb16703777cc1e0ca0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 02:34:11 +0200
Subject: Create a function for getting the result markdown.
---
bot/cogs/doc/parsing.py | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 25001b83d..8756e0694 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -1,6 +1,7 @@
import logging
import re
import string
+import textwrap
from functools import partial
from typing import Callable, List, Optional, Tuple, Union
from urllib.parse import urljoin
@@ -15,6 +16,8 @@ from .cache import async_cache
log = logging.getLogger(__name__)
UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
+
SEARCH_END_TAG_ATTRS = (
"data",
"function",
@@ -175,6 +178,24 @@ def truncate_markdown(markdown: str, max_length: int) -> str:
return markdown
+def _parse_into_markdown(signatures: Optional[List[str]], description: str, url: str) -> str:
+ """
+ Create a markdown string with the signatures at the top, and the converted html description below them.
+
+ The signatures are wrapped in python codeblocks, separated from the description by a newline.
+ The result string is truncated to be max 1000 symbols long.
+ """
+ description = truncate_markdown(markdownify(description, url=url), 1000)
+ description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
+ if signatures is not None:
+ formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
+ else:
+ formatted_markdown = ""
+ formatted_markdown += f"\n{description}"
+
+ return formatted_markdown
+
+
@async_cache(arg_offset=1)
async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
"""Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
--
cgit v1.2.3
From f562c4b4551caa8ed3710ac5e9841150cb8a2492 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 02:35:13 +0200
Subject: Create the parsing interface function.
Other functions from the module are not intended to be used directly,
with the interface of it being the added function which accepts the
symbol and calls internals.
All other names except imports and log had the underscore prefix added
to accommodate this.
---
bot/cogs/doc/parsing.py | 92 ++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 71 insertions(+), 21 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 8756e0694..a2c6564b3 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -3,7 +3,7 @@ import re
import string
import textwrap
from functools import partial
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, TYPE_CHECKING, Tuple, Union
from urllib.parse import urljoin
from aiohttp import ClientSession
@@ -12,13 +12,15 @@ from bs4.element import PageElement, Tag
from markdownify import MarkdownConverter
from .cache import async_cache
+if TYPE_CHECKING:
+ from .cog import DocItem
log = logging.getLogger(__name__)
-UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
-WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
+_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+_WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
-SEARCH_END_TAG_ATTRS = (
+_SEARCH_END_TAG_ATTRS = (
"data",
"function",
"class",
@@ -29,8 +31,17 @@ SEARCH_END_TAG_ATTRS = (
"sphinxsidebar",
)
+_NO_SIGNATURE_GROUPS = {
+ "attribute",
+ "envvar",
+ "setting",
+ "tempaltefilter",
+ "templatetag",
+ "term",
+}
-class DocMarkdownConverter(MarkdownConverter):
+
+class _DocMarkdownConverter(MarkdownConverter):
"""Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
def __init__(self, *, page_url: str, **options):
@@ -75,12 +86,12 @@ class DocMarkdownConverter(MarkdownConverter):
return super().convert_p(el, text)
-def markdownify(html: str, *, url: str = "") -> str:
+def _markdownify(html: str, *, url: str = "") -> str:
"""Create a DocMarkdownConverter object from the input html."""
- return DocMarkdownConverter(bullets='•', page_url=url).convert(html)
+ return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
-def find_elements_until_tag(
+def _find_elements_until_tag(
start_element: PageElement,
tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
*,
@@ -109,9 +120,9 @@ def find_elements_until_tag(
return elements
-find_next_children_until_tag = partial(find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
-find_next_siblings_until_tag = partial(find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
-find_previous_siblings_until_tag = partial(find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
+_find_next_children_until_tag = partial(_find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
+_find_next_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
+_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
def get_module_description(start_element: PageElement) -> Optional[str]:
@@ -123,12 +134,19 @@ def get_module_description(start_element: PageElement) -> Optional[str]:
"""
header = start_element.find("a", attrs={"class": "headerlink"})
start_tag = header.parent if header is not None else start_element
- description = "".join(str(tag) for tag in find_next_siblings_until_tag(start_tag, _match_end_tag))
+ description = "".join(str(tag) for tag in _find_next_siblings_until_tag(start_tag, _match_end_tag))
return description
-def get_signatures(start_signature: PageElement) -> List[str]:
+def _get_symbol_description(symbol: PageElement) -> str:
+ """Get the string contents of the next dd tag, up to a dt or a dl tag."""
+ description_tag = symbol.find_next("dd")
+ description_contents = _find_next_children_until_tag(description_tag, ("dt", "dl"))
+ return "".join(str(tag) for tag in description_contents)
+
+
+def _get_signatures(start_signature: PageElement) -> List[str]:
"""
Collect up to 3 signatures from dt tags around the `start_signature` dt tag.
@@ -137,11 +155,11 @@ def get_signatures(start_signature: PageElement) -> List[str]:
"""
signatures = []
for element in (
- *reversed(find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
+ *reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
start_signature,
- *find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
+ *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
)[-3:]:
- signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+ signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
if signature:
signatures.append(signature)
@@ -149,7 +167,7 @@ def get_signatures(start_signature: PageElement) -> List[str]:
return signatures
-def truncate_markdown(markdown: str, max_length: int) -> str:
+def _truncate_markdown(markdown: str, max_length: int) -> str:
"""
Truncate `markdown` to be at most `max_length` characters.
@@ -185,8 +203,8 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: str, url:
The signatures are wrapped in python codeblocks, separated from the description by a newline.
The result string is truncated to be max 1000 symbols long.
"""
- description = truncate_markdown(markdownify(description, url=url), 1000)
- description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
+ description = _truncate_markdown(_markdownify(description, url=url), 1000)
+ description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is not None:
formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
else:
@@ -197,7 +215,7 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: str, url:
@async_cache(arg_offset=1)
-async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
+async def _get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
"""Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
log.trace(f"Sending a request to {url}.")
async with http_session.get(url) as response:
@@ -208,8 +226,40 @@ async def get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulS
def _match_end_tag(tag: Tag) -> bool:
"""Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
- for attr in SEARCH_END_TAG_ATTRS:
+ for attr in _SEARCH_END_TAG_ATTRS:
if attr in tag.get("class", ()):
return True
return tag.name == "table"
+
+
+async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem") -> str:
+ """
+ Return parsed markdown of the passed symbol, truncated to 1000 characters.
+
+ A request through `http_session` is made to the url associated with `symbol_data` for the html contents;
+ the contents are then parsed depending on what group the symbol belongs to.
+ """
+ if "#" in symbol_data.url:
+ request_url, symbol_id = symbol_data.url.rsplit('#')
+ else:
+ request_url = symbol_data.url
+ symbol_id = None
+
+ soup = await _get_soup_from_url(http_session, request_url)
+ symbol_heading = soup.find(id=symbol_id)
+
+ # Handle doc symbols as modules, because they either link to the page of a module,
+ # or don't contain any useful info to be parsed.
+ signature = None
+ if symbol_data.group in {"module", "doc"}:
+ description = get_module_description(symbol_heading)
+
+ elif symbol_data.group in _NO_SIGNATURE_GROUPS:
+ description = _get_symbol_description(symbol_heading)
+
+ else:
+ signature = _get_signatures(symbol_heading)
+ description = _get_symbol_description(symbol_heading)
+
+ return _parse_into_markdown(signature, description, symbol_data.url)
--
cgit v1.2.3
From 6f4731714aa9df086ec287f768556a4c4443b635 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 02:50:49 +0200
Subject: Change DocCog to use the new parsing module fully.
The parsing module provides an interface for fetching the markdown
from the symbol data provided to it. Because it's now fully done
in an another module we can remove the needless parts from the cog.
---
bot/cogs/doc/cog.py | 69 ++++++-----------------------------------------------
1 file changed, 7 insertions(+), 62 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index a7dcd9020..6cd066f1b 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -3,17 +3,13 @@ import functools
import logging
import re
import sys
-import textwrap
from collections import OrderedDict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Dict, NamedTuple, Optional, Tuple
-from urllib.parse import urljoin
+from typing import Dict, NamedTuple, Optional
import discord
-from bs4.element import PageElement
from discord.ext import commands
-from markdownify import MarkdownConverter
from requests import ConnectTimeout, ConnectionError, HTTPError
from sphinx.ext import intersphinx
from urllib3.exceptions import ProtocolError
@@ -25,7 +21,7 @@ from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
from .cache import async_cache
-from .parsing import get_soup_from_url, parse_module_symbol, parse_symbol, truncate_markdown
+from .parsing import get_symbol_markdown
log = logging.getLogger(__name__)
logging.getLogger('urllib3').setLevel(logging.WARNING)
@@ -187,40 +183,6 @@ class DocCog(commands.Cog):
]
await asyncio.gather(*coros)
- async def get_symbol_html(self, symbol: str) -> Optional[Tuple[list, str]]:
- """
- Given a Python symbol, return its signature and description.
-
- The first tuple element is the signature of the given symbol as a markup-free string, and
- the second tuple element is the description of the given symbol with HTML markup included.
-
- If the given symbol is a module, returns a tuple `(None, str)`
- else if the symbol could not be found, returns `None`.
- """
- symbol_info = self.doc_symbols.get(symbol)
- if symbol_info is None:
- return None
- request_url, symbol_id = symbol_info.url.rsplit('#')
-
- soup = await get_soup_from_url(self.bot.http_session, request_url)
- symbol_heading = soup.find(id=symbol_id)
- search_html = str(soup)
-
- if symbol_heading is None:
- return None
-
- if symbol_info.group == "module":
- parsed_module = parse_module_symbol(symbol_heading)
- if parsed_module is None:
- return [], ""
- else:
- signatures, description = parsed_module
-
- else:
- signatures, description = parse_symbol(symbol_heading, search_html)
-
- return signatures, description.replace('¶', '')
-
@async_cache(arg_offset=1)
async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
"""
@@ -228,32 +190,15 @@ class DocCog(commands.Cog):
If the symbol is known, an Embed with documentation about it is returned.
"""
- scraped_html = await self.get_symbol_html(symbol)
- if scraped_html is None:
+ symbol_info = self.doc_symbols.get(symbol)
+ if symbol_info is None:
return None
-
- symbol_obj = self.doc_symbols[symbol]
- self.bot.stats.incr(f"doc_fetches.{symbol_obj.package.lower()}")
- signatures = scraped_html[0]
- permalink = symbol_obj.url
- description = truncate_markdown(markdownify(scraped_html[1], url=permalink), 1000)
- description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
- if signatures is None:
- # If symbol is a module, don't show signature.
- embed_description = description
-
- elif not signatures:
- # It's some "meta-page", for example:
- # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views
- embed_description = "This appears to be a generic page not tied to a specific symbol."
-
- else:
- embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
- embed_description += f"\n{description}"
+ self.bot.stats.incr(f"doc_fetches.{symbol_info.package.lower()}")
+ embed_description = await get_symbol_markdown(self.bot.http_session, symbol_info)
embed = discord.Embed(
title=discord.utils.escape_markdown(symbol),
- url=permalink,
+ url=symbol_info.url,
description=embed_description
)
# Show all symbols with the same name that were renamed in the footer.
--
cgit v1.2.3
From e875142a0f937ab190208523ef17068e5988dca3 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 14:25:47 +0200
Subject: Remove caching from get_symbol_embed.
The web request is already cached, and parsing doesn't much more time,
but without moving the logic around
the cache prevents the stat increase when a symbol is requested.
---
bot/cogs/doc/cog.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 6cd066f1b..05cedcaaf 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -183,7 +183,6 @@ class DocCog(commands.Cog):
]
await asyncio.gather(*coros)
- @async_cache(arg_offset=1)
async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
"""
Attempt to scrape and fetch the data for the given `symbol`, and build an embed from its contents.
--
cgit v1.2.3
From 6731de62e3a3f5d188e73538a718d2b30cc2f442 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 14:28:07 +0200
Subject: Hold url parts in DocItem separately.
This allows us to save up some memory by not creating unique strings
with the base url repeated between them.
---
bot/cogs/doc/cog.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 05cedcaaf..bd27dde01 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -55,10 +55,16 @@ NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
class DocItem(NamedTuple):
"""Holds inventory symbol information."""
+ base_url: str
+ relative_url: str
package: str
- url: str
group: str
+ @property
+ def url(self) -> str:
+ """Return the absolute url to the symbol."""
+ return self.base_url + self.relative_url
+
class InventoryURL(commands.Converter):
"""
@@ -131,7 +137,6 @@ class DocCog(commands.Cog):
for symbol, (_package_name, _version, relative_doc_url, _) in value.items():
if "/" in symbol:
continue # skip unreachable symbols with slashes
- absolute_doc_url = base_url + relative_doc_url
# Intern the group names since they're reused in all the DocItems
# to remove unnecessary memory consumption from them being unique objects
group_name = sys.intern(group.split(":")[1])
@@ -158,7 +163,7 @@ class DocCog(commands.Cog):
symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
- self.doc_symbols[symbol] = DocItem(api_package_name, absolute_doc_url, group_name)
+ self.doc_symbols[symbol] = DocItem(base_url, relative_doc_url, api_package_name, group_name)
log.trace(f"Fetched inventory for {api_package_name}.")
--
cgit v1.2.3
From 6ca72a68a75a1e5f56cb6a6ebec5a5b533c77eff Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 14:52:04 +0200
Subject: Remove paragraph chars from descriptions
---
bot/cogs/doc/parsing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index a2c6564b3..79f3bbf69 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -262,4 +262,4 @@ async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem
signature = _get_signatures(symbol_heading)
description = _get_symbol_description(symbol_heading)
- return _parse_into_markdown(signature, description, symbol_data.url)
+ return _parse_into_markdown(signature, description.replace('¶', ''), symbol_data.url)
--
cgit v1.2.3
From 9f4d602bfa02fce088aaed28ee598c116b655683 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 22 Jul 2020 16:20:48 +0200
Subject: Change ValidPythonIdentifier tests to PackageName.
---
tests/bot/test_converters.py | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/tests/bot/test_converters.py b/tests/bot/test_converters.py
index ca8cb6825..a3c071168 100644
--- a/tests/bot/test_converters.py
+++ b/tests/bot/test_converters.py
@@ -10,9 +10,9 @@ from bot.converters import (
Duration,
HushDurationConverter,
ISODateTime,
+ PackageName,
TagContentConverter,
TagNameConverter,
- ValidPythonIdentifier,
)
@@ -78,24 +78,23 @@ class ConverterTests(unittest.TestCase):
with self.assertRaises(BadArgument, msg=exception_message):
asyncio.run(TagNameConverter.convert(self.context, invalid_name))
- def test_valid_python_identifier_for_valid(self):
- """ValidPythonIdentifier returns valid identifiers unchanged."""
- test_values = ('foo', 'lemon')
+ def test_package_name_for_valid(self):
+ """PackageName returns valid package names unchanged."""
+ test_values = ('foo', 'le_mon')
for name in test_values:
with self.subTest(identifier=name):
- conversion = asyncio.run(ValidPythonIdentifier.convert(self.context, name))
+ conversion = asyncio.run(PackageName.convert(self.context, name))
self.assertEqual(name, conversion)
- def test_valid_python_identifier_for_invalid(self):
- """ValidPythonIdentifier raises the proper exception for invalid identifiers."""
- test_values = ('nested.stuff', '#####')
+ def test_package_name_for_invalid(self):
+ """PackageName raises the proper exception for invalid package names."""
+ test_values = ('text_with_a_dot.', 'UpperCaseName', "num83r")
for name in test_values:
with self.subTest(identifier=name):
- exception_message = f'`{name}` is not a valid Python identifier'
- with self.assertRaises(BadArgument, msg=exception_message):
- asyncio.run(ValidPythonIdentifier.convert(self.context, name))
+ with self.assertRaises(BadArgument):
+ asyncio.run(PackageName.convert(self.context, name))
def test_duration_converter_for_valid(self):
"""Duration returns the correct `datetime` for valid duration strings."""
--
cgit v1.2.3
From 7e367ce4a5df3fbd768c6dce1acc39e786a376ea Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 25 Jul 2020 03:13:20 +0200
Subject: Ensure all renamed symbols are kept
After the restructure behaviour change in
d790c404ca3dba3843f351d6f42e766956aa73a1, the add to renamed_symbols
was not readded and symbols that only passed the first check were
being missed.
---
bot/cogs/doc/cog.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index bd27dde01..e52ee95c1 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -148,6 +148,7 @@ class DocCog(commands.Cog):
or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
):
symbol = f"{group_name}.{symbol}"
+ self.renamed_symbols.add(symbol)
elif (overridden_symbol_group := self.doc_symbols[symbol].group) in NO_OVERRIDE_GROUPS:
overridden_symbol = f"{overridden_symbol_group}.{symbol}"
@@ -158,7 +159,7 @@ class DocCog(commands.Cog):
self.renamed_symbols.add(overridden_symbol)
# If renamed `symbol` already exists, add library name in front to differentiate between them.
- if symbol in self.renamed_symbols:
+ elif symbol in self.renamed_symbols:
# Split `package_name` because of packages like Pillow that have spaces in them.
symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
--
cgit v1.2.3
From 2cc7ec9e26b013b2967841372898f1f8954d8f8f Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 26 Jul 2020 15:06:35 +0200
Subject: Parse NavigableStrings in symbol descriptions.
When a symbol, such as [term.numpy](https://matplotlib.org/3.1.1/glossary/index.html#term-numpy) had NavigableStrings as direct
children, they were not included as bs4's SoupStrainer won't include
both strings and tags in its filters.
The implementation goes around the limitation by introducing a new
optional flag, bypassing the default check which skips matching tags
when the `text` argument is present.
---
bot/cogs/doc/html.py | 33 +++++++++++++++++++++++++++++++++
bot/cogs/doc/parsing.py | 36 ++++++++++++++++++++++--------------
2 files changed, 55 insertions(+), 14 deletions(-)
create mode 100644 bot/cogs/doc/html.py
diff --git a/bot/cogs/doc/html.py b/bot/cogs/doc/html.py
new file mode 100644
index 000000000..bc705130d
--- /dev/null
+++ b/bot/cogs/doc/html.py
@@ -0,0 +1,33 @@
+from collections.abc import Iterable
+from typing import List, Union
+
+from bs4.element import NavigableString, PageElement, SoupStrainer, Tag
+
+
+class Strainer(SoupStrainer):
+ """Subclass of SoupStrainer to allow matching of both `Tag`s and `NavigableString`s."""
+
+ def __init__(self, *, include_strings: bool, **kwargs):
+ self.include_strings = include_strings
+ super().__init__(**kwargs)
+
+ markup_hint = Union[PageElement, List["markup_hint"]]
+
+ def search(self, markup: markup_hint) -> Union[PageElement, str]:
+ """Extend default SoupStrainer behaviour to allow matching both `Tag`s` and `NavigableString`s."""
+ if isinstance(markup, Iterable) and not isinstance(markup, (Tag, str)):
+ for element in markup:
+ if isinstance(element, NavigableString) and self.search(element):
+ return element
+ elif isinstance(markup, Tag):
+ # Also include tags while we're searching for strings and tags.
+ if self.include_strings or (not self.text or self.name or self.attrs):
+ return self.search_tag(markup)
+
+ elif isinstance(markup, str):
+ # Let everything through the text filter if we're including strings and tags.
+ text_filter = None if not self.include_strings else True
+ if not self.name and not self.attrs and self._matches(markup, text_filter):
+ return markup
+ else:
+ raise Exception(f"I don't know how to match against a {markup.__class__}")
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 79f3bbf69..050c49447 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -8,10 +8,11 @@ from urllib.parse import urljoin
from aiohttp import ClientSession
from bs4 import BeautifulSoup
-from bs4.element import PageElement, Tag
+from bs4.element import NavigableString, PageElement, Tag
from markdownify import MarkdownConverter
from .cache import async_cache
+from .html import Strainer
if TYPE_CHECKING:
from .cog import DocItem
@@ -96,25 +97,30 @@ def _find_elements_until_tag(
tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
*,
func: Callable,
+ include_strings: bool = False,
limit: int = None,
-) -> List[Tag]:
+) -> List[Union[Tag, NavigableString]]:
"""
- Get all tags until a tag matching `tag_filter` is found.
+ Get all elements up to `limit` or until a tag matching `tag_filter` is found.
`tag_filter` can be either a tuple of string names to check against,
- or a filtering t.Callable that's applied to the tags.
+ or a filtering callable that's applied to tags.
+
+ When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
`func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
- That method is then iterated over and all tags until the matching tag are added to the return list as strings.
+ The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
"""
+ use_tuple_filter = isinstance(tag_filter, tuple)
elements = []
- for element in func(start_element, limit=limit):
- if isinstance(tag_filter, tuple):
- if element.name in tag_filter:
+ for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
+ if isinstance(element, Tag):
+ if use_tuple_filter:
+ if element.name in tag_filter:
+ break
+ elif tag_filter(element):
break
- elif tag_filter(element):
- break
elements.append(element)
return elements
@@ -125,7 +131,7 @@ _find_next_siblings_until_tag = partial(_find_elements_until_tag, func=Beautiful
_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-def get_module_description(start_element: PageElement) -> Optional[str]:
+def _get_module_description(start_element: PageElement) -> Optional[str]:
"""
Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
@@ -134,7 +140,9 @@ def get_module_description(start_element: PageElement) -> Optional[str]:
"""
header = start_element.find("a", attrs={"class": "headerlink"})
start_tag = header.parent if header is not None else start_element
- description = "".join(str(tag) for tag in _find_next_siblings_until_tag(start_tag, _match_end_tag))
+ description = "".join(
+ str(tag) for tag in _find_next_siblings_until_tag(start_tag, _match_end_tag, include_strings=True)
+ )
return description
@@ -142,7 +150,7 @@ def get_module_description(start_element: PageElement) -> Optional[str]:
def _get_symbol_description(symbol: PageElement) -> str:
"""Get the string contents of the next dd tag, up to a dt or a dl tag."""
description_tag = symbol.find_next("dd")
- description_contents = _find_next_children_until_tag(description_tag, ("dt", "dl"))
+ description_contents = _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
return "".join(str(tag) for tag in description_contents)
@@ -253,7 +261,7 @@ async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem
# or don't contain any useful info to be parsed.
signature = None
if symbol_data.group in {"module", "doc"}:
- description = get_module_description(symbol_heading)
+ description = _get_module_description(symbol_heading)
elif symbol_data.group in _NO_SIGNATURE_GROUPS:
description = _get_symbol_description(symbol_heading)
--
cgit v1.2.3
From 6ea6f732e719f93f88588f1d6c435262261e2650 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 26 Jul 2020 15:09:53 +0200
Subject: Fix markdownify's handling of h tags.
Discord only allows `**` for bolding while the markdown from the
default MarkdownConverter tries to use # time n with h*n* tags for
different font weights.
---
bot/cogs/doc/parsing.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 050c49447..ac8a94e3f 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -65,6 +65,10 @@ class _DocMarkdownConverter(MarkdownConverter):
bullet = bullets[depth % len(bullets)]
return '%s %s\n' % (bullet, text or '')
+ def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
+ """Convert h tags to bold text with ** instead of adding #."""
+ return f"**{text}**\n\n"
+
def convert_code(self, el: PageElement, text: str) -> str:
"""Undo `markdownify`s underscore escaping."""
return f"`{text}`".replace('\\', '')
--
cgit v1.2.3
From 13030b8c54dd2ed37047349c5b09e4ded2c83391 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 26 Jul 2020 15:11:45 +0200
Subject: Move MarkdownConverter subclass to separate module
---
bot/cogs/doc/markdown.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++
bot/cogs/doc/parsing.py | 59 ++----------------------------------------------
2 files changed, 60 insertions(+), 57 deletions(-)
create mode 100644 bot/cogs/doc/markdown.py
diff --git a/bot/cogs/doc/markdown.py b/bot/cogs/doc/markdown.py
new file mode 100644
index 000000000..dca477d35
--- /dev/null
+++ b/bot/cogs/doc/markdown.py
@@ -0,0 +1,58 @@
+from urllib.parse import urljoin
+
+from bs4.element import PageElement
+from markdownify import MarkdownConverter
+
+
+class _DocMarkdownConverter(MarkdownConverter):
+ """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
+
+ def __init__(self, *, page_url: str, **options):
+ super().__init__(**options)
+ self.page_url = page_url
+
+ def convert_li(self, el: PageElement, text: str) -> str:
+ """Fix markdownify's erroneous indexing in ol tags."""
+ parent = el.parent
+ if parent is not None and parent.name == 'ol':
+ li_tags = parent.find_all("li")
+ bullet = '%s.' % (li_tags.index(el)+1)
+ else:
+ depth = -1
+ while el:
+ if el.name == 'ul':
+ depth += 1
+ el = el.parent
+ bullets = self.options['bullets']
+ bullet = bullets[depth % len(bullets)]
+ return '%s %s\n' % (bullet, text or '')
+
+ def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
+ """Convert h tags to bold text with ** instead of adding #."""
+ return f"**{text}**\n\n"
+
+ def convert_code(self, el: PageElement, text: str) -> str:
+ """Undo `markdownify`s underscore escaping."""
+ return f"`{text}`".replace('\\', '')
+
+ def convert_pre(self, el: PageElement, text: str) -> str:
+ """Wrap any codeblocks in `py` for syntax highlighting."""
+ code = ''.join(el.strings)
+ return f"```py\n{code}```"
+
+ def convert_a(self, el: PageElement, text: str) -> str:
+ """Resolve relative URLs to `self.page_url`."""
+ el["href"] = urljoin(self.page_url, el["href"])
+ return super().convert_a(el, text)
+
+ def convert_p(self, el: PageElement, text: str) -> str:
+ """Include only one newline instead of two when the parent is a li tag."""
+ parent = el.parent
+ if parent is not None and parent.name == "li":
+ return f"{text}\n"
+ return super().convert_p(el, text)
+
+
+def markdownify(html: str, *, url: str = "") -> str:
+ """Create a DocMarkdownConverter object from the input html."""
+ return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index ac8a94e3f..93daf3faf 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -4,15 +4,14 @@ import string
import textwrap
from functools import partial
from typing import Callable, List, Optional, TYPE_CHECKING, Tuple, Union
-from urllib.parse import urljoin
from aiohttp import ClientSession
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
-from markdownify import MarkdownConverter
from .cache import async_cache
from .html import Strainer
+from .markdown import markdownify
if TYPE_CHECKING:
from .cog import DocItem
@@ -42,60 +41,6 @@ _NO_SIGNATURE_GROUPS = {
}
-class _DocMarkdownConverter(MarkdownConverter):
- """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
-
- def __init__(self, *, page_url: str, **options):
- super().__init__(**options)
- self.page_url = page_url
-
- def convert_li(self, el: PageElement, text: str) -> str:
- """Fix markdownify's erroneous indexing in ol tags."""
- parent = el.parent
- if parent is not None and parent.name == 'ol':
- li_tags = parent.find_all("li")
- bullet = '%s.' % (li_tags.index(el)+1)
- else:
- depth = -1
- while el:
- if el.name == 'ul':
- depth += 1
- el = el.parent
- bullets = self.options['bullets']
- bullet = bullets[depth % len(bullets)]
- return '%s %s\n' % (bullet, text or '')
-
- def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
- """Convert h tags to bold text with ** instead of adding #."""
- return f"**{text}**\n\n"
-
- def convert_code(self, el: PageElement, text: str) -> str:
- """Undo `markdownify`s underscore escaping."""
- return f"`{text}`".replace('\\', '')
-
- def convert_pre(self, el: PageElement, text: str) -> str:
- """Wrap any codeblocks in `py` for syntax highlighting."""
- code = ''.join(el.strings)
- return f"```py\n{code}```"
-
- def convert_a(self, el: PageElement, text: str) -> str:
- """Resolve relative URLs to `self.page_url`."""
- el["href"] = urljoin(self.page_url, el["href"])
- return super().convert_a(el, text)
-
- def convert_p(self, el: PageElement, text: str) -> str:
- """Include only one newline instead of two when the parent is a li tag."""
- parent = el.parent
- if parent is not None and parent.name == "li":
- return f"{text}\n"
- return super().convert_p(el, text)
-
-
-def _markdownify(html: str, *, url: str = "") -> str:
- """Create a DocMarkdownConverter object from the input html."""
- return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
-
-
def _find_elements_until_tag(
start_element: PageElement,
tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
@@ -215,7 +160,7 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: str, url:
The signatures are wrapped in python codeblocks, separated from the description by a newline.
The result string is truncated to be max 1000 symbols long.
"""
- description = _truncate_markdown(_markdownify(description, url=url), 1000)
+ description = _truncate_markdown(markdownify(description, url=url), 1000)
description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is not None:
formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
--
cgit v1.2.3
From 994b828254cc8e40a52cf604910d5aa3eba2293d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 26 Jul 2020 15:21:40 +0200
Subject: Add more logging
---
bot/cogs/doc/parsing.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 93daf3faf..2ea21ed98 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -197,6 +197,7 @@ async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem
A request through `http_session` is made to the url associated with `symbol_data` for the html contents;
the contents are then parsed depending on what group the symbol belongs to.
"""
+ log.trace(f"Parsing symbol from url {symbol_data.url}.")
if "#" in symbol_data.url:
request_url, symbol_id = symbol_data.url.rsplit('#')
else:
@@ -210,12 +211,15 @@ async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem
# or don't contain any useful info to be parsed.
signature = None
if symbol_data.group in {"module", "doc"}:
+ log.trace("Symbol is a module or doc, parsing as module.")
description = _get_module_description(symbol_heading)
elif symbol_data.group in _NO_SIGNATURE_GROUPS:
+ log.trace("Symbol's group is in the group signature blacklist, skipping parsing of signature.")
description = _get_symbol_description(symbol_heading)
else:
+ log.trace("Parsing both signature and description of symbol.")
signature = _get_signatures(symbol_heading)
description = _get_symbol_description(symbol_heading)
--
cgit v1.2.3
From 83989d28fb83801acdea4b6f51cf48e974e21891 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 26 Jul 2020 15:29:09 +0200
Subject: Rename description functions to be more general
---
bot/cogs/doc/parsing.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 2ea21ed98..96bb1dfb4 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -80,14 +80,14 @@ _find_next_siblings_until_tag = partial(_find_elements_until_tag, func=Beautiful
_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-def _get_module_description(start_element: PageElement) -> Optional[str]:
+def _get_general_description(start_element: PageElement) -> Optional[str]:
"""
Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
- A headerlink a tag is attempted to be found to skip repeating the module name in the description,
- if it's found it's used as the tag to search from instead of the `start_element`.
+ A headerlink a tag is attempted to be found to skip repeating the symbol information in the description,
+ if it's found it's used as the tag to start the search from instead of the `start_element`.
"""
- header = start_element.find("a", attrs={"class": "headerlink"})
+ header = start_element.find_next("a", attrs={"class": "headerlink"})
start_tag = header.parent if header is not None else start_element
description = "".join(
str(tag) for tag in _find_next_siblings_until_tag(start_tag, _match_end_tag, include_strings=True)
@@ -96,7 +96,7 @@ def _get_module_description(start_element: PageElement) -> Optional[str]:
return description
-def _get_symbol_description(symbol: PageElement) -> str:
+def _get_dd_description(symbol: PageElement) -> str:
"""Get the string contents of the next dd tag, up to a dt or a dl tag."""
description_tag = symbol.find_next("dd")
description_contents = _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
@@ -212,15 +212,15 @@ async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem
signature = None
if symbol_data.group in {"module", "doc"}:
log.trace("Symbol is a module or doc, parsing as module.")
- description = _get_module_description(symbol_heading)
+ description = _get_general_description(symbol_heading)
elif symbol_data.group in _NO_SIGNATURE_GROUPS:
log.trace("Symbol's group is in the group signature blacklist, skipping parsing of signature.")
- description = _get_symbol_description(symbol_heading)
+ description = _get_dd_description(symbol_heading)
else:
log.trace("Parsing both signature and description of symbol.")
signature = _get_signatures(symbol_heading)
- description = _get_symbol_description(symbol_heading)
+ description = _get_dd_description(symbol_heading)
return _parse_into_markdown(signature, description.replace('¶', ''), symbol_data.url)
--
cgit v1.2.3
From 5290fcf0fff23e4979746c51b77be9a51fe82ae7 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 26 Jul 2020 15:51:34 +0200
Subject: Properly parse labels add fallback for non dt tags
Labels point to tags that aren't in description lists, like modules
or doc symbols which we already handle.
If by chance we get a symbol that we don't have in the group for
general parsing and which isn't a dt tag, log it and don't attempt to
parse signature and use general description parsing instead of parsing a
dd tag.
---
bot/cogs/doc/parsing.py | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 96bb1dfb4..1271953d4 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -206,12 +206,20 @@ async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem
soup = await _get_soup_from_url(http_session, request_url)
symbol_heading = soup.find(id=symbol_id)
-
- # Handle doc symbols as modules, because they either link to the page of a module,
- # or don't contain any useful info to be parsed.
signature = None
- if symbol_data.group in {"module", "doc"}:
- log.trace("Symbol is a module or doc, parsing as module.")
+ # Modules, doc pages and labels don't point to description list tags but to tags like divs,
+ # no special parsing can be done so we only try to include what's under them.
+ if symbol_data.group in {"module", "doc", "label"}:
+ log.trace("Symbol is a module, doc or a label; using general description parsing.")
+ description = _get_general_description(symbol_heading)
+
+ elif symbol_heading.name != "dt":
+ # Use the general parsing for symbols that aren't modules, docs or labels and aren't dt tags,
+ # log info the tag can be looked at.
+ log.info(
+ f"Symbol heading at url {symbol_data.url} was not a dt tag or from known groups that lack it,"
+ f"handling as general description."
+ )
description = _get_general_description(symbol_heading)
elif symbol_data.group in _NO_SIGNATURE_GROUPS:
--
cgit v1.2.3
From b759a940a097effd16b761e0c62231ae0ca9562b Mon Sep 17 00:00:00 2001
From: dolphingarlic
Date: Thu, 30 Jul 2020 20:13:15 +0200
Subject: Cleaned the code for CodeSnippets
---
bot/__main__.py | 2 +-
bot/cogs/code_snippets.py | 216 +++++++++++++++++++++++++++++++++++++++++++++
bot/cogs/print_snippets.py | 190 ---------------------------------------
3 files changed, 217 insertions(+), 191 deletions(-)
create mode 100644 bot/cogs/code_snippets.py
delete mode 100644 bot/cogs/print_snippets.py
diff --git a/bot/__main__.py b/bot/__main__.py
index 3191faf85..3d414c4b8 100644
--- a/bot/__main__.py
+++ b/bot/__main__.py
@@ -71,7 +71,7 @@ bot.load_extension("bot.cogs.utils")
bot.load_extension("bot.cogs.watchchannels")
bot.load_extension("bot.cogs.webhook_remover")
bot.load_extension("bot.cogs.wolfram")
-bot.load_extension("bot.cogs.print_snippets")
+bot.load_extension("bot.cogs.code_snippets")
if constants.HelpChannels.enable:
bot.load_extension("bot.cogs.help_channels")
diff --git a/bot/cogs/code_snippets.py b/bot/cogs/code_snippets.py
new file mode 100644
index 000000000..9bd06f6ff
--- /dev/null
+++ b/bot/cogs/code_snippets.py
@@ -0,0 +1,216 @@
+import re
+import textwrap
+from urllib.parse import quote_plus
+
+from aiohttp import ClientSession
+from discord import Message
+from discord.ext.commands import Cog
+
+from bot.bot import Bot
+from bot.utils.messages import wait_for_deletion
+
+
+async def fetch_http(session: ClientSession, url: str, response_format: str, **kwargs) -> str:
+ """Uses aiohttp to make http GET requests."""
+ async with session.get(url, **kwargs) as response:
+ if response_format == 'text':
+ return await response.text()
+ elif response_format == 'json':
+ return await response.json()
+
+
+async def fetch_github_snippet(session: ClientSession, repo: str,
+ path: str, start_line: str, end_line: str) -> str:
+ """Fetches a snippet from a GitHub repo."""
+ headers = {'Accept': 'application/vnd.github.v3.raw'}
+
+ # Search the GitHub API for the specified branch
+ refs = (await fetch_http(session, f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
+ + await fetch_http(session, f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers))
+
+ ref = path.split('/')[0]
+ file_path = '/'.join(path.split('/')[1:])
+ for possible_ref in refs:
+ if path.startswith(possible_ref['name'] + '/'):
+ ref = possible_ref['name']
+ file_path = path[len(ref) + 1:]
+ break
+
+ file_contents = await fetch_http(
+ session,
+ f'https://api.github.com/repos/{repo}/contents/{file_path}?ref={ref}',
+ 'text',
+ headers=headers,
+ )
+
+ return await snippet_to_md(file_contents, file_path, start_line, end_line)
+
+
+async def fetch_github_gist_snippet(session: ClientSession, gist_id: str, revision: str,
+ file_path: str, start_line: str, end_line: str) -> str:
+ """Fetches a snippet from a GitHub gist."""
+ headers = {'Accept': 'application/vnd.github.v3.raw'}
+
+ gist_json = await fetch_http(
+ session,
+ f'https://api.github.com/gists/{gist_id}{f"/{revision}" if len(revision) > 0 else ""}',
+ 'json',
+ headers=headers,
+ )
+
+ # Check each file in the gist for the specified file
+ for gist_file in gist_json['files']:
+ if file_path == gist_file.lower().replace('.', '-'):
+ file_contents = await fetch_http(
+ session,
+ gist_json['files'][gist_file]['raw_url'],
+ 'text',
+ )
+
+ return await snippet_to_md(file_contents, gist_file, start_line, end_line)
+
+ return ''
+
+
+async def fetch_gitlab_snippet(session: ClientSession, repo: str,
+ path: str, start_line: str, end_line: str) -> str:
+ """Fetches a snippet from a GitLab repo."""
+ enc_repo = quote_plus(repo)
+
+ # Searches the GitLab API for the specified branch
+ refs = (await fetch_http(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/branches', 'json')
+ + await fetch_http(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/tags', 'json'))
+
+ ref = path.split('/')[0]
+ file_path = '/'.join(path.split('/')[1:])
+ for possible_ref in refs:
+ if path.startswith(possible_ref['name'] + '/'):
+ ref = possible_ref['name']
+ file_path = path[len(ref) + 1:]
+ break
+
+ enc_ref = quote_plus(ref)
+ enc_file_path = quote_plus(file_path)
+
+ file_contents = await fetch_http(
+ session,
+ f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/files/{enc_file_path}/raw?ref={enc_ref}',
+ 'text',
+ )
+
+ return await snippet_to_md(file_contents, file_path, start_line, end_line)
+
+
+async def fetch_bitbucket_snippet(session: ClientSession, repo: str, ref: str,
+ file_path: str, start_line: int, end_line: int) -> str:
+ """Fetches a snippet from a BitBucket repo."""
+ file_contents = await fetch_http(
+ session,
+ f'https://bitbucket.org/{quote_plus(repo)}/raw/{quote_plus(ref)}/{quote_plus(file_path)}',
+ 'text',
+ )
+
+ return await snippet_to_md(file_contents, file_path, start_line, end_line)
+
+
+async def snippet_to_md(file_contents: str, file_path: str, start_line: str, end_line: str) -> str:
+ """Given file contents, file path, start line and end line creates a code block."""
+ # Parse start_line and end_line into integers
+ if end_line is None:
+ start_line = end_line = int(start_line)
+ else:
+ start_line = int(start_line)
+ end_line = int(end_line)
+
+ split_file_contents = file_contents.splitlines()
+
+ # Make sure that the specified lines are in range
+ if start_line > end_line:
+ start_line, end_line = end_line, start_line
+ if start_line > len(split_file_contents) or end_line < 1:
+ return ''
+ start_line = max(1, start_line)
+ end_line = min(len(split_file_contents), end_line)
+
+ # Gets the code lines, dedents them, and inserts zero-width spaces to prevent Markdown injection
+ required = '\n'.join(split_file_contents[start_line - 1:end_line])
+ required = textwrap.dedent(required).rstrip().replace('`', '`\u200b')
+
+ # Extracts the code language and checks whether it's a "valid" language
+ language = file_path.split('/')[-1].split('.')[-1]
+ if not language.replace('-', '').replace('+', '').replace('_', '').isalnum():
+ language = ''
+
+ if len(required) != 0:
+ return f'```{language}\n{required}```\n'
+ return ''
+
+
+GITHUB_RE = re.compile(
+ r'https://github\.com/(?P.+?)/blob/(?P.+/.+)'
+ r'#L(?P\d+)([-~]L(?P\d+))?\b'
+)
+
+GITHUB_GIST_RE = re.compile(
+ r'https://gist\.github\.com/([^/]+)/(?P[^\W_]+)/*'
+ r'(?P[^\W_]*)/*#file-(?P.+?)'
+ r'-L(?P\d+)([-~]L(?P\d+))?\b'
+)
+
+GITLAB_RE = re.compile(
+ r'https://gitlab\.com/(?P.+?)/\-/blob/(?P.+/.+)'
+ r'#L(?P\d+)([-](?P\d+))?\b'
+)
+
+BITBUCKET_RE = re.compile(
+ r'https://bitbucket\.org/(?P.+?)/src/(?P.+?)/'
+ r'(?P.+?)#lines-(?P\d+)(:(?P\d+))?\b'
+)
+
+
+class CodeSnippets(Cog):
+ """
+ Cog that prints out snippets to Discord.
+
+ Matches each message against a regex and prints the contents of all matched snippets.
+ """
+
+ def __init__(self, bot: Bot):
+ """Initializes the cog's bot."""
+ self.bot = bot
+
+ @Cog.listener()
+ async def on_message(self, message: Message) -> None:
+ """Checks if the message has a snippet link, removes the embed, then sends the snippet contents."""
+ gh_match = GITHUB_RE.search(message.content)
+ gh_gist_match = GITHUB_GIST_RE.search(message.content)
+ gl_match = GITLAB_RE.search(message.content)
+ bb_match = BITBUCKET_RE.search(message.content)
+
+ if (gh_match or gh_gist_match or gl_match or bb_match) and not message.author.bot:
+ message_to_send = ''
+
+ for gh in GITHUB_RE.finditer(message.content):
+ message_to_send += await fetch_github_snippet(self.bot.http_session, **gh.groupdict())
+
+ for gh_gist in GITHUB_GIST_RE.finditer(message.content):
+ message_to_send += await fetch_github_gist_snippet(self.bot.http_session, **gh_gist.groupdict())
+
+ for gl in GITLAB_RE.finditer(message.content):
+ message_to_send += await fetch_gitlab_snippet(self.bot.http_session, **gl.groupdict())
+
+ for bb in BITBUCKET_RE.finditer(message.content):
+ message_to_send += await fetch_bitbucket_snippet(self.bot.http_session, **bb.groupdict())
+
+ if 0 < len(message_to_send) <= 2000 and message_to_send.count('\n') <= 15:
+ await message.edit(suppress=True)
+ await wait_for_deletion(
+ await message.channel.send(message_to_send),
+ (message.author.id,),
+ client=self.bot
+ )
+
+
+def setup(bot: Bot) -> None:
+ """Load the CodeSnippets cog."""
+ bot.add_cog(CodeSnippets(bot))
diff --git a/bot/cogs/print_snippets.py b/bot/cogs/print_snippets.py
deleted file mode 100644
index 3f784d2c6..000000000
--- a/bot/cogs/print_snippets.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import asyncio
-import os
-import re
-import textwrap
-
-import aiohttp
-from discord import Message, Reaction, User
-from discord.ext.commands import Cog
-
-from bot.bot import Bot
-
-
-async def fetch_http(session: aiohttp.ClientSession, url: str, response_format: str, **kwargs) -> str:
- """Uses aiohttp to make http GET requests."""
- async with session.get(url, **kwargs) as response:
- if response_format == 'text':
- return await response.text()
- elif response_format == 'json':
- return await response.json()
-
-
-async def revert_to_orig(d: dict) -> dict:
- """Replace URL Encoded values back to their original."""
- for obj in d:
- if d[obj] is not None:
- d[obj] = d[obj].replace('%2F', '/').replace('%2E', '.')
-
-
-async def orig_to_encode(d: dict) -> dict:
- """Encode URL Parameters."""
- for obj in d:
- if d[obj] is not None:
- d[obj] = d[obj].replace('/', '%2F').replace('.', '%2E')
-
-
-async def snippet_to_embed(d: dict, file_contents: str) -> str:
- """Given a regex groupdict and file contents, creates a code block."""
- if d['end_line']:
- start_line = int(d['start_line'])
- end_line = int(d['end_line'])
- else:
- start_line = end_line = int(d['start_line'])
-
- split_file_contents = file_contents.split('\n')
-
- if start_line > end_line:
- start_line, end_line = end_line, start_line
- if start_line > len(split_file_contents) or end_line < 1:
- return ''
- start_line = max(1, start_line)
- end_line = min(len(split_file_contents), end_line)
-
- required = '\n'.join(split_file_contents[start_line - 1:end_line])
- required = textwrap.dedent(required).rstrip().replace('`', '`\u200b')
-
- language = d['file_path'].split('/')[-1].split('.')[-1]
- if not language.replace('-', '').replace('+', '').replace('_', '').isalnum():
- language = ''
-
- if len(required) != 0:
- return f'```{language}\n{required}```\n'
- return '``` ```\n'
-
-
-GITHUB_RE = re.compile(
- r'https://github\.com/(?P.+?)/blob/(?P.+?)/'
- + r'(?P.+?)#L(?P\d+)([-~]L(?P\d+))?\b'
-)
-
-GITHUB_GIST_RE = re.compile(
- r'https://gist\.github\.com/([^/]*)/(?P[0-9a-zA-Z]+)/*'
- + r'(?P[0-9a-zA-Z]*)/*#file-(?P.+?)'
- + r'-L(?P\d+)([-~]L(?P\d+))?\b'
-)
-
-GITLAB_RE = re.compile(
- r'https://gitlab\.com/(?P.+?)/\-/blob/(?P.+?)/'
- + r'(?P.+?)#L(?P\d+)([-~](?P\d+))?\b'
-)
-
-BITBUCKET_RE = re.compile(
- r'https://bitbucket\.org/(?P.+?)/src/(?P.+?)/'
- + r'(?P.+?)#lines-(?P\d+)(:(?P\d+))?\b'
-)
-
-
-class PrintSnippets(Cog):
- """
- Cog that prints out snippets to Discord.
-
- Matches each message against a regex and prints the contents of all matched snippets.
- """
-
- def __init__(self, bot: Bot):
- """Initializes the cog's bot."""
- self.bot = bot
- self.session = aiohttp.ClientSession()
-
- @Cog.listener()
- async def on_message(self, message: Message) -> None:
- """Checks if the message has a snippet link, removes the embed, then sends the snippet contents."""
- gh_match = GITHUB_RE.search(message.content)
- gh_gist_match = GITHUB_GIST_RE.search(message.content)
- gl_match = GITLAB_RE.search(message.content)
- bb_match = BITBUCKET_RE.search(message.content)
-
- if (gh_match or gh_gist_match or gl_match or bb_match) and not message.author.bot:
- message_to_send = ''
-
- for gh in GITHUB_RE.finditer(message.content):
- d = gh.groupdict()
- headers = {'Accept': 'application/vnd.github.v3.raw'}
- if 'GITHUB_TOKEN' in os.environ:
- headers['Authorization'] = f'token {os.environ["GITHUB_TOKEN"]}'
- file_contents = await fetch_http(
- self.session,
- f'https://api.github.com/repos/{d["repo"]}'
- + f'/contents/{d["file_path"]}?ref={d["branch"]}',
- 'text',
- headers=headers,
- )
- message_to_send += await snippet_to_embed(d, file_contents)
-
- for gh_gist in GITHUB_GIST_RE.finditer(message.content):
- d = gh_gist.groupdict()
- gist_json = await fetch_http(
- self.session,
- f'https://api.github.com/gists/{d["gist_id"]}'
- + f'{"/" + d["revision"] if len(d["revision"]) > 0 else ""}',
- 'json',
- )
- for f in gist_json['files']:
- if d['file_path'] == f.lower().replace('.', '-'):
- d['file_path'] = f
- file_contents = await fetch_http(
- self.session,
- gist_json['files'][f]['raw_url'],
- 'text',
- )
- message_to_send += await snippet_to_embed(d, file_contents)
- break
-
- for gl in GITLAB_RE.finditer(message.content):
- d = gl.groupdict()
- await orig_to_encode(d)
- headers = {}
- if 'GITLAB_TOKEN' in os.environ:
- headers['PRIVATE-TOKEN'] = os.environ["GITLAB_TOKEN"]
- file_contents = await fetch_http(
- self.session,
- f'https://gitlab.com/api/v4/projects/{d["repo"]}/'
- + f'repository/files/{d["file_path"]}/raw?ref={d["branch"]}',
- 'text',
- headers=headers,
- )
- await revert_to_orig(d)
- message_to_send += await snippet_to_embed(d, file_contents)
-
- for bb in BITBUCKET_RE.finditer(message.content):
- d = bb.groupdict()
- await orig_to_encode(d)
- file_contents = await fetch_http(
- self.session,
- f'https://bitbucket.org/{d["repo"]}/raw/{d["branch"]}/{d["file_path"]}',
- 'text',
- )
- await revert_to_orig(d)
- message_to_send += await snippet_to_embed(d, file_contents)
-
- message_to_send = message_to_send[:-1]
-
- if 0 < len(message_to_send) <= 2000 and message_to_send.count('\n') <= 50:
- sent_message = await message.channel.send(message_to_send)
- await message.edit(suppress=True)
- await sent_message.add_reaction('❌')
-
- def check(reaction: Reaction, user: User) -> bool:
- return user == message.author and str(reaction.emoji) == '❌'
-
- try:
- reaction, user = await self.bot.wait_for('reaction_add', timeout=10.0, check=check)
- except asyncio.TimeoutError:
- await sent_message.remove_reaction('❌', self.bot.user)
- else:
- await sent_message.delete()
-
-
-def setup(bot: Bot) -> None:
- """Load the Utils cog."""
- bot.add_cog(PrintSnippets(bot))
--
cgit v1.2.3
From ddb3c230cc7e1b38dbb57be10b1684c4ecb2ac7b Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 16 Sep 2020 00:14:58 +0200
Subject: Remove old comment
---
bot/cogs/doc/cog.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index e52ee95c1..2f4c99252 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -160,7 +160,6 @@ class DocCog(commands.Cog):
# If renamed `symbol` already exists, add library name in front to differentiate between them.
elif symbol in self.renamed_symbols:
- # Split `package_name` because of packages like Pillow that have spaces in them.
symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
--
cgit v1.2.3
From cb89cbaa36102c111c0204eb7c8bc27cecc1d4cd Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 16 Sep 2020 00:18:51 +0200
Subject: Don't return fragment in DocItem url
The fragment is only needed for the user and required sparingly
returning only the url while keeping the fragment behind symbol_id
simplifies the uses of the url without it.
---
bot/cogs/doc/cog.py | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 2f4c99252..2e49fcd38 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -55,15 +55,16 @@ NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
class DocItem(NamedTuple):
"""Holds inventory symbol information."""
- base_url: str
- relative_url: str
package: str
group: str
+ base_url: str
+ relative_url_path: str
+ symbol_id: str
@property
def url(self) -> str:
"""Return the absolute url to the symbol."""
- return self.base_url + self.relative_url
+ return "".join((self.base_url, self.relative_url_path))
class InventoryURL(commands.Converter):
@@ -141,21 +142,20 @@ class DocCog(commands.Cog):
# to remove unnecessary memory consumption from them being unique objects
group_name = sys.intern(group.split(":")[1])
- if symbol in self.doc_symbols:
- symbol_base_url = self.doc_symbols[symbol].url.split("/", 3)[2]
+ if (original_symbol := self.doc_symbols.get(symbol)) is not None:
if (
group_name in NO_OVERRIDE_GROUPS
- or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
+ or any(package == original_symbol.package for package in NO_OVERRIDE_PACKAGES)
):
symbol = f"{group_name}.{symbol}"
self.renamed_symbols.add(symbol)
- elif (overridden_symbol_group := self.doc_symbols[symbol].group) in NO_OVERRIDE_GROUPS:
+ elif (overridden_symbol_group := original_symbol.group) in NO_OVERRIDE_GROUPS:
overridden_symbol = f"{overridden_symbol_group}.{symbol}"
if overridden_symbol in self.renamed_symbols:
overridden_symbol = f"{api_package_name}.{overridden_symbol}"
- self.doc_symbols[overridden_symbol] = self.doc_symbols[symbol]
+ self.doc_symbols[overridden_symbol] = original_symbol
self.renamed_symbols.add(overridden_symbol)
# If renamed `symbol` already exists, add library name in front to differentiate between them.
@@ -202,7 +202,7 @@ class DocCog(commands.Cog):
embed = discord.Embed(
title=discord.utils.escape_markdown(symbol),
- url=symbol_info.url,
+ url=f"{symbol_info.url}#{symbol_info.symbol_id}",
description=embed_description
)
# Show all symbols with the same name that were renamed in the footer.
--
cgit v1.2.3
From 75f95a110ce96734cb64f89321f9a6eeb0d79463 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 20 Sep 2020 03:06:59 +0200
Subject: Replace caching of soups with new class.
Storing BeautifulSoup objects could lead to memory problems because
of their large footprint, the new class replaces the long term storage
by parsing all items on the first fetch of the page and only storing
their markdown string.
---
bot/cogs/doc/cog.py | 122 +++++++++++++++++++++++++++++++++++++++++++++---
bot/cogs/doc/parsing.py | 36 ++------------
2 files changed, 119 insertions(+), 39 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 2e49fcd38..d57e76ebd 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -1,14 +1,18 @@
+from __future__ import annotations
+
import asyncio
import functools
import logging
import re
import sys
-from collections import OrderedDict
+from collections import defaultdict
from contextlib import suppress
from types import SimpleNamespace
-from typing import Dict, NamedTuple, Optional
+from typing import Dict, List, NamedTuple, Optional, Union
import discord
+from aiohttp import ClientSession
+from bs4 import BeautifulSoup
from discord.ext import commands
from requests import ConnectTimeout, ConnectionError, HTTPError
from sphinx.ext import intersphinx
@@ -20,7 +24,6 @@ from bot.converters import PackageName, ValidURL
from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
-from .cache import async_cache
from .parsing import get_symbol_markdown
log = logging.getLogger(__name__)
@@ -67,6 +70,108 @@ class DocItem(NamedTuple):
return "".join((self.base_url, self.relative_url_path))
+class QueueItem(NamedTuple):
+ """Contains a symbol and the BeautifulSoup object needed to parse it."""
+
+ symbol: DocItem
+ soup: BeautifulSoup
+
+ def __eq__(self, other: Union[QueueItem, DocItem]):
+ if isinstance(other, DocItem):
+ return self.symbol == other
+ return NamedTuple.__eq__(self, other)
+
+
+class CachedParser:
+ """
+ Get symbol markdown from pages with smarter caching.
+
+ DocItems are added through the `add_item` method which adds them to the `_page_symbols` dict.
+ `get_markdown` is used to fetch the markdown; when this is used for the first time on a page,
+ all of the symbols are queued to be parsed to avoid multiple web requests to the same page.
+ """
+
+ def __init__(self):
+ self._queue: List[QueueItem] = []
+ self._results = {}
+ self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
+ self._item_events: Dict[DocItem, asyncio.Event] = {}
+ self._parse_task = None
+
+ async def get_markdown(self, client_session: ClientSession, doc_item: DocItem) -> str:
+ """
+ Get result markdown of `doc_item`.
+
+ If no symbols were fetched from `doc_item`s page before,
+ the HTML has to be fetched before parsing can be queued.
+ """
+ if (symbol := self._results.get(doc_item)) is not None:
+ return symbol
+
+ if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
+ async with client_session.get(doc_item.url) as response:
+ soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
+
+ self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
+ del self._page_symbols[doc_item.url]
+ log.debug(f"Added symbols from {doc_item.url} to parse queue.")
+
+ if self._parse_task is None:
+ self._parse_task = asyncio.create_task(self._parse_queue())
+
+ self._move_to_front(doc_item)
+ self._item_events[doc_item] = item_event = asyncio.Event()
+ await item_event.wait()
+ return self._results[doc_item]
+
+ async def _parse_queue(self) -> None:
+ """
+ Parse all item from the queue, setting associated events for symbols if present.
+
+ The coroutine will run as long as the queue is not empty, resetting `self._parse_task` to None when finished.
+ """
+ log.trace("Starting queue parsing.")
+ while self._queue:
+ item, soup = self._queue.pop()
+ self._results[item] = get_symbol_markdown(soup, item)
+ if (event := self._item_events.get(item)) is not None:
+ event.set()
+ await asyncio.sleep(0.1)
+
+ self._parse_task = None
+ log.trace("Finished parsing queue.")
+
+ def _move_to_front(self, item: Union[QueueItem, DocItem]) -> None:
+ """Move `item` to the front of the parse queue."""
+ # The parse queue stores soups along with the doc symbols in QueueItem objects,
+ # in case we're moving a DocItem we have to get the associated QueueItem first and then move it.
+ item_index = self._queue.index(item)
+ queue_item = self._queue[item_index]
+
+ del self._queue[item_index]
+ self._queue.append(queue_item)
+
+ def add_item(self, doc_item: DocItem) -> None:
+ """Add a DocItem to `_page_symbols`."""
+ self._page_symbols[doc_item.url].append(doc_item)
+
+ async def clear(self) -> None:
+ """
+ Clear all internal symbol data.
+
+ All currently requested items are waited to be parsed before clearing.
+ """
+ for event in self._item_events.values():
+ await event.wait()
+ if self._parse_task is not None:
+ self._parse_task.cancel()
+ self._parse_task = None
+ self._queue.clear()
+ self._results.clear()
+ self._page_symbols.clear()
+ self._item_events.clear()
+
+
class InventoryURL(commands.Converter):
"""
Represents an Intersphinx inventory URL.
@@ -106,6 +211,7 @@ class DocCog(commands.Cog):
self.base_urls = {}
self.bot = bot
self.doc_symbols: Dict[str, DocItem] = {}
+ self.item_fetcher = CachedParser()
self.renamed_symbols = set()
self.bot.loop.create_task(self.init_refresh_inventory())
@@ -163,7 +269,10 @@ class DocCog(commands.Cog):
symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
- self.doc_symbols[symbol] = DocItem(base_url, relative_doc_url, api_package_name, group_name)
+ relative_url_path, _, symbol_id = relative_doc_url.partition("#")
+ symbol_item = DocItem(api_package_name, group_name, base_url, relative_url_path, symbol_id)
+ self.doc_symbols[symbol] = symbol_item
+ self.item_fetcher.add_item(symbol_item)
log.trace(f"Fetched inventory for {api_package_name}.")
@@ -177,7 +286,7 @@ class DocCog(commands.Cog):
self.base_urls.clear()
self.doc_symbols.clear()
self.renamed_symbols.clear()
- async_cache.cache = OrderedDict()
+ await self.item_fetcher.clear()
# Run all coroutines concurrently - since each of them performs a HTTP
# request, this speeds up fetching the inventory data heavily.
@@ -198,12 +307,11 @@ class DocCog(commands.Cog):
if symbol_info is None:
return None
self.bot.stats.incr(f"doc_fetches.{symbol_info.package.lower()}")
- embed_description = await get_symbol_markdown(self.bot.http_session, symbol_info)
embed = discord.Embed(
title=discord.utils.escape_markdown(symbol),
url=f"{symbol_info.url}#{symbol_info.symbol_id}",
- description=embed_description
+ description=await self.item_fetcher.get_markdown(self.bot.http_session, symbol_info)
)
# Show all symbols with the same name that were renamed in the footer.
embed.set_footer(
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 1271953d4..9fbce7bed 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -5,11 +5,9 @@ import textwrap
from functools import partial
from typing import Callable, List, Optional, TYPE_CHECKING, Tuple, Union
-from aiohttp import ClientSession
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
-from .cache import async_cache
from .html import Strainer
from .markdown import markdownify
if TYPE_CHECKING:
@@ -171,16 +169,6 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: str, url:
return formatted_markdown
-@async_cache(arg_offset=1)
-async def _get_soup_from_url(http_session: ClientSession, url: str) -> BeautifulSoup:
- """Create a BeautifulSoup object from the HTML data in `url` with the head tag removed."""
- log.trace(f"Sending a request to {url}.")
- async with http_session.get(url) as response:
- soup = BeautifulSoup(await response.text(encoding="utf8"), 'lxml')
- soup.find("head").decompose() # the head contains no useful data so we can remove it
- return soup
-
-
def _match_end_tag(tag: Tag) -> bool:
"""Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
for attr in _SEARCH_END_TAG_ATTRS:
@@ -190,44 +178,28 @@ def _match_end_tag(tag: Tag) -> bool:
return tag.name == "table"
-async def get_symbol_markdown(http_session: ClientSession, symbol_data: "DocItem") -> str:
+def get_symbol_markdown(soup: BeautifulSoup, symbol_data: "DocItem") -> str:
"""
- Return parsed markdown of the passed symbol, truncated to 1000 characters.
+ Return parsed markdown of the passed symbol using the passed in soup, truncated to 1000 characters.
- A request through `http_session` is made to the url associated with `symbol_data` for the html contents;
- the contents are then parsed depending on what group the symbol belongs to.
+ The method of parsing and what information gets included depends on the symbol's group.
"""
- log.trace(f"Parsing symbol from url {symbol_data.url}.")
- if "#" in symbol_data.url:
- request_url, symbol_id = symbol_data.url.rsplit('#')
- else:
- request_url = symbol_data.url
- symbol_id = None
-
- soup = await _get_soup_from_url(http_session, request_url)
- symbol_heading = soup.find(id=symbol_id)
+ symbol_heading = soup.find(id=symbol_data.symbol_id)
signature = None
# Modules, doc pages and labels don't point to description list tags but to tags like divs,
# no special parsing can be done so we only try to include what's under them.
if symbol_data.group in {"module", "doc", "label"}:
- log.trace("Symbol is a module, doc or a label; using general description parsing.")
description = _get_general_description(symbol_heading)
elif symbol_heading.name != "dt":
# Use the general parsing for symbols that aren't modules, docs or labels and aren't dt tags,
# log info the tag can be looked at.
- log.info(
- f"Symbol heading at url {symbol_data.url} was not a dt tag or from known groups that lack it,"
- f"handling as general description."
- )
description = _get_general_description(symbol_heading)
elif symbol_data.group in _NO_SIGNATURE_GROUPS:
- log.trace("Symbol's group is in the group signature blacklist, skipping parsing of signature.")
description = _get_dd_description(symbol_heading)
else:
- log.trace("Parsing both signature and description of symbol.")
signature = _get_signatures(symbol_heading)
description = _get_dd_description(symbol_heading)
--
cgit v1.2.3
From 38753114c0d056ba330296c9fea7a8f2312459f9 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 20 Sep 2020 03:08:36 +0200
Subject: Replace forward ref with future annotations import
---
bot/cogs/doc/parsing.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 9fbce7bed..21a3065f4 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
import logging
import re
import string
@@ -178,7 +180,7 @@ def _match_end_tag(tag: Tag) -> bool:
return tag.name == "table"
-def get_symbol_markdown(soup: BeautifulSoup, symbol_data: "DocItem") -> str:
+def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
"""
Return parsed markdown of the passed symbol using the passed in soup, truncated to 1000 characters.
--
cgit v1.2.3
From de440ce8c4539972ea0f0538042e6cb41a4395dc Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 20 Sep 2020 03:09:24 +0200
Subject: Remove unused cache
---
bot/cogs/doc/cache.py | 32 --------------------------------
1 file changed, 32 deletions(-)
delete mode 100644 bot/cogs/doc/cache.py
diff --git a/bot/cogs/doc/cache.py b/bot/cogs/doc/cache.py
deleted file mode 100644
index 9da2a1dab..000000000
--- a/bot/cogs/doc/cache.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import functools
-from collections import OrderedDict
-from typing import Any, Callable
-
-
-def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
- """
- LRU cache implementation for coroutines.
-
- Once the cache exceeds the maximum size, keys are deleted in FIFO order.
-
- An offset may be optionally provided to be applied to the coroutine's arguments when creating the cache key.
- """
- # Assign the cache to the function itself so we can clear it from outside.
- async_cache.cache = OrderedDict()
-
- def decorator(function: Callable) -> Callable:
- """Define the async_cache decorator."""
- @functools.wraps(function)
- async def wrapper(*args) -> Any:
- """Decorator wrapper for the caching logic."""
- key = ':'.join(args[arg_offset:])
-
- value = async_cache.cache.get(key)
- if value is None:
- if len(async_cache.cache) > max_size:
- async_cache.cache.popitem(last=False)
-
- async_cache.cache[key] = await function(*args)
- return async_cache.cache[key]
- return wrapper
- return decorator
--
cgit v1.2.3
From 758dd3ef6ca5c1cd7615f0eb6688d7d2f19578ea Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 20 Sep 2020 23:46:54 +0200
Subject: Log exceptions from parsing task
---
bot/cogs/doc/cog.py | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index fc01dfb20..7c1bf2a5f 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -133,9 +133,13 @@ class CachedParser:
log.trace("Starting queue parsing.")
while self._queue:
item, soup = self._queue.pop()
- self._results[item] = get_symbol_markdown(soup, item)
- if (event := self._item_events.get(item)) is not None:
- event.set()
+ try:
+ self._results[item] = get_symbol_markdown(soup, item)
+ except Exception:
+ log.exception(f"Unexpected error when handling {item}")
+ else:
+ if (event := self._item_events.get(item)) is not None:
+ event.set()
await asyncio.sleep(0.1)
self._parse_task = None
--
cgit v1.2.3
From 7ab949e09a22d7547f74caa447d81299f7b52e47 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 21 Sep 2020 00:30:08 +0200
Subject: Properly truncate description markdown
The previous truncating implementation used a naive method that
disregarded the actual markdown formatting, possibly resulting in
it getting cut out. With the introduction of proper href tags this
became impossible to manage without writing an actual parser; so the
process was moved to happen when the gathered bs4 elements are being
converted into markdown
---
bot/cogs/doc/markdown.py | 7 +---
bot/cogs/doc/parsing.py | 86 +++++++++++++++++++++++++++---------------------
2 files changed, 49 insertions(+), 44 deletions(-)
diff --git a/bot/cogs/doc/markdown.py b/bot/cogs/doc/markdown.py
index dca477d35..a95e94991 100644
--- a/bot/cogs/doc/markdown.py
+++ b/bot/cogs/doc/markdown.py
@@ -4,7 +4,7 @@ from bs4.element import PageElement
from markdownify import MarkdownConverter
-class _DocMarkdownConverter(MarkdownConverter):
+class DocMarkdownConverter(MarkdownConverter):
"""Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
def __init__(self, *, page_url: str, **options):
@@ -51,8 +51,3 @@ class _DocMarkdownConverter(MarkdownConverter):
if parent is not None and parent.name == "li":
return f"{text}\n"
return super().convert_p(el, text)
-
-
-def markdownify(html: str, *, url: str = "") -> str:
- """Create a DocMarkdownConverter object from the input html."""
- return _DocMarkdownConverter(bullets='•', page_url=url).convert(html)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 21a3065f4..ed6343cd8 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -5,13 +5,13 @@ import re
import string
import textwrap
from functools import partial
-from typing import Callable, List, Optional, TYPE_CHECKING, Tuple, Union
+from typing import Callable, Iterable, List, Optional, TYPE_CHECKING, Tuple, Union
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
from .html import Strainer
-from .markdown import markdownify
+from .markdown import DocMarkdownConverter
if TYPE_CHECKING:
from .cog import DocItem
@@ -39,6 +39,8 @@ _NO_SIGNATURE_GROUPS = {
"templatetag",
"term",
}
+_MAX_DESCRIPTION_LENGTH = 1800
+_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
def _find_elements_until_tag(
@@ -80,7 +82,7 @@ _find_next_siblings_until_tag = partial(_find_elements_until_tag, func=Beautiful
_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-def _get_general_description(start_element: PageElement) -> Optional[str]:
+def _get_general_description(start_element: PageElement) -> Iterable[Union[Tag, NavigableString]]:
"""
Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
@@ -89,18 +91,13 @@ def _get_general_description(start_element: PageElement) -> Optional[str]:
"""
header = start_element.find_next("a", attrs={"class": "headerlink"})
start_tag = header.parent if header is not None else start_element
- description = "".join(
- str(tag) for tag in _find_next_siblings_until_tag(start_tag, _match_end_tag, include_strings=True)
- )
+ return _find_next_siblings_until_tag(start_tag, _match_end_tag, include_strings=True)
- return description
-
-def _get_dd_description(symbol: PageElement) -> str:
- """Get the string contents of the next dd tag, up to a dt or a dl tag."""
+def _get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]]:
+ """Get the contents of the next dd tag, up to a dt or a dl tag."""
description_tag = symbol.find_next("dd")
- description_contents = _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
- return "".join(str(tag) for tag in description_contents)
+ return _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
def _get_signatures(start_signature: PageElement) -> List[str]:
@@ -124,43 +121,57 @@ def _get_signatures(start_signature: PageElement) -> List[str]:
return signatures
-def _truncate_markdown(markdown: str, max_length: int) -> str:
+def _get_truncated_description(
+ elements: Iterable[Union[Tag, NavigableString]],
+ markdown_converter: DocMarkdownConverter,
+ max_length: int,
+) -> str:
"""
- Truncate `markdown` to be at most `max_length` characters.
+ Truncate markdown from `elements` to be at most `max_length` characters visually.
- The markdown string is searched for substrings to cut at, to keep its structure,
- but if none are found the string is simply sliced.
+ `max_length` limits the length of the rendered characters in the string,
+ with the real string length limited to `_MAX_DESCRIPTION_LENGTH` to accommodate discord length limits
"""
- if len(markdown) > max_length:
- shortened = markdown[:max_length]
- description_cutoff = shortened.rfind('\n\n', 100)
- if description_cutoff == -1:
- # Search the shortened version for cutoff points in decreasing desirability,
- # cutoff at 1000 if none are found.
- for cutoff_string in (". ", ", ", ",", " "):
- description_cutoff = shortened.rfind(cutoff_string)
- if description_cutoff != -1:
- break
+ visual_length = 0
+ real_length = 0
+ result = []
+ shortened = False
+
+ for element in elements:
+ is_tag = isinstance(element, Tag)
+ element_length = len(element.text) if is_tag else len(element)
+ if visual_length + element_length < max_length:
+ if is_tag:
+ element_markdown = markdown_converter.process_tag(element)
+ else:
+ element_markdown = markdown_converter.process_text(element)
+
+ element_markdown_length = len(element_markdown)
+ if real_length + element_markdown_length < _MAX_DESCRIPTION_LENGTH:
+ result.append(element_markdown)
else:
- description_cutoff = max_length
- markdown = markdown[:description_cutoff]
+ shortened = True
+ break
+ real_length += element_markdown_length
+ visual_length += element_length
+ else:
+ shortened = True
+ break
- # If there is an incomplete code block, cut it out
- if markdown.count("```") % 2:
- codeblock_start = markdown.rfind('```py')
- markdown = markdown[:codeblock_start].rstrip()
- markdown = markdown.rstrip(string.punctuation) + "..."
- return markdown
+ markdown_string = "".join(result)
+ if shortened:
+ markdown_string = markdown_string.rstrip(_TRUNCATE_STRIP_CHARACTERS) + "..."
+ return markdown_string
-def _parse_into_markdown(signatures: Optional[List[str]], description: str, url: str) -> str:
+def _parse_into_markdown(signatures: Optional[List[str]], description: Iterable[Tag], url: str) -> str:
"""
Create a markdown string with the signatures at the top, and the converted html description below them.
The signatures are wrapped in python codeblocks, separated from the description by a newline.
The result string is truncated to be max 1000 symbols long.
"""
- description = _truncate_markdown(markdownify(description, url=url), 1000)
+ description = _get_truncated_description(description, DocMarkdownConverter(bullets="•", page_url=url), 750)
description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is not None:
formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
@@ -204,5 +215,4 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
else:
signature = _get_signatures(symbol_heading)
description = _get_dd_description(symbol_heading)
-
- return _parse_into_markdown(signature, description.replace('¶', ''), symbol_data.url)
+ return _parse_into_markdown(signature, description, symbol_data.url).replace('¶', '')
--
cgit v1.2.3
From 3eed4af70fa24e5daef6c5e6d2d145094b9e672f Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 21 Sep 2020 00:39:15 +0200
Subject: Use f strings instead of c style on copied code
The code copied over from MarkdownConverter's implementation used
c style string formatting, there is no reason to keep the style
of strings in our code
---
bot/cogs/doc/markdown.py | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/bot/cogs/doc/markdown.py b/bot/cogs/doc/markdown.py
index a95e94991..ba35a84c4 100644
--- a/bot/cogs/doc/markdown.py
+++ b/bot/cogs/doc/markdown.py
@@ -14,18 +14,18 @@ class DocMarkdownConverter(MarkdownConverter):
def convert_li(self, el: PageElement, text: str) -> str:
"""Fix markdownify's erroneous indexing in ol tags."""
parent = el.parent
- if parent is not None and parent.name == 'ol':
+ if parent is not None and parent.name == "ol":
li_tags = parent.find_all("li")
- bullet = '%s.' % (li_tags.index(el)+1)
+ bullet = f"{li_tags.index(el)+1}."
else:
depth = -1
while el:
- if el.name == 'ul':
+ if el.name == "ul":
depth += 1
el = el.parent
- bullets = self.options['bullets']
+ bullets = self.options["bullets"]
bullet = bullets[depth % len(bullets)]
- return '%s %s\n' % (bullet, text or '')
+ return f"{bullet} {text}\n"
def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
"""Convert h tags to bold text with ** instead of adding #."""
@@ -33,11 +33,11 @@ class DocMarkdownConverter(MarkdownConverter):
def convert_code(self, el: PageElement, text: str) -> str:
"""Undo `markdownify`s underscore escaping."""
- return f"`{text}`".replace('\\', '')
+ return f"`{text}`".replace("\\", "")
def convert_pre(self, el: PageElement, text: str) -> str:
"""Wrap any codeblocks in `py` for syntax highlighting."""
- code = ''.join(el.strings)
+ code = "".join(el.strings)
return f"```py\n{code}```"
def convert_a(self, el: PageElement, text: str) -> str:
--
cgit v1.2.3
From b6ef6b6bc30b02e0a6797dd9feae167da2cb6e5b Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 21 Sep 2020 00:52:40 +0200
Subject: Handle cases with outdated bot inventories.
---
bot/cogs/doc/parsing.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index ed6343cd8..939f963f1 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -198,6 +198,9 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
The method of parsing and what information gets included depends on the symbol's group.
"""
symbol_heading = soup.find(id=symbol_data.symbol_id)
+ if symbol_heading is None:
+ log.warning("Symbol present in loaded inventories not found on site, consider refreshing inventories.")
+ return "Unable to parse the requested symbol."
signature = None
# Modules, doc pages and labels don't point to description list tags but to tags like divs,
# no special parsing can be done so we only try to include what's under them.
--
cgit v1.2.3
From ba73313adaff363bef9e3a505bf66373ea915997 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 21 Sep 2020 22:36:18 +0200
Subject: Use List typehint that has a narrower scope
---
bot/cogs/doc/parsing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 939f963f1..9c82a1c13 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -82,7 +82,7 @@ _find_next_siblings_until_tag = partial(_find_elements_until_tag, func=Beautiful
_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-def _get_general_description(start_element: PageElement) -> Iterable[Union[Tag, NavigableString]]:
+def _get_general_description(start_element: PageElement) -> List[Union[Tag, NavigableString]]:
"""
Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
--
cgit v1.2.3
From 730f30197c43cc170aaecde664712f6f4aaea246 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 26 Sep 2020 17:49:43 +0200
Subject: Collapse signatures between args instead of spaces
The signature length needed more logic and shorter limits
to ensure messages would fit in a discord message in a nice way.
---
bot/cogs/doc/parsing.py | 95 +++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 92 insertions(+), 3 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 9c82a1c13..7dddadf43 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -5,7 +5,7 @@ import re
import string
import textwrap
from functools import partial
-from typing import Callable, Iterable, List, Optional, TYPE_CHECKING, Tuple, Union
+from typing import Callable, Collection, Iterable, List, Optional, TYPE_CHECKING, Tuple, Union
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
@@ -19,6 +19,7 @@ log = logging.getLogger(__name__)
_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
_WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
+_PARAMETERS_RE = re.compile(r"\((.+)\)")
_SEARCH_END_TAG_ATTRS = (
"data",
@@ -39,8 +40,59 @@ _NO_SIGNATURE_GROUPS = {
"templatetag",
"term",
}
-_MAX_DESCRIPTION_LENGTH = 1800
+_EMBED_CODE_BLOCK_LENGTH = 61
+# Three code block wrapped lines with py syntax highlight
+_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LENGTH + 8) * 3
+# Maximum discord message length - signatures on top
+_MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH
_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
+_BRACKET_PAIRS = {
+ "{": "}",
+ "(": ")",
+ "[": "]",
+}
+
+
+def _split_parameters(parameters_string: str) -> List[str]:
+ """
+ Split parameters of a signature into individual parameter strings on commas.
+
+ Long string literals are not accounted for.
+ """
+ parameters_list = []
+ last_split = 0
+ depth = 0
+ expected_end = None
+ current_search = None
+ previous_character = ""
+
+ for index, character in enumerate(parameters_string):
+ if character in _BRACKET_PAIRS:
+ if current_search is None:
+ current_search = character
+ expected_end = _BRACKET_PAIRS[character]
+ if character == current_search:
+ depth += 1
+
+ elif character in {"'", '"'}:
+ if depth == 0:
+ depth += 1
+ elif not previous_character == "\\":
+ depth -= 1
+
+ elif character == expected_end:
+ depth -= 1
+ if depth == 0:
+ current_search = None
+ expected_end = None
+
+ elif depth == 0 and character == ",":
+ parameters_list.append(parameters_string[last_split:index])
+ last_split = index + 1
+ previous_character = character
+
+ parameters_list.append(parameters_string[last_split:])
+ return parameters_list
def _find_elements_until_tag(
@@ -121,6 +173,43 @@ def _get_signatures(start_signature: PageElement) -> List[str]:
return signatures
+def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collection[str]]:
+ """
+ Truncate passed signatures to not exceed `_MAX_SIGNAUTRES_LENGTH`.
+
+ If the signatures need to be truncated, parameters are collapsed until they fit withing the limit.
+ Individual signatures can consist of max 1, 2 or 3 lines of text, inversely proportional to the amount of them.
+ A maximum of 3 signatures is assumed to be passed.
+ """
+ if not sum(len(signature) for signature in signatures) > _MAX_SIGNATURES_LENGTH:
+ return signatures
+
+ max_signature_length = _EMBED_CODE_BLOCK_LENGTH * (4 - len(signatures))
+ formatted_signatures = []
+ for signature in signatures:
+ signature = signature.strip()
+ if len(signature) > max_signature_length:
+ if (parameters_match := _PARAMETERS_RE.search(signature)) is None:
+ formatted_signatures.append(textwrap.shorten(signature, max_signature_length))
+ continue
+
+ truncated_signature = []
+ parameters_string = parameters_match[1]
+ running_length = len(signature) - len(parameters_string)
+ for parameter in _split_parameters(parameters_string):
+ if (len(parameter) + running_length) <= max_signature_length - 4: # account for comma and placeholder
+ truncated_signature.append(parameter)
+ running_length += len(parameter) + 1
+ else:
+ truncated_signature.append(" ...")
+ formatted_signatures.append(signature.replace(parameters_string, ",".join(truncated_signature)))
+ break
+ else:
+ formatted_signatures.append(signature)
+
+ return formatted_signatures
+
+
def _get_truncated_description(
elements: Iterable[Union[Tag, NavigableString]],
markdown_converter: DocMarkdownConverter,
@@ -174,7 +263,7 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: Iterable[
description = _get_truncated_description(description, DocMarkdownConverter(bullets="•", page_url=url), 750)
description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is not None:
- formatted_markdown = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
+ formatted_markdown = "".join(f"```py\n{signature}```" for signature in _truncate_signatures(signatures))
else:
formatted_markdown = ""
formatted_markdown += f"\n{description}"
--
cgit v1.2.3
From e10f91fce08f26f92776c3641ddd26f961a0c8b8 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 26 Sep 2020 17:51:52 +0200
Subject: Make amount of included signatures configurable
---
bot/cogs/doc/parsing.py | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index 7dddadf43..cf1124936 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -17,6 +17,8 @@ if TYPE_CHECKING:
log = logging.getLogger(__name__)
+_MAX_SIGNATURE_AMOUNT = 3
+
_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
_WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
_PARAMETERS_RE = re.compile(r"\((.+)\)")
@@ -41,8 +43,8 @@ _NO_SIGNATURE_GROUPS = {
"term",
}
_EMBED_CODE_BLOCK_LENGTH = 61
-# Three code block wrapped lines with py syntax highlight
-_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LENGTH + 8) * 3
+# _MAX_SIGNATURE_AMOUNT code block wrapped lines with py syntax highlight
+_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LENGTH + 8) * _MAX_SIGNATURE_AMOUNT
# Maximum discord message length - signatures on top
_MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH
_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
@@ -154,7 +156,7 @@ def _get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]
def _get_signatures(start_signature: PageElement) -> List[str]:
"""
- Collect up to 3 signatures from dt tags around the `start_signature` dt tag.
+ Collect up to `_MAX_SIGNATURE_AMOUNT` signatures from dt tags around the `start_signature` dt tag.
First the signatures under the `start_signature` are included;
if less than 2 are found, tags above the start signature are added to the result if any are present.
@@ -164,7 +166,7 @@ def _get_signatures(start_signature: PageElement) -> List[str]:
*reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
start_signature,
*_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
- )[-3:]:
+ )[-_MAX_SIGNATURE_AMOUNT:]:
signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
if signature:
@@ -178,13 +180,14 @@ def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collec
Truncate passed signatures to not exceed `_MAX_SIGNAUTRES_LENGTH`.
If the signatures need to be truncated, parameters are collapsed until they fit withing the limit.
- Individual signatures can consist of max 1, 2 or 3 lines of text, inversely proportional to the amount of them.
- A maximum of 3 signatures is assumed to be passed.
+ Individual signatures can consist of max 1, 2, ..., `_MAX_SIGNATURE_AMOUNT` lines of text,
+ inversely proportional to the amount of signatures.
+ A maximum of `_MAX_SIGNATURE_AMOUNT` signatures is assumed to be passed.
"""
if not sum(len(signature) for signature in signatures) > _MAX_SIGNATURES_LENGTH:
return signatures
- max_signature_length = _EMBED_CODE_BLOCK_LENGTH * (4 - len(signatures))
+ max_signature_length = _EMBED_CODE_BLOCK_LENGTH * (_MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
formatted_signatures = []
for signature in signatures:
signature = signature.strip()
--
cgit v1.2.3
From a2e7db718fbeb6fabb5e261ef4414038477abfb2 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 28 Sep 2020 23:43:58 +0200
Subject: Add parentheses for clarity
---
bot/cogs/doc/parsing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/doc/parsing.py b/bot/cogs/doc/parsing.py
index cf1124936..7cf4ec7ba 100644
--- a/bot/cogs/doc/parsing.py
+++ b/bot/cogs/doc/parsing.py
@@ -166,7 +166,7 @@ def _get_signatures(start_signature: PageElement) -> List[str]:
*reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
start_signature,
*_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
- )[-_MAX_SIGNATURE_AMOUNT:]:
+ )[-(_MAX_SIGNATURE_AMOUNT):]:
signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
if signature:
--
cgit v1.2.3
From 2b97cfad08f7dac0ea1ce6119bab004b4c2452e7 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 29 Sep 2020 23:03:36 +0200
Subject: Add async implementation of sphinx fetch_inventory
The sphinx version of the function does a lot of checks that are
unnecessary for the bot because it's not working with anything else
related to docs. The custom implementation means we can throw some of
the code out and get rid of sphinx as a dependency.
---
LICENSE-THIRD-PARTY | 30 ++++++++++++++
bot/cogs/doc/inventory_parser.py | 87 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 117 insertions(+)
create mode 100644 LICENSE-THIRD-PARTY
create mode 100644 bot/cogs/doc/inventory_parser.py
diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY
new file mode 100644
index 000000000..f78491fc1
--- /dev/null
+++ b/LICENSE-THIRD-PARTY
@@ -0,0 +1,30 @@
+License for Sphinx
+Applies to:
+ - bot/cogs/doc/inventory_parser.py: _load_v1, _load_v2 and ZlibStreamReader.__aiter__.
+==================
+
+Copyright (c) 2007-2020 by the Sphinx team (see AUTHORS file).
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/bot/cogs/doc/inventory_parser.py b/bot/cogs/doc/inventory_parser.py
new file mode 100644
index 000000000..6c2b63d5e
--- /dev/null
+++ b/bot/cogs/doc/inventory_parser.py
@@ -0,0 +1,87 @@
+import re
+import zlib
+from collections import defaultdict
+from typing import AsyncIterator, DefaultDict, List, Tuple
+
+import aiohttp
+
+_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)')
+
+
+class ZlibStreamReader:
+ """Class used for decoding zlib data of a stream line by line."""
+
+ READ_CHUNK_SIZE = 16 * 1024
+
+ def __init__(self, stream: aiohttp.StreamReader) -> None:
+ self.stream = stream
+
+ async def _read_compressed_chunks(self) -> AsyncIterator[bytes]:
+ """Read zlib data in `READ_CHUNK_SIZE` sized chunks and decompress."""
+ decompressor = zlib.decompressobj()
+ async for chunk in self.stream.iter_chunked(self.READ_CHUNK_SIZE):
+ yield decompressor.decompress(chunk)
+
+ yield decompressor.flush()
+
+ async def __aiter__(self) -> AsyncIterator[str]:
+ """Yield lines of decompressed text."""
+ buf = b''
+ async for chunk in self._read_compressed_chunks():
+ buf += chunk
+ pos = buf.find(b'\n')
+ while pos != -1:
+ yield buf[:pos].decode()
+ buf = buf[pos + 1:]
+ pos = buf.find(b'\n')
+
+
+async def _load_v1(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]:
+ invdata = defaultdict(list)
+
+ async for line in stream:
+ name, type_, location = line.decode().rstrip().split(maxsplit=2)
+ # version 1 did not add anchors to the location
+ if type_ == 'mod':
+ type_ = 'py:module'
+ location += '#module-' + name
+ else:
+ type_ = 'py:' + type_
+ location += '#' + name
+ invdata[type_].append((name, location))
+ return invdata
+
+
+async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]:
+ invdata = defaultdict(list)
+
+ async for line in ZlibStreamReader(stream):
+ m = _V2_LINE_RE.match(line.rstrip())
+ name, type_, _prio, location, _dispname = m.groups() # ignore the parsed items we don't need
+ if location.endswith('$'):
+ location = location[:-1] + name
+
+ invdata[type_].append((name, location))
+ return invdata
+
+
+async def fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
+ """Fetch, parse and return an intersphinx inventory file from an url."""
+ timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5)
+ async with client_session.get(url, timeout=timeout, raise_for_status=True) as response:
+ stream = response.content
+
+ inventory_header = (await stream.readline()).decode().rstrip()
+ inventory_version = int(inventory_header[-1:])
+ await stream.readline() # skip project name
+ await stream.readline() # skip project version
+
+ if inventory_version == 1:
+ return await _load_v1(stream)
+
+ elif inventory_version == 2:
+ if b"zlib" not in await stream.readline():
+ raise ValueError(f"Invalid inventory file at url {url}.")
+ return await _load_v2(stream)
+
+ raise ValueError(f"Invalid inventory file at url {url}.")
--
cgit v1.2.3
From d8c36ac9f189ba9638ef91df7628f95845161f8e Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 30 Sep 2020 00:19:39 +0200
Subject: Handle errors on inventory fetching
---
bot/cogs/doc/inventory_parser.py | 37 +++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
diff --git a/bot/cogs/doc/inventory_parser.py b/bot/cogs/doc/inventory_parser.py
index 6c2b63d5e..23931869b 100644
--- a/bot/cogs/doc/inventory_parser.py
+++ b/bot/cogs/doc/inventory_parser.py
@@ -1,10 +1,14 @@
+import logging
import re
import zlib
from collections import defaultdict
-from typing import AsyncIterator, DefaultDict, List, Tuple
+from typing import AsyncIterator, DefaultDict, List, Optional, Tuple
import aiohttp
+log = logging.getLogger(__name__)
+
+FAILED_REQUEST_ATTEMPTS = 3
_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)')
@@ -65,7 +69,7 @@ async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[
return invdata
-async def fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
+async def _fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
"""Fetch, parse and return an intersphinx inventory file from an url."""
timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5)
async with client_session.get(url, timeout=timeout, raise_for_status=True) as response:
@@ -85,3 +89,32 @@ async def fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> De
return await _load_v2(stream)
raise ValueError(f"Invalid inventory file at url {url}.")
+
+
+async def fetch_inventory(
+ client_session: aiohttp.ClientSession,
+ url: str
+) -> Optional[DefaultDict[str, List[Tuple[str, str]]]]:
+ """Get inventory from `url`, retrying `FAILED_REQUEST_ATTEMPTS` times on errors."""
+ for attempt in range(1, FAILED_REQUEST_ATTEMPTS+1):
+ try:
+ inventory = await _fetch_inventory(client_session, url)
+ except aiohttp.ClientConnectorError:
+ log.warning(
+ f"Failed to connect to inventory url at {url}, "
+ f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
+ )
+ except aiohttp.ClientError:
+ log.error(
+ f"Failed to get inventory from {url}, "
+ f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
+ )
+ except Exception:
+ log.exception(
+ f"An unexpected error has occurred during fetching of {url}, "
+ f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
+ )
+ else:
+ return inventory
+
+ return None
--
cgit v1.2.3
From 3bf04d8a353056944ac335b1d387d71464a81aa1 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 30 Sep 2020 00:38:24 +0200
Subject: Use new async inventory fetching
---
bot/cogs/doc/cog.py | 71 ++++++-----------------------------------------------
1 file changed, 7 insertions(+), 64 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 7c1bf2a5f..2cb296d53 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -1,22 +1,17 @@
from __future__ import annotations
import asyncio
-import functools
import logging
import re
import sys
from collections import defaultdict
from contextlib import suppress
-from types import SimpleNamespace
from typing import Dict, List, NamedTuple, Optional, Union
import discord
from aiohttp import ClientSession
from bs4 import BeautifulSoup
from discord.ext import commands
-from requests import ConnectTimeout, ConnectionError, HTTPError
-from sphinx.ext import intersphinx
-from urllib3.exceptions import ProtocolError
from bot.bot import Bot
from bot.constants import MODERATION_ROLES, RedirectOutput
@@ -24,20 +19,10 @@ from bot.converters import PackageName, ValidURL
from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
+from .inventory_parser import FAILED_REQUEST_ATTEMPTS, fetch_inventory
from .parsing import get_symbol_markdown
log = logging.getLogger(__name__)
-logging.getLogger('urllib3').setLevel(logging.WARNING)
-
-# Since Intersphinx is intended to be used with Sphinx,
-# we need to mock its configuration.
-SPHINX_MOCK_APP = SimpleNamespace(
- config=SimpleNamespace(
- intersphinx_timeout=3,
- tls_verify=True,
- user_agent="python3:python-discord/bot:1.0.0"
- )
-)
NO_OVERRIDE_GROUPS = (
"2to3fixer",
@@ -51,7 +36,6 @@ NO_OVERRIDE_PACKAGES = (
)
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
-FAILED_REQUEST_RETRY_AMOUNT = 3
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
@@ -190,21 +174,8 @@ class InventoryURL(commands.Converter):
async def convert(ctx: commands.Context, url: str) -> str:
"""Convert url to Intersphinx inventory URL."""
await ctx.trigger_typing()
- try:
- intersphinx.fetch_inventory(SPHINX_MOCK_APP, '', url)
- except AttributeError:
- raise commands.BadArgument(f"Failed to fetch Intersphinx inventory from URL `{url}`.")
- except ConnectionError:
- if url.startswith('https'):
- raise commands.BadArgument(
- f"Cannot establish a connection to `{url}`. Does it support HTTPS?"
- )
- raise commands.BadArgument(f"Cannot connect to host with URL `{url}`.")
- except ValueError:
- raise commands.BadArgument(
- f"Failed to read Intersphinx inventory from URL `{url}`. "
- "Are you sure that it's a valid inventory file?"
- )
+ if await fetch_inventory(ctx.bot.http_session, url) is None:
+ raise commands.BadArgument(f"Failed to fetch inventory file after {FAILED_REQUEST_ATTEMPTS}.")
return url
@@ -235,17 +206,16 @@ class DocCog(commands.Cog):
* `package_name` is the package name to use, appears in the log
* `base_url` is the root documentation URL for the specified package, used to build
absolute paths that link to specific symbols
- * `inventory_url` is the absolute URL to the intersphinx inventory, fetched by running
- `intersphinx.fetch_inventory` in an executor on the bot's event loop
+ * `inventory_url` is the absolute URL to the intersphinx inventory.
"""
self.base_urls[api_package_name] = base_url
- package = await self._fetch_inventory(inventory_url)
+ package = await fetch_inventory(self.bot.http_session, inventory_url)
if not package:
return None
- for group, value in package.items():
- for symbol, (_package_name, _version, relative_doc_url, _) in value.items():
+ for group, items in package.items():
+ for symbol, relative_doc_url in items:
if "/" in symbol:
continue # skip unreachable symbols with slashes
# Intern the group names since they're reused in all the DocItems
@@ -455,30 +425,3 @@ class DocCog(commands.Cog):
description=f"```diff\n{added}\n{removed}```" if added or removed else ""
)
await ctx.send(embed=embed)
-
- async def _fetch_inventory(self, inventory_url: str) -> Optional[dict]:
- """Get and return inventory from `inventory_url`. If fetching fails, return None."""
- fetch_func = functools.partial(intersphinx.fetch_inventory, SPHINX_MOCK_APP, '', inventory_url)
- for retry in range(1, FAILED_REQUEST_RETRY_AMOUNT+1):
- try:
- package = await self.bot.loop.run_in_executor(None, fetch_func)
- except ConnectTimeout:
- log.error(
- f"Fetching of inventory {inventory_url} timed out,"
- f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
- )
- except ProtocolError:
- log.error(
- f"Connection lost while fetching inventory {inventory_url},"
- f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
- )
- except HTTPError as e:
- log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.")
- return None
- except ConnectionError:
- log.error(f"Couldn't establish connection to inventory {inventory_url}.")
- return None
- else:
- return package
- log.error(f"Fetching of inventory {inventory_url} failed.")
- return None
--
cgit v1.2.3
From 46ee70533328eed3790ebb93d1257b5d4e598802 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 30 Sep 2020 00:42:55 +0200
Subject: Remove sphinx and requests from Pipfile
With our own implementation of sphinx's inventory fetching we no longer
need the sphinx package, and requests which were used inside of it.
---
Pipfile | 2 --
1 file changed, 2 deletions(-)
diff --git a/Pipfile b/Pipfile
index 6fff2223e..1e54c9212 100644
--- a/Pipfile
+++ b/Pipfile
@@ -21,9 +21,7 @@ markdownify = "~=0.4"
more_itertools = "~=8.2"
python-dateutil = "~=2.8"
pyyaml = "~=5.1"
-requests = "~=2.22"
sentry-sdk = "~=0.14"
-sphinx = "~=2.2"
statsd = "~=3.3"
[dev-packages]
--
cgit v1.2.3
From c5aa0c0bd7e8933648fbedc92a7cd1f5ae199772 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Thu, 1 Oct 2020 00:04:53 +0200
Subject: Reschedule failed inventory updates
---
bot/cogs/doc/cog.py | 39 +++++++++++++++++++++++++++++++++++----
1 file changed, 35 insertions(+), 4 deletions(-)
diff --git a/bot/cogs/doc/cog.py b/bot/cogs/doc/cog.py
index 2cb296d53..41fca4584 100644
--- a/bot/cogs/doc/cog.py
+++ b/bot/cogs/doc/cog.py
@@ -19,6 +19,7 @@ from bot.converters import PackageName, ValidURL
from bot.decorators import with_role
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
+from bot.utils.scheduling import Scheduler
from .inventory_parser import FAILED_REQUEST_ATTEMPTS, fetch_inventory
from .parsing import get_symbol_markdown
@@ -189,6 +190,9 @@ class DocCog(commands.Cog):
self.item_fetcher = CachedParser()
self.renamed_symbols = set()
+ self.inventory_scheduler = Scheduler(self.__class__.__name__)
+ self.scheduled_inventories = set()
+
self.bot.loop.create_task(self.init_refresh_inventory())
async def init_refresh_inventory(self) -> None:
@@ -198,7 +202,7 @@ class DocCog(commands.Cog):
async def update_single(
self, api_package_name: str, base_url: str, inventory_url: str
- ) -> None:
+ ) -> bool:
"""
Rebuild the inventory for a single package.
@@ -207,12 +211,27 @@ class DocCog(commands.Cog):
* `base_url` is the root documentation URL for the specified package, used to build
absolute paths that link to specific symbols
* `inventory_url` is the absolute URL to the intersphinx inventory.
+
+ If the inventory file is currently unreachable,
+ the update is rescheduled to execute in 2 minutes on the first attempt, and 5 minutes on subsequent attempts.
+
+ Return True on success; False if fetching failed and was rescheduled.
"""
self.base_urls[api_package_name] = base_url
-
package = await fetch_inventory(self.bot.http_session, inventory_url)
+
if not package:
- return None
+ delay = 2*60 if inventory_url not in self.scheduled_inventories else 5*60
+ log.info(f"Failed to fetch inventory, attempting again in {delay//60} minutes.")
+ self.inventory_scheduler.schedule_later(
+ delay,
+ api_package_name,
+ fetch_inventory(self.bot.http_session, inventory_url)
+ )
+ self.scheduled_inventories.add(api_package_name)
+ return False
+ with suppress(KeyError):
+ self.scheduled_inventories.discard(api_package_name)
for group, items in package.items():
for symbol, relative_doc_url in items:
@@ -249,6 +268,7 @@ class DocCog(commands.Cog):
self.item_fetcher.add_item(symbol_item)
log.trace(f"Fetched inventory for {api_package_name}.")
+ return True
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
@@ -260,6 +280,7 @@ class DocCog(commands.Cog):
self.base_urls.clear()
self.doc_symbols.clear()
self.renamed_symbols.clear()
+ self.scheduled_inventories.clear()
await self.item_fetcher.clear()
# Run all coroutines concurrently - since each of them performs a HTTP
@@ -385,7 +406,11 @@ class DocCog(commands.Cog):
f"Inventory URL: {inventory_url}"
)
- await self.update_single(package_name, base_url, inventory_url)
+ if await self.update_single(package_name, base_url, inventory_url) is None:
+ await ctx.send(
+ f"Added package `{package_name}` to database but failed to fetch inventory; rescheduled in 2 minutes."
+ )
+ return
await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
@docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
@@ -399,6 +424,9 @@ class DocCog(commands.Cog):
"""
await self.bot.api_client.delete(f'bot/documentation-links/{package_name}')
+ if package_name in self.scheduled_inventories:
+ self.inventory_scheduler.cancel(package_name)
+
async with ctx.typing():
# Rebuild the inventory to ensure that everything
# that was from this package is properly deleted.
@@ -409,6 +437,9 @@ class DocCog(commands.Cog):
@with_role(*MODERATION_ROLES)
async def refresh_command(self, ctx: commands.Context) -> None:
"""Refresh inventories and send differences to channel."""
+ for inventory in self.scheduled_inventories:
+ self.inventory_scheduler.cancel(inventory)
+
old_inventories = set(self.base_urls)
with ctx.typing():
await self.refresh_inventory()
--
cgit v1.2.3
From f4924f0e8c26e373ddae8cb29f1f3935aaf00f4a Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 10 Oct 2020 21:47:34 +0200
Subject: Handle non dt fallback together with modules
---
bot/exts/info/doc/_parsing.py | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 83e35e2b1..a79332716 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -296,12 +296,7 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
signature = None
# Modules, doc pages and labels don't point to description list tags but to tags like divs,
# no special parsing can be done so we only try to include what's under them.
- if symbol_data.group in {"module", "doc", "label"}:
- description = _get_general_description(symbol_heading)
-
- elif symbol_heading.name != "dt":
- # Use the general parsing for symbols that aren't modules, docs or labels and aren't dt tags,
- # log info the tag can be looked at.
+ if symbol_data.group in {"module", "doc", "label"} or symbol_heading.name != "dt":
description = _get_general_description(symbol_heading)
elif symbol_data.group in _NO_SIGNATURE_GROUPS:
--
cgit v1.2.3
From 2744b10fae0f3b1d4ac198ba819c024e037e5660 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 10 Oct 2020 21:48:10 +0200
Subject: Use more descriptive name for end_tag_filter
---
bot/exts/info/doc/_parsing.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index a79332716..5f6c23c8d 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -99,7 +99,7 @@ def _split_parameters(parameters_string: str) -> List[str]:
def _find_elements_until_tag(
start_element: PageElement,
- tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
+ end_tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
*,
func: Callable,
include_strings: bool = False,
@@ -108,7 +108,7 @@ def _find_elements_until_tag(
"""
Get all elements up to `limit` or until a tag matching `tag_filter` is found.
- `tag_filter` can be either a tuple of string names to check against,
+ `end_tag_filter` can be either a tuple of string names to check against,
or a filtering callable that's applied to tags.
When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
@@ -116,15 +116,15 @@ def _find_elements_until_tag(
`func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
"""
- use_tuple_filter = isinstance(tag_filter, tuple)
+ use_tuple_filter = isinstance(end_tag_filter, tuple)
elements = []
for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
if isinstance(element, Tag):
if use_tuple_filter:
- if element.name in tag_filter:
+ if element.name in end_tag_filter:
break
- elif tag_filter(element):
+ elif end_tag_filter(element):
break
elements.append(element)
--
cgit v1.2.3
From 9e4832965957eec291a3ccde198252ab28ce13e2 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 10 Oct 2020 21:50:37 +0200
Subject: Exclude headerlinks outside of current section
---
bot/exts/info/doc/_parsing.py | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 5f6c23c8d..d31f26060 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -132,20 +132,22 @@ def _find_elements_until_tag(
_find_next_children_until_tag = partial(_find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
+_find_recursive_children_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_all)
_find_next_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-def _get_general_description(start_element: PageElement) -> List[Union[Tag, NavigableString]]:
+def _get_general_description(start_element: Tag) -> List[Union[Tag, NavigableString]]:
"""
Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
A headerlink a tag is attempted to be found to skip repeating the symbol information in the description,
if it's found it's used as the tag to start the search from instead of the `start_element`.
"""
- header = start_element.find_next("a", attrs={"class": "headerlink"})
+ child_tags = _find_recursive_children_until_tag(start_element, _class_filter_factory(["section"]), limit=100)
+ header = next(filter(_class_filter_factory(["headerlink"]), child_tags), None)
start_tag = header.parent if header is not None else start_element
- return _find_next_siblings_until_tag(start_tag, _match_end_tag, include_strings=True)
+ return _find_next_siblings_until_tag(start_tag, _class_filter_factory(_SEARCH_END_TAG_ATTRS), include_strings=True)
def _get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]]:
@@ -274,13 +276,15 @@ def _parse_into_markdown(signatures: Optional[List[str]], description: Iterable[
return formatted_markdown
-def _match_end_tag(tag: Tag) -> bool:
- """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
- for attr in _SEARCH_END_TAG_ATTRS:
- if attr in tag.get("class", ()):
- return True
+def _class_filter_factory(class_names: Iterable[str]) -> Callable[[Tag], bool]:
+ """Create callable that returns True when the passed in tag's class is in `class_names` or when it's is a table."""
+ def match_tag(tag: Tag) -> bool:
+ for attr in class_names:
+ if attr in tag.get("class", ()):
+ return True
+ return tag.name == "table"
- return tag.name == "table"
+ return match_tag
def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
--
cgit v1.2.3
From 59f1fffb656447668f6e5a34fcc52697b152780a Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 18 Oct 2020 03:04:29 +0200
Subject: Handle escaped backslashes in strings
---
bot/exts/info/doc/_parsing.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index d31f26060..0883b9f42 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -66,7 +66,6 @@ def _split_parameters(parameters_string: str) -> List[str]:
depth = 0
expected_end = None
current_search = None
- previous_character = ""
for index, character in enumerate(parameters_string):
if character in _BRACKET_PAIRS:
@@ -79,7 +78,9 @@ def _split_parameters(parameters_string: str) -> List[str]:
elif character in {"'", '"'}:
if depth == 0:
depth += 1
- elif not previous_character == "\\":
+ elif parameters_string[index-1] != "\\":
+ depth -= 1
+ elif parameters_string[index-2] == "\\":
depth -= 1
elif character == expected_end:
@@ -91,7 +92,6 @@ def _split_parameters(parameters_string: str) -> List[str]:
elif depth == 0 and character == ",":
parameters_list.append(parameters_string[last_split:index])
last_split = index + 1
- previous_character = character
parameters_list.append(parameters_string[last_split:])
return parameters_list
--
cgit v1.2.3
From c966853e92b696b9132c6f5316e6920e3cb70733 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 27 Oct 2020 10:58:49 +0200
Subject: Moved code for finding the right ref to a function
---
bot/cogs/code_snippets.py | 34 ++++++++++++++--------------------
1 file changed, 14 insertions(+), 20 deletions(-)
diff --git a/bot/cogs/code_snippets.py b/bot/cogs/code_snippets.py
index 9bd06f6ff..b10c68789 100644
--- a/bot/cogs/code_snippets.py
+++ b/bot/cogs/code_snippets.py
@@ -19,6 +19,18 @@ async def fetch_http(session: ClientSession, url: str, response_format: str, **k
return await response.json()
+def find_ref(path: str, refs: tuple) -> tuple:
+ """Loops through all branches and tags to find the required ref."""
+ ref = path.split('/')[0]
+ file_path = '/'.join(path.split('/')[1:])
+ for possible_ref in refs:
+ if path.startswith(possible_ref['name'] + '/'):
+ ref = possible_ref['name']
+ file_path = path[len(ref) + 1:]
+ break
+ return (ref, file_path)
+
+
async def fetch_github_snippet(session: ClientSession, repo: str,
path: str, start_line: str, end_line: str) -> str:
"""Fetches a snippet from a GitHub repo."""
@@ -28,13 +40,7 @@ async def fetch_github_snippet(session: ClientSession, repo: str,
refs = (await fetch_http(session, f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
+ await fetch_http(session, f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers))
- ref = path.split('/')[0]
- file_path = '/'.join(path.split('/')[1:])
- for possible_ref in refs:
- if path.startswith(possible_ref['name'] + '/'):
- ref = possible_ref['name']
- file_path = path[len(ref) + 1:]
- break
+ ref, file_path = find_ref(path, refs)
file_contents = await fetch_http(
session,
@@ -42,7 +48,6 @@ async def fetch_github_snippet(session: ClientSession, repo: str,
'text',
headers=headers,
)
-
return await snippet_to_md(file_contents, file_path, start_line, end_line)
@@ -66,9 +71,7 @@ async def fetch_github_gist_snippet(session: ClientSession, gist_id: str, revisi
gist_json['files'][gist_file]['raw_url'],
'text',
)
-
return await snippet_to_md(file_contents, gist_file, start_line, end_line)
-
return ''
@@ -81,14 +84,7 @@ async def fetch_gitlab_snippet(session: ClientSession, repo: str,
refs = (await fetch_http(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/branches', 'json')
+ await fetch_http(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/tags', 'json'))
- ref = path.split('/')[0]
- file_path = '/'.join(path.split('/')[1:])
- for possible_ref in refs:
- if path.startswith(possible_ref['name'] + '/'):
- ref = possible_ref['name']
- file_path = path[len(ref) + 1:]
- break
-
+ ref, file_path = find_ref(path, refs)
enc_ref = quote_plus(ref)
enc_file_path = quote_plus(file_path)
@@ -97,7 +93,6 @@ async def fetch_gitlab_snippet(session: ClientSession, repo: str,
f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/files/{enc_file_path}/raw?ref={enc_ref}',
'text',
)
-
return await snippet_to_md(file_contents, file_path, start_line, end_line)
@@ -109,7 +104,6 @@ async def fetch_bitbucket_snippet(session: ClientSession, repo: str, ref: str,
f'https://bitbucket.org/{quote_plus(repo)}/raw/{quote_plus(ref)}/{quote_plus(file_path)}',
'text',
)
-
return await snippet_to_md(file_contents, file_path, start_line, end_line)
--
cgit v1.2.3
From 372cfb9c1dcfb761ad468ac38955473db57f18b6 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 27 Oct 2020 11:02:03 +0200
Subject: Renamed fetch_http to fetch_response
---
bot/cogs/code_snippets.py | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/bot/cogs/code_snippets.py b/bot/cogs/code_snippets.py
index b10c68789..27faf70ec 100644
--- a/bot/cogs/code_snippets.py
+++ b/bot/cogs/code_snippets.py
@@ -10,8 +10,8 @@ from bot.bot import Bot
from bot.utils.messages import wait_for_deletion
-async def fetch_http(session: ClientSession, url: str, response_format: str, **kwargs) -> str:
- """Uses aiohttp to make http GET requests."""
+async def fetch_response(session: ClientSession, url: str, response_format: str, **kwargs) -> str:
+ """Makes http requests using aiohttp."""
async with session.get(url, **kwargs) as response:
if response_format == 'text':
return await response.text()
@@ -37,12 +37,12 @@ async def fetch_github_snippet(session: ClientSession, repo: str,
headers = {'Accept': 'application/vnd.github.v3.raw'}
# Search the GitHub API for the specified branch
- refs = (await fetch_http(session, f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
- + await fetch_http(session, f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers))
+ refs = (await fetch_response(session, f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
+ + await fetch_response(session, f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers))
ref, file_path = find_ref(path, refs)
- file_contents = await fetch_http(
+ file_contents = await fetch_response(
session,
f'https://api.github.com/repos/{repo}/contents/{file_path}?ref={ref}',
'text',
@@ -56,7 +56,7 @@ async def fetch_github_gist_snippet(session: ClientSession, gist_id: str, revisi
"""Fetches a snippet from a GitHub gist."""
headers = {'Accept': 'application/vnd.github.v3.raw'}
- gist_json = await fetch_http(
+ gist_json = await fetch_response(
session,
f'https://api.github.com/gists/{gist_id}{f"/{revision}" if len(revision) > 0 else ""}',
'json',
@@ -66,7 +66,7 @@ async def fetch_github_gist_snippet(session: ClientSession, gist_id: str, revisi
# Check each file in the gist for the specified file
for gist_file in gist_json['files']:
if file_path == gist_file.lower().replace('.', '-'):
- file_contents = await fetch_http(
+ file_contents = await fetch_response(
session,
gist_json['files'][gist_file]['raw_url'],
'text',
@@ -81,14 +81,14 @@ async def fetch_gitlab_snippet(session: ClientSession, repo: str,
enc_repo = quote_plus(repo)
# Searches the GitLab API for the specified branch
- refs = (await fetch_http(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/branches', 'json')
- + await fetch_http(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/tags', 'json'))
+ refs = (await fetch_response(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/branches', 'json')
+ + await fetch_response(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/tags', 'json'))
ref, file_path = find_ref(path, refs)
enc_ref = quote_plus(ref)
enc_file_path = quote_plus(file_path)
- file_contents = await fetch_http(
+ file_contents = await fetch_response(
session,
f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/files/{enc_file_path}/raw?ref={enc_ref}',
'text',
@@ -99,7 +99,7 @@ async def fetch_gitlab_snippet(session: ClientSession, repo: str,
async def fetch_bitbucket_snippet(session: ClientSession, repo: str, ref: str,
file_path: str, start_line: int, end_line: int) -> str:
"""Fetches a snippet from a BitBucket repo."""
- file_contents = await fetch_http(
+ file_contents = await fetch_response(
session,
f'https://bitbucket.org/{quote_plus(repo)}/raw/{quote_plus(ref)}/{quote_plus(file_path)}',
'text',
--
cgit v1.2.3
From c3ce61937211cbd8c7e3df1c501cda70d97623cb Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 27 Oct 2020 11:16:14 +0200
Subject: Renamed snippet_to_md and wrote a better docstring
---
bot/cogs/code_snippets.py | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/bot/cogs/code_snippets.py b/bot/cogs/code_snippets.py
index 27faf70ec..dda4d185f 100644
--- a/bot/cogs/code_snippets.py
+++ b/bot/cogs/code_snippets.py
@@ -21,8 +21,10 @@ async def fetch_response(session: ClientSession, url: str, response_format: str,
def find_ref(path: str, refs: tuple) -> tuple:
"""Loops through all branches and tags to find the required ref."""
+ # Base case: there is no slash in the branch name
ref = path.split('/')[0]
file_path = '/'.join(path.split('/')[1:])
+ # In case there are slashes in the branch name, we loop through all branches and tags
for possible_ref in refs:
if path.startswith(possible_ref['name'] + '/'):
ref = possible_ref['name']
@@ -48,7 +50,7 @@ async def fetch_github_snippet(session: ClientSession, repo: str,
'text',
headers=headers,
)
- return await snippet_to_md(file_contents, file_path, start_line, end_line)
+ return snippet_to_codeblock(file_contents, file_path, start_line, end_line)
async def fetch_github_gist_snippet(session: ClientSession, gist_id: str, revision: str,
@@ -71,7 +73,7 @@ async def fetch_github_gist_snippet(session: ClientSession, gist_id: str, revisi
gist_json['files'][gist_file]['raw_url'],
'text',
)
- return await snippet_to_md(file_contents, gist_file, start_line, end_line)
+ return snippet_to_codeblock(file_contents, gist_file, start_line, end_line)
return ''
@@ -93,7 +95,7 @@ async def fetch_gitlab_snippet(session: ClientSession, repo: str,
f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/files/{enc_file_path}/raw?ref={enc_ref}',
'text',
)
- return await snippet_to_md(file_contents, file_path, start_line, end_line)
+ return snippet_to_codeblock(file_contents, file_path, start_line, end_line)
async def fetch_bitbucket_snippet(session: ClientSession, repo: str, ref: str,
@@ -104,11 +106,21 @@ async def fetch_bitbucket_snippet(session: ClientSession, repo: str, ref: str,
f'https://bitbucket.org/{quote_plus(repo)}/raw/{quote_plus(ref)}/{quote_plus(file_path)}',
'text',
)
- return await snippet_to_md(file_contents, file_path, start_line, end_line)
+ return snippet_to_codeblock(file_contents, file_path, start_line, end_line)
-async def snippet_to_md(file_contents: str, file_path: str, start_line: str, end_line: str) -> str:
- """Given file contents, file path, start line and end line creates a code block."""
+def snippet_to_codeblock(file_contents: str, file_path: str, start_line: str, end_line: str) -> str:
+ """
+ Given the entire file contents and target lines, creates a code block.
+
+ First, we split the file contents into a list of lines and then keep and join only the required
+ ones together.
+
+ We then dedent the lines to look nice, and replace all ` characters with `\u200b to prevent
+ markdown injection.
+
+ Finally, we surround the code with ``` characters.
+ """
# Parse start_line and end_line into integers
if end_line is None:
start_line = end_line = int(start_line)
--
cgit v1.2.3
From 28dfd8278a8ee24fb26bc5359729ca0ed0307632 Mon Sep 17 00:00:00 2001
From: Andi Qu <31325319+dolphingarlic@users.noreply.github.com>
Date: Tue, 27 Oct 2020 11:17:26 +0200
Subject: Update bot/cogs/code_snippets.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Leon Sandøy
---
bot/cogs/code_snippets.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/cogs/code_snippets.py b/bot/cogs/code_snippets.py
index dda4d185f..d5424ea15 100644
--- a/bot/cogs/code_snippets.py
+++ b/bot/cogs/code_snippets.py
@@ -176,7 +176,7 @@ BITBUCKET_RE = re.compile(
class CodeSnippets(Cog):
"""
- Cog that prints out snippets to Discord.
+ Cog that parses and sends code snippets to Discord.
Matches each message against a regex and prints the contents of all matched snippets.
"""
--
cgit v1.2.3
From fd0bbdcd80156a443e5b91ad4b7f74e2c0285242 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 27 Oct 2020 11:19:56 +0200
Subject: Split up refs into branches and tags
---
bot/cogs/code_snippets.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/bot/cogs/code_snippets.py b/bot/cogs/code_snippets.py
index dda4d185f..77c0ede42 100644
--- a/bot/cogs/code_snippets.py
+++ b/bot/cogs/code_snippets.py
@@ -39,9 +39,9 @@ async def fetch_github_snippet(session: ClientSession, repo: str,
headers = {'Accept': 'application/vnd.github.v3.raw'}
# Search the GitHub API for the specified branch
- refs = (await fetch_response(session, f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
- + await fetch_response(session, f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers))
-
+ branches = await fetch_response(session, f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
+ tags = await fetch_response(session, f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers)
+ refs = branches + tags
ref, file_path = find_ref(path, refs)
file_contents = await fetch_response(
@@ -83,9 +83,9 @@ async def fetch_gitlab_snippet(session: ClientSession, repo: str,
enc_repo = quote_plus(repo)
# Searches the GitLab API for the specified branch
- refs = (await fetch_response(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/branches', 'json')
- + await fetch_response(session, f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/tags', 'json'))
-
+ branches = await fetch_response(session, f'https://api.github.com/repos/{repo}/branches', 'json')
+ tags = await fetch_response(session, f'https://api.github.com/repos/{repo}/tags', 'json')
+ refs = branches + tags
ref, file_path = find_ref(path, refs)
enc_ref = quote_plus(ref)
enc_file_path = quote_plus(file_path)
--
cgit v1.2.3
From 7807939084f01fed327ff2d1772fb81efc0edbba Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 27 Oct 2020 15:34:52 +0200
Subject: Made check for valid language easier to read
---
bot/exts/info/code_snippets.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 3d38ef1c3..c53c28e8b 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -144,7 +144,9 @@ def snippet_to_codeblock(file_contents: str, file_path: str, start_line: str, en
# Extracts the code language and checks whether it's a "valid" language
language = file_path.split('/')[-1].split('.')[-1]
- if not language.replace('-', '').replace('+', '').replace('_', '').isalnum():
+ trimmed_language = language.replace('-', '').replace('+', '').replace('_', '')
+ is_valid_language = trimmed_language.isalnum()
+ if not is_valid_language:
language = ''
if len(required) != 0:
--
cgit v1.2.3
From 76afc563ac73f6b8d40194c15e28f42a9fe6be0f Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 27 Oct 2020 15:45:09 +0200
Subject: Moved global functions into the cog and got rid of unnecessary
aiohttp sessions
---
bot/exts/info/code_snippets.py | 307 +++++++++++++++++++++--------------------
1 file changed, 158 insertions(+), 149 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index c53c28e8b..12eb692d4 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -2,7 +2,6 @@ import re
import textwrap
from urllib.parse import quote_plus
-from aiohttp import ClientSession
from discord import Message
from discord.ext.commands import Cog
@@ -10,150 +9,6 @@ from bot.bot import Bot
from bot.utils.messages import wait_for_deletion
-async def fetch_response(session: ClientSession, url: str, response_format: str, **kwargs) -> str:
- """Makes http requests using aiohttp."""
- async with session.get(url, **kwargs) as response:
- if response_format == 'text':
- return await response.text()
- elif response_format == 'json':
- return await response.json()
-
-
-def find_ref(path: str, refs: tuple) -> tuple:
- """Loops through all branches and tags to find the required ref."""
- # Base case: there is no slash in the branch name
- ref = path.split('/')[0]
- file_path = '/'.join(path.split('/')[1:])
- # In case there are slashes in the branch name, we loop through all branches and tags
- for possible_ref in refs:
- if path.startswith(possible_ref['name'] + '/'):
- ref = possible_ref['name']
- file_path = path[len(ref) + 1:]
- break
- return (ref, file_path)
-
-
-async def fetch_github_snippet(session: ClientSession, repo: str,
- path: str, start_line: str, end_line: str) -> str:
- """Fetches a snippet from a GitHub repo."""
- headers = {'Accept': 'application/vnd.github.v3.raw'}
-
- # Search the GitHub API for the specified branch
- branches = await fetch_response(session, f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
- tags = await fetch_response(session, f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers)
- refs = branches + tags
- ref, file_path = find_ref(path, refs)
-
- file_contents = await fetch_response(
- session,
- f'https://api.github.com/repos/{repo}/contents/{file_path}?ref={ref}',
- 'text',
- headers=headers,
- )
- return snippet_to_codeblock(file_contents, file_path, start_line, end_line)
-
-
-async def fetch_github_gist_snippet(session: ClientSession, gist_id: str, revision: str,
- file_path: str, start_line: str, end_line: str) -> str:
- """Fetches a snippet from a GitHub gist."""
- headers = {'Accept': 'application/vnd.github.v3.raw'}
-
- gist_json = await fetch_response(
- session,
- f'https://api.github.com/gists/{gist_id}{f"/{revision}" if len(revision) > 0 else ""}',
- 'json',
- headers=headers,
- )
-
- # Check each file in the gist for the specified file
- for gist_file in gist_json['files']:
- if file_path == gist_file.lower().replace('.', '-'):
- file_contents = await fetch_response(
- session,
- gist_json['files'][gist_file]['raw_url'],
- 'text',
- )
- return snippet_to_codeblock(file_contents, gist_file, start_line, end_line)
- return ''
-
-
-async def fetch_gitlab_snippet(session: ClientSession, repo: str,
- path: str, start_line: str, end_line: str) -> str:
- """Fetches a snippet from a GitLab repo."""
- enc_repo = quote_plus(repo)
-
- # Searches the GitLab API for the specified branch
- branches = await fetch_response(session, f'https://api.github.com/repos/{repo}/branches', 'json')
- tags = await fetch_response(session, f'https://api.github.com/repos/{repo}/tags', 'json')
- refs = branches + tags
- ref, file_path = find_ref(path, refs)
- enc_ref = quote_plus(ref)
- enc_file_path = quote_plus(file_path)
-
- file_contents = await fetch_response(
- session,
- f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/files/{enc_file_path}/raw?ref={enc_ref}',
- 'text',
- )
- return snippet_to_codeblock(file_contents, file_path, start_line, end_line)
-
-
-async def fetch_bitbucket_snippet(session: ClientSession, repo: str, ref: str,
- file_path: str, start_line: int, end_line: int) -> str:
- """Fetches a snippet from a BitBucket repo."""
- file_contents = await fetch_response(
- session,
- f'https://bitbucket.org/{quote_plus(repo)}/raw/{quote_plus(ref)}/{quote_plus(file_path)}',
- 'text',
- )
- return snippet_to_codeblock(file_contents, file_path, start_line, end_line)
-
-
-def snippet_to_codeblock(file_contents: str, file_path: str, start_line: str, end_line: str) -> str:
- """
- Given the entire file contents and target lines, creates a code block.
-
- First, we split the file contents into a list of lines and then keep and join only the required
- ones together.
-
- We then dedent the lines to look nice, and replace all ` characters with `\u200b to prevent
- markdown injection.
-
- Finally, we surround the code with ``` characters.
- """
- # Parse start_line and end_line into integers
- if end_line is None:
- start_line = end_line = int(start_line)
- else:
- start_line = int(start_line)
- end_line = int(end_line)
-
- split_file_contents = file_contents.splitlines()
-
- # Make sure that the specified lines are in range
- if start_line > end_line:
- start_line, end_line = end_line, start_line
- if start_line > len(split_file_contents) or end_line < 1:
- return ''
- start_line = max(1, start_line)
- end_line = min(len(split_file_contents), end_line)
-
- # Gets the code lines, dedents them, and inserts zero-width spaces to prevent Markdown injection
- required = '\n'.join(split_file_contents[start_line - 1:end_line])
- required = textwrap.dedent(required).rstrip().replace('`', '`\u200b')
-
- # Extracts the code language and checks whether it's a "valid" language
- language = file_path.split('/')[-1].split('.')[-1]
- trimmed_language = language.replace('-', '').replace('+', '').replace('_', '')
- is_valid_language = trimmed_language.isalnum()
- if not is_valid_language:
- language = ''
-
- if len(required) != 0:
- return f'```{language}\n{required}```\n'
- return ''
-
-
GITHUB_RE = re.compile(
r'https://github\.com/(?P.+?)/blob/(?P.+/.+)'
r'#L(?P\d+)([-~]L(?P\d+))?\b'
@@ -183,6 +38,160 @@ class CodeSnippets(Cog):
Matches each message against a regex and prints the contents of all matched snippets.
"""
+ async def _fetch_response(self, url: str, response_format: str, **kwargs) -> str:
+ """Makes http requests using aiohttp."""
+ async with self.bot.http_session.get(url, **kwargs) as response:
+ if response_format == 'text':
+ return await response.text()
+ elif response_format == 'json':
+ return await response.json()
+
+ def _find_ref(self, path: str, refs: tuple) -> tuple:
+ """Loops through all branches and tags to find the required ref."""
+ # Base case: there is no slash in the branch name
+ ref = path.split('/')[0]
+ file_path = '/'.join(path.split('/')[1:])
+ # In case there are slashes in the branch name, we loop through all branches and tags
+ for possible_ref in refs:
+ if path.startswith(possible_ref['name'] + '/'):
+ ref = possible_ref['name']
+ file_path = path[len(ref) + 1:]
+ break
+ return (ref, file_path)
+
+ async def _fetch_github_snippet(
+ self,
+ repo: str,
+ path: str,
+ start_line: str,
+ end_line: str
+ ) -> str:
+ """Fetches a snippet from a GitHub repo."""
+ headers = {'Accept': 'application/vnd.github.v3.raw'}
+
+ # Search the GitHub API for the specified branch
+ branches = await self._fetch_response(f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
+ tags = await self._fetch_response(f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers)
+ refs = branches + tags
+ ref, file_path = self._find_ref(path, refs)
+
+ file_contents = await self._fetch_response(
+ f'https://api.github.com/repos/{repo}/contents/{file_path}?ref={ref}',
+ 'text',
+ headers=headers,
+ )
+ return self._snippet_to_codeblock(file_contents, file_path, start_line, end_line)
+
+ async def _fetch_github_gist_snippet(
+ self,
+ gist_id: str,
+ revision: str,
+ file_path: str,
+ start_line: str,
+ end_line: str
+ ) -> str:
+ """Fetches a snippet from a GitHub gist."""
+ headers = {'Accept': 'application/vnd.github.v3.raw'}
+
+ gist_json = await self._fetch_response(
+ f'https://api.github.com/gists/{gist_id}{f"/{revision}" if len(revision) > 0 else ""}',
+ 'json',
+ headers=headers,
+ )
+
+ # Check each file in the gist for the specified file
+ for gist_file in gist_json['files']:
+ if file_path == gist_file.lower().replace('.', '-'):
+ file_contents = await self._fetch_response(
+ gist_json['files'][gist_file]['raw_url'],
+ 'text',
+ )
+ return self._snippet_to_codeblock(file_contents, gist_file, start_line, end_line)
+ return ''
+
+ async def _fetch_gitlab_snippet(
+ self,
+ repo: str,
+ path: str,
+ start_line: str,
+ end_line: str
+ ) -> str:
+ """Fetches a snippet from a GitLab repo."""
+ enc_repo = quote_plus(repo)
+
+ # Searches the GitLab API for the specified branch
+ branches = await self._fetch_response(f'https://api.github.com/repos/{repo}/branches', 'json')
+ tags = await self._fetch_response(f'https://api.github.com/repos/{repo}/tags', 'json')
+ refs = branches + tags
+ ref, file_path = self._find_ref(path, refs)
+ enc_ref = quote_plus(ref)
+ enc_file_path = quote_plus(file_path)
+
+ file_contents = await self._fetch_response(
+ f'https://gitlab.com/api/v4/projects/{enc_repo}/repository/files/{enc_file_path}/raw?ref={enc_ref}',
+ 'text',
+ )
+ return self._snippet_to_codeblock(file_contents, file_path, start_line, end_line)
+
+ async def _fetch_bitbucket_snippet(
+ self,
+ repo: str,
+ ref: str,
+ file_path: str,
+ start_line: int,
+ end_line: int
+ ) -> str:
+ """Fetches a snippet from a BitBucket repo."""
+ file_contents = await self._fetch_response(
+ f'https://bitbucket.org/{quote_plus(repo)}/raw/{quote_plus(ref)}/{quote_plus(file_path)}',
+ 'text',
+ )
+ return self._snippet_to_codeblock(file_contents, file_path, start_line, end_line)
+
+ def _snippet_to_codeblock(self, file_contents: str, file_path: str, start_line: str, end_line: str) -> str:
+ """
+ Given the entire file contents and target lines, creates a code block.
+
+ First, we split the file contents into a list of lines and then keep and join only the required
+ ones together.
+
+ We then dedent the lines to look nice, and replace all ` characters with `\u200b to prevent
+ markdown injection.
+
+ Finally, we surround the code with ``` characters.
+ """
+ # Parse start_line and end_line into integers
+ if end_line is None:
+ start_line = end_line = int(start_line)
+ else:
+ start_line = int(start_line)
+ end_line = int(end_line)
+
+ split_file_contents = file_contents.splitlines()
+
+ # Make sure that the specified lines are in range
+ if start_line > end_line:
+ start_line, end_line = end_line, start_line
+ if start_line > len(split_file_contents) or end_line < 1:
+ return ''
+ start_line = max(1, start_line)
+ end_line = min(len(split_file_contents), end_line)
+
+ # Gets the code lines, dedents them, and inserts zero-width spaces to prevent Markdown injection
+ required = '\n'.join(split_file_contents[start_line - 1:end_line])
+ required = textwrap.dedent(required).rstrip().replace('`', '`\u200b')
+
+ # Extracts the code language and checks whether it's a "valid" language
+ language = file_path.split('/')[-1].split('.')[-1]
+ trimmed_language = language.replace('-', '').replace('+', '').replace('_', '')
+ is_valid_language = trimmed_language.isalnum()
+ if not is_valid_language:
+ language = ''
+
+ if len(required) != 0:
+ return f'```{language}\n{required}```\n'
+ return ''
+
def __init__(self, bot: Bot):
"""Initializes the cog's bot."""
self.bot = bot
@@ -199,16 +208,16 @@ class CodeSnippets(Cog):
message_to_send = ''
for gh in GITHUB_RE.finditer(message.content):
- message_to_send += await fetch_github_snippet(self.bot.http_session, **gh.groupdict())
+ message_to_send += await self._fetch_github_snippet(**gh.groupdict())
for gh_gist in GITHUB_GIST_RE.finditer(message.content):
- message_to_send += await fetch_github_gist_snippet(self.bot.http_session, **gh_gist.groupdict())
+ message_to_send += await self._fetch_github_gist_snippet(**gh_gist.groupdict())
for gl in GITLAB_RE.finditer(message.content):
- message_to_send += await fetch_gitlab_snippet(self.bot.http_session, **gl.groupdict())
+ message_to_send += await self._fetch_gitlab_snippet(**gl.groupdict())
for bb in BITBUCKET_RE.finditer(message.content):
- message_to_send += await fetch_bitbucket_snippet(self.bot.http_session, **bb.groupdict())
+ message_to_send += await self._fetch_bitbucket_snippet(**bb.groupdict())
if 0 < len(message_to_send) <= 2000 and message_to_send.count('\n') <= 15:
await message.edit(suppress=True)
--
cgit v1.2.3
From 3102c698e8892d5a3b1b0fcc2183bf2c480d60fd Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 27 Oct 2020 15:55:34 +0200
Subject: Used a list of tuples for on_message instead
---
bot/exts/info/code_snippets.py | 29 +++++++++++------------------
1 file changed, 11 insertions(+), 18 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 12eb692d4..1bb00b677 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -199,25 +199,18 @@ class CodeSnippets(Cog):
@Cog.listener()
async def on_message(self, message: Message) -> None:
"""Checks if the message has a snippet link, removes the embed, then sends the snippet contents."""
- gh_match = GITHUB_RE.search(message.content)
- gh_gist_match = GITHUB_GIST_RE.search(message.content)
- gl_match = GITLAB_RE.search(message.content)
- bb_match = BITBUCKET_RE.search(message.content)
-
- if (gh_match or gh_gist_match or gl_match or bb_match) and not message.author.bot:
+ if not message.author.bot:
message_to_send = ''
-
- for gh in GITHUB_RE.finditer(message.content):
- message_to_send += await self._fetch_github_snippet(**gh.groupdict())
-
- for gh_gist in GITHUB_GIST_RE.finditer(message.content):
- message_to_send += await self._fetch_github_gist_snippet(**gh_gist.groupdict())
-
- for gl in GITLAB_RE.finditer(message.content):
- message_to_send += await self._fetch_gitlab_snippet(**gl.groupdict())
-
- for bb in BITBUCKET_RE.finditer(message.content):
- message_to_send += await self._fetch_bitbucket_snippet(**bb.groupdict())
+ pattern_handlers = [
+ (GITHUB_RE, self._fetch_github_snippet),
+ (GITHUB_GIST_RE, self._fetch_github_gist_snippet),
+ (GITLAB_RE, self._fetch_gitlab_snippet),
+ (BITBUCKET_RE, self._fetch_bitbucket_snippet)
+ ]
+
+ for pattern, handler in pattern_handlers:
+ for match in pattern.finditer(message.content):
+ message_to_send += await handler(**match.groupdict())
if 0 < len(message_to_send) <= 2000 and message_to_send.count('\n') <= 15:
await message.edit(suppress=True)
--
cgit v1.2.3
From bbf7a600ca4b657258b46074c00cab1982791613 Mon Sep 17 00:00:00 2001
From: Andi Qu <31325319+dolphingarlic@users.noreply.github.com>
Date: Wed, 28 Oct 2020 09:26:09 +0200
Subject: Update bot/exts/info/code_snippets.py
Co-authored-by: Mark
---
bot/exts/info/code_snippets.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 1bb00b677..4594c36f2 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -49,8 +49,7 @@ class CodeSnippets(Cog):
def _find_ref(self, path: str, refs: tuple) -> tuple:
"""Loops through all branches and tags to find the required ref."""
# Base case: there is no slash in the branch name
- ref = path.split('/')[0]
- file_path = '/'.join(path.split('/')[1:])
+ ref, file_path = path.split('/', 1)
# In case there are slashes in the branch name, we loop through all branches and tags
for possible_ref in refs:
if path.startswith(possible_ref['name'] + '/'):
--
cgit v1.2.3
From 1b8610c83dacfe1b19f3efa5d3a2b66c4c6e1e5d Mon Sep 17 00:00:00 2001
From: Andi Qu <31325319+dolphingarlic@users.noreply.github.com>
Date: Wed, 28 Oct 2020 09:31:01 +0200
Subject: Removed unnecessary space before equals sign
---
bot/exts/info/code_snippets.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 4594c36f2..d854ebb4c 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -49,7 +49,7 @@ class CodeSnippets(Cog):
def _find_ref(self, path: str, refs: tuple) -> tuple:
"""Loops through all branches and tags to find the required ref."""
# Base case: there is no slash in the branch name
- ref, file_path = path.split('/', 1)
+ ref, file_path = path.split('/', 1)
# In case there are slashes in the branch name, we loop through all branches and tags
for possible_ref in refs:
if path.startswith(possible_ref['name'] + '/'):
--
cgit v1.2.3
From aae80011f5cb7e1ec5b9d6fd648ba255ad30e0df Mon Sep 17 00:00:00 2001
From: mbaruh
Date: Fri, 30 Oct 2020 05:31:09 +0200
Subject: Added defcon status notifier
---
bot/exts/moderation/defcon.py | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/bot/exts/moderation/defcon.py b/bot/exts/moderation/defcon.py
index caa6fb917..4b25c36df 100644
--- a/bot/exts/moderation/defcon.py
+++ b/bot/exts/moderation/defcon.py
@@ -4,8 +4,10 @@ import logging
from collections import namedtuple
from datetime import datetime, timedelta
from enum import Enum
+from gettext import ngettext
from discord import Colour, Embed, Member
+from discord.ext import tasks
from discord.ext.commands import Cog, Context, group, has_any_role
from bot.bot import Bot
@@ -83,6 +85,7 @@ class Defcon(Cog):
self.days = timedelta(days=0)
log.info("DEFCON disabled")
+ self.update_notifier()
await self.update_channel_topic()
@Cog.listener()
@@ -153,6 +156,10 @@ class Defcon(Cog):
}
}
)
+
+ self.days = timedelta(days=days)
+ self.update_notifier()
+
except Exception as err:
log.exception("Unable to update DEFCON settings.")
error = err
@@ -199,7 +206,6 @@ class Defcon(Cog):
@has_any_role(*MODERATION_ROLES)
async def days_command(self, ctx: Context, days: int) -> None:
"""Set how old an account must be to join the server, in days, with DEFCON mode enabled."""
- self.days = timedelta(days=days)
self.enabled = True
await self._defcon_action(ctx, days=days, action=Action.UPDATED)
await self.update_channel_topic()
@@ -252,6 +258,21 @@ class Defcon(Cog):
await self.mod_log.send_log_message(info.icon, info.color, status_msg, log_msg)
+ def update_notifier(self) -> None:
+ """Start or stop the notifier according to the DEFCON status."""
+ if self.days.days != 0 and not self.defcon_notifier.is_running():
+ log.info("DEFCON notifier started.")
+ self.defcon_notifier.start()
+
+ elif self.days.days == 0 and self.defcon_notifier.is_running():
+ log.info("DEFCON notifier stopped.")
+ self.defcon_notifier.cancel()
+
+ @tasks.loop(hours=1)
+ async def defcon_notifier(self) -> None:
+ """Routinely notify moderators that DEFCON is active."""
+ await self.channel.send(f"Defcon is on and is set to {self.days.days} day{ngettext('', 's', self.days.days)}.")
+
def setup(bot: Bot) -> None:
"""Load the Defcon cog."""
--
cgit v1.2.3
From c9fe7b1d6b98334c29f516b682b93b4c1c3946a1 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 01:14:31 +0100
Subject: Cache user fetched symbols through redis.
---
bot/exts/info/doc/_cog.py | 22 ++++++++++++++++++++--
bot/exts/info/doc/_redis_cache.py | 23 +++++++++++++++++++++++
2 files changed, 43 insertions(+), 2 deletions(-)
create mode 100644 bot/exts/info/doc/_redis_cache.py
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 257435e95..ab3ad159a 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -4,6 +4,7 @@ import asyncio
import logging
import re
import sys
+import urllib.parse
from collections import defaultdict
from contextlib import suppress
from typing import Dict, List, NamedTuple, Optional, Union
@@ -21,6 +22,7 @@ from bot.utils.messages import wait_for_deletion
from bot.utils.scheduling import Scheduler
from ._inventory_parser import FAILED_REQUEST_ATTEMPTS, fetch_inventory
from ._parsing import get_symbol_markdown
+from ._redis_cache import DocRedisCache
log = logging.getLogger(__name__)
@@ -182,6 +184,8 @@ class InventoryURL(commands.Converter):
class DocCog(commands.Cog):
"""A set of commands for querying & displaying documentation."""
+ doc_cache = DocRedisCache()
+
def __init__(self, bot: Bot):
self.base_urls = {}
self.bot = bot
@@ -296,16 +300,30 @@ class DocCog(commands.Cog):
Attempt to scrape and fetch the data for the given `symbol`, and build an embed from its contents.
If the symbol is known, an Embed with documentation about it is returned.
+
+ First check the DocRedisCache before querying the cog's `CachedParser`,
+ if not present also create a redis entry for the symbol.
"""
+ log.trace(f"Building embed for symbol `{symbol}`")
symbol_info = self.doc_symbols.get(symbol)
if symbol_info is None:
+ log.debug("Symbol does not exist.")
return None
self.bot.stats.incr(f"doc_fetches.{symbol_info.package.lower()}")
+ item_url = f"{symbol_info.url}#{symbol_info.symbol_id}"
+ redis_key = "".join(urllib.parse.urlparse(item_url)[1:]) # url without scheme
+
+ markdown = await self.doc_cache.get(redis_key)
+ if markdown is None:
+ log.debug(f"Redis cache miss for symbol `{symbol}`.")
+ markdown = await self.item_fetcher.get_markdown(self.bot.http_session, symbol_info)
+ await self.doc_cache.set(redis_key, markdown)
+
embed = discord.Embed(
title=discord.utils.escape_markdown(symbol),
- url=f"{symbol_info.url}#{symbol_info.symbol_id}",
- description=await self.item_fetcher.get_markdown(self.bot.http_session, symbol_info)
+ url=item_url,
+ description=markdown
)
# Show all symbols with the same name that were renamed in the footer.
embed.set_footer(
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
new file mode 100644
index 000000000..147394ba6
--- /dev/null
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -0,0 +1,23 @@
+from typing import Optional
+
+from async_rediscache.types.base import RedisObject, namespace_lock
+
+
+class DocRedisCache(RedisObject):
+ """Interface for redis functionality needed by the Doc cog."""
+
+ @namespace_lock
+ async def set(self, key: str, value: str) -> None:
+ """
+ Set markdown `value` for `key`.
+
+ Keys expire after a week to keep data up to date.
+ """
+ with await self._get_pool_connection() as connection:
+ await connection.setex(f"{self.namespace}:{key}", 7*24*60*60, value)
+
+ @namespace_lock
+ async def get(self, key: str) -> Optional[str]:
+ """Get markdown contents for `key`."""
+ with await self._get_pool_connection() as connection:
+ return await connection.get(f"{self.namespace}:{key}", encoding="utf8")
--
cgit v1.2.3
From b8c12d08c9b8dc4e0bf39fcc242d67a3532d0fd0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 03:16:35 +0100
Subject: Add package in front of symbol as default fallback
Previously weo nly added the package name for symbols
that shared are named name with an another symbol, but
in some edge cases we can get to this point with symbols
that weren't renamed but have name conflicts, causing some
to get overwritten completely without the capturing condition
---
bot/exts/info/doc/_cog.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index ab3ad159a..264d6e31e 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -260,8 +260,7 @@ class DocCog(commands.Cog):
self.doc_symbols[overridden_symbol] = original_symbol
self.renamed_symbols.add(overridden_symbol)
- # If renamed `symbol` already exists, add library name in front to differentiate between them.
- elif symbol in self.renamed_symbols:
+ else:
symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
--
cgit v1.2.3
From 89169f5c0b203be1963cfe569c216e0094674c4f Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 03:56:29 +0100
Subject: Simplify duplicate symbol name handling code
With the catchall else condition and symbols from FORCE_PREFIX_GROUPS
getting renamed even when being overwritten, we can ignore the package
handling and let it go to the else which adds the package prefix
instead of a group
---
bot/exts/info/doc/_cog.py | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 264d6e31e..ee89f5384 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -26,17 +26,14 @@ from ._redis_cache import DocRedisCache
log = logging.getLogger(__name__)
-NO_OVERRIDE_GROUPS = (
+# symbols with a group contained here will get the group prefixed on duplicates
+FORCE_PREFIX_GROUPS = (
"2to3fixer",
"token",
"label",
"pdbcommand",
"term",
)
-NO_OVERRIDE_PACKAGES = (
- "python",
-)
-
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
@@ -245,14 +242,11 @@ class DocCog(commands.Cog):
group_name = sys.intern(group.split(":")[1])
if (original_symbol := self.doc_symbols.get(symbol)) is not None:
- if (
- group_name in NO_OVERRIDE_GROUPS
- or any(package == original_symbol.package for package in NO_OVERRIDE_PACKAGES)
- ):
+ if group_name in FORCE_PREFIX_GROUPS:
symbol = f"{group_name}.{symbol}"
self.renamed_symbols.add(symbol)
- elif (overridden_symbol_group := original_symbol.group) in NO_OVERRIDE_GROUPS:
+ elif (overridden_symbol_group := original_symbol.group) in FORCE_PREFIX_GROUPS:
overridden_symbol = f"{overridden_symbol_group}.{symbol}"
if overridden_symbol in self.renamed_symbols:
overridden_symbol = f"{api_package_name}.{overridden_symbol}"
--
cgit v1.2.3
From faaa85d2d00a2bc7496965fad3f5f53f56718e9c Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 04:03:23 +0100
Subject: Move InventoryURL converer to the converters file
---
bot/converters.py | 20 ++++++++++++++++++++
bot/exts/info/doc/_cog.py | 23 ++---------------------
2 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/bot/converters.py b/bot/converters.py
index 6c87a50fe..3066eaabb 100644
--- a/bot/converters.py
+++ b/bot/converters.py
@@ -15,6 +15,7 @@ from discord.utils import DISCORD_EPOCH, snowflake_time
from bot.api import ResponseCodeError
from bot.constants import URLs
+from bot.exts.info.doc import _inventory_parser
from bot.utils.regex import INVITE_RE
log = logging.getLogger(__name__)
@@ -175,6 +176,25 @@ class ValidURL(Converter):
return url
+class InventoryURL(Converter):
+ """
+ Represents an Intersphinx inventory URL.
+
+ This converter checks whether intersphinx accepts the given inventory URL, and raises
+ `BadArgument` if that is not the case.
+
+ Otherwise, it simply passes through the given URL.
+ """
+
+ @staticmethod
+ async def convert(ctx: Context, url: str) -> str:
+ """Convert url to Intersphinx inventory URL."""
+ await ctx.trigger_typing()
+ if await _inventory_parser.fetch_inventory(ctx.bot.http_session, url) is None:
+ raise BadArgument(f"Failed to fetch inventory file after {_inventory_parser.FAILED_REQUEST_ATTEMPTS}.")
+ return url
+
+
class Snowflake(IDConverter):
"""
Converts to an int if the argument is a valid Discord snowflake.
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index ee89f5384..25477fe07 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -16,11 +16,11 @@ from discord.ext import commands
from bot.bot import Bot
from bot.constants import MODERATION_ROLES, RedirectOutput
-from bot.converters import PackageName, ValidURL
+from bot.converters import InventoryURL, PackageName, ValidURL
from bot.pagination import LinePaginator
from bot.utils.messages import wait_for_deletion
from bot.utils.scheduling import Scheduler
-from ._inventory_parser import FAILED_REQUEST_ATTEMPTS, fetch_inventory
+from ._inventory_parser import fetch_inventory
from ._parsing import get_symbol_markdown
from ._redis_cache import DocRedisCache
@@ -159,25 +159,6 @@ class CachedParser:
self._item_events.clear()
-class InventoryURL(commands.Converter):
- """
- Represents an Intersphinx inventory URL.
-
- This converter checks whether intersphinx accepts the given inventory URL, and raises
- `BadArgument` if that is not the case.
-
- Otherwise, it simply passes through the given URL.
- """
-
- @staticmethod
- async def convert(ctx: commands.Context, url: str) -> str:
- """Convert url to Intersphinx inventory URL."""
- await ctx.trigger_typing()
- if await fetch_inventory(ctx.bot.http_session, url) is None:
- raise commands.BadArgument(f"Failed to fetch inventory file after {FAILED_REQUEST_ATTEMPTS}.")
- return url
-
-
class DocCog(commands.Cog):
"""A set of commands for querying & displaying documentation."""
--
cgit v1.2.3
From 2836ce6f24d66949376a1defbf3813ffae8b7f47 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 13:45:43 +0100
Subject: Relock Pipfile.lock
---
Pipfile.lock | 434 +++++++++++++++++++----------------------------------------
1 file changed, 136 insertions(+), 298 deletions(-)
diff --git a/Pipfile.lock b/Pipfile.lock
index becd85c55..f622d9e01 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
- "sha256": "073fd0c51749aafa188fdbe96c5b90dd157cb1d23bdd144801fb0d0a369ffa88"
+ "sha256": "35130d225126e341941fe36e4193fe53aa253e193a50505054a87f48ab7f7c8c"
},
"pipfile-spec": 6,
"requires": {
@@ -34,21 +34,22 @@
},
"aiohttp": {
"hashes": [
- "sha256:1e984191d1ec186881ffaed4581092ba04f7c61582a177b187d3a2f07ed9719e",
- "sha256:259ab809ff0727d0e834ac5e8a283dc5e3e0ecc30c4d80b3cd17a4139ce1f326",
- "sha256:2f4d1a4fdce595c947162333353d4a44952a724fba9ca3205a3df99a33d1307a",
- "sha256:32e5f3b7e511aa850829fbe5aa32eb455e5534eaa4b1ce93231d00e2f76e5654",
- "sha256:344c780466b73095a72c616fac5ea9c4665add7fc129f285fbdbca3cccf4612a",
- "sha256:460bd4237d2dbecc3b5ed57e122992f60188afe46e7319116da5eb8a9dfedba4",
- "sha256:4c6efd824d44ae697814a2a85604d8e992b875462c6655da161ff18fd4f29f17",
- "sha256:50aaad128e6ac62e7bf7bd1f0c0a24bc968a0c0590a726d5a955af193544bcec",
- "sha256:6206a135d072f88da3e71cc501c59d5abffa9d0bb43269a6dcd28d66bfafdbdd",
- "sha256:65f31b622af739a802ca6fd1a3076fd0ae523f8485c52924a89561ba10c49b48",
- "sha256:ae55bac364c405caa23a4f2d6cfecc6a0daada500274ffca4a9230e7129eac59",
- "sha256:b778ce0c909a2653741cb4b1ac7015b5c130ab9c897611df43ae6a58523cb965"
+ "sha256:1a4160579ffbc1b69e88cb6ca8bb0fbd4947dfcbf9fb1e2a4fc4c7a4a986c1fe",
+ "sha256:206c0ccfcea46e1bddc91162449c20c72f308aebdcef4977420ef329c8fcc599",
+ "sha256:2ad493de47a8f926386fa6d256832de3095ba285f325db917c7deae0b54a9fc8",
+ "sha256:319b490a5e2beaf06891f6711856ea10591cfe84fe9f3e71a721aa8f20a0872a",
+ "sha256:470e4c90da36b601676fe50c49a60d34eb8c6593780930b1aa4eea6f508dfa37",
+ "sha256:60f4caa3b7f7a477f66ccdd158e06901e1d235d572283906276e3803f6b098f5",
+ "sha256:66d64486172b032db19ea8522328b19cfb78a3e1e5b62ab6a0567f93f073dea0",
+ "sha256:687461cd974722110d1763b45c5db4d2cdee8d50f57b00c43c7590d1dd77fc5c",
+ "sha256:698cd7bc3c7d1b82bb728bae835724a486a8c376647aec336aa21a60113c3645",
+ "sha256:797456399ffeef73172945708810f3277f794965eb6ec9bd3a0c007c0476be98",
+ "sha256:a885432d3cabc1287bcf88ea94e1826d3aec57fd5da4a586afae4591b061d40d",
+ "sha256:c506853ba52e516b264b106321c424d03f3ddef2813246432fa9d1cefd361c81",
+ "sha256:fb83326d8295e8840e4ba774edf346e87eca78ba8a89c55d2690352842c15ba5"
],
"index": "pypi",
- "version": "==3.6.2"
+ "version": "==3.6.3"
},
"aioping": {
"hashes": [
@@ -68,18 +69,11 @@
},
"aiormq": {
"hashes": [
- "sha256:106695a836f19c1af6c46b58e8aac80e00f86c5b3287a3c6483a1ee369cc95c9",
- "sha256:9f6dbf6155fe2b7a3d24bf68de97fb812db0fac0a54e96bc1af14ea95078ba7f"
+ "sha256:8218dd9f7198d6e7935855468326bbacf0089f926c70baa8dd92944cb2496573",
+ "sha256:e584dac13a242589aaf42470fd3006cb0dc5aed6506cbd20357c7ec8bbe4a89e"
],
"markers": "python_version >= '3.6'",
- "version": "==3.2.3"
- },
- "alabaster": {
- "hashes": [
- "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
- "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
- ],
- "version": "==0.7.12"
+ "version": "==3.3.1"
},
"async-rediscache": {
"extras": [
@@ -103,35 +97,27 @@
},
"attrs": {
"hashes": [
- "sha256:26b54ddbbb9ee1d34d5d3668dd37d6cf74990ab23c828c2888dccdceee395594",
- "sha256:fce7fc47dfc976152e82d53ff92fa0407700c21acd20886a13777a0d20e655dc"
+ "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
+ "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==20.2.0"
- },
- "babel": {
- "hashes": [
- "sha256:1aac2ae2d0d8ea368fa90906567f5c08463d98ade155c0c4bfedd6a0f7160e38",
- "sha256:d670ea0b10f8b723672d3a6abeb87b565b244da220d76b4dba1b66269ec152d4"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==2.8.0"
+ "version": "==20.3.0"
},
"beautifulsoup4": {
"hashes": [
- "sha256:1edf5e39f3a5bc6e38b235b369128416c7239b34f692acccececb040233032a1",
- "sha256:5dfe44f8fddc89ac5453f02659d3ab1668f2c0d9684839f0785037e8c6d9ac8d",
- "sha256:645d833a828722357038299b7f6879940c11dddd95b900fe5387c258b72bb883"
+ "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
+ "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25",
+ "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"
],
"index": "pypi",
- "version": "==4.9.2"
+ "version": "==4.9.3"
},
"certifi": {
"hashes": [
- "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3",
- "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"
+ "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd",
+ "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4"
],
- "version": "==2020.6.20"
+ "version": "==2020.11.8"
},
"cffi": {
"hashes": [
@@ -183,11 +169,12 @@
},
"colorama": {
"hashes": [
- "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff",
- "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"
+ "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",
+ "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"
],
+ "index": "pypi",
"markers": "sys_platform == 'win32'",
- "version": "==0.4.3"
+ "version": "==0.4.4"
},
"coloredlogs": {
"hashes": [
@@ -207,26 +194,18 @@
},
"discord.py": {
"hashes": [
- "sha256:3acb61fde0d862ed346a191d69c46021e6063673f63963bc984ae09a685ab211",
- "sha256:e71089886aa157341644bdecad63a72ff56b44406b1a6467b66db31c8e5a5a15"
+ "sha256:2367359e31f6527f8a936751fc20b09d7495dd6a76b28c8fb13d4ca6c55b7563",
+ "sha256:def00dc50cf36d21346d71bc89f0cad8f18f9a3522978dc18c7796287d47de8b"
],
"index": "pypi",
- "version": "==1.5.0"
- },
- "docutils": {
- "hashes": [
- "sha256:0c5b78adfbf7762415433f5515cd5c9e762339e23369dbe8000d84a4bf4ab3af",
- "sha256:c2de3a60e9e7d07be26b7f2b00ca0309c207e06c100f9cc2a94931fc75a478fc"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
- "version": "==0.16"
+ "version": "==1.5.1"
},
"fakeredis": {
"hashes": [
- "sha256:7ea0866ba5edb40fe2e9b1722535df0c7e6b91d518aa5f50d96c2fff3ea7f4c2",
- "sha256:aad8836ffe0319ffbba66dcf872ac6e7e32d1f19790e31296ba58445efb0a5c7"
+ "sha256:8070b7fce16f828beaef2c757a4354af91698685d5232404f1aeeb233529c7a5",
+ "sha256:f8c8ea764d7b6fd801e7f5486e3edd32ca991d506186f1923a01fc072e33c271"
],
- "version": "==1.4.3"
+ "version": "==1.4.4"
},
"feedparser": {
"hashes": [
@@ -313,58 +292,48 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.10"
},
- "imagesize": {
- "hashes": [
- "sha256:6965f19a6a2039c7d48bca7dba2473069ff854c36ae6f19d2cde309d998228a1",
- "sha256:b1f6b5a4eab1f73479a50fb79fcf729514a900c341d8503d62a62dbc4127a2b1"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==1.2.0"
- },
- "jinja2": {
- "hashes": [
- "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0",
- "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
- "version": "==2.11.2"
- },
"lxml": {
"hashes": [
- "sha256:05a444b207901a68a6526948c7cc8f9fe6d6f24c70781488e32fd74ff5996e3f",
- "sha256:08fc93257dcfe9542c0a6883a25ba4971d78297f63d7a5a26ffa34861ca78730",
- "sha256:107781b213cf7201ec3806555657ccda67b1fccc4261fb889ef7fc56976db81f",
- "sha256:121b665b04083a1e85ff1f5243d4a93aa1aaba281bc12ea334d5a187278ceaf1",
- "sha256:1fa21263c3aba2b76fd7c45713d4428dbcc7644d73dcf0650e9d344e433741b3",
- "sha256:2b30aa2bcff8e958cd85d907d5109820b01ac511eae5b460803430a7404e34d7",
- "sha256:4b4a111bcf4b9c948e020fd207f915c24a6de3f1adc7682a2d92660eb4e84f1a",
- "sha256:5591c4164755778e29e69b86e425880f852464a21c7bb53c7ea453bbe2633bbe",
- "sha256:59daa84aef650b11bccd18f99f64bfe44b9f14a08a28259959d33676554065a1",
- "sha256:5a9c8d11aa2c8f8b6043d845927a51eb9102eb558e3f936df494e96393f5fd3e",
- "sha256:5dd20538a60c4cc9a077d3b715bb42307239fcd25ef1ca7286775f95e9e9a46d",
- "sha256:74f48ec98430e06c1fa8949b49ebdd8d27ceb9df8d3d1c92e1fdc2773f003f20",
- "sha256:786aad2aa20de3dbff21aab86b2fb6a7be68064cbbc0219bde414d3a30aa47ae",
- "sha256:7ad7906e098ccd30d8f7068030a0b16668ab8aa5cda6fcd5146d8d20cbaa71b5",
- "sha256:80a38b188d20c0524fe8959c8ce770a8fdf0e617c6912d23fc97c68301bb9aba",
- "sha256:8f0ec6b9b3832e0bd1d57af41f9238ea7709bbd7271f639024f2fc9d3bb01293",
- "sha256:92282c83547a9add85ad658143c76a64a8d339028926d7dc1998ca029c88ea6a",
- "sha256:94150231f1e90c9595ccc80d7d2006c61f90a5995db82bccbca7944fd457f0f6",
- "sha256:9dc9006dcc47e00a8a6a029eb035c8f696ad38e40a27d073a003d7d1443f5d88",
- "sha256:a76979f728dd845655026ab991df25d26379a1a8fc1e9e68e25c7eda43004bed",
- "sha256:aa8eba3db3d8761db161003e2d0586608092e217151d7458206e243be5a43843",
- "sha256:bea760a63ce9bba566c23f726d72b3c0250e2fa2569909e2d83cda1534c79443",
- "sha256:c3f511a3c58676147c277eff0224c061dd5a6a8e1373572ac817ac6324f1b1e0",
- "sha256:c9d317efde4bafbc1561509bfa8a23c5cab66c44d49ab5b63ff690f5159b2304",
- "sha256:cc411ad324a4486b142c41d9b2b6a722c534096963688d879ea6fa8a35028258",
- "sha256:cdc13a1682b2a6241080745b1953719e7fe0850b40a5c71ca574f090a1391df6",
- "sha256:cfd7c5dd3c35c19cec59c63df9571c67c6d6e5c92e0fe63517920e97f61106d1",
- "sha256:e1cacf4796b20865789083252186ce9dc6cc59eca0c2e79cca332bdff24ac481",
- "sha256:e70d4e467e243455492f5de463b72151cc400710ac03a0678206a5f27e79ddef",
- "sha256:ecc930ae559ea8a43377e8b60ca6f8d61ac532fc57efb915d899de4a67928efd",
- "sha256:f161af26f596131b63b236372e4ce40f3167c1b5b5d459b29d2514bd8c9dc9ee"
- ],
- "index": "pypi",
- "version": "==4.5.2"
+ "sha256:098fb713b31050463751dcc694878e1d39f316b86366fb9fe3fbbe5396ac9fab",
+ "sha256:0e89f5d422988c65e6936e4ec0fe54d6f73f3128c80eb7ecc3b87f595523607b",
+ "sha256:189ad47203e846a7a4951c17694d845b6ade7917c47c64b29b86526eefc3adf5",
+ "sha256:1d87936cb5801c557f3e981c9c193861264c01209cb3ad0964a16310ca1b3301",
+ "sha256:211b3bcf5da70c2d4b84d09232534ad1d78320762e2c59dedc73bf01cb1fc45b",
+ "sha256:2358809cc64394617f2719147a58ae26dac9e21bae772b45cfb80baa26bfca5d",
+ "sha256:23c83112b4dada0b75789d73f949dbb4e8f29a0a3511647024a398ebd023347b",
+ "sha256:24e811118aab6abe3ce23ff0d7d38932329c513f9cef849d3ee88b0f848f2aa9",
+ "sha256:2d5896ddf5389560257bbe89317ca7bcb4e54a02b53a3e572e1ce4226512b51b",
+ "sha256:2d6571c48328be4304aee031d2d5046cbc8aed5740c654575613c5a4f5a11311",
+ "sha256:2e311a10f3e85250910a615fe194839a04a0f6bc4e8e5bb5cac221344e3a7891",
+ "sha256:302160eb6e9764168e01d8c9ec6becddeb87776e81d3fcb0d97954dd51d48e0a",
+ "sha256:3a7a380bfecc551cfd67d6e8ad9faa91289173bdf12e9cfafbd2bdec0d7b1ec1",
+ "sha256:3d9b2b72eb0dbbdb0e276403873ecfae870599c83ba22cadff2db58541e72856",
+ "sha256:475325e037fdf068e0c2140b818518cf6bc4aa72435c407a798b2db9f8e90810",
+ "sha256:4b7572145054330c8e324a72d808c8c8fbe12be33368db28c39a255ad5f7fb51",
+ "sha256:4fff34721b628cce9eb4538cf9a73d02e0f3da4f35a515773cce6f5fe413b360",
+ "sha256:56eff8c6fb7bc4bcca395fdff494c52712b7a57486e4fbde34c31bb9da4c6cc4",
+ "sha256:573b2f5496c7e9f4985de70b9bbb4719ffd293d5565513e04ac20e42e6e5583f",
+ "sha256:7ecaef52fd9b9535ae5f01a1dd2651f6608e4ec9dc136fc4dfe7ebe3c3ddb230",
+ "sha256:803a80d72d1f693aa448566be46ffd70882d1ad8fc689a2e22afe63035eb998a",
+ "sha256:8862d1c2c020cb7a03b421a9a7b4fe046a208db30994fc8ff68c627a7915987f",
+ "sha256:9b06690224258db5cd39a84e993882a6874676f5de582da57f3df3a82ead9174",
+ "sha256:a71400b90b3599eb7bf241f947932e18a066907bf84617d80817998cee81e4bf",
+ "sha256:bb252f802f91f59767dcc559744e91efa9df532240a502befd874b54571417bd",
+ "sha256:be1ebf9cc25ab5399501c9046a7dcdaa9e911802ed0e12b7d620cd4bbf0518b3",
+ "sha256:be7c65e34d1b50ab7093b90427cbc488260e4b3a38ef2435d65b62e9fa3d798a",
+ "sha256:c0dac835c1a22621ffa5e5f999d57359c790c52bbd1c687fe514ae6924f65ef5",
+ "sha256:c152b2e93b639d1f36ec5a8ca24cde4a8eefb2b6b83668fcd8e83a67badcb367",
+ "sha256:d182eada8ea0de61a45a526aa0ae4bcd222f9673424e65315c35820291ff299c",
+ "sha256:d18331ea905a41ae71596502bd4c9a2998902328bbabd29e3d0f5f8569fabad1",
+ "sha256:d20d32cbb31d731def4b1502294ca2ee99f9249b63bc80e03e67e8f8e126dea8",
+ "sha256:d4ad7fd3269281cb471ad6c7bafca372e69789540d16e3755dd717e9e5c9d82f",
+ "sha256:d6f8c23f65a4bfe4300b85f1f40f6c32569822d08901db3b6454ab785d9117cc",
+ "sha256:d84d741c6e35c9f3e7406cb7c4c2e08474c2a6441d59322a00dcae65aac6315d",
+ "sha256:e65c221b2115a91035b55a593b6eb94aa1206fa3ab374f47c6dc10d364583ff9",
+ "sha256:f98b6f256be6cec8dd308a8563976ddaff0bdc18b730720f6f4bee927ffe926f"
+ ],
+ "index": "pypi",
+ "version": "==4.6.1"
},
"markdownify": {
"hashes": [
@@ -374,52 +343,13 @@
"index": "pypi",
"version": "==0.5.3"
},
- "markupsafe": {
- "hashes": [
- "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473",
- "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161",
- "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
- "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
- "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42",
- "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
- "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
- "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
- "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
- "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
- "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66",
- "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b",
- "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1",
- "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15",
- "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
- "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
- "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
- "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
- "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
- "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d",
- "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e",
- "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d",
- "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c",
- "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21",
- "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2",
- "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5",
- "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b",
- "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6",
- "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f",
- "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f",
- "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2",
- "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7",
- "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==1.1.1"
- },
"more-itertools": {
"hashes": [
- "sha256:6f83822ae94818eae2612063a5101a7311e68ae8002005b5e05f03fd74a86a20",
- "sha256:9b30f12df9393f0d28af9210ff8efe48d10c94f73e5daf886f10c4b0b0b4f03c"
+ "sha256:8e1a2a43b2f2727425f2b5839587ae37093f19153dc26c0927d1048ff6557330",
+ "sha256:b3a9005928e5bed54076e6e549c792b306fddfe72b2d1d22dd63d42d5d3899cf"
],
"index": "pypi",
- "version": "==8.5.0"
+ "version": "==8.6.0"
},
"multidict": {
"hashes": [
@@ -451,14 +381,6 @@
"markers": "python_version >= '3.5'",
"version": "==4.0.2"
},
- "packaging": {
- "hashes": [
- "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8",
- "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==20.4"
- },
"pamqp": {
"hashes": [
"sha256:2f81b5c186f668a67f165193925b6bfd83db4363a6222f599517f29ecee60b02",
@@ -508,21 +430,14 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.20"
},
- "pygments": {
- "hashes": [
- "sha256:307543fe65c0947b126e83dd5a61bd8acbd84abec11f43caebaf5534cbc17998",
- "sha256:926c3f319eda178d1bd90851e4317e6d8cdb5e292a3386aac9bd75eca29cf9c7"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==2.7.1"
- },
- "pyparsing": {
+ "pyreadline": {
"hashes": [
- "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
- "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
+ "sha256:4530592fc2e85b25b1a9f79664433da09237c1a270e4d78ea5aa3a2c7229e2d1",
+ "sha256:65540c21bfe14405a3a77e4c085ecfce88724743a4ead47c66b84defcf82c32e",
+ "sha256:9ce5fa65b8992dfa373bddc5b6e0864ead8f291c94fbfec05fbd5c836162e67b"
],
- "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==2.4.7"
+ "markers": "sys_platform == 'win32'",
+ "version": "==2.1"
},
"python-dateutil": {
"hashes": [
@@ -532,13 +447,6 @@
"index": "pypi",
"version": "==2.8.1"
},
- "pytz": {
- "hashes": [
- "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed",
- "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048"
- ],
- "version": "==2020.1"
- },
"pyyaml": {
"hashes": [
"sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
@@ -564,21 +472,13 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==3.5.3"
},
- "requests": {
- "hashes": [
- "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b",
- "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"
- ],
- "index": "pypi",
- "version": "==2.24.0"
- },
"sentry-sdk": {
"hashes": [
- "sha256:c9c0fa1412bad87104c4eee8dd36c7bbf60b0d92ae917ab519094779b22e6d9a",
- "sha256:e159f7c919d19ae86e5a4ff370fccc45149fab461fbeb93fb5a735a0b33a9cb1"
+ "sha256:17b725df2258354ccb39618ae4ead29651aa92c01a92acf72f98efe06ee2e45a",
+ "sha256:9040539485226708b5cad0401d76628fba4eed9154bf301c50579767afe344fd"
],
"index": "pypi",
- "version": "==0.17.8"
+ "version": "==0.19.2"
},
"six": {
"hashes": [
@@ -588,19 +488,12 @@
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
- "snowballstemmer": {
- "hashes": [
- "sha256:209f257d7533fdb3cb73bdbd24f436239ca3b2fa67d56f6ff88e86be08cc5ef0",
- "sha256:df3bac3df4c2c01363f3dd2cfa78cce2840a79b9f1c2d2de9ce8d31683992f52"
- ],
- "version": "==2.0.0"
- },
"sortedcontainers": {
"hashes": [
- "sha256:4e73a757831fc3ca4de2859c422564239a31d8213d09a2a666e375807034d2ba",
- "sha256:c633ebde8580f241f274c1f8994a665c0e54a17724fecd0cae2f079e09c36d3f"
+ "sha256:37257a32add0a3ee490bb170b599e93095eed89a55da91fa9f48753ea12fd73f",
+ "sha256:59cc937650cf60d677c16775597c89a960658a09cf7c1a668f86e1e4464b10a1"
],
- "version": "==2.2.2"
+ "version": "==2.3.0"
},
"soupsieve": {
"hashes": [
@@ -610,62 +503,6 @@
"markers": "python_version >= '3.0'",
"version": "==2.0.1"
},
- "sphinx": {
- "hashes": [
- "sha256:b4c750d546ab6d7e05bdff6ac24db8ae3e8b8253a3569b754e445110a0a12b66",
- "sha256:fc312670b56cb54920d6cc2ced455a22a547910de10b3142276495ced49231cb"
- ],
- "index": "pypi",
- "version": "==2.4.4"
- },
- "sphinxcontrib-applehelp": {
- "hashes": [
- "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a",
- "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.2"
- },
- "sphinxcontrib-devhelp": {
- "hashes": [
- "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e",
- "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.2"
- },
- "sphinxcontrib-htmlhelp": {
- "hashes": [
- "sha256:3c0bc24a2c41e340ac37c85ced6dafc879ab485c095b1d65d2461ac2f7cca86f",
- "sha256:e8f5bb7e31b2dbb25b9cc435c8ab7a79787ebf7f906155729338f3156d93659b"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.3"
- },
- "sphinxcontrib-jsmath": {
- "hashes": [
- "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178",
- "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.1"
- },
- "sphinxcontrib-qthelp": {
- "hashes": [
- "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72",
- "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.0.3"
- },
- "sphinxcontrib-serializinghtml": {
- "hashes": [
- "sha256:eaa0eccc86e982a9b939b2b82d12cc5d013385ba5eadcc7e4fed23f4405f77bc",
- "sha256:f242a81d423f59617a8e5cf16f5d4d74e28ee9a66f9e5b637a18082991db5a9a"
- ],
- "markers": "python_version >= '3.5'",
- "version": "==1.1.4"
- },
"statsd": {
"hashes": [
"sha256:c610fb80347fca0ef62666d241bce64184bd7cc1efe582f9690e045c25535eaa",
@@ -676,34 +513,34 @@
},
"urllib3": {
"hashes": [
- "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a",
- "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"
+ "sha256:8d7eaa5a82a1cac232164990f04874c594c9453ec55eef02eab885aa02fc17a2",
+ "sha256:f5321fbe4bf3fefa0efd0bfe7fb14e90909eb62a48ccda331726b4319897dd5e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
- "version": "==1.25.10"
+ "version": "==1.25.11"
},
"yarl": {
"hashes": [
- "sha256:04a54f126a0732af75e5edc9addeaa2113e2ca7c6fce8974a63549a70a25e50e",
- "sha256:3cc860d72ed989f3b1f3abbd6ecf38e412de722fb38b8f1b1a086315cf0d69c5",
- "sha256:5d84cc36981eb5a8533be79d6c43454c8e6a39ee3118ceaadbd3c029ab2ee580",
- "sha256:5e447e7f3780f44f890360ea973418025e8c0cdcd7d6a1b221d952600fd945dc",
- "sha256:61d3ea3c175fe45f1498af868879c6ffeb989d4143ac542163c45538ba5ec21b",
- "sha256:67c5ea0970da882eaf9efcf65b66792557c526f8e55f752194eff8ec722c75c2",
- "sha256:6f6898429ec3c4cfbef12907047136fd7b9e81a6ee9f105b45505e633427330a",
- "sha256:7ce35944e8e61927a8f4eb78f5bc5d1e6da6d40eadd77e3f79d4e9399e263921",
- "sha256:b7c199d2cbaf892ba0f91ed36d12ff41ecd0dde46cbf64ff4bfe997a3ebc925e",
- "sha256:c15d71a640fb1f8e98a1423f9c64d7f1f6a3a168f803042eaf3a5b5022fde0c1",
- "sha256:c22607421f49c0cb6ff3ed593a49b6a99c6ffdeaaa6c944cdda83c2393c8864d",
- "sha256:c604998ab8115db802cc55cb1b91619b2831a6128a62ca7eea577fc8ea4d3131",
- "sha256:d088ea9319e49273f25b1c96a3763bf19a882cff774d1792ae6fba34bd40550a",
- "sha256:db9eb8307219d7e09b33bcb43287222ef35cbcf1586ba9472b0a4b833666ada1",
- "sha256:e31fef4e7b68184545c3d68baec7074532e077bd1906b040ecfba659737df188",
- "sha256:e32f0fb443afcfe7f01f95172b66f279938fbc6bdaebe294b0ff6747fb6db020",
- "sha256:fcbe419805c9b20db9a51d33b942feddbf6e7fb468cb20686fd7089d4164c12a"
+ "sha256:040b237f58ff7d800e6e0fd89c8439b841f777dd99b4a9cca04d6935564b9409",
+ "sha256:17668ec6722b1b7a3a05cc0167659f6c95b436d25a36c2d52db0eca7d3f72593",
+ "sha256:3a584b28086bc93c888a6c2aa5c92ed1ae20932f078c46509a66dce9ea5533f2",
+ "sha256:4439be27e4eee76c7632c2427ca5e73703151b22cae23e64adb243a9c2f565d8",
+ "sha256:48e918b05850fffb070a496d2b5f97fc31d15d94ca33d3d08a4f86e26d4e7c5d",
+ "sha256:9102b59e8337f9874638fcfc9ac3734a0cfadb100e47d55c20d0dc6087fb4692",
+ "sha256:9b930776c0ae0c691776f4d2891ebc5362af86f152dd0da463a6614074cb1b02",
+ "sha256:b3b9ad80f8b68519cc3372a6ca85ae02cc5a8807723ac366b53c0f089db19e4a",
+ "sha256:bc2f976c0e918659f723401c4f834deb8a8e7798a71be4382e024bcc3f7e23a8",
+ "sha256:c22c75b5f394f3d47105045ea551e08a3e804dc7e01b37800ca35b58f856c3d6",
+ "sha256:c52ce2883dc193824989a9b97a76ca86ecd1fa7955b14f87bf367a61b6232511",
+ "sha256:ce584af5de8830d8701b8979b18fcf450cef9a382b1a3c8ef189bedc408faf1e",
+ "sha256:da456eeec17fa8aa4594d9a9f27c0b1060b6a75f2419fe0c00609587b2695f4a",
+ "sha256:db6db0f45d2c63ddb1a9d18d1b9b22f308e52c83638c26b422d520a815c4b3fb",
+ "sha256:df89642981b94e7db5596818499c4b2219028f2a528c9c37cc1de45bf2fd3a3f",
+ "sha256:f18d68f2be6bf0e89f1521af2b1bb46e66ab0018faafa81d70f358153170a317",
+ "sha256:f379b7f83f23fe12823085cd6b906edc49df969eb99757f58ff382349a3303c6"
],
"markers": "python_version >= '3.5'",
- "version": "==1.6.0"
+ "version": "==1.5.1"
}
},
"develop": {
@@ -716,11 +553,11 @@
},
"attrs": {
"hashes": [
- "sha256:26b54ddbbb9ee1d34d5d3668dd37d6cf74990ab23c828c2888dccdceee395594",
- "sha256:fce7fc47dfc976152e82d53ff92fa0407700c21acd20886a13777a0d20e655dc"
+ "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
+ "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==20.2.0"
+ "version": "==20.3.0"
},
"cfgv": {
"hashes": [
@@ -786,19 +623,19 @@
},
"flake8": {
"hashes": [
- "sha256:15e351d19611c887e482fb960eae4d44845013cc142d42896e9862f775d8cf5c",
- "sha256:f04b9fcbac03b0a3e58c0ab3a0ecc462e023a9faf046d57794184028123aa208"
+ "sha256:749dbbd6bfd0cf1318af27bf97a14e28e5ff548ef8e5b1566ccfb25a11e7c839",
+ "sha256:aadae8761ec651813c24be05c6f7b4680857ef6afaae4651a4eccaef97ce6c3b"
],
"index": "pypi",
- "version": "==3.8.3"
+ "version": "==3.8.4"
},
"flake8-annotations": {
"hashes": [
- "sha256:09fe1aa3f40cb8fef632a0ab3614050a7584bb884b6134e70cf1fc9eeee642fa",
- "sha256:5bda552f074fd6e34276c7761756fa07d824ffac91ce9c0a8555eb2bc5b92d7a"
+ "sha256:0bcebb0792f1f96d617ded674dca7bf64181870bfe5dace353a1483551f8e5f1",
+ "sha256:bebd11a850f6987a943ce8cdff4159767e0f5f89b3c88aca64680c2175ee02df"
],
"index": "pypi",
- "version": "==2.4.0"
+ "version": "==2.4.1"
},
"flake8-bugbear": {
"hashes": [
@@ -856,11 +693,11 @@
},
"identify": {
"hashes": [
- "sha256:7c22c384a2c9b32c5cc891d13f923f6b2653aa83e2d75d8f79be240d6c86c4f4",
- "sha256:da683bfb7669fa749fc7731f378229e2dbf29a1d1337cbde04106f02236eb29d"
+ "sha256:5dd84ac64a9a115b8e0b27d1756b244b882ad264c3c423f42af8235a6e71ca12",
+ "sha256:c9504ba6a043ee2db0a9d69e43246bc138034895f6338d5aed1b41e4a73b1513"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==1.5.5"
+ "version": "==1.5.9"
},
"mccabe": {
"hashes": [
@@ -886,11 +723,11 @@
},
"pre-commit": {
"hashes": [
- "sha256:810aef2a2ba4f31eed1941fc270e72696a1ad5590b9751839c90807d0fff6b9a",
- "sha256:c54fd3e574565fe128ecc5e7d2f91279772ddb03f8729645fa812fe809084a70"
+ "sha256:22e6aa3bd571debb01eb7d34483f11c01b65237be4eebbf30c3d4fb65762d315",
+ "sha256:905ebc9b534b991baec87e934431f2d0606ba27f2b90f7f652985f5a5b8b6ae6"
],
"index": "pypi",
- "version": "==2.7.1"
+ "version": "==2.8.2"
},
"pycodestyle": {
"hashes": [
@@ -950,10 +787,11 @@
},
"toml": {
"hashes": [
- "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f",
- "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88"
+ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
+ "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
],
- "version": "==0.10.1"
+ "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==0.10.2"
},
"unittest-xml-reporting": {
"hashes": [
@@ -965,11 +803,11 @@
},
"virtualenv": {
"hashes": [
- "sha256:43add625c53c596d38f971a465553f6318decc39d98512bc100fa1b1e839c8dc",
- "sha256:e0305af10299a7fb0d69393d8f04cb2965dda9351140d11ac8db4e5e3970451b"
+ "sha256:b0011228208944ce71052987437d3843e05690b2f23d1c7da4263fde104c97a2",
+ "sha256:b8d6110f493af256a40d65e29846c69340a947669eec8ce784fcf3dd3af28380"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==20.0.31"
+ "version": "==20.1.0"
}
}
}
--
cgit v1.2.3
From 70ee01b8726921e8389abd4f69ffb0e2ceee0773 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 18:22:11 +0100
Subject: Generalise tag filter hint to accept all containers
---
bot/exts/info/doc/_parsing.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 0883b9f42..93b6f0def 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -5,7 +5,7 @@ import re
import string
import textwrap
from functools import partial
-from typing import Callable, Collection, Iterable, List, Optional, TYPE_CHECKING, Tuple, Union
+from typing import Callable, Collection, Container, Iterable, List, Optional, TYPE_CHECKING, Union
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
@@ -99,7 +99,7 @@ def _split_parameters(parameters_string: str) -> List[str]:
def _find_elements_until_tag(
start_element: PageElement,
- end_tag_filter: Union[Tuple[str, ...], Callable[[Tag], bool]],
+ end_tag_filter: Union[Container[str], Callable[[Tag], bool]],
*,
func: Callable,
include_strings: bool = False,
@@ -108,7 +108,7 @@ def _find_elements_until_tag(
"""
Get all elements up to `limit` or until a tag matching `tag_filter` is found.
- `end_tag_filter` can be either a tuple of string names to check against,
+ `end_tag_filter` can be either a container of string names to check against,
or a filtering callable that's applied to tags.
When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
@@ -116,12 +116,12 @@ def _find_elements_until_tag(
`func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
"""
- use_tuple_filter = isinstance(end_tag_filter, tuple)
+ use_container_filter = not callable(end_tag_filter)
elements = []
for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
if isinstance(element, Tag):
- if use_tuple_filter:
+ if use_container_filter:
if element.name in end_tag_filter:
break
elif end_tag_filter(element):
--
cgit v1.2.3
From beebeac45cf487e59ca4d76a84472c898bc23b06 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 19:20:44 +0100
Subject: Rename variables for clarity
---
bot/exts/info/doc/_cog.py | 4 ++--
bot/exts/info/doc/_parsing.py | 18 +++++++++---------
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 25477fe07..4e48e81e5 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -227,8 +227,8 @@ class DocCog(commands.Cog):
symbol = f"{group_name}.{symbol}"
self.renamed_symbols.add(symbol)
- elif (overridden_symbol_group := original_symbol.group) in FORCE_PREFIX_GROUPS:
- overridden_symbol = f"{overridden_symbol_group}.{symbol}"
+ elif (original_symbol_group := original_symbol.group) in FORCE_PREFIX_GROUPS:
+ overridden_symbol = f"{original_symbol_group}.{symbol}"
if overridden_symbol in self.renamed_symbols:
overridden_symbol = f"{api_package_name}.{overridden_symbol}"
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 93b6f0def..9140f635a 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -42,9 +42,9 @@ _NO_SIGNATURE_GROUPS = {
"templatetag",
"term",
}
-_EMBED_CODE_BLOCK_LENGTH = 61
+_EMBED_CODE_BLOCK_LINE_LENGTH = 61
# _MAX_SIGNATURE_AMOUNT code block wrapped lines with py syntax highlight
-_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LENGTH + 8) * _MAX_SIGNATURE_AMOUNT
+_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * _MAX_SIGNATURE_AMOUNT
# Maximum discord message length - signatures on top
_MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH
_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
@@ -189,7 +189,7 @@ def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collec
if not sum(len(signature) for signature in signatures) > _MAX_SIGNATURES_LENGTH:
return signatures
- max_signature_length = _EMBED_CODE_BLOCK_LENGTH * (_MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
+ max_signature_length = _EMBED_CODE_BLOCK_LINE_LENGTH * (_MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
formatted_signatures = []
for signature in signatures:
signature = signature.strip()
@@ -221,12 +221,12 @@ def _get_truncated_description(
max_length: int,
) -> str:
"""
- Truncate markdown from `elements` to be at most `max_length` characters visually.
+ Truncate markdown from `elements` to be at most `max_length` characters when rendered.
`max_length` limits the length of the rendered characters in the string,
with the real string length limited to `_MAX_DESCRIPTION_LENGTH` to accommodate discord length limits
"""
- visual_length = 0
+ rendered_length = 0
real_length = 0
result = []
shortened = False
@@ -234,7 +234,7 @@ def _get_truncated_description(
for element in elements:
is_tag = isinstance(element, Tag)
element_length = len(element.text) if is_tag else len(element)
- if visual_length + element_length < max_length:
+ if rendered_length + element_length < max_length:
if is_tag:
element_markdown = markdown_converter.process_tag(element)
else:
@@ -247,7 +247,7 @@ def _get_truncated_description(
shortened = True
break
real_length += element_markdown_length
- visual_length += element_length
+ rendered_length += element_length
else:
shortened = True
break
@@ -258,7 +258,7 @@ def _get_truncated_description(
return markdown_string
-def _parse_into_markdown(signatures: Optional[List[str]], description: Iterable[Tag], url: str) -> str:
+def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag], url: str) -> str:
"""
Create a markdown string with the signatures at the top, and the converted html description below them.
@@ -309,4 +309,4 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
else:
signature = _get_signatures(symbol_heading)
description = _get_dd_description(symbol_heading)
- return _parse_into_markdown(signature, description, symbol_data.url).replace('¶', '')
+ return _create_markdown(signature, description, symbol_data.url).replace('¶', '')
--
cgit v1.2.3
From 7348b86bfedfc24c67d97a08d839a18956a6bff6 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 22:17:15 +0100
Subject: Update outdated docstring
---
bot/exts/info/doc/_parsing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 9140f635a..82b2ca808 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -263,7 +263,7 @@ def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag]
Create a markdown string with the signatures at the top, and the converted html description below them.
The signatures are wrapped in python codeblocks, separated from the description by a newline.
- The result string is truncated to be max 1000 symbols long.
+ The result markdown string is max 750 rendered characters for the description with signatures at the start.
"""
description = _get_truncated_description(description, DocMarkdownConverter(bullets="•", page_url=url), 750)
description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
--
cgit v1.2.3
From ddb6b11575c05c8417f5607aec98fb1c09e351af Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 10 Nov 2020 22:22:27 +0100
Subject: Adjust unparseable symbol behaviour
With redis we need to make sure we don't send the "error"
string into the cache, returning None instead of the string
and then setting it manually in the caller makes this nicer
compared to checking against a string
---
bot/exts/info/doc/_cog.py | 5 ++++-
bot/exts/info/doc/_parsing.py | 4 ++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 4e48e81e5..fa59bcc42 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -292,7 +292,10 @@ class DocCog(commands.Cog):
if markdown is None:
log.debug(f"Redis cache miss for symbol `{symbol}`.")
markdown = await self.item_fetcher.get_markdown(self.bot.http_session, symbol_info)
- await self.doc_cache.set(redis_key, markdown)
+ if markdown is not None:
+ await self.doc_cache.set(redis_key, markdown)
+ else:
+ markdown = "Unable to parse the requested symbol."
embed = discord.Embed(
title=discord.utils.escape_markdown(symbol),
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 82b2ca808..72e81982a 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -287,7 +287,7 @@ def _class_filter_factory(class_names: Iterable[str]) -> Callable[[Tag], bool]:
return match_tag
-def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
+def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[str]:
"""
Return parsed markdown of the passed symbol using the passed in soup, truncated to 1000 characters.
@@ -296,7 +296,7 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> str:
symbol_heading = soup.find(id=symbol_data.symbol_id)
if symbol_heading is None:
log.warning("Symbol present in loaded inventories not found on site, consider refreshing inventories.")
- return "Unable to parse the requested symbol."
+ return None
signature = None
# Modules, doc pages and labels don't point to description list tags but to tags like divs,
# no special parsing can be done so we only try to include what's under them.
--
cgit v1.2.3
From d936e5bc049e2e93beca3c62430d048d9f9cf47b Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 11 Nov 2020 18:23:01 +0100
Subject: Cancel scheduled inventory updates on all refreshes
---
bot/exts/info/doc/_cog.py | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index fa59bcc42..822f682bf 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -250,6 +250,8 @@ class DocCog(commands.Cog):
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
log.debug("Refreshing documentation inventory...")
+ for inventory in self.scheduled_inventories:
+ self.inventory_scheduler.cancel(inventory)
# Clear the old base URLS and doc symbols to ensure
# that we start from a fresh local dataset.
@@ -418,9 +420,6 @@ class DocCog(commands.Cog):
"""
await self.bot.api_client.delete(f'bot/documentation-links/{package_name}')
- if package_name in self.scheduled_inventories:
- self.inventory_scheduler.cancel(package_name)
-
async with ctx.typing():
# Rebuild the inventory to ensure that everything
# that was from this package is properly deleted.
@@ -431,9 +430,6 @@ class DocCog(commands.Cog):
@commands.has_any_role(*MODERATION_ROLES)
async def refresh_command(self, ctx: commands.Context) -> None:
"""Refresh inventories and send differences to channel."""
- for inventory in self.scheduled_inventories:
- self.inventory_scheduler.cancel(inventory)
-
old_inventories = set(self.base_urls)
with ctx.typing():
await self.refresh_inventory()
--
cgit v1.2.3
From 2bae8eeed0eae75d782da097e78826650e1ac498 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Thu, 12 Nov 2020 19:44:26 +0100
Subject: Intern relative url paths
Group name interning was also moved to the DocItem creation
to group the behaviour
---
bot/exts/info/doc/_cog.py | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 822f682bf..ecc648d89 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -218,10 +218,8 @@ class DocCog(commands.Cog):
for symbol, relative_doc_url in items:
if "/" in symbol:
continue # skip unreachable symbols with slashes
- # Intern the group names since they're reused in all the DocItems
- # to remove unnecessary memory consumption from them being unique objects
- group_name = sys.intern(group.split(":")[1])
+ group_name = group.split(":")[1]
if (original_symbol := self.doc_symbols.get(symbol)) is not None:
if group_name in FORCE_PREFIX_GROUPS:
symbol = f"{group_name}.{symbol}"
@@ -240,7 +238,14 @@ class DocCog(commands.Cog):
self.renamed_symbols.add(symbol)
relative_url_path, _, symbol_id = relative_doc_url.partition("#")
- symbol_item = DocItem(api_package_name, group_name, base_url, relative_url_path, symbol_id)
+ # Intern fields that have shared content so we're not storing unique strings for every object
+ symbol_item = DocItem(
+ api_package_name,
+ sys.intern(group_name),
+ base_url,
+ sys.intern(relative_url_path),
+ symbol_id
+ )
self.doc_symbols[symbol] = symbol_item
self.item_fetcher.add_item(symbol_item)
--
cgit v1.2.3
From aeac77a08cdafadcc180a400c32ce21732d7d20d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 14 Nov 2020 02:39:07 +0100
Subject: Limit newlines in doc descriptions
---
bot/exts/info/doc/_parsing.py | 48 ++++++++++++++++++++++++++++---------------
1 file changed, 32 insertions(+), 16 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 72e81982a..418405ca9 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -10,6 +10,7 @@ from typing import Callable, Collection, Container, Iterable, List, Optional, TY
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
+from bot.utils.helpers import find_nth_occurrence
from ._html import Strainer
from ._markdown import DocMarkdownConverter
if TYPE_CHECKING:
@@ -219,21 +220,23 @@ def _get_truncated_description(
elements: Iterable[Union[Tag, NavigableString]],
markdown_converter: DocMarkdownConverter,
max_length: int,
+ max_lines: int,
) -> str:
"""
- Truncate markdown from `elements` to be at most `max_length` characters when rendered.
+ Truncate markdown from `elements` to be at most `max_length` characters when rendered or `max_lines` newlines.
`max_length` limits the length of the rendered characters in the string,
with the real string length limited to `_MAX_DESCRIPTION_LENGTH` to accommodate discord length limits
"""
+ result = ""
+ markdown_element_ends = []
rendered_length = 0
- real_length = 0
- result = []
- shortened = False
+ tag_end_index = 0
for element in elements:
is_tag = isinstance(element, Tag)
element_length = len(element.text) if is_tag else len(element)
+
if rendered_length + element_length < max_length:
if is_tag:
element_markdown = markdown_converter.process_tag(element)
@@ -241,21 +244,29 @@ def _get_truncated_description(
element_markdown = markdown_converter.process_text(element)
element_markdown_length = len(element_markdown)
- if real_length + element_markdown_length < _MAX_DESCRIPTION_LENGTH:
- result.append(element_markdown)
- else:
- shortened = True
- break
- real_length += element_markdown_length
rendered_length += element_length
+ tag_end_index += element_markdown_length
+
+ if not element_markdown.isspace():
+ markdown_element_ends.append(tag_end_index)
+ result += element_markdown
else:
- shortened = True
break
- markdown_string = "".join(result)
- if shortened:
- markdown_string = markdown_string.rstrip(_TRUNCATE_STRIP_CHARACTERS) + "..."
- return markdown_string
+ if not markdown_element_ends:
+ return ""
+
+ newline_truncate_index = find_nth_occurrence(result, "\n", max_lines)
+ if newline_truncate_index is not None and newline_truncate_index < _MAX_DESCRIPTION_LENGTH:
+ truncate_index = newline_truncate_index
+ else:
+ truncate_index = _MAX_DESCRIPTION_LENGTH
+
+ if truncate_index >= markdown_element_ends[-1]:
+ return result
+
+ markdown_truncate_index = max(cut for cut in markdown_element_ends if cut < truncate_index)
+ return result[:markdown_truncate_index].strip(_TRUNCATE_STRIP_CHARACTERS) + "..."
def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag], url: str) -> str:
@@ -265,7 +276,12 @@ def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag]
The signatures are wrapped in python codeblocks, separated from the description by a newline.
The result markdown string is max 750 rendered characters for the description with signatures at the start.
"""
- description = _get_truncated_description(description, DocMarkdownConverter(bullets="•", page_url=url), 750)
+ description = _get_truncated_description(
+ description,
+ markdown_converter=DocMarkdownConverter(bullets="•", page_url=url),
+ max_length=750,
+ max_lines=13
+ )
description = _WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
if signatures is not None:
formatted_markdown = "".join(f"```py\n{signature}```" for signature in _truncate_signatures(signatures))
--
cgit v1.2.3
From b118f4cf38bdf99cf66e822c5b2280aff879123d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 14 Nov 2020 22:59:50 +0100
Subject: Rework the doc redis cache to work with hashes
This rework requires us to delete packages caches easily with
deleting the package hash instead of having to pattern match all
keys and delete those.
The interface was also updated to accept DocItems instead of requiring
callers to construct the keys
---
bot/exts/info/doc/_cog.py | 11 +++-----
bot/exts/info/doc/_redis_cache.py | 57 +++++++++++++++++++++++++++++++++++----
2 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index ecc648d89..67a21ed72 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -4,7 +4,6 @@ import asyncio
import logging
import re
import sys
-import urllib.parse
from collections import defaultdict
from contextlib import suppress
from typing import Dict, List, NamedTuple, Optional, Union
@@ -175,6 +174,7 @@ class DocCog(commands.Cog):
self.scheduled_inventories = set()
self.bot.loop.create_task(self.init_refresh_inventory())
+ self.bot.loop.create_task(self.doc_cache.delete_expired())
async def init_refresh_inventory(self) -> None:
"""Refresh documentation inventory on cog initialization."""
@@ -292,21 +292,18 @@ class DocCog(commands.Cog):
return None
self.bot.stats.incr(f"doc_fetches.{symbol_info.package.lower()}")
- item_url = f"{symbol_info.url}#{symbol_info.symbol_id}"
- redis_key = "".join(urllib.parse.urlparse(item_url)[1:]) # url without scheme
-
- markdown = await self.doc_cache.get(redis_key)
+ markdown = await self.doc_cache.get(symbol_info)
if markdown is None:
log.debug(f"Redis cache miss for symbol `{symbol}`.")
markdown = await self.item_fetcher.get_markdown(self.bot.http_session, symbol_info)
if markdown is not None:
- await self.doc_cache.set(redis_key, markdown)
+ await self.doc_cache.set(symbol_info, markdown)
else:
markdown = "Unable to parse the requested symbol."
embed = discord.Embed(
title=discord.utils.escape_markdown(symbol),
- url=item_url,
+ url=f"{symbol_info.url}#{symbol_info.symbol_id}",
description=markdown
)
# Show all symbols with the same name that were renamed in the footer.
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
index 147394ba6..c617eba49 100644
--- a/bot/exts/info/doc/_redis_cache.py
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -1,23 +1,70 @@
-from typing import Optional
+from __future__ import annotations
+
+import datetime
+import pickle
+from typing import Optional, TYPE_CHECKING
from async_rediscache.types.base import RedisObject, namespace_lock
+if TYPE_CHECKING:
+ from ._cog import DocItem
class DocRedisCache(RedisObject):
"""Interface for redis functionality needed by the Doc cog."""
@namespace_lock
- async def set(self, key: str, value: str) -> None:
+ async def set(self, item: DocItem, value: str) -> None:
"""
Set markdown `value` for `key`.
Keys expire after a week to keep data up to date.
"""
+ expiry_timestamp = datetime.datetime.now().timestamp() + 7 * 24 * 60 * 60
with await self._get_pool_connection() as connection:
- await connection.setex(f"{self.namespace}:{key}", 7*24*60*60, value)
+ await connection.hset(
+ f"{self.namespace}:{item.package}",
+ self.get_item_key(item),
+ pickle.dumps((value, expiry_timestamp))
+ )
@namespace_lock
- async def get(self, key: str) -> Optional[str]:
+ async def get(self, item: DocItem) -> Optional[str]:
"""Get markdown contents for `key`."""
with await self._get_pool_connection() as connection:
- return await connection.get(f"{self.namespace}:{key}", encoding="utf8")
+ cached_value = await connection.hget(f"{self.namespace}:{item.package}", self.get_item_key(item))
+ if cached_value is None:
+ return None
+
+ value, expire = pickle.loads(cached_value)
+ if expire <= datetime.datetime.now().timestamp():
+ await connection.hdel(f"{self.namespace}:{item.package}", self.get_item_key(item))
+ return None
+
+ return value
+
+ @namespace_lock
+ async def delete(self, package: str) -> None:
+ """Remove all values for `package`."""
+ with await self._get_pool_connection() as connection:
+ await connection.delete(f"{self.namespace}:{package}")
+
+ @namespace_lock
+ async def delete_expired(self) -> None:
+ """Delete all expired keys."""
+ current_timestamp = datetime.datetime.now().timestamp()
+ with await self._get_pool_connection() as connection:
+ async for package_key in connection.iscan(match=f"{self.namespace}*"):
+ expired_fields = []
+
+ for field, cached_value in (await connection.hgetall(package_key)).items():
+ _, expire = pickle.loads(cached_value)
+ if expire <= current_timestamp:
+ expired_fields.append(field)
+
+ if expired_fields:
+ await connection.hdel(package_key, *expired_fields)
+
+ @staticmethod
+ def get_item_key(item: DocItem) -> str:
+ """Create redis key for `item`."""
+ return item.relative_url_path + item.symbol_id
--
cgit v1.2.3
From 07a5d5fc58a402f930505c7b29a7a275e743a84d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 14 Nov 2020 23:07:13 +0100
Subject: Update existing redis values when parsing pages
If we're parsing a page for a symbol that's out of the cache
and encounter a symbol that was already cached we can update that symbol
to keep it up to date without additional requests
---
bot/exts/info/doc/_cog.py | 14 ++++++++------
bot/exts/info/doc/_redis_cache.py | 17 +++++++++++++++++
2 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 67a21ed72..678134f3c 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -36,6 +36,8 @@ FORCE_PREFIX_GROUPS = (
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
+doc_cache = DocRedisCache(namespace="Docs")
+
class DocItem(NamedTuple):
"""Holds inventory symbol information."""
@@ -116,7 +118,9 @@ class CachedParser:
while self._queue:
item, soup = self._queue.pop()
try:
- self._results[item] = get_symbol_markdown(soup, item)
+ markdown = get_symbol_markdown(soup, item)
+ await doc_cache.set_if_exists(item, markdown)
+ self._results[item] = markdown
except Exception:
log.exception(f"Unexpected error when handling {item}")
else:
@@ -161,8 +165,6 @@ class CachedParser:
class DocCog(commands.Cog):
"""A set of commands for querying & displaying documentation."""
- doc_cache = DocRedisCache()
-
def __init__(self, bot: Bot):
self.base_urls = {}
self.bot = bot
@@ -174,7 +176,7 @@ class DocCog(commands.Cog):
self.scheduled_inventories = set()
self.bot.loop.create_task(self.init_refresh_inventory())
- self.bot.loop.create_task(self.doc_cache.delete_expired())
+ self.bot.loop.create_task(doc_cache.delete_expired())
async def init_refresh_inventory(self) -> None:
"""Refresh documentation inventory on cog initialization."""
@@ -292,12 +294,12 @@ class DocCog(commands.Cog):
return None
self.bot.stats.incr(f"doc_fetches.{symbol_info.package.lower()}")
- markdown = await self.doc_cache.get(symbol_info)
+ markdown = await doc_cache.get(symbol_info)
if markdown is None:
log.debug(f"Redis cache miss for symbol `{symbol}`.")
markdown = await self.item_fetcher.get_markdown(self.bot.http_session, symbol_info)
if markdown is not None:
- await self.doc_cache.set(symbol_info, markdown)
+ await doc_cache.set(symbol_info, markdown)
else:
markdown = "Unable to parse the requested symbol."
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
index c617eba49..2230884c9 100644
--- a/bot/exts/info/doc/_redis_cache.py
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -27,6 +27,23 @@ class DocRedisCache(RedisObject):
pickle.dumps((value, expiry_timestamp))
)
+ @namespace_lock
+ async def set_if_exists(self, item: DocItem, value: str) -> None:
+ """
+ Set markdown `value` for `key` if `key` exists.
+
+ Keys expire after a week to keep data up to date.
+ """
+ expiry_timestamp = datetime.datetime.now().timestamp() + 7 * 24 * 60 * 60
+
+ with await self._get_pool_connection() as connection:
+ if await connection.hexists(f"{self.namespace}:{item.package}", self.get_item_key(item)):
+ await connection.hset(
+ f"{self.namespace}:{item.package}",
+ self.get_item_key(item),
+ pickle.dumps((value, expiry_timestamp))
+ )
+
@namespace_lock
async def get(self, item: DocItem) -> Optional[str]:
"""Get markdown contents for `key`."""
--
cgit v1.2.3
From 15e73b7d4148ff16d2d408eaf201ebd5a6fd1251 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 14 Nov 2020 23:34:39 +0100
Subject: Add command for clearing the cache of packages
We also clear the cache when removing a package
---
bot/exts/info/doc/_cog.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 678134f3c..b2d015b89 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -428,6 +428,7 @@ class DocCog(commands.Cog):
# Rebuild the inventory to ensure that everything
# that was from this package is properly deleted.
await self.refresh_inventory()
+ await doc_cache.delete(package_name)
await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
@docs_group.command(name="refreshdoc", aliases=("rfsh", "r"))
@@ -450,3 +451,10 @@ class DocCog(commands.Cog):
description=f"```diff\n{added}\n{removed}```" if added or removed else ""
)
await ctx.send(embed=embed)
+
+ @docs_group.command(name="cleardoccache")
+ @commands.has_any_role(*MODERATION_ROLES)
+ async def clear_cache_command(self, ctx: commands.Context, package_name: PackageName) -> None:
+ """Clear persistent redis cache for `package`."""
+ await doc_cache.delete(package_name)
+ await ctx.send(f"Succesfully cleared cache for {package_name}")
--
cgit v1.2.3
From 531ee4aad5432860afa784d0c067019662b3a0fe Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 15 Nov 2020 02:35:37 +0100
Subject: Ensure packages from PRIORITY_PACKAGES are directly accessible
Some packages (currently only python) should be prioritised to others,
the previous cleanup didn't account for other packages loading before it
which resulted in duplicate symbols getting the python prefix and the
original symbols linking to most probably undesired pages
---
bot/exts/info/doc/_cog.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index b2d015b89..9e4bb54ea 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -33,6 +33,9 @@ FORCE_PREFIX_GROUPS = (
"pdbcommand",
"term",
)
+PRIORITY_PACKAGES = (
+ "python",
+)
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
@@ -235,6 +238,10 @@ class DocCog(commands.Cog):
self.doc_symbols[overridden_symbol] = original_symbol
self.renamed_symbols.add(overridden_symbol)
+ elif api_package_name in PRIORITY_PACKAGES:
+ self.doc_symbols[f"{original_symbol.package}.{symbol}"] = original_symbol
+ self.renamed_symbols.add(symbol)
+
else:
symbol = f"{api_package_name}.{symbol}"
self.renamed_symbols.add(symbol)
--
cgit v1.2.3
From 977cc0552bd71018d874246137f812df14bb4d31 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Tue, 24 Nov 2020 14:22:55 +0100
Subject: Added Stream and revokestream commands
---
bot/constants.py | 28 +++++++++
bot/exts/moderation/stream.py | 138 ++++++++++++++++++++++++++++++++++++++++++
config-default.yml | 3 +
3 files changed, 169 insertions(+)
create mode 100644 bot/exts/moderation/stream.py
diff --git a/bot/constants.py b/bot/constants.py
index 2126b2b37..744fbd512 100644
--- a/bot/constants.py
+++ b/bot/constants.py
@@ -466,6 +466,7 @@ class Roles(metaclass=YAMLGetter):
unverified: int
verified: int # This is the Developers role on PyDis, here named verified for readability reasons.
voice_verified: int
+ video: int
class Guild(metaclass=YAMLGetter):
@@ -701,3 +702,30 @@ ERROR_REPLIES = [
"Noooooo!!",
"I can't believe you've done this",
]
+
+# TIME_FORMATS defines aliases and multipliers for time formats
+# key is a standard time unit name like second ,year, decade etc.
+# mul is a multiplier where duration of said time unit * multiplier = time in seconds
+# eg. 1 day = 1 * multiplier seconds, so mul = 86400
+TIME_FORMATS = {
+ "second": {
+ "aliases": ("s", "sec", "seconds", "secs"),
+ "mul": 1
+ },
+ "minute": {
+ "aliases": ("m", "min", "mins", "minutes"),
+ "mul": 60
+ },
+ "hour": {
+ "aliases": ("h", "hr", "hrs", "hours"),
+ "mul": 3600
+ },
+ "day": {
+ "aliases": ("d", "days"),
+ "mul": 86400
+ },
+ "year": {
+ "aliases": ("yr", "yrs", "years"),
+ "mul": 31536000
+ }
+}
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
new file mode 100644
index 000000000..673a21b1b
--- /dev/null
+++ b/bot/exts/moderation/stream.py
@@ -0,0 +1,138 @@
+from discord.ext import commands, tasks
+import discord
+
+from bot.constants import Roles, STAFF_ROLES, Guild, TIME_FORMATS
+from bot import Bot
+import time
+from async_rediscache import RedisCache
+
+# Constant error messages
+NO_USER_SPECIFIED = "Please specify a user"
+TIME_FORMAT_NOT_VALID = "Please specify a valid time format ex. 10h or 1day"
+TIME_LESS_EQ_0 = "Duration can not be a 0 or lower"
+USER_ALREADY_ALLOWED_TO_STREAM = "This user can already stream"
+USER_ALREADY_NOT_ALLOWED_TO_STREAM = "This user already can't stream"
+
+
+# FORMATS holds a combined list of all allowed time units
+# made from TIME_FORMATS constant
+FORMATS = []
+for key, entry in TIME_FORMATS.items():
+ FORMATS.extend(entry["aliases"])
+ FORMATS.append(key)
+
+
+class Stream(commands.Cog):
+ """Stream class handles giving screen sharing permission with commands"""
+
+ # Data cache storing userid to unix_time relation
+ # user id is used to get member who's streaming permission need to be revoked after some time
+ # unix_time is a time when user's streaming permission needs tp be revoked in unix time notation
+ user_cache = RedisCache()
+
+ def __init__(self, bot: Bot):
+ self.bot = bot
+ self.remove_permissions.start()
+ self.guild_static = None
+
+ @staticmethod
+ def _link_from_alias(time_format) -> (dict, str):
+ """Get TIME_FORMATS key and entry by time format or any of its aliases"""
+ for format_key, val in TIME_FORMATS.items():
+ if format_key == time_format or time_format in val["aliases"]:
+ return TIME_FORMATS[format_key], format_key
+
+ def _parse_time_to_seconds(self, duration, time_format) -> int:
+ """Get time in seconds from duration and time format"""
+ return duration * self._link_from_alias(time_format)[0]["mul"]
+
+ @commands.command(aliases=("streaming", "share"))
+ @commands.has_any_role(*STAFF_ROLES)
+ async def stream(
+ self,
+ ctx: commands.Context,
+ user: discord.Member = None,
+ duration: int = 1,
+ time_format: str = "h",
+ *_
+ ):
+ """
+ stream handles stream command
+ argument user - required user mention, any errors should be handled by upper level handler
+ duration - int must be higher than 0 - defaults to 1
+ time_format - str defining what time unit you want to use, must be any of FORMATS - defaults to h
+
+ Command give user permission to stream and takes it away after provided duration
+ """
+ # Check for required user argument
+ # if not provided send NO_USER_SPECIFIED message
+ if not user:
+ await ctx.send(NO_USER_SPECIFIED)
+ return
+
+ # Time can't be negative lol
+ if duration <= 0:
+ await ctx.send(TIME_LESS_EQ_0)
+ return
+
+ # Check if time_format argument is a valid time format
+ # eg. d, day etc are aliases for day time format
+ if time_format not in FORMATS:
+ await ctx.send(TIME_FORMAT_NOT_VALID)
+ return
+
+ # Check if user already has streaming permission
+ already_allowed = any(Roles.video == role.id for role in user.roles)
+ if already_allowed:
+ await ctx.send(USER_ALREADY_ALLOWED_TO_STREAM)
+ return
+
+ # Set user id - time in redis cache and add streaming permission role
+ await self.user_cache.set(user.id, time.time() + self._parse_time_to_seconds(duration, time_format))
+ await user.add_roles(discord.Object(Roles.video), reason="Temporary streaming access granted")
+ await ctx.send(f"{user.mention} can now stream for {duration} {self._link_from_alias(time_format)[1]}/s")
+
+ @tasks.loop(seconds=30)
+ async def remove_permissions(self):
+ """
+ background loop for removing streaming permission
+ """
+ all_entries = await self.user_cache.items()
+ for user_id, delete_time in all_entries:
+ if time.time() > delete_time:
+ member = self.guild_static.fetch_memebr(user_id)
+ if member:
+ await member.remove_roles(discord.Object(Roles.video), reason="Temporary streaming access revoked")
+ await self.user_cache.pop(user_id)
+
+ @remove_permissions.before_loop
+ async def await_ready(self):
+ """Wait for bot to be ready before starting remove_permissions loop
+ and get guild by id
+ """
+ await self.bot.wait_until_ready()
+ self.guild_static = self.bot.get_guild(Guild.id)
+
+ @commands.command(aliases=("unstream", ))
+ @commands.has_any_role(*STAFF_ROLES)
+ async def revokestream(
+ self,
+ ctx: commands.Context,
+ user: discord.Member = None
+ ):
+ """
+ stream handles revokestream command
+ argument user - required user mention, any errors should be handled by upper level handler
+
+ command removes streaming permission from a user
+ """
+ not_allowed = not any(Roles.video == role.id for role in user.roles)
+ if not_allowed:
+ await user.remove_roles(discord.Object(Roles.video))
+ else:
+ await ctx.send(USER_ALREADY_NOT_ALLOWED_TO_STREAM)
+
+
+def setup(bot: Bot) -> None:
+ """Loads the Stream cog."""
+ bot.add_cog(Stream(bot))
diff --git a/config-default.yml b/config-default.yml
index 89493c4de..700406f4e 100644
--- a/config-default.yml
+++ b/config-default.yml
@@ -251,6 +251,9 @@ guild:
jammers: 737249140966162473
team_leaders: 737250302834638889
+ # Streaming
+ video: 764245844798079016
+
moderation_roles:
- *OWNERS_ROLE
- *ADMINS_ROLE
--
cgit v1.2.3
From a3aec34b444d75292f17dadc308457490c395620 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Tue, 24 Nov 2020 15:03:39 +0100
Subject: import fix
---
bot/exts/moderation/stream.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index 673a21b1b..ceb291027 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -2,7 +2,7 @@ from discord.ext import commands, tasks
import discord
from bot.constants import Roles, STAFF_ROLES, Guild, TIME_FORMATS
-from bot import Bot
+from bot.bot import Bot
import time
from async_rediscache import RedisCache
--
cgit v1.2.3
From 91f1962703902fffabbbd7b710373850763e3ed7 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Tue, 24 Nov 2020 15:05:50 +0100
Subject: Add additional year alias
---
bot/constants.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/constants.py b/bot/constants.py
index 744fbd512..6c0ef913b 100644
--- a/bot/constants.py
+++ b/bot/constants.py
@@ -725,7 +725,7 @@ TIME_FORMATS = {
"mul": 86400
},
"year": {
- "aliases": ("yr", "yrs", "years"),
+ "aliases": ("yr", "yrs", "years", "y"),
"mul": 31536000
}
}
--
cgit v1.2.3
From b18127c3df9b46a33648a0db376a3587c9fbe6be Mon Sep 17 00:00:00 2001
From: Harbys
Date: Tue, 24 Nov 2020 15:22:01 +0100
Subject: Add 2 first unit tests for Stream cog
---
tests/bot/exts/moderation/test_stream.py | 45 ++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
create mode 100644 tests/bot/exts/moderation/test_stream.py
diff --git a/tests/bot/exts/moderation/test_stream.py b/tests/bot/exts/moderation/test_stream.py
new file mode 100644
index 000000000..872627fc1
--- /dev/null
+++ b/tests/bot/exts/moderation/test_stream.py
@@ -0,0 +1,45 @@
+import unittest
+from bot.constants import TIME_FORMATS
+from bot.exts.moderation.stream import Stream
+from tests.helpers import MockContext, MockBot
+
+
+class StreamCommandTest(unittest.IsolatedAsyncioTestCase):
+
+ def setUp(self) -> None:
+ self.bot = MockBot()
+ self.cog = Stream(self.bot)
+ self.ctx = MockContext()
+
+ def test_linking_time_format_from_alias_or_key(self):
+ FORMATS = []
+ for key, entry in TIME_FORMATS.items():
+ FORMATS.extend(entry["aliases"])
+ FORMATS.append(key)
+
+ test_cases = (("sec", "second"),
+ ("s", "second"),
+ ("seconds", "second"),
+ ("second", "second"),
+ ("secs", "second"),
+ ("min", "minute"),
+ ("m", "minute"),
+ ("minutes", "minute"),
+ ("hr", "hour"),
+ ("hrs", "hour"),
+ ("hours", "hour"),
+ ("d", "day"),
+ ("days", "day"),
+ ("yr", "year"),
+ ("yrs", "year"),
+ ("y", "year"))
+
+ for case in test_cases:
+ linked = self.cog._link_from_alias(case[0])[1]
+ self.assertEqual(linked, case[1])
+
+ def test_parsing_duration_and_time_format_to_seconds(self):
+ test_cases = ((1, "minute", 60), (5, "second", 5), (2, "day", 172800))
+ for case in test_cases:
+ time_in_seconds = self.cog._parse_time_to_seconds(case[0], case[1])
+ self.assertEqual(time_in_seconds, case[2])
--
cgit v1.2.3
From 42c862b49923e30f66632902c86cfd168021b1e8 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Tue, 24 Nov 2020 17:55:25 +0100
Subject: Add more tests and some comments
---
tests/bot/exts/moderation/test_stream.py | 46 +++++++++++++++++++++++++++++---
1 file changed, 43 insertions(+), 3 deletions(-)
diff --git a/tests/bot/exts/moderation/test_stream.py b/tests/bot/exts/moderation/test_stream.py
index 872627fc1..7aa2fae26 100644
--- a/tests/bot/exts/moderation/test_stream.py
+++ b/tests/bot/exts/moderation/test_stream.py
@@ -1,7 +1,27 @@
+import asyncio
import unittest
-from bot.constants import TIME_FORMATS
+
+from async_rediscache import RedisSession
+
+from bot.constants import TIME_FORMATS, Roles
from bot.exts.moderation.stream import Stream
-from tests.helpers import MockContext, MockBot
+from tests.helpers import MockBot, MockRole, MockMember
+
+redis_session = None
+redis_loop = asyncio.get_event_loop()
+
+
+def setUpModule(): # noqa: N802
+ """Create and connect to the fakeredis session."""
+ global redis_session
+ redis_session = RedisSession(use_fakeredis=True)
+ redis_loop.run_until_complete(redis_session.connect())
+
+
+def tearDownModule(): # noqa: N802
+ """Close the fakeredis session."""
+ if redis_session:
+ redis_loop.run_until_complete(redis_session.close())
class StreamCommandTest(unittest.IsolatedAsyncioTestCase):
@@ -9,9 +29,13 @@ class StreamCommandTest(unittest.IsolatedAsyncioTestCase):
def setUp(self) -> None:
self.bot = MockBot()
self.cog = Stream(self.bot)
- self.ctx = MockContext()
def test_linking_time_format_from_alias_or_key(self):
+ """
+ User provided time format needs to be lined to a proper entry in TIME_FORMATS
+ This Test checks _link_from_alias method
+ Checking for whether alias or key exists in TIME_FORMATS is done before calling this function
+ """
FORMATS = []
for key, entry in TIME_FORMATS.items():
FORMATS.extend(entry["aliases"])
@@ -39,7 +63,23 @@ class StreamCommandTest(unittest.IsolatedAsyncioTestCase):
self.assertEqual(linked, case[1])
def test_parsing_duration_and_time_format_to_seconds(self):
+ """
+ Test calculating time in seconds from duration and time unit
+ This test is technically dependent on _link_from_alias function, not the best practice but necessary
+ """
test_cases = ((1, "minute", 60), (5, "second", 5), (2, "day", 172800))
for case in test_cases:
time_in_seconds = self.cog._parse_time_to_seconds(case[0], case[1])
self.assertEqual(time_in_seconds, case[2])
+
+ def test_checking_if_user_has_streaming_permission(self):
+ """
+ Test searching for video role in Member.roles
+ """
+ user1 = MockMember(roles=[MockRole(id=Roles.video)])
+ user2 = MockMember()
+ already_allowed_user1 = any(Roles.video == role.id for role in user1.roles)
+ self.assertEqual(already_allowed_user1, True)
+
+ already_allowed_user2 = any(Roles.video == role.id for role in user2.roles)
+ self.assertEqual(already_allowed_user2, False)
--
cgit v1.2.3
From 25fe0c919edfffbca5a73554853d076455e2d997 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Tue, 24 Nov 2020 19:41:52 +0100
Subject: fixing code to be flake8 compliant
---
bot/exts/moderation/stream.py | 43 +++++++++++++++-----------------
tests/bot/exts/moderation/test_stream.py | 8 ++----
2 files changed, 22 insertions(+), 29 deletions(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index ceb291027..a44095273 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -1,10 +1,11 @@
-from discord.ext import commands, tasks
+import time
+
import discord
+from async_rediscache import RedisCache
+from discord.ext import commands, tasks
-from bot.constants import Roles, STAFF_ROLES, Guild, TIME_FORMATS
from bot.bot import Bot
-import time
-from async_rediscache import RedisCache
+from bot.constants import Guild, Roles, STAFF_ROLES, TIME_FORMATS
# Constant error messages
NO_USER_SPECIFIED = "Please specify a user"
@@ -23,7 +24,7 @@ for key, entry in TIME_FORMATS.items():
class Stream(commands.Cog):
- """Stream class handles giving screen sharing permission with commands"""
+ """Stream class handles giving screen sharing permission with commands."""
# Data cache storing userid to unix_time relation
# user id is used to get member who's streaming permission need to be revoked after some time
@@ -36,14 +37,14 @@ class Stream(commands.Cog):
self.guild_static = None
@staticmethod
- def _link_from_alias(time_format) -> (dict, str):
- """Get TIME_FORMATS key and entry by time format or any of its aliases"""
+ def _link_from_alias(time_format: str) -> (dict, str):
+ """Get TIME_FORMATS key and entry by time format or any of its aliases."""
for format_key, val in TIME_FORMATS.items():
if format_key == time_format or time_format in val["aliases"]:
return TIME_FORMATS[format_key], format_key
- def _parse_time_to_seconds(self, duration, time_format) -> int:
- """Get time in seconds from duration and time format"""
+ def _parse_time_to_seconds(self, duration: int, time_format: str) -> int:
+ """Get time in seconds from duration and time format."""
return duration * self._link_from_alias(time_format)[0]["mul"]
@commands.command(aliases=("streaming", "share"))
@@ -55,13 +56,13 @@ class Stream(commands.Cog):
duration: int = 1,
time_format: str = "h",
*_
- ):
+ ) -> None:
"""
- stream handles stream command
+ Stream handles stream command.
+
argument user - required user mention, any errors should be handled by upper level handler
duration - int must be higher than 0 - defaults to 1
time_format - str defining what time unit you want to use, must be any of FORMATS - defaults to h
-
Command give user permission to stream and takes it away after provided duration
"""
# Check for required user argument
@@ -93,10 +94,8 @@ class Stream(commands.Cog):
await ctx.send(f"{user.mention} can now stream for {duration} {self._link_from_alias(time_format)[1]}/s")
@tasks.loop(seconds=30)
- async def remove_permissions(self):
- """
- background loop for removing streaming permission
- """
+ async def remove_permissions(self) -> None:
+ """Background loop for removing streaming permission."""
all_entries = await self.user_cache.items()
for user_id, delete_time in all_entries:
if time.time() > delete_time:
@@ -106,10 +105,8 @@ class Stream(commands.Cog):
await self.user_cache.pop(user_id)
@remove_permissions.before_loop
- async def await_ready(self):
- """Wait for bot to be ready before starting remove_permissions loop
- and get guild by id
- """
+ async def await_ready(self) -> None:
+ """Wait for bot to be ready before starting remove_permissions loop and get guild by id."""
await self.bot.wait_until_ready()
self.guild_static = self.bot.get_guild(Guild.id)
@@ -119,11 +116,11 @@ class Stream(commands.Cog):
self,
ctx: commands.Context,
user: discord.Member = None
- ):
+ ) -> None:
"""
- stream handles revokestream command
- argument user - required user mention, any errors should be handled by upper level handler
+ Revokestream handles revokestream command.
+ argument user - required user mention, any errors should be handled by upper level handler
command removes streaming permission from a user
"""
not_allowed = not any(Roles.video == role.id for role in user.roles)
diff --git a/tests/bot/exts/moderation/test_stream.py b/tests/bot/exts/moderation/test_stream.py
index 7aa2fae26..467c373aa 100644
--- a/tests/bot/exts/moderation/test_stream.py
+++ b/tests/bot/exts/moderation/test_stream.py
@@ -3,9 +3,9 @@ import unittest
from async_rediscache import RedisSession
-from bot.constants import TIME_FORMATS, Roles
+from bot.constants import Roles
from bot.exts.moderation.stream import Stream
-from tests.helpers import MockBot, MockRole, MockMember
+from tests.helpers import MockBot, MockMember, MockRole
redis_session = None
redis_loop = asyncio.get_event_loop()
@@ -36,10 +36,6 @@ class StreamCommandTest(unittest.IsolatedAsyncioTestCase):
This Test checks _link_from_alias method
Checking for whether alias or key exists in TIME_FORMATS is done before calling this function
"""
- FORMATS = []
- for key, entry in TIME_FORMATS.items():
- FORMATS.extend(entry["aliases"])
- FORMATS.append(key)
test_cases = (("sec", "second"),
("s", "second"),
--
cgit v1.2.3
From 39401cd99b4dffc423a88f18a6e08c7cf1bd26e9 Mon Sep 17 00:00:00 2001
From: Harbys <44087388+Harbys@users.noreply.github.com>
Date: Wed, 25 Nov 2020 17:15:53 +0100
Subject: removing redundant class names
Co-authored-by: Mark
---
bot/exts/moderation/stream.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index a44095273..92fd9955f 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -24,7 +24,7 @@ for key, entry in TIME_FORMATS.items():
class Stream(commands.Cog):
- """Stream class handles giving screen sharing permission with commands."""
+ """Grant and revoke streaming permissions from users."""
# Data cache storing userid to unix_time relation
# user id is used to get member who's streaming permission need to be revoked after some time
--
cgit v1.2.3
From 189f8c31bcf9f58cf72abf7f86061746613dfd7f Mon Sep 17 00:00:00 2001
From: Harbys <44087388+Harbys@users.noreply.github.com>
Date: Wed, 25 Nov 2020 17:19:17 +0100
Subject: removing redundant descriptions
Co-authored-by: Mark
---
bot/exts/moderation/stream.py | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index 92fd9955f..ef52cd107 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -117,12 +117,7 @@ class Stream(commands.Cog):
ctx: commands.Context,
user: discord.Member = None
) -> None:
- """
- Revokestream handles revokestream command.
-
- argument user - required user mention, any errors should be handled by upper level handler
- command removes streaming permission from a user
- """
+ """Revoke streaming permissions from a user."""
not_allowed = not any(Roles.video == role.id for role in user.roles)
if not_allowed:
await user.remove_roles(discord.Object(Roles.video))
--
cgit v1.2.3
From 2f97f5705cbc073c9460e3a60cde4d53d7f3d5e0 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Wed, 25 Nov 2020 17:11:53 +0100
Subject: spelling fix from fetch_membr to fetch_member
---
bot/exts/moderation/stream.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index ef52cd107..458559b18 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -99,7 +99,7 @@ class Stream(commands.Cog):
all_entries = await self.user_cache.items()
for user_id, delete_time in all_entries:
if time.time() > delete_time:
- member = self.guild_static.fetch_memebr(user_id)
+ member = self.guild_static.fetch_memeber(user_id)
if member:
await member.remove_roles(discord.Object(Roles.video), reason="Temporary streaming access revoked")
await self.user_cache.pop(user_id)
--
cgit v1.2.3
From f823f0e9a48f346a8a2ead7ded03da29104f064e Mon Sep 17 00:00:00 2001
From: Harbys
Date: Wed, 25 Nov 2020 17:14:16 +0100
Subject: removed share alias
---
bot/exts/moderation/stream.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index 458559b18..2ce248f03 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -47,7 +47,7 @@ class Stream(commands.Cog):
"""Get time in seconds from duration and time format."""
return duration * self._link_from_alias(time_format)[0]["mul"]
- @commands.command(aliases=("streaming", "share"))
+ @commands.command(aliases=("streaming",))
@commands.has_any_role(*STAFF_ROLES)
async def stream(
self,
--
cgit v1.2.3
From e4907d6e06f5ac9d94d7dcfe13dc7bb2c33cd65a Mon Sep 17 00:00:00 2001
From: Harbys
Date: Wed, 25 Nov 2020 17:18:05 +0100
Subject: fixing required arguments
---
bot/exts/moderation/stream.py | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index 2ce248f03..7678c3184 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -8,7 +8,6 @@ from bot.bot import Bot
from bot.constants import Guild, Roles, STAFF_ROLES, TIME_FORMATS
# Constant error messages
-NO_USER_SPECIFIED = "Please specify a user"
TIME_FORMAT_NOT_VALID = "Please specify a valid time format ex. 10h or 1day"
TIME_LESS_EQ_0 = "Duration can not be a 0 or lower"
USER_ALREADY_ALLOWED_TO_STREAM = "This user can already stream"
@@ -52,7 +51,7 @@ class Stream(commands.Cog):
async def stream(
self,
ctx: commands.Context,
- user: discord.Member = None,
+ user: discord.Member,
duration: int = 1,
time_format: str = "h",
*_
@@ -65,12 +64,6 @@ class Stream(commands.Cog):
time_format - str defining what time unit you want to use, must be any of FORMATS - defaults to h
Command give user permission to stream and takes it away after provided duration
"""
- # Check for required user argument
- # if not provided send NO_USER_SPECIFIED message
- if not user:
- await ctx.send(NO_USER_SPECIFIED)
- return
-
# Time can't be negative lol
if duration <= 0:
await ctx.send(TIME_LESS_EQ_0)
--
cgit v1.2.3
From 9eb729d89e2969d284a546b539ab720e36007fab Mon Sep 17 00:00:00 2001
From: Harbys
Date: Wed, 25 Nov 2020 17:25:49 +0100
Subject: fixing punctuation and adding Emojis to messages
---
bot/exts/moderation/stream.py | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index 7678c3184..ca4284e77 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -5,13 +5,13 @@ from async_rediscache import RedisCache
from discord.ext import commands, tasks
from bot.bot import Bot
-from bot.constants import Guild, Roles, STAFF_ROLES, TIME_FORMATS
+from bot.constants import Guild, Roles, STAFF_ROLES, TIME_FORMATS, Emojis
# Constant error messages
-TIME_FORMAT_NOT_VALID = "Please specify a valid time format ex. 10h or 1day"
-TIME_LESS_EQ_0 = "Duration can not be a 0 or lower"
-USER_ALREADY_ALLOWED_TO_STREAM = "This user can already stream"
-USER_ALREADY_NOT_ALLOWED_TO_STREAM = "This user already can't stream"
+TIME_FORMAT_NOT_VALID = f"{Emojis.cross_mark}Please specify a valid time format ex. 10h or 1day."
+TIME_LESS_EQ_0 = f"{Emojis.cross_mark}Duration can not be a 0 or lower."
+USER_ALREADY_ALLOWED_TO_STREAM = f"{Emojis.cross_mark}This user can already stream."
+USER_ALREADY_NOT_ALLOWED_TO_STREAM = f"{Emojis.cross_mark}This user already can't stream."
# FORMATS holds a combined list of all allowed time units
@@ -84,7 +84,8 @@ class Stream(commands.Cog):
# Set user id - time in redis cache and add streaming permission role
await self.user_cache.set(user.id, time.time() + self._parse_time_to_seconds(duration, time_format))
await user.add_roles(discord.Object(Roles.video), reason="Temporary streaming access granted")
- await ctx.send(f"{user.mention} can now stream for {duration} {self._link_from_alias(time_format)[1]}/s")
+ await ctx.send(f"{Emojis.check_mark}{user.mention} can now stream for "
+ f"{duration} {self._link_from_alias(time_format)[1]}/s.")
@tasks.loop(seconds=30)
async def remove_permissions(self) -> None:
--
cgit v1.2.3
From bcbcd3e8b6bc95c96d7c316d032b9f774773e961 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Wed, 25 Nov 2020 17:34:55 +0100
Subject: add success message after revokestream command
---
bot/exts/moderation/stream.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index ca4284e77..7dd72a95b 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -115,6 +115,7 @@ class Stream(commands.Cog):
not_allowed = not any(Roles.video == role.id for role in user.roles)
if not_allowed:
await user.remove_roles(discord.Object(Roles.video))
+ await ctx.send(f"{Emojis.check_mark}Streaming permission taken from {user.display_name}")
else:
await ctx.send(USER_ALREADY_NOT_ALLOWED_TO_STREAM)
--
cgit v1.2.3
From 16936aad19978078a872ce8ebec82f30a3e7442f Mon Sep 17 00:00:00 2001
From: Harbys
Date: Fri, 27 Nov 2020 08:44:20 +0100
Subject: move to Scheduler
---
Pipfile.lock | 68 +++++++++++---------
bot/constants.py | 27 --------
bot/exts/moderation/stream.py | 106 ++++++++++---------------------
tests/bot/exts/moderation/test_stream.py | 56 ----------------
4 files changed, 72 insertions(+), 185 deletions(-)
diff --git a/Pipfile.lock b/Pipfile.lock
index 541db1627..25fcab4b1 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -187,6 +187,7 @@
"sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",
"sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"
],
+ "index": "pypi",
"markers": "sys_platform == 'win32'",
"version": "==0.4.4"
},
@@ -231,10 +232,10 @@
},
"fakeredis": {
"hashes": [
- "sha256:8070b7fce16f828beaef2c757a4354af91698685d5232404f1aeeb233529c7a5",
- "sha256:f8c8ea764d7b6fd801e7f5486e3edd32ca991d506186f1923a01fc072e33c271"
+ "sha256:01cb47d2286825a171fb49c0e445b1fa9307087e07cbb3d027ea10dbff108b6a",
+ "sha256:2c6041cf0225889bc403f3949838b2c53470a95a9e2d4272422937786f5f8f73"
],
- "version": "==1.4.4"
+ "version": "==1.4.5"
},
"feedparser": {
"hashes": [
@@ -538,6 +539,15 @@
"markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.4.7"
},
+ "pyreadline": {
+ "hashes": [
+ "sha256:4530592fc2e85b25b1a9f79664433da09237c1a270e4d78ea5aa3a2c7229e2d1",
+ "sha256:65540c21bfe14405a3a77e4c085ecfce88724743a4ead47c66b84defcf82c32e",
+ "sha256:9ce5fa65b8992dfa373bddc5b6e0864ead8f291c94fbfec05fbd5c836162e67b"
+ ],
+ "markers": "sys_platform == 'win32'",
+ "version": "==2.1"
+ },
"python-dateutil": {
"hashes": [
"sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
@@ -555,18 +565,18 @@
},
"pyyaml": {
"hashes": [
- "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
- "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a",
- "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
+ "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
"sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
+ "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
"sha256:6034f55dab5fea9e53f436aa68fa3ace2634918e8b5994d82f3621c04ff5ed2e",
"sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
- "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
- "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
+ "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
"sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
- "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
- "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
"sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
+ "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
+ "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a",
+ "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
+ "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
"sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
],
"index": "pypi",
@@ -846,11 +856,11 @@
},
"flake8-bugbear": {
"hashes": [
- "sha256:a3ddc03ec28ba2296fc6f89444d1c946a6b76460f859795b35b77d4920a51b63",
- "sha256:bd02e4b009fb153fe6072c31c52aeab5b133d508095befb2ffcf3b41c4823162"
+ "sha256:528020129fea2dea33a466b9d64ab650aa3e5f9ffc788b70ea4bc6cf18283538",
+ "sha256:f35b8135ece7a014bc0aee5b5d485334ac30a6da48494998cc1fabf7ec70d703"
],
"index": "pypi",
- "version": "==20.1.4"
+ "version": "==20.11.1"
},
"flake8-docstrings": {
"hashes": [
@@ -900,11 +910,11 @@
},
"identify": {
"hashes": [
- "sha256:5dd84ac64a9a115b8e0b27d1756b244b882ad264c3c423f42af8235a6e71ca12",
- "sha256:c9504ba6a043ee2db0a9d69e43246bc138034895f6338d5aed1b41e4a73b1513"
+ "sha256:943cd299ac7f5715fcb3f684e2fc1594c1e0f22a90d15398e5888143bd4144b5",
+ "sha256:cc86e6a9a390879dcc2976cef169dd9cc48843ed70b7380f321d1b118163c60e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==1.5.9"
+ "version": "==1.5.10"
},
"idna": {
"hashes": [
@@ -938,11 +948,11 @@
},
"pre-commit": {
"hashes": [
- "sha256:22e6aa3bd571debb01eb7d34483f11c01b65237be4eebbf30c3d4fb65762d315",
- "sha256:905ebc9b534b991baec87e934431f2d0606ba27f2b90f7f652985f5a5b8b6ae6"
+ "sha256:4aee0db4808fa48d2458cedd5b9a084ef24dda1a0fa504432a11977a4d1cfd0a",
+ "sha256:b2d106d51c6ba6217e859d81774aae33fd825fe7de0dcf0c46e2586333d7a92e"
],
"index": "pypi",
- "version": "==2.8.2"
+ "version": "==2.9.0"
},
"pycodestyle": {
"hashes": [
@@ -970,18 +980,18 @@
},
"pyyaml": {
"hashes": [
- "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
- "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a",
- "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
+ "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
"sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
+ "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
"sha256:6034f55dab5fea9e53f436aa68fa3ace2634918e8b5994d82f3621c04ff5ed2e",
"sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
- "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
- "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
+ "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
"sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
- "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
- "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
"sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
+ "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
+ "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a",
+ "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
+ "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
"sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
],
"index": "pypi",
@@ -1028,11 +1038,11 @@
},
"virtualenv": {
"hashes": [
- "sha256:b0011228208944ce71052987437d3843e05690b2f23d1c7da4263fde104c97a2",
- "sha256:b8d6110f493af256a40d65e29846c69340a947669eec8ce784fcf3dd3af28380"
+ "sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7",
+ "sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==20.1.0"
+ "version": "==20.2.1"
}
}
}
diff --git a/bot/constants.py b/bot/constants.py
index 33ed29c39..dca83e7ab 100644
--- a/bot/constants.py
+++ b/bot/constants.py
@@ -705,30 +705,3 @@ ERROR_REPLIES = [
"Noooooo!!",
"I can't believe you've done this",
]
-
-# TIME_FORMATS defines aliases and multipliers for time formats
-# key is a standard time unit name like second ,year, decade etc.
-# mul is a multiplier where duration of said time unit * multiplier = time in seconds
-# eg. 1 day = 1 * multiplier seconds, so mul = 86400
-TIME_FORMATS = {
- "second": {
- "aliases": ("s", "sec", "seconds", "secs"),
- "mul": 1
- },
- "minute": {
- "aliases": ("m", "min", "mins", "minutes"),
- "mul": 60
- },
- "hour": {
- "aliases": ("h", "hr", "hrs", "hours"),
- "mul": 3600
- },
- "day": {
- "aliases": ("d", "days"),
- "mul": 86400
- },
- "year": {
- "aliases": ("yr", "yrs", "years", "y"),
- "mul": 31536000
- }
-}
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index 7dd72a95b..0fc004d75 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -1,11 +1,11 @@
-import time
-
import discord
-from async_rediscache import RedisCache
-from discord.ext import commands, tasks
+from discord.ext import commands
from bot.bot import Bot
-from bot.constants import Guild, Roles, STAFF_ROLES, TIME_FORMATS, Emojis
+from bot.constants import Emojis, Roles, STAFF_ROLES
+from bot.converters import Expiry
+from bot.utils.scheduling import Scheduler
+from bot.utils.time import format_infraction_with_duration
# Constant error messages
TIME_FORMAT_NOT_VALID = f"{Emojis.cross_mark}Please specify a valid time format ex. 10h or 1day."
@@ -14,37 +14,17 @@ USER_ALREADY_ALLOWED_TO_STREAM = f"{Emojis.cross_mark}This user can already stre
USER_ALREADY_NOT_ALLOWED_TO_STREAM = f"{Emojis.cross_mark}This user already can't stream."
-# FORMATS holds a combined list of all allowed time units
-# made from TIME_FORMATS constant
-FORMATS = []
-for key, entry in TIME_FORMATS.items():
- FORMATS.extend(entry["aliases"])
- FORMATS.append(key)
-
-
class Stream(commands.Cog):
"""Grant and revoke streaming permissions from users."""
- # Data cache storing userid to unix_time relation
- # user id is used to get member who's streaming permission need to be revoked after some time
- # unix_time is a time when user's streaming permission needs tp be revoked in unix time notation
- user_cache = RedisCache()
-
def __init__(self, bot: Bot):
self.bot = bot
- self.remove_permissions.start()
- self.guild_static = None
+ self.scheduler = Scheduler(self.__class__.__name__)
@staticmethod
- def _link_from_alias(time_format: str) -> (dict, str):
- """Get TIME_FORMATS key and entry by time format or any of its aliases."""
- for format_key, val in TIME_FORMATS.items():
- if format_key == time_format or time_format in val["aliases"]:
- return TIME_FORMATS[format_key], format_key
-
- def _parse_time_to_seconds(self, duration: int, time_format: str) -> int:
- """Get time in seconds from duration and time format."""
- return duration * self._link_from_alias(time_format)[0]["mul"]
+ async def _remove_streaming_permission(schedule_user: discord.Member) -> None:
+ """Remove streaming permission from Member"""
+ await schedule_user.remove_roles(discord.Object(Roles.video), reason="Temporary streaming access revoked")
@commands.command(aliases=("streaming",))
@commands.has_any_role(*STAFF_ROLES)
@@ -52,68 +32,48 @@ class Stream(commands.Cog):
self,
ctx: commands.Context,
user: discord.Member,
- duration: int = 1,
- time_format: str = "h",
+ duration: Expiry,
*_
) -> None:
"""
- Stream handles stream command.
-
- argument user - required user mention, any errors should be handled by upper level handler
- duration - int must be higher than 0 - defaults to 1
- time_format - str defining what time unit you want to use, must be any of FORMATS - defaults to h
- Command give user permission to stream and takes it away after provided duration
+ Temporarily grant streaming permissions to a user for a given duration.
+ A unit of time should be appended to the duration.
+ Units (∗case-sensitive):
+ \u2003`y` - years
+ \u2003`m` - months∗
+ \u2003`w` - weeks
+ \u2003`d` - days
+ \u2003`h` - hours
+ \u2003`M` - minutes∗
+ \u2003`s` - seconds
+ Alternatively, an ISO 8601 timestamp can be provided for the duration.
"""
- # Time can't be negative lol
- if duration <= 0:
- await ctx.send(TIME_LESS_EQ_0)
- return
-
- # Check if time_format argument is a valid time format
- # eg. d, day etc are aliases for day time format
- if time_format not in FORMATS:
- await ctx.send(TIME_FORMAT_NOT_VALID)
- return
-
# Check if user already has streaming permission
already_allowed = any(Roles.video == role.id for role in user.roles)
if already_allowed:
await ctx.send(USER_ALREADY_ALLOWED_TO_STREAM)
return
- # Set user id - time in redis cache and add streaming permission role
- await self.user_cache.set(user.id, time.time() + self._parse_time_to_seconds(duration, time_format))
+ # Schedule task to remove streaming permission from Member
+ self.scheduler.schedule_at(duration, user.id, self._remove_streaming_permission(user))
await user.add_roles(discord.Object(Roles.video), reason="Temporary streaming access granted")
- await ctx.send(f"{Emojis.check_mark}{user.mention} can now stream for "
- f"{duration} {self._link_from_alias(time_format)[1]}/s.")
-
- @tasks.loop(seconds=30)
- async def remove_permissions(self) -> None:
- """Background loop for removing streaming permission."""
- all_entries = await self.user_cache.items()
- for user_id, delete_time in all_entries:
- if time.time() > delete_time:
- member = self.guild_static.fetch_memeber(user_id)
- if member:
- await member.remove_roles(discord.Object(Roles.video), reason="Temporary streaming access revoked")
- await self.user_cache.pop(user_id)
-
- @remove_permissions.before_loop
- async def await_ready(self) -> None:
- """Wait for bot to be ready before starting remove_permissions loop and get guild by id."""
- await self.bot.wait_until_ready()
- self.guild_static = self.bot.get_guild(Guild.id)
+ await ctx.send(f"{Emojis.check_mark}{user.mention} can now stream until "
+ f"{format_infraction_with_duration(str(duration))}.")
@commands.command(aliases=("unstream", ))
@commands.has_any_role(*STAFF_ROLES)
async def revokestream(
self,
ctx: commands.Context,
- user: discord.Member = None
+ user: discord.Member
) -> None:
- """Revoke streaming permissions from a user."""
- not_allowed = not any(Roles.video == role.id for role in user.roles)
- if not_allowed:
+ """Take away streaming permission from a user"""
+ # Check if user has the streaming permission to begin with
+ allowed = any(Roles.video == role.id for role in user.roles)
+ if allowed:
+ # Cancel scheduled task to take away streaming permission to avoid errors
+ if user.id in self.scheduler:
+ self.scheduler.cancel(user.id)
await user.remove_roles(discord.Object(Roles.video))
await ctx.send(f"{Emojis.check_mark}Streaming permission taken from {user.display_name}")
else:
diff --git a/tests/bot/exts/moderation/test_stream.py b/tests/bot/exts/moderation/test_stream.py
index 467c373aa..15956a9de 100644
--- a/tests/bot/exts/moderation/test_stream.py
+++ b/tests/bot/exts/moderation/test_stream.py
@@ -1,28 +1,10 @@
-import asyncio
import unittest
-from async_rediscache import RedisSession
from bot.constants import Roles
from bot.exts.moderation.stream import Stream
from tests.helpers import MockBot, MockMember, MockRole
-redis_session = None
-redis_loop = asyncio.get_event_loop()
-
-
-def setUpModule(): # noqa: N802
- """Create and connect to the fakeredis session."""
- global redis_session
- redis_session = RedisSession(use_fakeredis=True)
- redis_loop.run_until_complete(redis_session.connect())
-
-
-def tearDownModule(): # noqa: N802
- """Close the fakeredis session."""
- if redis_session:
- redis_loop.run_until_complete(redis_session.close())
-
class StreamCommandTest(unittest.IsolatedAsyncioTestCase):
@@ -30,44 +12,6 @@ class StreamCommandTest(unittest.IsolatedAsyncioTestCase):
self.bot = MockBot()
self.cog = Stream(self.bot)
- def test_linking_time_format_from_alias_or_key(self):
- """
- User provided time format needs to be lined to a proper entry in TIME_FORMATS
- This Test checks _link_from_alias method
- Checking for whether alias or key exists in TIME_FORMATS is done before calling this function
- """
-
- test_cases = (("sec", "second"),
- ("s", "second"),
- ("seconds", "second"),
- ("second", "second"),
- ("secs", "second"),
- ("min", "minute"),
- ("m", "minute"),
- ("minutes", "minute"),
- ("hr", "hour"),
- ("hrs", "hour"),
- ("hours", "hour"),
- ("d", "day"),
- ("days", "day"),
- ("yr", "year"),
- ("yrs", "year"),
- ("y", "year"))
-
- for case in test_cases:
- linked = self.cog._link_from_alias(case[0])[1]
- self.assertEqual(linked, case[1])
-
- def test_parsing_duration_and_time_format_to_seconds(self):
- """
- Test calculating time in seconds from duration and time unit
- This test is technically dependent on _link_from_alias function, not the best practice but necessary
- """
- test_cases = ((1, "minute", 60), (5, "second", 5), (2, "day", 172800))
- for case in test_cases:
- time_in_seconds = self.cog._parse_time_to_seconds(case[0], case[1])
- self.assertEqual(time_in_seconds, case[2])
-
def test_checking_if_user_has_streaming_permission(self):
"""
Test searching for video role in Member.roles
--
cgit v1.2.3
From fae4ad4a614c56f011f64c64b3318f511ccb17eb Mon Sep 17 00:00:00 2001
From: Harbys
Date: Fri, 27 Nov 2020 09:10:48 +0100
Subject: fix flake8 and line endings
---
bot/exts/moderation/stream.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index 0fc004d75..d8c2a8628 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -23,7 +23,7 @@ class Stream(commands.Cog):
@staticmethod
async def _remove_streaming_permission(schedule_user: discord.Member) -> None:
- """Remove streaming permission from Member"""
+ """Remove streaming permission from Member."""
await schedule_user.remove_roles(discord.Object(Roles.video), reason="Temporary streaming access revoked")
@commands.command(aliases=("streaming",))
@@ -37,6 +37,7 @@ class Stream(commands.Cog):
) -> None:
"""
Temporarily grant streaming permissions to a user for a given duration.
+
A unit of time should be appended to the duration.
Units (∗case-sensitive):
\u2003`y` - years
@@ -67,7 +68,7 @@ class Stream(commands.Cog):
ctx: commands.Context,
user: discord.Member
) -> None:
- """Take away streaming permission from a user"""
+ """Take away streaming permission from a user."""
# Check if user has the streaming permission to begin with
allowed = any(Roles.video == role.id for role in user.roles)
if allowed:
--
cgit v1.2.3
From 0d3d2bd632e2ed2e14eaacb7db9b49de4cd4baa5 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 29 Nov 2020 04:12:04 +0100
Subject: Use timedelta instead of constructing duration manually
A newline was also added to set to keep it consistent with
set_if_exists
---
bot/exts/info/doc/_redis_cache.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
index 2230884c9..e8577aa64 100644
--- a/bot/exts/info/doc/_redis_cache.py
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -19,7 +19,8 @@ class DocRedisCache(RedisObject):
Keys expire after a week to keep data up to date.
"""
- expiry_timestamp = datetime.datetime.now().timestamp() + 7 * 24 * 60 * 60
+ expiry_timestamp = (datetime.datetime.now() + datetime.timedelta(weeks=1)).timestamp()
+
with await self._get_pool_connection() as connection:
await connection.hset(
f"{self.namespace}:{item.package}",
@@ -34,7 +35,7 @@ class DocRedisCache(RedisObject):
Keys expire after a week to keep data up to date.
"""
- expiry_timestamp = datetime.datetime.now().timestamp() + 7 * 24 * 60 * 60
+ expiry_timestamp = (datetime.datetime.now() + datetime.timedelta(weeks=1)).timestamp()
with await self._get_pool_connection() as connection:
if await connection.hexists(f"{self.namespace}:{item.package}", self.get_item_key(item)):
--
cgit v1.2.3
From e22deb55de286c4186da2f0d2f2d562b9e333630 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 29 Nov 2020 04:34:41 +0100
Subject: Use pop instead of getitem and del
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 9e4bb54ea..e29e3b717 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -139,9 +139,8 @@ class CachedParser:
# The parse queue stores soups along with the doc symbols in QueueItem objects,
# in case we're moving a DocItem we have to get the associated QueueItem first and then move it.
item_index = self._queue.index(item)
- queue_item = self._queue[item_index]
+ queue_item = self._queue.pop(item_index)
- del self._queue[item_index]
self._queue.append(queue_item)
def add_item(self, doc_item: DocItem) -> None:
--
cgit v1.2.3
From ad90978fd7c038429b715f30519c01d546441afc Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 29 Nov 2020 04:35:43 +0100
Subject: Clear up docstring so it doesn't rely on private attribute
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index e29e3b717..bd9b589ce 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -135,7 +135,7 @@ class CachedParser:
log.trace("Finished parsing queue.")
def _move_to_front(self, item: Union[QueueItem, DocItem]) -> None:
- """Move `item` to the front of the parse queue."""
+ """Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
# The parse queue stores soups along with the doc symbols in QueueItem objects,
# in case we're moving a DocItem we have to get the associated QueueItem first and then move it.
item_index = self._queue.index(item)
--
cgit v1.2.3
From b094a6fa0dc9d9c2fde75cd79c95c87582f5e23d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 29 Nov 2020 04:44:17 +0100
Subject: Various grammar and sentence structure changes
Co-authored-by: MarkKoz
---
bot/converters.py | 2 +-
bot/exts/info/doc/_cog.py | 19 ++++++++++---------
bot/exts/info/doc/_inventory_parser.py | 6 +++---
3 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/bot/converters.py b/bot/converters.py
index 3066eaabb..901ba1cca 100644
--- a/bot/converters.py
+++ b/bot/converters.py
@@ -140,7 +140,7 @@ class PackageName(Converter):
async def convert(cls, ctx: Context, argument: str) -> str:
"""Checks whether the given string is a valid package name."""
if cls.PACKAGE_NAME_RE.search(argument):
- raise BadArgument("The provided package name is not valid, please only use the _ and a-z characters.")
+ raise BadArgument("The provided package name is not valid; please only use the _ and a-z characters.")
return argument
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index bd9b589ce..ea91b2353 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -207,7 +207,7 @@ class DocCog(commands.Cog):
if not package:
delay = 2*60 if inventory_url not in self.scheduled_inventories else 5*60
- log.info(f"Failed to fetch inventory, attempting again in {delay//60} minutes.")
+ log.info(f"Failed to fetch inventory; attempting again in {delay//60} minutes.")
self.inventory_scheduler.schedule_later(
delay,
api_package_name,
@@ -275,7 +275,7 @@ class DocCog(commands.Cog):
self.scheduled_inventories.clear()
await self.item_fetcher.clear()
- # Run all coroutines concurrently - since each of them performs a HTTP
+ # Run all coroutines concurrently - since each of them performs an HTTP
# request, this speeds up fetching the inventory data heavily.
coros = [
self.update_single(
@@ -322,7 +322,7 @@ class DocCog(commands.Cog):
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
async def docs_group(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
- """Lookup documentation for Python symbols."""
+ """Look up documentation for Python symbols."""
await ctx.invoke(self.get_command, symbol=symbol)
@docs_group.command(name='getdoc', aliases=('g',))
@@ -414,7 +414,8 @@ class DocCog(commands.Cog):
if await self.update_single(package_name, base_url, inventory_url) is None:
await ctx.send(
- f"Added package `{package_name}` to database but failed to fetch inventory; rescheduled in 2 minutes."
+ f"Added the package `{package_name}` to the database but failed to fetch inventory; "
+ f"trying again in 2 minutes."
)
return
await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
@@ -425,7 +426,7 @@ class DocCog(commands.Cog):
"""
Removes the specified package from the database.
- Examples:
+ Example:
!docs deletedoc aiohttp
"""
await self.bot.api_client.delete(f'bot/documentation-links/{package_name}')
@@ -435,12 +436,12 @@ class DocCog(commands.Cog):
# that was from this package is properly deleted.
await self.refresh_inventory()
await doc_cache.delete(package_name)
- await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
+ await ctx.send(f"Successfully deleted `{package_name}` and refreshed the inventory.")
@docs_group.command(name="refreshdoc", aliases=("rfsh", "r"))
@commands.has_any_role(*MODERATION_ROLES)
async def refresh_command(self, ctx: commands.Context) -> None:
- """Refresh inventories and send differences to channel."""
+ """Refresh inventories and show the difference."""
old_inventories = set(self.base_urls)
with ctx.typing():
await self.refresh_inventory()
@@ -461,6 +462,6 @@ class DocCog(commands.Cog):
@docs_group.command(name="cleardoccache")
@commands.has_any_role(*MODERATION_ROLES)
async def clear_cache_command(self, ctx: commands.Context, package_name: PackageName) -> None:
- """Clear persistent redis cache for `package`."""
+ """Clear the persistent redis cache for `package`."""
await doc_cache.delete(package_name)
- await ctx.send(f"Succesfully cleared cache for {package_name}")
+ await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
diff --git a/bot/exts/info/doc/_inventory_parser.py b/bot/exts/info/doc/_inventory_parser.py
index 23931869b..96df08786 100644
--- a/bot/exts/info/doc/_inventory_parser.py
+++ b/bot/exts/info/doc/_inventory_parser.py
@@ -101,17 +101,17 @@ async def fetch_inventory(
inventory = await _fetch_inventory(client_session, url)
except aiohttp.ClientConnectorError:
log.warning(
- f"Failed to connect to inventory url at {url}, "
+ f"Failed to connect to inventory url at {url}; "
f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
)
except aiohttp.ClientError:
log.error(
- f"Failed to get inventory from {url}, "
+ f"Failed to get inventory from {url}; "
f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
)
except Exception:
log.exception(
- f"An unexpected error has occurred during fetching of {url}, "
+ f"An unexpected error has occurred during fetching of {url}; "
f"trying again ({attempt}/{FAILED_REQUEST_ATTEMPTS})."
)
else:
--
cgit v1.2.3
From 210f7d9b096b373935ab2a3f5f41989f4a081e35 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 29 Nov 2020 23:42:26 +0100
Subject: Remove redundant suppress
---
bot/exts/info/doc/_cog.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index ea91b2353..7d57f65ad 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -215,8 +215,8 @@ class DocCog(commands.Cog):
)
self.scheduled_inventories.add(api_package_name)
return False
- with suppress(KeyError):
- self.scheduled_inventories.discard(api_package_name)
+
+ self.scheduled_inventories.discard(api_package_name)
for group, items in package.items():
for symbol, relative_doc_url in items:
--
cgit v1.2.3
From 8b41a7678d175de69ae6bf72e6a9f6e7036e1968 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 8 Dec 2020 10:21:41 +0200
Subject: Add file path to codeblock
---
bot/exts/info/code_snippets.py | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 1bb00b677..f807fa9a7 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -188,9 +188,16 @@ class CodeSnippets(Cog):
if not is_valid_language:
language = ''
+ # Adds a label showing the file path to the snippet
+ if start_line == end_line:
+ ret = f'`{file_path}` line {start_line}\n'
+ else:
+ ret = f'`{file_path}` lines {start_line} to {end_line}\n'
+
if len(required) != 0:
- return f'```{language}\n{required}```\n'
- return ''
+ return f'{ret}```{language}\n{required}```\n'
+ # Returns an empty codeblock if the snippet is empty
+ return f'{ret}``` ```\n'
def __init__(self, bot: Bot):
"""Initializes the cog's bot."""
--
cgit v1.2.3
From e8d2448c771aef262b294a583661092c9e90baef Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 8 Dec 2020 10:36:56 +0200
Subject: Add logging for HTTP requests
---
bot/exts/info/code_snippets.py | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index f807fa9a7..e1025e568 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -1,3 +1,4 @@
+import logging
import re
import textwrap
from urllib.parse import quote_plus
@@ -8,6 +9,7 @@ from discord.ext.commands import Cog
from bot.bot import Bot
from bot.utils.messages import wait_for_deletion
+log = logging.getLogger(__name__)
GITHUB_RE = re.compile(
r'https://github\.com/(?P.+?)/blob/(?P.+/.+)'
@@ -40,11 +42,14 @@ class CodeSnippets(Cog):
async def _fetch_response(self, url: str, response_format: str, **kwargs) -> str:
"""Makes http requests using aiohttp."""
- async with self.bot.http_session.get(url, **kwargs) as response:
- if response_format == 'text':
- return await response.text()
- elif response_format == 'json':
- return await response.json()
+ try:
+ async with self.bot.http_session.get(url, **kwargs) as response:
+ if response_format == 'text':
+ return await response.text()
+ elif response_format == 'json':
+ return await response.json()
+ except Exception:
+ log.exception(f'Failed to fetch code snippet from {url}.')
def _find_ref(self, path: str, refs: tuple) -> tuple:
"""Loops through all branches and tags to find the required ref."""
--
cgit v1.2.3
From 0e48ae679abc0937b4aad583b1b29ee0b3e3eb15 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 9 Dec 2020 13:40:01 +0100
Subject: Improve handling of strings
Previously the code assumed ' and " can be used interchangeably,
and strings that were inside of brackets were ignored for depth but
their contents weren't causing strings like "ab[cd" to increase the
depth
---
bot/exts/info/doc/_parsing.py | 35 ++++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 418405ca9..e6103dde2 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -56,6 +56,15 @@ _BRACKET_PAIRS = {
}
+def _is_closing_quote(search_string: str, index: int) -> bool:
+ """Check whether the quote at `index` inside `search_string` can be a closing quote."""
+ if search_string[index - 1] != "\\":
+ return True
+ elif search_string[index - 2] == "\\":
+ return True
+ return False
+
+
def _split_parameters(parameters_string: str) -> List[str]:
"""
Split parameters of a signature into individual parameter strings on commas.
@@ -67,9 +76,11 @@ def _split_parameters(parameters_string: str) -> List[str]:
depth = 0
expected_end = None
current_search = None
+ quote_character = None
- for index, character in enumerate(parameters_string):
- if character in _BRACKET_PAIRS:
+ enumerated_string = enumerate(parameters_string)
+ for index, character in enumerated_string:
+ if quote_character is None and character in _BRACKET_PAIRS:
if current_search is None:
current_search = character
expected_end = _BRACKET_PAIRS[character]
@@ -77,12 +88,22 @@ def _split_parameters(parameters_string: str) -> List[str]:
depth += 1
elif character in {"'", '"'}:
- if depth == 0:
+ if current_search is not None:
+ # We're currently searching for a bracket, skip all characters that belong to the string
+ # to avoid false positives of closing brackets
+ quote_character = character
+ for index, character in enumerated_string:
+ if character == quote_character and _is_closing_quote(parameters_string, index):
+ break
+
+ elif depth == 0:
depth += 1
- elif parameters_string[index-1] != "\\":
- depth -= 1
- elif parameters_string[index-2] == "\\":
- depth -= 1
+ quote_character = character
+ elif character == quote_character:
+ if _is_closing_quote(parameters_string, index):
+ depth -= 1
+ if depth == 0:
+ quote_character = None
elif character == expected_end:
depth -= 1
--
cgit v1.2.3
From 04aa50bc3ac3baca788392fb6a56a4ba43e678d4 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 9 Dec 2020 15:25:53 +0100
Subject: Merge current_search and expected_end in
The two variables were initialized and cleared together and contained
related information
---
bot/exts/info/doc/_parsing.py | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index e6103dde2..a8b38f400 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -4,6 +4,7 @@ import logging
import re
import string
import textwrap
+from collections import namedtuple
from functools import partial
from typing import Callable, Collection, Container, Iterable, List, Optional, TYPE_CHECKING, Union
@@ -49,10 +50,12 @@ _MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * _MAX_SIGNATURE_AM
# Maximum discord message length - signatures on top
_MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH
_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
+
+BracketPair = namedtuple("BracketPair", ["opening_bracket", "closing_bracket"])
_BRACKET_PAIRS = {
- "{": "}",
- "(": ")",
- "[": "]",
+ "{": BracketPair("{", "}"),
+ "(": BracketPair("(", ")"),
+ "[": BracketPair("[", "]"),
}
@@ -74,17 +77,16 @@ def _split_parameters(parameters_string: str) -> List[str]:
parameters_list = []
last_split = 0
depth = 0
- expected_end = None
- current_search = None
+ current_search: Optional[BracketPair] = None
quote_character = None
enumerated_string = enumerate(parameters_string)
for index, character in enumerated_string:
if quote_character is None and character in _BRACKET_PAIRS:
if current_search is None:
- current_search = character
- expected_end = _BRACKET_PAIRS[character]
- if character == current_search:
+ current_search = _BRACKET_PAIRS[character]
+ depth = 1
+ elif character == current_search.opening_bracket:
depth += 1
elif character in {"'", '"'}:
@@ -105,11 +107,10 @@ def _split_parameters(parameters_string: str) -> List[str]:
if depth == 0:
quote_character = None
- elif character == expected_end:
+ elif current_search is not None and character == current_search.closing_bracket:
depth -= 1
if depth == 0:
current_search = None
- expected_end = None
elif depth == 0 and character == ",":
parameters_list.append(parameters_string[last_split:index])
--
cgit v1.2.3
From 50cbfbda930aab5492411863aaaf8f8cd5ef57fd Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 9 Dec 2020 15:26:53 +0100
Subject: Create a generator instead of returning a list
The result of _split_parameters is only iterated over, so a list is not
needed. Making it lazy may also save some time in cases where we don't
use all parameters
---
bot/exts/info/doc/_parsing.py | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index a8b38f400..567786204 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -6,7 +6,7 @@ import string
import textwrap
from collections import namedtuple
from functools import partial
-from typing import Callable, Collection, Container, Iterable, List, Optional, TYPE_CHECKING, Union
+from typing import Callable, Collection, Container, Iterable, Iterator, List, Optional, TYPE_CHECKING, Union
from bs4 import BeautifulSoup
from bs4.element import NavigableString, PageElement, Tag
@@ -68,13 +68,12 @@ def _is_closing_quote(search_string: str, index: int) -> bool:
return False
-def _split_parameters(parameters_string: str) -> List[str]:
+def _split_parameters(parameters_string: str) -> Iterator[str]:
"""
Split parameters of a signature into individual parameter strings on commas.
Long string literals are not accounted for.
"""
- parameters_list = []
last_split = 0
depth = 0
current_search: Optional[BracketPair] = None
@@ -113,11 +112,10 @@ def _split_parameters(parameters_string: str) -> List[str]:
current_search = None
elif depth == 0 and character == ",":
- parameters_list.append(parameters_string[last_split:index])
+ yield parameters_string[last_split:index]
last_split = index + 1
- parameters_list.append(parameters_string[last_split:])
- return parameters_list
+ yield parameters_string[last_split:]
def _find_elements_until_tag(
--
cgit v1.2.3
From ea9b3e0e9ac74ea541f436f8021178f76f19af39 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 11 Dec 2020 09:44:11 +0100
Subject: Restructure doc cache to handle caches of whole pages
Previously we used packages as the top level keys and fields
contained the url and the symbol id, however if we want to store
all symbols from fetched pages instead of only the ones that were
fetched by the users this comes worse off than using the page url
in the field and setting EXPIREs for them instead of doing it manually
in python.
The new implementation uses package:url as the redis key and only
the symbol id for field names, with the expire being set to a week
on the key, this means we have to pattern match the keys when deleting
the cache for a package but that's being done far less than the expire
checking done previously.
---
bot/exts/info/doc/_cog.py | 3 +-
bot/exts/info/doc/_redis_cache.py | 95 +++++++++++++++------------------------
2 files changed, 37 insertions(+), 61 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 7d57f65ad..d1518f69d 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -122,7 +122,7 @@ class CachedParser:
item, soup = self._queue.pop()
try:
markdown = get_symbol_markdown(soup, item)
- await doc_cache.set_if_exists(item, markdown)
+ await doc_cache.set(item, markdown)
self._results[item] = markdown
except Exception:
log.exception(f"Unexpected error when handling {item}")
@@ -178,7 +178,6 @@ class DocCog(commands.Cog):
self.scheduled_inventories = set()
self.bot.loop.create_task(self.init_refresh_inventory())
- self.bot.loop.create_task(doc_cache.delete_expired())
async def init_refresh_inventory(self) -> None:
"""Refresh documentation inventory on cog initialization."""
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
index e8577aa64..52cb2bc94 100644
--- a/bot/exts/info/doc/_redis_cache.py
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -1,7 +1,6 @@
from __future__ import annotations
import datetime
-import pickle
from typing import Optional, TYPE_CHECKING
from async_rediscache.types.base import RedisObject, namespace_lock
@@ -12,77 +11,55 @@ if TYPE_CHECKING:
class DocRedisCache(RedisObject):
"""Interface for redis functionality needed by the Doc cog."""
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._set_expires = set()
+
@namespace_lock
async def set(self, item: DocItem, value: str) -> None:
"""
- Set markdown `value` for `key`.
+ Set the Markdown `value` for the symbol `item`.
- Keys expire after a week to keep data up to date.
+ All keys from a single page are stored together, expiring a week after the first set.
"""
- expiry_timestamp = (datetime.datetime.now() + datetime.timedelta(weeks=1)).timestamp()
+ url_key = remove_suffix(item.relative_url_path, ".html")
+ redis_key = f"{self.namespace}:{item.package}:{url_key}"
+ needs_expire = False
with await self._get_pool_connection() as connection:
- await connection.hset(
- f"{self.namespace}:{item.package}",
- self.get_item_key(item),
- pickle.dumps((value, expiry_timestamp))
- )
-
- @namespace_lock
- async def set_if_exists(self, item: DocItem, value: str) -> None:
- """
- Set markdown `value` for `key` if `key` exists.
+ if item.package+url_key not in self._set_expires:
+ self._set_expires.add(item.package+url_key)
+ needs_expire = not await connection.exists(redis_key)
- Keys expire after a week to keep data up to date.
- """
- expiry_timestamp = (datetime.datetime.now() + datetime.timedelta(weeks=1)).timestamp()
-
- with await self._get_pool_connection() as connection:
- if await connection.hexists(f"{self.namespace}:{item.package}", self.get_item_key(item)):
- await connection.hset(
- f"{self.namespace}:{item.package}",
- self.get_item_key(item),
- pickle.dumps((value, expiry_timestamp))
- )
+ await connection.hset(redis_key, item.symbol_id, value)
+ if needs_expire:
+ await connection.expire(redis_key, datetime.timedelta(weeks=1).total_seconds())
@namespace_lock
async def get(self, item: DocItem) -> Optional[str]:
- """Get markdown contents for `key`."""
- with await self._get_pool_connection() as connection:
- cached_value = await connection.hget(f"{self.namespace}:{item.package}", self.get_item_key(item))
- if cached_value is None:
- return None
-
- value, expire = pickle.loads(cached_value)
- if expire <= datetime.datetime.now().timestamp():
- await connection.hdel(f"{self.namespace}:{item.package}", self.get_item_key(item))
- return None
+ """Return the Markdown content of the symbol `item` if it exists."""
+ url_key = remove_suffix(item.relative_url_path, ".html")
- return value
-
- @namespace_lock
- async def delete(self, package: str) -> None:
- """Remove all values for `package`."""
with await self._get_pool_connection() as connection:
- await connection.delete(f"{self.namespace}:{package}")
+ return await connection.hget(f"{self.namespace}:{item.package}:{url_key}", item.symbol_id, encoding="utf8")
@namespace_lock
- async def delete_expired(self) -> None:
- """Delete all expired keys."""
- current_timestamp = datetime.datetime.now().timestamp()
+ async def delete(self, package: str) -> bool:
+ """Remove all values for `package`; return True if at least one key was deleted, False otherwise."""
with await self._get_pool_connection() as connection:
- async for package_key in connection.iscan(match=f"{self.namespace}*"):
- expired_fields = []
-
- for field, cached_value in (await connection.hgetall(package_key)).items():
- _, expire = pickle.loads(cached_value)
- if expire <= current_timestamp:
- expired_fields.append(field)
-
- if expired_fields:
- await connection.hdel(package_key, *expired_fields)
-
- @staticmethod
- def get_item_key(item: DocItem) -> str:
- """Create redis key for `item`."""
- return item.relative_url_path + item.symbol_id
+ package_keys = [
+ package_key async for package_key in connection.iscan(match=f"{self.namespace}:{package}:*")
+ ]
+ if package_keys:
+ await connection.delete(*package_keys)
+ return True
+ return False
+
+
+def remove_suffix(string: str, suffix: str) -> str:
+ """Remove `suffix` from end of `string`."""
+ # TODO replace usages with str.removesuffix on 3.9
+ if string.endswith(suffix):
+ return string[:-len(suffix)]
+ else:
+ return string
--
cgit v1.2.3
From c42bf69a8b170772710c2184a3d0d3d57f597c30 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 11 Dec 2020 11:05:42 +0100
Subject: Use global bot http_session instead of parameter
---
bot/converters.py | 2 +-
bot/exts/info/doc/_cog.py | 12 ++++++------
bot/exts/info/doc/_inventory_parser.py | 13 ++++++-------
3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/bot/converters.py b/bot/converters.py
index d44b675a7..d558fa3df 100644
--- a/bot/converters.py
+++ b/bot/converters.py
@@ -190,7 +190,7 @@ class InventoryURL(Converter):
async def convert(ctx: Context, url: str) -> str:
"""Convert url to Intersphinx inventory URL."""
await ctx.trigger_typing()
- if await _inventory_parser.fetch_inventory(ctx.bot.http_session, url) is None:
+ if await _inventory_parser.fetch_inventory(url) is None:
raise BadArgument(f"Failed to fetch inventory file after {_inventory_parser.FAILED_REQUEST_ATTEMPTS}.")
return url
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 524dcc829..e1be956cd 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -9,10 +9,10 @@ from contextlib import suppress
from typing import Dict, List, NamedTuple, Optional, Union
import discord
-from aiohttp import ClientSession
from bs4 import BeautifulSoup
from discord.ext import commands
+from bot import instance as bot_instance
from bot.bot import Bot
from bot.constants import MODERATION_ROLES, RedirectOutput
from bot.converters import InventoryURL, PackageName, ValidURL
@@ -85,7 +85,7 @@ class CachedParser:
self._item_events: Dict[DocItem, asyncio.Event] = {}
self._parse_task = None
- async def get_markdown(self, client_session: ClientSession, doc_item: DocItem) -> str:
+ async def get_markdown(self, doc_item: DocItem) -> str:
"""
Get result markdown of `doc_item`.
@@ -96,7 +96,7 @@ class CachedParser:
return symbol
if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
- async with client_session.get(doc_item.url) as response:
+ async with bot_instance.http_session.get(doc_item.url) as response:
soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
@@ -202,7 +202,7 @@ class DocCog(commands.Cog):
Return True on success; False if fetching failed and was rescheduled.
"""
self.base_urls[api_package_name] = base_url
- package = await fetch_inventory(self.bot.http_session, inventory_url)
+ package = await fetch_inventory(inventory_url)
if not package:
delay = 2*60 if inventory_url not in self.scheduled_inventories else 5*60
@@ -210,7 +210,7 @@ class DocCog(commands.Cog):
self.inventory_scheduler.schedule_later(
delay,
api_package_name,
- fetch_inventory(self.bot.http_session, inventory_url)
+ fetch_inventory(inventory_url)
)
self.scheduled_inventories.add(api_package_name)
return False
@@ -302,7 +302,7 @@ class DocCog(commands.Cog):
markdown = await doc_cache.get(symbol_info)
if markdown is None:
log.debug(f"Redis cache miss for symbol `{symbol}`.")
- markdown = await self.item_fetcher.get_markdown(self.bot.http_session, symbol_info)
+ markdown = await self.item_fetcher.get_markdown(symbol_info)
if markdown is not None:
await doc_cache.set(symbol_info, markdown)
else:
diff --git a/bot/exts/info/doc/_inventory_parser.py b/bot/exts/info/doc/_inventory_parser.py
index 96df08786..0d9bd726a 100644
--- a/bot/exts/info/doc/_inventory_parser.py
+++ b/bot/exts/info/doc/_inventory_parser.py
@@ -6,6 +6,8 @@ from typing import AsyncIterator, DefaultDict, List, Optional, Tuple
import aiohttp
+import bot
+
log = logging.getLogger(__name__)
FAILED_REQUEST_ATTEMPTS = 3
@@ -69,10 +71,10 @@ async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[
return invdata
-async def _fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
+async def _fetch_inventory(url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
"""Fetch, parse and return an intersphinx inventory file from an url."""
timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5)
- async with client_session.get(url, timeout=timeout, raise_for_status=True) as response:
+ async with bot.instance.http_session.get(url, timeout=timeout, raise_for_status=True) as response:
stream = response.content
inventory_header = (await stream.readline()).decode().rstrip()
@@ -91,14 +93,11 @@ async def _fetch_inventory(client_session: aiohttp.ClientSession, url: str) -> D
raise ValueError(f"Invalid inventory file at url {url}.")
-async def fetch_inventory(
- client_session: aiohttp.ClientSession,
- url: str
-) -> Optional[DefaultDict[str, List[Tuple[str, str]]]]:
+async def fetch_inventory(url: str) -> Optional[DefaultDict[str, List[Tuple[str, str]]]]:
"""Get inventory from `url`, retrying `FAILED_REQUEST_ATTEMPTS` times on errors."""
for attempt in range(1, FAILED_REQUEST_ATTEMPTS+1):
try:
- inventory = await _fetch_inventory(client_session, url)
+ inventory = await _fetch_inventory(url)
except aiohttp.ClientConnectorError:
log.warning(
f"Failed to connect to inventory url at {url}; "
--
cgit v1.2.3
From fdff2491fc48bac0c55e0a506e7f7c395be13c0d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 11 Dec 2020 23:41:38 +0100
Subject: Remove internal CachedParser result cache
We no longer need to keep the items around since everything is in redis
and the costs of always going through redis is fairly small
---
bot/exts/info/doc/_cog.py | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index e1be956cd..d2bbf8c57 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -80,9 +80,8 @@ class CachedParser:
def __init__(self):
self._queue: List[QueueItem] = []
- self._results = {}
self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
- self._item_events: Dict[DocItem, asyncio.Event] = {}
+ self._item_futures: Dict[DocItem, asyncio.Future] = {}
self._parse_task = None
async def get_markdown(self, doc_item: DocItem) -> str:
@@ -107,9 +106,8 @@ class CachedParser:
self._parse_task = asyncio.create_task(self._parse_queue())
self._move_to_front(doc_item)
- self._item_events[doc_item] = item_event = asyncio.Event()
- await item_event.wait()
- return self._results[doc_item]
+ self._item_futures[doc_item] = item_future = asyncio.Future()
+ return await item_future
async def _parse_queue(self) -> None:
"""
@@ -123,12 +121,11 @@ class CachedParser:
try:
markdown = get_symbol_markdown(soup, item)
await doc_cache.set(item, markdown)
- self._results[item] = markdown
except Exception:
log.exception(f"Unexpected error when handling {item}")
else:
- if (event := self._item_events.get(item)) is not None:
- event.set()
+ if (future := self._item_futures.get(item)) is not None:
+ future.set_result(markdown)
await asyncio.sleep(0.1)
self._parse_task = None
@@ -153,15 +150,14 @@ class CachedParser:
All currently requested items are waited to be parsed before clearing.
"""
- for event in self._item_events.values():
- await event.wait()
+ for future in self._item_futures.values():
+ await future
if self._parse_task is not None:
self._parse_task.cancel()
self._parse_task = None
self._queue.clear()
- self._results.clear()
self._page_symbols.clear()
- self._item_events.clear()
+ self._item_futures.clear()
class DocCog(commands.Cog):
--
cgit v1.2.3
From f6805c397c47d7dbfc2f38998c7de3556de69b42 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 11 Dec 2020 23:42:36 +0100
Subject: Ensure only one future is created for each doc_item
Previously in case get_markdown for an item ran twice, the one
that ran second would overwrite the future created by the first one,
potentially causing the coro to wait for it infinitely as _parse_queue
would only be able to set the last future
---
bot/exts/info/doc/_cog.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index d2bbf8c57..78d9c6b9b 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -106,8 +106,9 @@ class CachedParser:
self._parse_task = asyncio.create_task(self._parse_queue())
self._move_to_front(doc_item)
- self._item_futures[doc_item] = item_future = asyncio.Future()
- return await item_future
+ if doc_item not in self._item_futures:
+ self._item_futures[doc_item] = bot_instance.loop.create_future()
+ return await self._item_futures[doc_item]
async def _parse_queue(self) -> None:
"""
--
cgit v1.2.3
From 121bdd16e8ee53d83822e9320232a65ea2ab540a Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 11 Dec 2020 23:44:59 +0100
Subject: Move parse_queue cleanup into finally block
The finally will make sure we reset the task and log it no matter
what happens, additionally the clearing of the variable is now only
done in one place as the finally also executes when the coro is
cancelled
---
bot/exts/info/doc/_cog.py | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 78d9c6b9b..603d7df97 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -117,20 +117,21 @@ class CachedParser:
The coroutine will run as long as the queue is not empty, resetting `self._parse_task` to None when finished.
"""
log.trace("Starting queue parsing.")
- while self._queue:
- item, soup = self._queue.pop()
- try:
- markdown = get_symbol_markdown(soup, item)
- await doc_cache.set(item, markdown)
- except Exception:
- log.exception(f"Unexpected error when handling {item}")
- else:
- if (future := self._item_futures.get(item)) is not None:
- future.set_result(markdown)
- await asyncio.sleep(0.1)
-
- self._parse_task = None
- log.trace("Finished parsing queue.")
+ try:
+ while self._queue:
+ item, soup = self._queue.pop()
+ try:
+ markdown = get_symbol_markdown(soup, item)
+ await doc_cache.set(item, markdown)
+ except Exception:
+ log.exception(f"Unexpected error when handling {item}")
+ else:
+ if (future := self._item_futures.get(item)) is not None:
+ future.set_result(markdown)
+ await asyncio.sleep(0.1)
+ finally:
+ self._parse_task = None
+ log.trace("Finished parsing queue.")
def _move_to_front(self, item: Union[QueueItem, DocItem]) -> None:
"""Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
@@ -155,7 +156,6 @@ class CachedParser:
await future
if self._parse_task is not None:
self._parse_task.cancel()
- self._parse_task = None
self._queue.clear()
self._page_symbols.clear()
self._item_futures.clear()
--
cgit v1.2.3
From 97d0625823171a873393c8baf14212104b1ee955 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 11 Dec 2020 23:46:24 +0100
Subject: Provide feedback to user when no cache to clear was found
While technically correct, always sending success could be misleading
in case of a typo on the package
---
bot/exts/info/doc/_cog.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 603d7df97..933f4500e 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -454,5 +454,7 @@ class DocCog(commands.Cog):
@commands.has_any_role(*MODERATION_ROLES)
async def clear_cache_command(self, ctx: commands.Context, package_name: PackageName) -> None:
"""Clear the persistent redis cache for `package`."""
- await doc_cache.delete(package_name)
- await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
+ if await doc_cache.delete(package_name):
+ await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
+ else:
+ await ctx.send("No keys matching the package found.")
--
cgit v1.2.3
From 30a3ce49fd346e4a2f4b3c9c12806a2aba8e9e16 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 02:37:03 +0100
Subject: Create function for merging function and decorator wrapper globals
discord.py uses the globals of functions to resolve forward refs
in commands, previously decorators applied before commands
broke the bot with forwardrefs to names that weren't in the namespace
of the module where they were defined, the new function takes care of
merging the globals in a new function to mitigate this issue.
closes: #1323
---
bot/decorators.py | 6 ++----
bot/utils/function.py | 27 +++++++++++++++++++++++++++
bot/utils/lock.py | 3 +--
3 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/bot/decorators.py b/bot/decorators.py
index 063c8f878..3892e350f 100644
--- a/bot/decorators.py
+++ b/bot/decorators.py
@@ -71,7 +71,6 @@ def redirect_output(destination_channel: int, bypass_roles: t.Container[int] = N
This decorator must go before (below) the `command` decorator.
"""
def wrap(func: t.Callable) -> t.Callable:
- @wraps(func)
async def inner(self: Cog, ctx: Context, *args, **kwargs) -> None:
if ctx.channel.id == destination_channel:
log.trace(f"Command {ctx.command.name} was invoked in destination_channel, not redirecting")
@@ -106,7 +105,7 @@ def redirect_output(destination_channel: int, bypass_roles: t.Container[int] = N
await ctx.message.delete()
log.trace("Redirect output: Deleted invocation message")
- return inner
+ return wraps(func)(function.update_wrapper_globals(inner, func))
return wrap
@@ -123,7 +122,6 @@ def respect_role_hierarchy(member_arg: function.Argument) -> t.Callable:
This decorator must go before (below) the `command` decorator.
"""
def decorator(func: t.Callable) -> t.Callable:
- @wraps(func)
async def wrapper(*args, **kwargs) -> None:
log.trace(f"{func.__name__}: respect role hierarchy decorator called")
@@ -151,5 +149,5 @@ def respect_role_hierarchy(member_arg: function.Argument) -> t.Callable:
else:
log.trace(f"{func.__name__}: {target.top_role=} < {actor.top_role=}; calling func")
await func(*args, **kwargs)
- return wrapper
+ return wraps(func)(function.update_wrapper_globals(wrapper, func))
return decorator
diff --git a/bot/utils/function.py b/bot/utils/function.py
index 3ab32fe3c..8b8c7ba5c 100644
--- a/bot/utils/function.py
+++ b/bot/utils/function.py
@@ -1,6 +1,7 @@
"""Utilities for interaction with functions."""
import inspect
+import types
import typing as t
Argument = t.Union[int, str]
@@ -73,3 +74,29 @@ def get_bound_args(func: t.Callable, args: t.Tuple, kwargs: t.Dict[str, t.Any])
bound_args.apply_defaults()
return bound_args.arguments
+
+
+def update_wrapper_globals(wrapper: types.FunctionType, func: types.FunctionType) -> types.FunctionType:
+ """
+ Update globals of `wrapper` with the globals from `func`.
+
+ For forwardrefs in command annotations discordpy uses the __global__ attribute of the function
+ to resolve their values, with decorators that replace the function this breaks because they have
+ their own globals.
+
+ This function creates a new function functionally identical to `wrapper`, which has the globals replaced with
+ a merge of `func`s globals and the `wrapper`s globals.
+
+ In case a global name from `func` conflicts with a name from `wrapper`'s globals, `wrapper` will win
+ to keep it functional, but this may cause problems if the name is used as an annotation and
+ discord.py uses it as a converter on a parameter from `func`.
+ """
+ new_globals = wrapper.__globals__.copy()
+ new_globals.update((k, v) for k, v in func.__globals__.items() if k not in wrapper.__code__.co_names)
+ return types.FunctionType(
+ code=wrapper.__code__,
+ globals=new_globals,
+ name=wrapper.__name__,
+ argdefs=wrapper.__defaults__,
+ closure=wrapper.__closure__,
+ )
diff --git a/bot/utils/lock.py b/bot/utils/lock.py
index 7aaafbc88..cf87321c5 100644
--- a/bot/utils/lock.py
+++ b/bot/utils/lock.py
@@ -61,7 +61,6 @@ def lock(namespace: Hashable, resource_id: ResourceId, *, raise_error: bool = Fa
def decorator(func: Callable) -> Callable:
name = func.__name__
- @wraps(func)
async def wrapper(*args, **kwargs) -> Any:
log.trace(f"{name}: mutually exclusive decorator called")
@@ -93,7 +92,7 @@ def lock(namespace: Hashable, resource_id: ResourceId, *, raise_error: bool = Fa
if raise_error:
raise LockedResourceError(str(namespace), id_)
- return wrapper
+ return wraps(func)(function.update_wrapper_globals(wrapper, func))
return decorator
--
cgit v1.2.3
From 3cc32ae30a671a31a3f05c2c8a4af44e09095cc8 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 02:37:37 +0100
Subject: Lock inventory refreshes
All commands that refresh the inventories in some way are now locked to
prevent various race conditions that may have occurred in the unlikely
scenario that they got triggered together, the fetching part of the
get command now also has to wait for the running inventory refresh to
finish before proceeding to fetch and parse the html
---
bot/exts/info/doc/_cog.py | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 933f4500e..11d17222d 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -17,6 +17,7 @@ from bot.bot import Bot
from bot.constants import MODERATION_ROLES, RedirectOutput
from bot.converters import InventoryURL, PackageName, ValidURL
from bot.pagination import LinePaginator
+from bot.utils.lock import lock
from bot.utils.messages import wait_for_deletion
from bot.utils.scheduling import Scheduler
from ._inventory_parser import fetch_inventory
@@ -39,6 +40,10 @@ PRIORITY_PACKAGES = (
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
+REFRESH_EVENT = asyncio.Event()
+REFRESH_EVENT.set()
+COMMAND_LOCK_SINGLETON = "inventory refresh"
+
doc_cache = DocRedisCache(namespace="Docs")
@@ -91,9 +96,6 @@ class CachedParser:
If no symbols were fetched from `doc_item`s page before,
the HTML has to be fetched before parsing can be queued.
"""
- if (symbol := self._results.get(doc_item)) is not None:
- return symbol
-
if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
async with bot_instance.http_session.get(doc_item.url) as response:
soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
@@ -176,6 +178,7 @@ class DocCog(commands.Cog):
self.bot.loop.create_task(self.init_refresh_inventory())
+ @lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def init_refresh_inventory(self) -> None:
"""Refresh documentation inventory on cog initialization."""
await self.bot.wait_until_guild_available()
@@ -258,6 +261,7 @@ class DocCog(commands.Cog):
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
+ REFRESH_EVENT.clear()
log.debug("Refreshing documentation inventory...")
for inventory in self.scheduled_inventories:
self.inventory_scheduler.cancel(inventory)
@@ -279,6 +283,7 @@ class DocCog(commands.Cog):
) for package in await self.bot.api_client.get('bot/documentation-links')
]
await asyncio.gather(*coros)
+ REFRESH_EVENT.set()
async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
"""
@@ -299,6 +304,9 @@ class DocCog(commands.Cog):
markdown = await doc_cache.get(symbol_info)
if markdown is None:
log.debug(f"Redis cache miss for symbol `{symbol}`.")
+ if not REFRESH_EVENT.is_set():
+ log.debug("Waiting for inventories to be refreshed before processing item.")
+ await REFRESH_EVENT.wait()
markdown = await self.item_fetcher.get_markdown(symbol_info)
if markdown is not None:
await doc_cache.set(symbol_info, markdown)
@@ -374,6 +382,7 @@ class DocCog(commands.Cog):
@docs_group.command(name='setdoc', aliases=('s',))
@commands.has_any_role(*MODERATION_ROLES)
+ @lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def set_command(
self, ctx: commands.Context, package_name: PackageName,
base_url: ValidURL, inventory_url: InventoryURL
@@ -413,6 +422,7 @@ class DocCog(commands.Cog):
@docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
@commands.has_any_role(*MODERATION_ROLES)
+ @lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def delete_command(self, ctx: commands.Context, package_name: PackageName) -> None:
"""
Removes the specified package from the database.
@@ -431,6 +441,7 @@ class DocCog(commands.Cog):
@docs_group.command(name="refreshdoc", aliases=("rfsh", "r"))
@commands.has_any_role(*MODERATION_ROLES)
+ @lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def refresh_command(self, ctx: commands.Context) -> None:
"""Refresh inventories and show the difference."""
old_inventories = set(self.base_urls)
--
cgit v1.2.3
From 9f11b453930b5abbab0b891e8b1ca0a2f9d013d0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 03:59:40 +0100
Subject: Simplify flow
The else is a bit clearer than the early return
---
bot/exts/info/doc/_cog.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 11d17222d..5e7399afb 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -417,8 +417,8 @@ class DocCog(commands.Cog):
f"Added the package `{package_name}` to the database but failed to fetch inventory; "
f"trying again in 2 minutes."
)
- return
- await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
+ else:
+ await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
@docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
@commands.has_any_role(*MODERATION_ROLES)
--
cgit v1.2.3
From d21540d56853bc33625b0e1b8e2227294706eedb Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:02:49 +0100
Subject: Clear up grammar
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 5e7399afb..d828e6b4a 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -76,10 +76,10 @@ class QueueItem(NamedTuple):
class CachedParser:
"""
- Get symbol markdown from pages with smarter caching.
+ Get the symbol Markdown from pages with smarter caching.
DocItems are added through the `add_item` method which adds them to the `_page_symbols` dict.
- `get_markdown` is used to fetch the markdown; when this is used for the first time on a page,
+ `get_markdown` is used to fetch the Markdown; when this is used for the first time on a page,
all of the symbols are queued to be parsed to avoid multiple web requests to the same page.
"""
@@ -91,7 +91,7 @@ class CachedParser:
async def get_markdown(self, doc_item: DocItem) -> str:
"""
- Get result markdown of `doc_item`.
+ Get the result Markdown of `doc_item`.
If no symbols were fetched from `doc_item`s page before,
the HTML has to be fetched before parsing can be queued.
@@ -418,7 +418,7 @@ class DocCog(commands.Cog):
f"trying again in 2 minutes."
)
else:
- await ctx.send(f"Added package `{package_name}` to database and refreshed inventory.")
+ await ctx.send(f"Added the package `{package_name}` to the database and refreshed the inventory.")
@docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
@commands.has_any_role(*MODERATION_ROLES)
--
cgit v1.2.3
From b3f9cc10b7fe50575fee74424ba26636007cbcdc Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:16:19 +0100
Subject: Reuse form body to construct log message
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index d828e6b4a..61f770c0a 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -407,9 +407,7 @@ class DocCog(commands.Cog):
log.info(
f"User @{ctx.author} ({ctx.author.id}) added a new documentation package:\n"
- f"Package name: {package_name}\n"
- f"Base url: {base_url}\n"
- f"Inventory URL: {inventory_url}"
+ + "\n".join(f"{key}: {value}" for key, value in body.items())
)
if await self.update_single(package_name, base_url, inventory_url) is None:
--
cgit v1.2.3
From 7aea86dd22572e9685ed8353428f14e90a9db321 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:22:20 +0100
Subject: Make reschedule delays a module constant
---
bot/exts/info/doc/_cog.py | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 61f770c0a..30579894c 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -6,6 +6,7 @@ import re
import sys
from collections import defaultdict
from contextlib import suppress
+from types import SimpleNamespace
from typing import Dict, List, NamedTuple, Optional, Union
import discord
@@ -39,6 +40,8 @@ PRIORITY_PACKAGES = (
)
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
+# Delay to wait before trying to reach a rescheduled inventory again, in minutes
+FETCH_RESCHEDULE_DELAY = SimpleNamespace(first=2, repeated=5)
REFRESH_EVENT = asyncio.Event()
REFRESH_EVENT.set()
@@ -197,7 +200,8 @@ class DocCog(commands.Cog):
* `inventory_url` is the absolute URL to the intersphinx inventory.
If the inventory file is currently unreachable,
- the update is rescheduled to execute in 2 minutes on the first attempt, and 5 minutes on subsequent attempts.
+ the update is rescheduled to execute in FETCH_RESCHEDULE_DELAY.first minutes on the first attempt,
+ and FETCH_RESCHEDULE_DELAY.repeated minutes on the subsequent attempts.
Return True on success; False if fetching failed and was rescheduled.
"""
@@ -205,7 +209,10 @@ class DocCog(commands.Cog):
package = await fetch_inventory(inventory_url)
if not package:
- delay = 2*60 if inventory_url not in self.scheduled_inventories else 5*60
+ if inventory_url not in self.scheduled_inventories:
+ delay = FETCH_RESCHEDULE_DELAY.first * 60
+ else:
+ delay = FETCH_RESCHEDULE_DELAY.repeated * 60
log.info(f"Failed to fetch inventory; attempting again in {delay//60} minutes.")
self.inventory_scheduler.schedule_later(
delay,
@@ -413,7 +420,7 @@ class DocCog(commands.Cog):
if await self.update_single(package_name, base_url, inventory_url) is None:
await ctx.send(
f"Added the package `{package_name}` to the database but failed to fetch inventory; "
- f"trying again in 2 minutes."
+ f"trying again in {FETCH_RESCHEDULE_DELAY.first} minutes."
)
else:
await ctx.send(f"Added the package `{package_name}` to the database and refreshed the inventory.")
--
cgit v1.2.3
From 73502611d1420a62f1e8c0a6ca51c02dc2c8f896 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:25:26 +0100
Subject: Call command method directly
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 30579894c..4cd28e29a 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -334,7 +334,7 @@ class DocCog(commands.Cog):
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
async def docs_group(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
"""Look up documentation for Python symbols."""
- await ctx.invoke(self.get_command, symbol=symbol)
+ await self.get_command(ctx, symbol=symbol)
@docs_group.command(name='getdoc', aliases=('g',))
async def get_command(self, ctx: commands.Context, *, symbol: Optional[str]) -> None:
--
cgit v1.2.3
From 677f2ad91dbc16ef3a33c102e4932d99a65437da Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:26:22 +0100
Subject: Change param styling to be consistent with the repo
---
bot/exts/info/doc/_cog.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 4cd28e29a..60e86353b 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -187,9 +187,7 @@ class DocCog(commands.Cog):
await self.bot.wait_until_guild_available()
await self.refresh_inventory()
- async def update_single(
- self, api_package_name: str, base_url: str, inventory_url: str
- ) -> bool:
+ async def update_single(self, api_package_name: str, base_url: str, inventory_url: str) -> bool:
"""
Rebuild the inventory for a single package.
@@ -391,8 +389,11 @@ class DocCog(commands.Cog):
@commands.has_any_role(*MODERATION_ROLES)
@lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def set_command(
- self, ctx: commands.Context, package_name: PackageName,
- base_url: ValidURL, inventory_url: InventoryURL
+ self,
+ ctx: commands.Context,
+ package_name: PackageName,
+ base_url: ValidURL,
+ inventory_url: InventoryURL,
) -> None:
"""
Adds a new documentation metadata object to the site's database.
--
cgit v1.2.3
From f988d3ec07c4ca814fa5ddb47a6e064c4bb32461 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:27:29 +0100
Subject: Use string addition instead of join
With only two strings, the addition is a bit clearer than
constructing and joining a tuple
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 60e86353b..1b5eaa6d5 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -62,7 +62,7 @@ class DocItem(NamedTuple):
@property
def url(self) -> str:
"""Return the absolute url to the symbol."""
- return "".join((self.base_url, self.relative_url_path))
+ return self.base_url + self.relative_url_path
class QueueItem(NamedTuple):
--
cgit v1.2.3
From 9cfdeacb807442c27de08e2b66c49d998dfae5ce Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:29:36 +0100
Subject: Move copyright outside of license text
Co-authored-by: MarkKoz
---
LICENSE-THIRD-PARTY | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY
index d454070c2..ab715630d 100644
--- a/LICENSE-THIRD-PARTY
+++ b/LICENSE-THIRD-PARTY
@@ -37,12 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------------------------------------------------
BSD 2-Clause License
Applies to:
- - bot/cogs/doc/inventory_parser.py: _load_v1, _load_v2 and ZlibStreamReader.__aiter__.
+ - Copyright (c) 2007-2020 by the Sphinx team (see AUTHORS file). All rights reserved.
+ - bot/cogs/doc/inventory_parser.py: _load_v1, _load_v2 and ZlibStreamReader.__aiter__.
---------------------------------------------------------------------------------------------------
-Copyright (c) 2007-2020 by the Sphinx team (see AUTHORS file).
-All rights reserved.
-
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
--
cgit v1.2.3
From f416e42efce74082d155d9159114f698a97305cb Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:34:05 +0100
Subject: Return the sent message
This allows the caller to work with the message further
---
bot/utils/messages.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bot/utils/messages.py b/bot/utils/messages.py
index 42bde358d..c42e4bacc 100644
--- a/bot/utils/messages.py
+++ b/bot/utils/messages.py
@@ -135,14 +135,14 @@ def sub_clyde(username: Optional[str]) -> Optional[str]:
return username # Empty string or None
-async def send_denial(ctx: Context, reason: str) -> None:
+async def send_denial(ctx: Context, reason: str) -> discord.Message:
"""Send an embed denying the user with the given reason."""
embed = discord.Embed()
embed.colour = discord.Colour.red()
embed.title = random.choice(NEGATIVE_REPLIES)
embed.description = reason
- await ctx.send(embed=embed)
+ return await ctx.send(embed=embed)
def format_user(user: discord.abc.User) -> str:
--
cgit v1.2.3
From 9c6f3acac1334e885cc6b9d176a4b816bb68710a Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:35:37 +0100
Subject: Use send_denial util instead of creating embed manually
The symbol is also no longer sent back to the user, as it is not
necessary and we can skip the cleanup on it
---
bot/exts/info/doc/_cog.py | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 1b5eaa6d5..8c52b04cf 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -19,7 +19,7 @@ from bot.constants import MODERATION_ROLES, RedirectOutput
from bot.converters import InventoryURL, PackageName, ValidURL
from bot.pagination import LinePaginator
from bot.utils.lock import lock
-from bot.utils.messages import wait_for_deletion
+from bot.utils.messages import send_denial, wait_for_deletion
from bot.utils.scheduling import Scheduler
from ._inventory_parser import fetch_inventory
from ._parsing import get_symbol_markdown
@@ -370,12 +370,7 @@ class DocCog(commands.Cog):
doc_embed = await self.get_symbol_embed(symbol)
if doc_embed is None:
- symbol = await discord.ext.commands.clean_content().convert(ctx, symbol)
- error_embed = discord.Embed(
- description=f"Sorry, I could not find any documentation for `{(symbol)}`.",
- colour=discord.Colour.red()
- )
- error_message = await ctx.send(embed=error_embed)
+ error_message = await send_denial(ctx, "No documentation found for the requested symbol.")
await wait_for_deletion(error_message, (ctx.author.id,), timeout=NOT_FOUND_DELETE_DELAY)
with suppress(discord.NotFound):
await ctx.message.delete()
--
cgit v1.2.3
From 2a855de33c79bfebee4c85757d26b5463c1fccce Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:42:46 +0100
Subject: Use cancel_all instead of manually calling cancel repeatedly
---
bot/exts/info/doc/_cog.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 8c52b04cf..07a287572 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -268,8 +268,7 @@ class DocCog(commands.Cog):
"""Refresh internal documentation inventory."""
REFRESH_EVENT.clear()
log.debug("Refreshing documentation inventory...")
- for inventory in self.scheduled_inventories:
- self.inventory_scheduler.cancel(inventory)
+ self.inventory_scheduler.cancel_all()
# Clear the old base URLS and doc symbols to ensure
# that we start from a fresh local dataset.
--
cgit v1.2.3
From fdc24cf48fcd34b14098befc36bb3d4ce768dccd Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 12 Dec 2020 04:44:37 +0100
Subject: Strip whitespace from symbol Markdown before returning it
The html we parse frequently ends up with trailing and sometimes leading
newlines which get stripped out by discord anyway, we have no reason
to keep those around when sending the Markdown over to redis
---
bot/exts/info/doc/_parsing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 567786204..521034006 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -345,4 +345,4 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[s
else:
signature = _get_signatures(symbol_heading)
description = _get_dd_description(symbol_heading)
- return _create_markdown(signature, description, symbol_data.url).replace('¶', '')
+ return _create_markdown(signature, description, symbol_data.url).replace('¶', '').strip()
--
cgit v1.2.3
From b827d9bc8b66b2b7cc3702056b473ebbaf601031 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 13 Dec 2020 05:48:27 +0100
Subject: Simplify the implementation of the custom strainer
The strainer now forces the text attribute to be None, simplifying
the check on strings and falls back to the superclass' method on non
string elements
---
bot/exts/info/doc/_html.py | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/bot/exts/info/doc/_html.py b/bot/exts/info/doc/_html.py
index bc705130d..88fbc8825 100644
--- a/bot/exts/info/doc/_html.py
+++ b/bot/exts/info/doc/_html.py
@@ -1,7 +1,9 @@
-from collections.abc import Iterable
+import logging
from typing import List, Union
-from bs4.element import NavigableString, PageElement, SoupStrainer, Tag
+from bs4.element import PageElement, SoupStrainer
+
+log = logging.getLogger(__name__)
class Strainer(SoupStrainer):
@@ -9,25 +11,18 @@ class Strainer(SoupStrainer):
def __init__(self, *, include_strings: bool, **kwargs):
self.include_strings = include_strings
+ passed_text = kwargs.pop("text", None)
+ if passed_text is not None:
+ log.warning("`text` is not a supported kwarg in the custom strainer.")
super().__init__(**kwargs)
markup_hint = Union[PageElement, List["markup_hint"]]
def search(self, markup: markup_hint) -> Union[PageElement, str]:
"""Extend default SoupStrainer behaviour to allow matching both `Tag`s` and `NavigableString`s."""
- if isinstance(markup, Iterable) and not isinstance(markup, (Tag, str)):
- for element in markup:
- if isinstance(element, NavigableString) and self.search(element):
- return element
- elif isinstance(markup, Tag):
- # Also include tags while we're searching for strings and tags.
- if self.include_strings or (not self.text or self.name or self.attrs):
- return self.search_tag(markup)
-
- elif isinstance(markup, str):
+ if isinstance(markup, str):
# Let everything through the text filter if we're including strings and tags.
- text_filter = None if not self.include_strings else True
- if not self.name and not self.attrs and self._matches(markup, text_filter):
+ if not self.name and not self.attrs and self.include_strings:
return markup
else:
- raise Exception(f"I don't know how to match against a {markup.__class__}")
+ return super().search(markup)
--
cgit v1.2.3
From 73d7d748a550e644980d2604542d279472eb1b0c Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 14 Dec 2020 05:49:58 +0100
Subject: Run html parsing in an executor
The parsing may take up to a few hundred ms depending on the amount
of work it has to do
---
bot/exts/info/doc/_cog.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 07a287572..093e5cdb7 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -6,6 +6,7 @@ import re
import sys
from collections import defaultdict
from contextlib import suppress
+from functools import partial
from types import SimpleNamespace
from typing import Dict, List, NamedTuple, Optional, Union
@@ -126,7 +127,10 @@ class CachedParser:
while self._queue:
item, soup = self._queue.pop()
try:
- markdown = get_symbol_markdown(soup, item)
+ markdown = await bot_instance.loop.run_in_executor(
+ None,
+ partial(get_symbol_markdown, soup, item),
+ )
await doc_cache.set(item, markdown)
except Exception:
log.exception(f"Unexpected error when handling {item}")
--
cgit v1.2.3
From a9dfeb195e53aba9b444959da8b16addea3574d2 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 14 Dec 2020 05:50:45 +0100
Subject: Revert "Clear up docstring so it doesn't rely on private attribute"
This reverts commit ad90978f
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 093e5cdb7..92190bc55 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -143,7 +143,7 @@ class CachedParser:
log.trace("Finished parsing queue.")
def _move_to_front(self, item: Union[QueueItem, DocItem]) -> None:
- """Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
+ """Move `item` to the front of the parse queue."""
# The parse queue stores soups along with the doc symbols in QueueItem objects,
# in case we're moving a DocItem we have to get the associated QueueItem first and then move it.
item_index = self._queue.index(item)
--
cgit v1.2.3
From 2da9d443598bcf91c9eb6ab22963806a201fce01 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 14 Dec 2020 05:51:13 +0100
Subject: Clear up docstring so it doesn't rely on private attribute
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 92190bc55..6c51ab738 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -152,7 +152,7 @@ class CachedParser:
self._queue.append(queue_item)
def add_item(self, doc_item: DocItem) -> None:
- """Add a DocItem to `_page_symbols`."""
+ """Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
self._page_symbols[doc_item.url].append(doc_item)
async def clear(self) -> None:
--
cgit v1.2.3
From cf00aff24d20a57c2c9178d6d9e30f5d33d9a426 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 15 Dec 2020 00:30:17 +0100
Subject: Create futures for all items in the queue
Creating futures for everything and then awaiting at the end takes
care of all the potential race conditions that may pop up from items
that are parsed and sent to redis while the get_markdown method is
in the middle of fetching a page. In case it happens with the
implementation we'll just need to move the item to the front and the
future will get a result set soon afterwards.
---
bot/exts/info/doc/_cog.py | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 6c51ab738..0d344c363 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -7,6 +7,7 @@ import sys
from collections import defaultdict
from contextlib import suppress
from functools import partial
+from operator import attrgetter
from types import SimpleNamespace
from typing import Dict, List, NamedTuple, Optional, Union
@@ -78,6 +79,14 @@ class QueueItem(NamedTuple):
return NamedTuple.__eq__(self, other)
+class ParseResultFuture(asyncio.Future):
+ """Future with the user_requested attribute to know which futures need to be waited for before clearing."""
+
+ def __init__(self):
+ super().__init__()
+ self.user_requested = False
+
+
class CachedParser:
"""
Get the symbol Markdown from pages with smarter caching.
@@ -90,7 +99,7 @@ class CachedParser:
def __init__(self):
self._queue: List[QueueItem] = []
self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
- self._item_futures: Dict[DocItem, asyncio.Future] = {}
+ self._item_futures: Dict[DocItem, ParseResultFuture] = {}
self._parse_task = None
async def get_markdown(self, doc_item: DocItem) -> str:
@@ -99,21 +108,25 @@ class CachedParser:
If no symbols were fetched from `doc_item`s page before,
the HTML has to be fetched before parsing can be queued.
+
+ Not safe to run while `self.clear` is running.
"""
if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
async with bot_instance.http_session.get(doc_item.url) as response:
soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
+ self._item_futures.update((symbol, ParseResultFuture()) for symbol in symbols_to_queue)
del self._page_symbols[doc_item.url]
log.debug(f"Added symbols from {doc_item.url} to parse queue.")
if self._parse_task is None:
self._parse_task = asyncio.create_task(self._parse_queue())
- self._move_to_front(doc_item)
- if doc_item not in self._item_futures:
- self._item_futures[doc_item] = bot_instance.loop.create_future()
+ with suppress(ValueError):
+ # If the item is not in the list then the item is already parsed or is being parsed
+ self._move_to_front(doc_item)
+ self._item_futures[doc_item].user_requested = True
return await self._item_futures[doc_item]
async def _parse_queue(self) -> None:
@@ -161,7 +174,7 @@ class CachedParser:
All currently requested items are waited to be parsed before clearing.
"""
- for future in self._item_futures.values():
+ for future in filter(attrgetter("user_requested"), self._item_futures.values()):
await future
if self._parse_task is not None:
self._parse_task.cancel()
--
cgit v1.2.3
From a430f1aefdb092bc7ca2fd41bff20aedaa949f5e Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 15 Dec 2020 00:35:12 +0100
Subject: Wait for the inventory to be refreshed before attempting any fetching
Previously the bot returned an error if a symbol was not found while
inventories were refreshing, but we can just wait for the to finish
refreshing and then the symbol may be filled in.
A logging call to notify of the refresh being done was also added.
---
bot/exts/info/doc/_cog.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 0d344c363..a8642be3e 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -304,6 +304,7 @@ class DocCog(commands.Cog):
) for package in await self.bot.api_client.get('bot/documentation-links')
]
await asyncio.gather(*coros)
+ log.debug("Finished inventory refresh.")
REFRESH_EVENT.set()
async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
@@ -316,6 +317,10 @@ class DocCog(commands.Cog):
if not present also create a redis entry for the symbol.
"""
log.trace(f"Building embed for symbol `{symbol}`")
+ if not REFRESH_EVENT.is_set():
+ log.debug("Waiting for inventories to be refreshed before processing item.")
+ await REFRESH_EVENT.wait()
+
symbol_info = self.doc_symbols.get(symbol)
if symbol_info is None:
log.debug("Symbol does not exist.")
@@ -325,9 +330,6 @@ class DocCog(commands.Cog):
markdown = await doc_cache.get(symbol_info)
if markdown is None:
log.debug(f"Redis cache miss for symbol `{symbol}`.")
- if not REFRESH_EVENT.is_set():
- log.debug("Waiting for inventories to be refreshed before processing item.")
- await REFRESH_EVENT.wait()
markdown = await self.item_fetcher.get_markdown(symbol_info)
if markdown is not None:
await doc_cache.set(symbol_info, markdown)
--
cgit v1.2.3
From 7e5fb88a9976570590a4e946722fd60ada1aad95 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Tue, 15 Dec 2020 05:00:23 +0100
Subject: Return the fetched inventory in the Inventory converter
Instead of fetching it again in the cog, the converter now returns
the inventory for later use. The set command now no longer attempts
to reschedule the inventory, and a bug that caused the inventory
rescheduling to do nothing in `update_single` was fixed after moving
it to its own method
---
bot/converters.py | 12 +++---
bot/exts/info/doc/_cog.py | 75 ++++++++++++++++++----------------
bot/exts/info/doc/_inventory_parser.py | 9 ++--
3 files changed, 50 insertions(+), 46 deletions(-)
diff --git a/bot/converters.py b/bot/converters.py
index d558fa3df..6bbc22c3a 100644
--- a/bot/converters.py
+++ b/bot/converters.py
@@ -176,23 +176,23 @@ class ValidURL(Converter):
return url
-class InventoryURL(Converter):
+class Inventory(Converter):
"""
Represents an Intersphinx inventory URL.
This converter checks whether intersphinx accepts the given inventory URL, and raises
- `BadArgument` if that is not the case.
+ `BadArgument` if that is not the case or if the url is unreachable.
- Otherwise, it simply passes through the given URL.
+ Otherwise, it returns the url and the fetched inventory dict in a tuple.
"""
@staticmethod
- async def convert(ctx: Context, url: str) -> str:
+ async def convert(ctx: Context, url: str) -> t.Tuple[str, _inventory_parser.INVENTORY_DICT]:
"""Convert url to Intersphinx inventory URL."""
await ctx.trigger_typing()
- if await _inventory_parser.fetch_inventory(url) is None:
+ if (inventory := await _inventory_parser.fetch_inventory(url)) is None:
raise BadArgument(f"Failed to fetch inventory file after {_inventory_parser.FAILED_REQUEST_ATTEMPTS}.")
- return url
+ return url, inventory
class Snowflake(IDConverter):
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index a8642be3e..11d1dc9ad 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -18,12 +18,12 @@ from discord.ext import commands
from bot import instance as bot_instance
from bot.bot import Bot
from bot.constants import MODERATION_ROLES, RedirectOutput
-from bot.converters import InventoryURL, PackageName, ValidURL
+from bot.converters import Inventory, PackageName, ValidURL
from bot.pagination import LinePaginator
from bot.utils.lock import lock
from bot.utils.messages import send_denial, wait_for_deletion
from bot.utils.scheduling import Scheduler
-from ._inventory_parser import fetch_inventory
+from ._inventory_parser import INVENTORY_DICT, fetch_inventory
from ._parsing import get_symbol_markdown
from ._redis_cache import DocRedisCache
@@ -204,7 +204,7 @@ class DocCog(commands.Cog):
await self.bot.wait_until_guild_available()
await self.refresh_inventory()
- async def update_single(self, api_package_name: str, base_url: str, inventory_url: str) -> bool:
+ async def update_single(self, api_package_name: str, base_url: str, package: INVENTORY_DICT) -> None:
"""
Rebuild the inventory for a single package.
@@ -213,31 +213,8 @@ class DocCog(commands.Cog):
* `base_url` is the root documentation URL for the specified package, used to build
absolute paths that link to specific symbols
* `inventory_url` is the absolute URL to the intersphinx inventory.
-
- If the inventory file is currently unreachable,
- the update is rescheduled to execute in FETCH_RESCHEDULE_DELAY.first minutes on the first attempt,
- and FETCH_RESCHEDULE_DELAY.repeated minutes on the subsequent attempts.
-
- Return True on success; False if fetching failed and was rescheduled.
"""
self.base_urls[api_package_name] = base_url
- package = await fetch_inventory(inventory_url)
-
- if not package:
- if inventory_url not in self.scheduled_inventories:
- delay = FETCH_RESCHEDULE_DELAY.first * 60
- else:
- delay = FETCH_RESCHEDULE_DELAY.repeated * 60
- log.info(f"Failed to fetch inventory; attempting again in {delay//60} minutes.")
- self.inventory_scheduler.schedule_later(
- delay,
- api_package_name,
- fetch_inventory(inventory_url)
- )
- self.scheduled_inventories.add(api_package_name)
- return False
-
- self.scheduled_inventories.discard(api_package_name)
for group, items in package.items():
for symbol, relative_doc_url in items:
@@ -279,7 +256,37 @@ class DocCog(commands.Cog):
self.item_fetcher.add_item(symbol_item)
log.trace(f"Fetched inventory for {api_package_name}.")
- return True
+
+ async def update_or_reschedule_inventory(
+ self,
+ api_package_name: str,
+ base_url: str,
+ inventory_url: str
+ ) -> Optional[INVENTORY_DICT]:
+ """
+ Update the cog's inventory, or reschedule this method to execute again if the remote inventory unreachable.
+
+ The first attempt is rescheduled to execute in `FETCH_RESCHEDULE_DELAY.first` minutes, the subsequent attempts
+ in `FETCH_RESCHEDULE_DELAY.repeated` minutes.
+ """
+ package = await fetch_inventory(inventory_url)
+
+ if not package:
+ if inventory_url not in self.scheduled_inventories:
+ delay = FETCH_RESCHEDULE_DELAY.first
+ else:
+ delay = FETCH_RESCHEDULE_DELAY.repeated
+ log.info(f"Failed to fetch inventory; attempting again in {delay} minutes.")
+ self.inventory_scheduler.schedule_later(
+ delay*60,
+ api_package_name,
+ self.update_or_reschedule_inventory(api_package_name, base_url, inventory_url)
+ )
+ self.scheduled_inventories.add(api_package_name)
+ return
+
+ self.scheduled_inventories.discard(api_package_name)
+ await self.update_single(api_package_name, base_url, package)
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
@@ -299,7 +306,7 @@ class DocCog(commands.Cog):
# Run all coroutines concurrently - since each of them performs an HTTP
# request, this speeds up fetching the inventory data heavily.
coros = [
- self.update_single(
+ self.update_or_reschedule_inventory(
package["package"], package["base_url"], package["inventory_url"]
) for package in await self.bot.api_client.get('bot/documentation-links')
]
@@ -406,7 +413,7 @@ class DocCog(commands.Cog):
ctx: commands.Context,
package_name: PackageName,
base_url: ValidURL,
- inventory_url: InventoryURL,
+ inventory: Inventory,
) -> None:
"""
Adds a new documentation metadata object to the site's database.
@@ -419,6 +426,7 @@ class DocCog(commands.Cog):
https://docs.python.org/3/ \
https://docs.python.org/3/objects.inv
"""
+ inventory_url, inventory_dict = inventory
body = {
'package': package_name,
'base_url': base_url,
@@ -431,13 +439,8 @@ class DocCog(commands.Cog):
+ "\n".join(f"{key}: {value}" for key, value in body.items())
)
- if await self.update_single(package_name, base_url, inventory_url) is None:
- await ctx.send(
- f"Added the package `{package_name}` to the database but failed to fetch inventory; "
- f"trying again in {FETCH_RESCHEDULE_DELAY.first} minutes."
- )
- else:
- await ctx.send(f"Added the package `{package_name}` to the database and refreshed the inventory.")
+ await self.update_single(package_name, base_url, inventory_dict)
+ await ctx.send(f"Added the package `{package_name}` to the database and refreshed the inventory.")
@docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
@commands.has_any_role(*MODERATION_ROLES)
diff --git a/bot/exts/info/doc/_inventory_parser.py b/bot/exts/info/doc/_inventory_parser.py
index 0d9bd726a..b38c3b2a8 100644
--- a/bot/exts/info/doc/_inventory_parser.py
+++ b/bot/exts/info/doc/_inventory_parser.py
@@ -11,6 +11,7 @@ import bot
log = logging.getLogger(__name__)
FAILED_REQUEST_ATTEMPTS = 3
+INVENTORY_DICT = DefaultDict[str, List[Tuple[str, str]]]
_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)')
@@ -42,7 +43,7 @@ class ZlibStreamReader:
pos = buf.find(b'\n')
-async def _load_v1(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]:
+async def _load_v1(stream: aiohttp.StreamReader) -> INVENTORY_DICT:
invdata = defaultdict(list)
async for line in stream:
@@ -58,7 +59,7 @@ async def _load_v1(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[
return invdata
-async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[str, str]]]:
+async def _load_v2(stream: aiohttp.StreamReader) -> INVENTORY_DICT:
invdata = defaultdict(list)
async for line in ZlibStreamReader(stream):
@@ -71,7 +72,7 @@ async def _load_v2(stream: aiohttp.StreamReader) -> DefaultDict[str, List[Tuple[
return invdata
-async def _fetch_inventory(url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
+async def _fetch_inventory(url: str) -> INVENTORY_DICT:
"""Fetch, parse and return an intersphinx inventory file from an url."""
timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5)
async with bot.instance.http_session.get(url, timeout=timeout, raise_for_status=True) as response:
@@ -93,7 +94,7 @@ async def _fetch_inventory(url: str) -> DefaultDict[str, List[Tuple[str, str]]]:
raise ValueError(f"Invalid inventory file at url {url}.")
-async def fetch_inventory(url: str) -> Optional[DefaultDict[str, List[Tuple[str, str]]]]:
+async def fetch_inventory(url: str) -> Optional[INVENTORY_DICT]:
"""Get inventory from `url`, retrying `FAILED_REQUEST_ATTEMPTS` times on errors."""
for attempt in range(1, FAILED_REQUEST_ATTEMPTS+1):
try:
--
cgit v1.2.3
From 7134c10485d2b4215213c1ffb670fa9a06d5de1e Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 18 Dec 2020 21:41:30 +0100
Subject: Use update_wrapper instead of wraps
We're not using it as a decorator so using wraps only complicates
the call syntax
---
bot/decorators.py | 6 +++---
bot/utils/lock.py | 5 ++---
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/bot/decorators.py b/bot/decorators.py
index 3892e350f..a37996e80 100644
--- a/bot/decorators.py
+++ b/bot/decorators.py
@@ -2,7 +2,7 @@ import asyncio
import logging
import typing as t
from contextlib import suppress
-from functools import wraps
+from functools import update_wrapper
from discord import Member, NotFound
from discord.ext import commands
@@ -105,7 +105,7 @@ def redirect_output(destination_channel: int, bypass_roles: t.Container[int] = N
await ctx.message.delete()
log.trace("Redirect output: Deleted invocation message")
- return wraps(func)(function.update_wrapper_globals(inner, func))
+ return update_wrapper(function.update_wrapper_globals(inner, func), func)
return wrap
@@ -149,5 +149,5 @@ def respect_role_hierarchy(member_arg: function.Argument) -> t.Callable:
else:
log.trace(f"{func.__name__}: {target.top_role=} < {actor.top_role=}; calling func")
await func(*args, **kwargs)
- return wraps(func)(function.update_wrapper_globals(wrapper, func))
+ return update_wrapper(function.update_wrapper_globals(wrapper, func), func)
return decorator
diff --git a/bot/utils/lock.py b/bot/utils/lock.py
index cf87321c5..02188c827 100644
--- a/bot/utils/lock.py
+++ b/bot/utils/lock.py
@@ -1,7 +1,7 @@
import inspect
import logging
from collections import defaultdict
-from functools import partial, wraps
+from functools import partial, update_wrapper
from typing import Any, Awaitable, Callable, Hashable, Union
from weakref import WeakValueDictionary
@@ -91,8 +91,7 @@ def lock(namespace: Hashable, resource_id: ResourceId, *, raise_error: bool = Fa
log.info(f"{name}: aborted because resource {namespace!r}:{id_!r} is locked")
if raise_error:
raise LockedResourceError(str(namespace), id_)
-
- return wraps(func)(function.update_wrapper_globals(wrapper, func))
+ return update_wrapper(function.update_wrapper_globals(wrapper, func), func)
return decorator
--
cgit v1.2.3
From 003613ff0f89871c8477e996c708873e1387e514 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 6 Jan 2021 06:56:17 +0100
Subject: Add comments to truncation handling code
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_parsing.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 521034006..f51ab4ea1 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -276,15 +276,21 @@ def _get_truncated_description(
if not markdown_element_ends:
return ""
+ # Determine the "hard" truncation index.
newline_truncate_index = find_nth_occurrence(result, "\n", max_lines)
if newline_truncate_index is not None and newline_truncate_index < _MAX_DESCRIPTION_LENGTH:
+ # Truncate based on maximum lines if there are more than the maximum number of lines.
truncate_index = newline_truncate_index
else:
+ # There are less than the maximum number of lines; truncate based on the max char length.
truncate_index = _MAX_DESCRIPTION_LENGTH
+ # Nothing needs to be truncated if the last element ends before the truncation index.
if truncate_index >= markdown_element_ends[-1]:
return result
+ # Determine the actual truncation index.
+ # Truncate at the last Markdown element that comes before the truncation index.
markdown_truncate_index = max(cut for cut in markdown_element_ends if cut < truncate_index)
return result[:markdown_truncate_index].strip(_TRUNCATE_STRIP_CHARACTERS) + "..."
--
cgit v1.2.3
From fef6c50f0c8a9c54e6e0519c0feae5c8c32152c1 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 6 Jan 2021 06:57:54 +0100
Subject: Remove redundant variable
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_parsing.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index f51ab4ea1..032fe3404 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -263,9 +263,8 @@ def _get_truncated_description(
else:
element_markdown = markdown_converter.process_text(element)
- element_markdown_length = len(element_markdown)
rendered_length += element_length
- tag_end_index += element_markdown_length
+ tag_end_index += len(element_markdown)
if not element_markdown.isspace():
markdown_element_ends.append(tag_end_index)
--
cgit v1.2.3
From cbd84558ef4e5e89ce032c8b5d47f1bb94b89ba0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 6 Jan 2021 18:27:10 +0100
Subject: Do not attempt to set cache values for symbols that were not found
---
bot/exts/info/doc/_cog.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 11d1dc9ad..df5d417d7 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -144,7 +144,8 @@ class CachedParser:
None,
partial(get_symbol_markdown, soup, item),
)
- await doc_cache.set(item, markdown)
+ if markdown is not None:
+ await doc_cache.set(item, markdown)
except Exception:
log.exception(f"Unexpected error when handling {item}")
else:
--
cgit v1.2.3
From 3439badedb65f7d37ba9733bc4e8268f2efe316e Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 9 Jan 2021 06:37:31 +0100
Subject: Ensure no symbols get overwritten while generating symbol mappings
The code handling this was moved to a function to achieve this cleanly.
Includes fixes for bugs where incorrect package was added to the symbol
name in the second branch and an incorrect symbol being added in
the third branch
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 74 +++++++++++++++++++++++++++++++++++------------
1 file changed, 55 insertions(+), 19 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index df5d417d7..ed9432ed2 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -222,27 +222,19 @@ class DocCog(commands.Cog):
if "/" in symbol:
continue # skip unreachable symbols with slashes
+ # e.g. get 'class' from 'py:class'
group_name = group.split(":")[1]
- if (original_symbol := self.doc_symbols.get(symbol)) is not None:
- if group_name in FORCE_PREFIX_GROUPS:
- symbol = f"{group_name}.{symbol}"
- self.renamed_symbols.add(symbol)
-
- elif (original_symbol_group := original_symbol.group) in FORCE_PREFIX_GROUPS:
- overridden_symbol = f"{original_symbol_group}.{symbol}"
- if overridden_symbol in self.renamed_symbols:
- overridden_symbol = f"{api_package_name}.{overridden_symbol}"
-
- self.doc_symbols[overridden_symbol] = original_symbol
- self.renamed_symbols.add(overridden_symbol)
-
- elif api_package_name in PRIORITY_PACKAGES:
- self.doc_symbols[f"{original_symbol.package}.{symbol}"] = original_symbol
- self.renamed_symbols.add(symbol)
-
+ while (original_symbol := self.doc_symbols.get(symbol)) is not None:
+ replaced_symbol_name = self.ensure_unique_symbol_name(
+ api_package_name,
+ group_name,
+ original_symbol,
+ symbol,
+ )
+ if replaced_symbol_name is None:
+ break
else:
- symbol = f"{api_package_name}.{symbol}"
- self.renamed_symbols.add(symbol)
+ symbol = replaced_symbol_name
relative_url_path, _, symbol_id = relative_doc_url.partition("#")
# Intern fields that have shared content so we're not storing unique strings for every object
@@ -289,6 +281,50 @@ class DocCog(commands.Cog):
self.scheduled_inventories.discard(api_package_name)
await self.update_single(api_package_name, base_url, package)
+ def ensure_unique_symbol_name(
+ self,
+ package_name: str,
+ group_name: str,
+ original_item: DocItem,
+ symbol_name: str
+ ) -> Optional[str]:
+ """
+ Ensure `symbol_name` doesn't overwrite an another symbol in `doc_symbols`.
+
+ Should only be called with symbol names that already have a conflict in `doc_symbols`.
+
+ If None is returned, space was created for `symbol_name` in `doc_symbols` instead of
+ the symbol name being changed.
+ """
+ # Certain groups are added as prefixes to disambiguate the symbols.
+ if group_name in FORCE_PREFIX_GROUPS:
+ self.renamed_symbols.add(symbol_name)
+ return f"{group_name}.{symbol_name}"
+
+ # The existing symbol with which the current symbol conflicts should have a group prefix.
+ # It currently doesn't have the group prefix because it's only added once there's a conflict.
+ elif (original_symbol_group := original_item.group) in FORCE_PREFIX_GROUPS:
+ overridden_symbol = f"{original_symbol_group}.{symbol_name}"
+ if overridden_symbol in self.doc_symbols:
+ # If there's still a conflict, prefix with package name.
+ overridden_symbol = f"{original_item.package}.{overridden_symbol}"
+
+ self.doc_symbols[overridden_symbol] = original_item
+ self.renamed_symbols.add(overridden_symbol)
+
+ elif package_name in PRIORITY_PACKAGES:
+ overridden_symbol = f"{original_item.package}.{symbol_name}"
+ if overridden_symbol in self.doc_symbols:
+ # If there's still a conflict, add the symbol's group in the middle.
+ overridden_symbol = f"{original_item.package}.{original_item.group}.{symbol_name}"
+
+ self.doc_symbols[overridden_symbol] = original_item
+ self.renamed_symbols.add(overridden_symbol)
+
+ else:
+ self.renamed_symbols.add(symbol_name)
+ return f"{package_name}.{symbol_name}"
+
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
REFRESH_EVENT.clear()
--
cgit v1.2.3
From fcfb604bc9123254622b763dba46d3f25ed4d93c Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 9 Jan 2021 06:38:43 +0100
Subject: Do not ignore symbols with slashes.
In some cases these are actual symbols that we can look up
---
bot/exts/info/doc/_cog.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index ed9432ed2..7aa6d0428 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -219,8 +219,6 @@ class DocCog(commands.Cog):
for group, items in package.items():
for symbol, relative_doc_url in items:
- if "/" in symbol:
- continue # skip unreachable symbols with slashes
# e.g. get 'class' from 'py:class'
group_name = group.split(":")[1]
--
cgit v1.2.3
From 33c861b4e1fb88c52585647a958ac27810399704 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 9 Jan 2021 19:27:21 +0100
Subject: Do not add package name to the front of the symbol if it's already
there
---
bot/exts/info/doc/_cog.py | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 7aa6d0428..feb08e1cb 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -319,9 +319,18 @@ class DocCog(commands.Cog):
self.doc_symbols[overridden_symbol] = original_item
self.renamed_symbols.add(overridden_symbol)
+ # If we can't specially handle the symbol through its group or package,
+ # fall back to prepending its package name to the front.
else:
- self.renamed_symbols.add(symbol_name)
- return f"{package_name}.{symbol_name}"
+ if symbol_name.startswith(package_name):
+ # If the symbol already starts with the package name, insert the group name after it.
+ split_symbol_name = symbol_name.split(".", maxsplit=1)
+ split_symbol_name.insert(1, group_name)
+ overridden_symbol = ".".join(split_symbol_name)
+ else:
+ overridden_symbol = f"{package_name}.{symbol_name}"
+ self.renamed_symbols.add(overridden_symbol)
+ return overridden_symbol
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
--
cgit v1.2.3
From 70609baca94dc7c7ad7598f707ac479efe348e88 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 9 Jan 2021 21:48:51 +0100
Subject: Periodically clear unnecessary futures from the _item_futures dict
The code has no way of reaching futures through new requests after
their result has been set as that also includes setting its value in
redis.
---
bot/exts/info/doc/_cog.py | 34 +++++++++++++++++++++++++++++++++-
1 file changed, 33 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index feb08e1cb..364d99182 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -4,6 +4,7 @@ import asyncio
import logging
import re
import sys
+import time
from collections import defaultdict
from contextlib import suppress
from functools import partial
@@ -80,11 +81,25 @@ class QueueItem(NamedTuple):
class ParseResultFuture(asyncio.Future):
- """Future with the user_requested attribute to know which futures need to be waited for before clearing."""
+ """
+ Future with metadata for the parser class.
+
+ `user_requested` is set by the parser when a Future is requested by an user and moved to the front,
+ allowing the futures to only be waited for when clearing if they were user requested.
+
+ `result_set_time` provides the time at which the future's result has been set,
+ or -inf if the result hasn't been set yet
+ """
def __init__(self):
super().__init__()
self.user_requested = False
+ self.result_set_time = float("inf")
+
+ def set_result(self, result: str, /) -> None:
+ """Set `self.result_set_time` to current time when the result is set."""
+ self.result_set_time = time.time()
+ super().set_result(result)
class CachedParser:
@@ -102,6 +117,8 @@ class CachedParser:
self._item_futures: Dict[DocItem, ParseResultFuture] = {}
self._parse_task = None
+ self.cleanup_futures_task = bot_instance.loop.create_task(self._cleanup_futures())
+
async def get_markdown(self, doc_item: DocItem) -> str:
"""
Get the result Markdown of `doc_item`.
@@ -183,6 +200,21 @@ class CachedParser:
self._page_symbols.clear()
self._item_futures.clear()
+ async def _cleanup_futures(self) -> None:
+ """
+ Clear old futures from internal results.
+
+ After a future is set, we only need to wait for old requests to its associated DocItem to finish
+ as all new requests will get the value from the redis cache in the cog first.
+ Keeping them around for longer than a second is unnecessary and keeps the parsed Markdown strings alive.
+ """
+ while True:
+ current_time = time.time()
+ for key, future in self._item_futures.copy().items():
+ if current_time - future.result_set_time > 5:
+ del self._item_futures[key]
+ await asyncio.sleep(5)
+
class DocCog(commands.Cog):
"""A set of commands for querying & displaying documentation."""
--
cgit v1.2.3
From 5ad2afbc0160a7d9b0ab9c50b73044e7169db7cb Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sat, 9 Jan 2021 21:59:03 +0100
Subject: Stop scheduled and long running tasks on cog unload
---
bot/exts/info/doc/_cog.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 364d99182..61ac35b6f 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -567,3 +567,9 @@ class DocCog(commands.Cog):
await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
else:
await ctx.send("No keys matching the package found.")
+
+ def cog_unload(self) -> None:
+ """Clear scheduled inventories, queued symbols and cleanup task on cog unload."""
+ self.inventory_scheduler.cancel_all()
+ self.item_fetcher.cleanup_futures_task.cancel()
+ asyncio.create_task(self.item_fetcher.clear())
--
cgit v1.2.3
From 50bb3439824277991124b888d0b46c5936c2efce Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 00:11:16 +0100
Subject: Handle equal DocItems in the queue
This could be handled by using sets to hold the items in _page_symbols,
but ultimately the check has a much smaller cost than having
thousands of sets for the urls.
Because we create futures for every item that ends up in the queue we
can also skip the .get is None check and instead fetch the
future directly from the dict
---
bot/exts/info/doc/_cog.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 61ac35b6f..cee482c30 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -157,6 +157,11 @@ class CachedParser:
while self._queue:
item, soup = self._queue.pop()
try:
+ if (future := self._item_futures[item]).done():
+ # Some items are present in the inventories multiple times under different symbols,
+ # if we already parsed an equal item, we can just skip it.
+ continue
+
markdown = await bot_instance.loop.run_in_executor(
None,
partial(get_symbol_markdown, soup, item),
@@ -166,8 +171,7 @@ class CachedParser:
except Exception:
log.exception(f"Unexpected error when handling {item}")
else:
- if (future := self._item_futures.get(item)) is not None:
- future.set_result(markdown)
+ future.set_result(markdown)
await asyncio.sleep(0.1)
finally:
self._parse_task = None
--
cgit v1.2.3
From 298ad2f8e8f31d9f06a9e01a91a4d08f5b5d6347 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 01:48:26 +0100
Subject: Refresh inventories when the redis cache is cleared
Because the futures are cleaned up and Markdown only exists in the
cache after a short time, items that were requested previously
and had the cache cleared would be missing from the CachedParser
---
bot/exts/info/doc/_cog.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index cee482c30..a78916d4a 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -565,9 +565,11 @@ class DocCog(commands.Cog):
@docs_group.command(name="cleardoccache")
@commands.has_any_role(*MODERATION_ROLES)
+ @lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def clear_cache_command(self, ctx: commands.Context, package_name: PackageName) -> None:
"""Clear the persistent redis cache for `package`."""
if await doc_cache.delete(package_name):
+ await self.refresh_inventory()
await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
else:
await ctx.send("No keys matching the package found.")
--
cgit v1.2.3
From 383e4e993c1bc9d31562748cc55ab4c468bcdd8d Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 03:25:50 +0100
Subject: Set exception on future
Without the exception set, to the user the bot would fail silently
if an exception was handled here
---
bot/exts/info/doc/_cog.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index a78916d4a..3f7604072 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -168,8 +168,9 @@ class CachedParser:
)
if markdown is not None:
await doc_cache.set(item, markdown)
- except Exception:
+ except Exception as e:
log.exception(f"Unexpected error when handling {item}")
+ future.set_exception(e)
else:
future.set_result(markdown)
await asyncio.sleep(0.1)
--
cgit v1.2.3
From 5df60dd2ad10aec1c0368ed357562338e89a1250 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 03:32:54 +0100
Subject: Bump markdownify to 0.6.1-0.6.*
The 0.6 release brought a new parameter that has to be included in all
tag handling methods
---
Pipfile | 2 +-
bot/exts/info/doc/_markdown.py | 21 +++++++++++++--------
bot/exts/info/doc/_parsing.py | 2 +-
3 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/Pipfile b/Pipfile
index 4ca651c92..a92f64f59 100644
--- a/Pipfile
+++ b/Pipfile
@@ -18,7 +18,7 @@ deepdiff = "~=4.0"
feedparser = "~=5.2"
fuzzywuzzy = "~=0.17"
lxml = "~=4.4"
-markdownify = "~=0.4"
+markdownify = "~=0.6.1"
more_itertools = "~=8.2"
python-dateutil = "~=2.8"
pyyaml = "~=5.1"
diff --git a/bot/exts/info/doc/_markdown.py b/bot/exts/info/doc/_markdown.py
index ba35a84c4..1b7d8232b 100644
--- a/bot/exts/info/doc/_markdown.py
+++ b/bot/exts/info/doc/_markdown.py
@@ -11,7 +11,7 @@ class DocMarkdownConverter(MarkdownConverter):
super().__init__(**options)
self.page_url = page_url
- def convert_li(self, el: PageElement, text: str) -> str:
+ def convert_li(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
"""Fix markdownify's erroneous indexing in ol tags."""
parent = el.parent
if parent is not None and parent.name == "ol":
@@ -27,27 +27,32 @@ class DocMarkdownConverter(MarkdownConverter):
bullet = bullets[depth % len(bullets)]
return f"{bullet} {text}\n"
- def convert_hn(self, _n: int, el: PageElement, text: str) -> str:
+ def convert_hn(self, _n: int, el: PageElement, text: str, convert_as_inline: bool) -> str:
"""Convert h tags to bold text with ** instead of adding #."""
+ if convert_as_inline:
+ return text
return f"**{text}**\n\n"
- def convert_code(self, el: PageElement, text: str) -> str:
+ def convert_code(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
"""Undo `markdownify`s underscore escaping."""
return f"`{text}`".replace("\\", "")
- def convert_pre(self, el: PageElement, text: str) -> str:
+ def convert_pre(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
"""Wrap any codeblocks in `py` for syntax highlighting."""
code = "".join(el.strings)
return f"```py\n{code}```"
- def convert_a(self, el: PageElement, text: str) -> str:
+ def convert_a(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
"""Resolve relative URLs to `self.page_url`."""
el["href"] = urljoin(self.page_url, el["href"])
- return super().convert_a(el, text)
+ return super().convert_a(el, text, convert_as_inline)
- def convert_p(self, el: PageElement, text: str) -> str:
+ def convert_p(self, el: PageElement, text: str, convert_as_inline: bool) -> str:
"""Include only one newline instead of two when the parent is a li tag."""
+ if convert_as_inline:
+ return text
+
parent = el.parent
if parent is not None and parent.name == "li":
return f"{text}\n"
- return super().convert_p(el, text)
+ return super().convert_p(el, text, convert_as_inline)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 032fe3404..46ae33b92 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -259,7 +259,7 @@ def _get_truncated_description(
if rendered_length + element_length < max_length:
if is_tag:
- element_markdown = markdown_converter.process_tag(element)
+ element_markdown = markdown_converter.process_tag(element, convert_as_inline=False)
else:
element_markdown = markdown_converter.process_text(element)
--
cgit v1.2.3
From 58154398d0ed905e0418451cfa7d3e8b66508bc6 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 03:39:06 +0100
Subject: Expand docstring
---
bot/exts/info/doc/_inventory_parser.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_inventory_parser.py b/bot/exts/info/doc/_inventory_parser.py
index b38c3b2a8..886708867 100644
--- a/bot/exts/info/doc/_inventory_parser.py
+++ b/bot/exts/info/doc/_inventory_parser.py
@@ -95,7 +95,12 @@ async def _fetch_inventory(url: str) -> INVENTORY_DICT:
async def fetch_inventory(url: str) -> Optional[INVENTORY_DICT]:
- """Get inventory from `url`, retrying `FAILED_REQUEST_ATTEMPTS` times on errors."""
+ """
+ Get an inventory dict from `url`, retrying `FAILED_REQUEST_ATTEMPTS` times on errors.
+
+ `url` should point at a valid sphinx objects.inv inventory file, which will be parsed into the
+ inventory dict in the format of {"domain:role": [("symbol_name", "relative_url_to_symbol"), ...], ...}
+ """
for attempt in range(1, FAILED_REQUEST_ATTEMPTS+1):
try:
inventory = await _fetch_inventory(url)
--
cgit v1.2.3
From 695044167756eb2b6b4d953ef17f0359ba688246 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 03:58:43 +0100
Subject: Move functions strictly related to parsing html to the _html module
Some constants need to be shared between html and parsing, because they
may also be wanted to be edited by the cog user to change the behaviour,
they were moved into the package's init.
---
bot/exts/info/doc/__init__.py | 5 ++
bot/exts/info/doc/_cog.py | 4 +-
bot/exts/info/doc/_html.py | 112 ++++++++++++++++++++++++++++++++++++-
bot/exts/info/doc/_parsing.py | 125 ++++--------------------------------------
4 files changed, 126 insertions(+), 120 deletions(-)
diff --git a/bot/exts/info/doc/__init__.py b/bot/exts/info/doc/__init__.py
index e9eb9428c..af0bbff2d 100644
--- a/bot/exts/info/doc/__init__.py
+++ b/bot/exts/info/doc/__init__.py
@@ -1,6 +1,11 @@
from bot.bot import Bot
from ._cog import DocCog
+MAX_SIGNATURE_AMOUNT = 3
+PRIORITY_PACKAGES = (
+ "python",
+)
+
def setup(bot: Bot) -> None:
"""Load the Doc cog."""
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 3f7604072..fd211d9f1 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -24,6 +24,7 @@ from bot.pagination import LinePaginator
from bot.utils.lock import lock
from bot.utils.messages import send_denial, wait_for_deletion
from bot.utils.scheduling import Scheduler
+from . import PRIORITY_PACKAGES
from ._inventory_parser import INVENTORY_DICT, fetch_inventory
from ._parsing import get_symbol_markdown
from ._redis_cache import DocRedisCache
@@ -38,9 +39,6 @@ FORCE_PREFIX_GROUPS = (
"pdbcommand",
"term",
)
-PRIORITY_PACKAGES = (
- "python",
-)
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
# Delay to wait before trying to reach a rescheduled inventory again, in minutes
diff --git a/bot/exts/info/doc/_html.py b/bot/exts/info/doc/_html.py
index 88fbc8825..f9fe542ce 100644
--- a/bot/exts/info/doc/_html.py
+++ b/bot/exts/info/doc/_html.py
@@ -1,10 +1,27 @@
import logging
-from typing import List, Union
+import re
+from functools import partial
+from typing import Callable, Container, Iterable, List, Union
-from bs4.element import PageElement, SoupStrainer
+from bs4 import BeautifulSoup
+from bs4.element import NavigableString, PageElement, SoupStrainer, Tag
+
+from . import MAX_SIGNATURE_AMOUNT
log = logging.getLogger(__name__)
+_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
+_SEARCH_END_TAG_ATTRS = (
+ "data",
+ "function",
+ "class",
+ "exception",
+ "seealso",
+ "section",
+ "rubric",
+ "sphinxsidebar",
+)
+
class Strainer(SoupStrainer):
"""Subclass of SoupStrainer to allow matching of both `Tag`s and `NavigableString`s."""
@@ -26,3 +43,94 @@ class Strainer(SoupStrainer):
return markup
else:
return super().search(markup)
+
+
+def _find_elements_until_tag(
+ start_element: PageElement,
+ end_tag_filter: Union[Container[str], Callable[[Tag], bool]],
+ *,
+ func: Callable,
+ include_strings: bool = False,
+ limit: int = None,
+) -> List[Union[Tag, NavigableString]]:
+ """
+ Get all elements up to `limit` or until a tag matching `tag_filter` is found.
+
+ `end_tag_filter` can be either a container of string names to check against,
+ or a filtering callable that's applied to tags.
+
+ When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
+
+ `func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
+ The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
+ """
+ use_container_filter = not callable(end_tag_filter)
+ elements = []
+
+ for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
+ if isinstance(element, Tag):
+ if use_container_filter:
+ if element.name in end_tag_filter:
+ break
+ elif end_tag_filter(element):
+ break
+ elements.append(element)
+
+ return elements
+
+
+_find_next_children_until_tag = partial(_find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
+_find_recursive_children_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_all)
+_find_next_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
+_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
+
+
+def _class_filter_factory(class_names: Iterable[str]) -> Callable[[Tag], bool]:
+ """Create callable that returns True when the passed in tag's class is in `class_names` or when it's is a table."""
+ def match_tag(tag: Tag) -> bool:
+ for attr in class_names:
+ if attr in tag.get("class", ()):
+ return True
+ return tag.name == "table"
+
+ return match_tag
+
+
+def get_general_description(start_element: Tag) -> List[Union[Tag, NavigableString]]:
+ """
+ Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
+
+ A headerlink a tag is attempted to be found to skip repeating the symbol information in the description,
+ if it's found it's used as the tag to start the search from instead of the `start_element`.
+ """
+ child_tags = _find_recursive_children_until_tag(start_element, _class_filter_factory(["section"]), limit=100)
+ header = next(filter(_class_filter_factory(["headerlink"]), child_tags), None)
+ start_tag = header.parent if header is not None else start_element
+ return _find_next_siblings_until_tag(start_tag, _class_filter_factory(_SEARCH_END_TAG_ATTRS), include_strings=True)
+
+
+def get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]]:
+ """Get the contents of the next dd tag, up to a dt or a dl tag."""
+ description_tag = symbol.find_next("dd")
+ return _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
+
+
+def get_signatures(start_signature: PageElement) -> List[str]:
+ """
+ Collect up to `_MAX_SIGNATURE_AMOUNT` signatures from dt tags around the `start_signature` dt tag.
+
+ First the signatures under the `start_signature` are included;
+ if less than 2 are found, tags above the start signature are added to the result if any are present.
+ """
+ signatures = []
+ for element in (
+ *reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
+ start_signature,
+ *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
+ )[-MAX_SIGNATURE_AMOUNT:]:
+ signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+
+ if signature:
+ signatures.append(signature)
+
+ return signatures
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 46ae33b92..d68f7c8d7 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -5,37 +5,23 @@ import re
import string
import textwrap
from collections import namedtuple
-from functools import partial
-from typing import Callable, Collection, Container, Iterable, Iterator, List, Optional, TYPE_CHECKING, Union
+from typing import Collection, Iterable, Iterator, List, Optional, TYPE_CHECKING, Union
from bs4 import BeautifulSoup
-from bs4.element import NavigableString, PageElement, Tag
+from bs4.element import NavigableString, Tag
from bot.utils.helpers import find_nth_occurrence
-from ._html import Strainer
+from . import MAX_SIGNATURE_AMOUNT
+from ._html import get_dd_description, get_general_description, get_signatures
from ._markdown import DocMarkdownConverter
if TYPE_CHECKING:
from ._cog import DocItem
log = logging.getLogger(__name__)
-_MAX_SIGNATURE_AMOUNT = 3
-
-_UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
_WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
_PARAMETERS_RE = re.compile(r"\((.+)\)")
-_SEARCH_END_TAG_ATTRS = (
- "data",
- "function",
- "class",
- "exception",
- "seealso",
- "section",
- "rubric",
- "sphinxsidebar",
-)
-
_NO_SIGNATURE_GROUPS = {
"attribute",
"envvar",
@@ -46,7 +32,7 @@ _NO_SIGNATURE_GROUPS = {
}
_EMBED_CODE_BLOCK_LINE_LENGTH = 61
# _MAX_SIGNATURE_AMOUNT code block wrapped lines with py syntax highlight
-_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * _MAX_SIGNATURE_AMOUNT
+_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * MAX_SIGNATURE_AMOUNT
# Maximum discord message length - signatures on top
_MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH
_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
@@ -118,86 +104,6 @@ def _split_parameters(parameters_string: str) -> Iterator[str]:
yield parameters_string[last_split:]
-def _find_elements_until_tag(
- start_element: PageElement,
- end_tag_filter: Union[Container[str], Callable[[Tag], bool]],
- *,
- func: Callable,
- include_strings: bool = False,
- limit: int = None,
-) -> List[Union[Tag, NavigableString]]:
- """
- Get all elements up to `limit` or until a tag matching `tag_filter` is found.
-
- `end_tag_filter` can be either a container of string names to check against,
- or a filtering callable that's applied to tags.
-
- When `include_strings` is True, `NavigableString`s from the document will be included in the result along `Tag`s.
-
- `func` takes in a BeautifulSoup unbound method for finding multiple elements, such as `BeautifulSoup.find_all`.
- The method is then iterated over and all elements until the matching tag or the limit are added to the return list.
- """
- use_container_filter = not callable(end_tag_filter)
- elements = []
-
- for element in func(start_element, name=Strainer(include_strings=include_strings), limit=limit):
- if isinstance(element, Tag):
- if use_container_filter:
- if element.name in end_tag_filter:
- break
- elif end_tag_filter(element):
- break
- elements.append(element)
-
- return elements
-
-
-_find_next_children_until_tag = partial(_find_elements_until_tag, func=partial(BeautifulSoup.find_all, recursive=False))
-_find_recursive_children_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_all)
-_find_next_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_next_siblings)
-_find_previous_siblings_until_tag = partial(_find_elements_until_tag, func=BeautifulSoup.find_previous_siblings)
-
-
-def _get_general_description(start_element: Tag) -> List[Union[Tag, NavigableString]]:
- """
- Get page content to a table or a tag with its class in `SEARCH_END_TAG_ATTRS`.
-
- A headerlink a tag is attempted to be found to skip repeating the symbol information in the description,
- if it's found it's used as the tag to start the search from instead of the `start_element`.
- """
- child_tags = _find_recursive_children_until_tag(start_element, _class_filter_factory(["section"]), limit=100)
- header = next(filter(_class_filter_factory(["headerlink"]), child_tags), None)
- start_tag = header.parent if header is not None else start_element
- return _find_next_siblings_until_tag(start_tag, _class_filter_factory(_SEARCH_END_TAG_ATTRS), include_strings=True)
-
-
-def _get_dd_description(symbol: PageElement) -> List[Union[Tag, NavigableString]]:
- """Get the contents of the next dd tag, up to a dt or a dl tag."""
- description_tag = symbol.find_next("dd")
- return _find_next_children_until_tag(description_tag, ("dt", "dl"), include_strings=True)
-
-
-def _get_signatures(start_signature: PageElement) -> List[str]:
- """
- Collect up to `_MAX_SIGNATURE_AMOUNT` signatures from dt tags around the `start_signature` dt tag.
-
- First the signatures under the `start_signature` are included;
- if less than 2 are found, tags above the start signature are added to the result if any are present.
- """
- signatures = []
- for element in (
- *reversed(_find_previous_siblings_until_tag(start_signature, ("dd",), limit=2)),
- start_signature,
- *_find_next_siblings_until_tag(start_signature, ("dd",), limit=2),
- )[-(_MAX_SIGNATURE_AMOUNT):]:
- signature = _UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
-
- if signature:
- signatures.append(signature)
-
- return signatures
-
-
def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collection[str]]:
"""
Truncate passed signatures to not exceed `_MAX_SIGNAUTRES_LENGTH`.
@@ -210,7 +116,7 @@ def _truncate_signatures(signatures: Collection[str]) -> Union[List[str], Collec
if not sum(len(signature) for signature in signatures) > _MAX_SIGNATURES_LENGTH:
return signatures
- max_signature_length = _EMBED_CODE_BLOCK_LINE_LENGTH * (_MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
+ max_signature_length = _EMBED_CODE_BLOCK_LINE_LENGTH * (MAX_SIGNATURE_AMOUNT + 1 - len(signatures))
formatted_signatures = []
for signature in signatures:
signature = signature.strip()
@@ -317,17 +223,6 @@ def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag]
return formatted_markdown
-def _class_filter_factory(class_names: Iterable[str]) -> Callable[[Tag], bool]:
- """Create callable that returns True when the passed in tag's class is in `class_names` or when it's is a table."""
- def match_tag(tag: Tag) -> bool:
- for attr in class_names:
- if attr in tag.get("class", ()):
- return True
- return tag.name == "table"
-
- return match_tag
-
-
def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[str]:
"""
Return parsed markdown of the passed symbol using the passed in soup, truncated to 1000 characters.
@@ -342,12 +237,12 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[s
# Modules, doc pages and labels don't point to description list tags but to tags like divs,
# no special parsing can be done so we only try to include what's under them.
if symbol_data.group in {"module", "doc", "label"} or symbol_heading.name != "dt":
- description = _get_general_description(symbol_heading)
+ description = get_general_description(symbol_heading)
elif symbol_data.group in _NO_SIGNATURE_GROUPS:
- description = _get_dd_description(symbol_heading)
+ description = get_dd_description(symbol_heading)
else:
- signature = _get_signatures(symbol_heading)
- description = _get_dd_description(symbol_heading)
+ signature = get_signatures(symbol_heading)
+ description = get_dd_description(symbol_heading)
return _create_markdown(signature, description, symbol_data.url).replace('¶', '').strip()
--
cgit v1.2.3
From 22520b9b37e161437a376a6067955e0c9b91cc76 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 04:01:34 +0100
Subject: Defer import to avoid circular imports
---
bot/exts/info/doc/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/__init__.py b/bot/exts/info/doc/__init__.py
index af0bbff2d..dff7a0269 100644
--- a/bot/exts/info/doc/__init__.py
+++ b/bot/exts/info/doc/__init__.py
@@ -1,5 +1,4 @@
from bot.bot import Bot
-from ._cog import DocCog
MAX_SIGNATURE_AMOUNT = 3
PRIORITY_PACKAGES = (
@@ -9,4 +8,5 @@ PRIORITY_PACKAGES = (
def setup(bot: Bot) -> None:
"""Load the Doc cog."""
+ from ._cog import DocCog
bot.add_cog(DocCog(bot))
--
cgit v1.2.3
From 33b408d9e2cc805e2cfc6851225929c50725ea80 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 06:15:27 +0100
Subject: Rename CachedParser to BatchParser and move it to its own module
---
bot/exts/info/doc/__init__.py | 3 +
bot/exts/info/doc/_batch_parser.py | 173 +++++++++++++++++++++++++++++++++++++
bot/exts/info/doc/_cog.py | 170 +-----------------------------------
3 files changed, 180 insertions(+), 166 deletions(-)
create mode 100644 bot/exts/info/doc/_batch_parser.py
diff --git a/bot/exts/info/doc/__init__.py b/bot/exts/info/doc/__init__.py
index dff7a0269..2bb43a950 100644
--- a/bot/exts/info/doc/__init__.py
+++ b/bot/exts/info/doc/__init__.py
@@ -1,10 +1,13 @@
from bot.bot import Bot
+from ._redis_cache import DocRedisCache
MAX_SIGNATURE_AMOUNT = 3
PRIORITY_PACKAGES = (
"python",
)
+doc_cache = DocRedisCache(namespace="Docs")
+
def setup(bot: Bot) -> None:
"""Load the Doc cog."""
diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py
new file mode 100644
index 000000000..edd6bb090
--- /dev/null
+++ b/bot/exts/info/doc/_batch_parser.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+from collections import defaultdict
+from contextlib import suppress
+from functools import partial
+from operator import attrgetter
+from typing import Dict, List, NamedTuple, TYPE_CHECKING, Union
+
+from bs4 import BeautifulSoup
+
+import bot
+from . import doc_cache
+from ._parsing import get_symbol_markdown
+if TYPE_CHECKING:
+ from ._cog import DocItem
+
+log = logging.getLogger(__name__)
+
+
+class QueueItem(NamedTuple):
+ """Contains a symbol and the BeautifulSoup object needed to parse it."""
+
+ symbol: DocItem
+ soup: BeautifulSoup
+
+ def __eq__(self, other: Union[QueueItem, DocItem]):
+ if isinstance(other, type(self.symbol)):
+ return self.symbol == other
+ return NamedTuple.__eq__(self, other)
+
+
+class ParseResultFuture(asyncio.Future):
+ """
+ Future with metadata for the parser class.
+
+ `user_requested` is set by the parser when a Future is requested by an user and moved to the front,
+ allowing the futures to only be waited for when clearing if they were user requested.
+
+ `result_set_time` provides the time at which the future's result has been set,
+ or -inf if the result hasn't been set yet
+ """
+
+ def __init__(self):
+ super().__init__()
+ self.user_requested = False
+ self.result_set_time = float("inf")
+
+ def set_result(self, result: str, /) -> None:
+ """Set `self.result_set_time` to current time when the result is set."""
+ self.result_set_time = time.time()
+ super().set_result(result)
+
+
+class BatchParser:
+ """
+ Get the Markdown of all symbols on a page and send them to redis when a symbol is requested.
+
+ DocItems are added through the `add_item` method which adds them to the `_page_symbols` dict.
+ `get_markdown` is used to fetch the Markdown; when this is used for the first time on a page,
+ all of the symbols are queued to be parsed to avoid multiple web requests to the same page.
+ """
+
+ def __init__(self):
+ self._queue: List[QueueItem] = []
+ self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
+ self._item_futures: Dict[DocItem, ParseResultFuture] = {}
+ self._parse_task = None
+
+ self.cleanup_futures_task = bot.instance.loop.create_task(self._cleanup_futures())
+
+ async def get_markdown(self, doc_item: DocItem) -> str:
+ """
+ Get the result Markdown of `doc_item`.
+
+ If no symbols were fetched from `doc_item`s page before,
+ the HTML has to be fetched and then all items from the page are put into the parse queue.
+
+ Not safe to run while `self.clear` is running.
+ """
+ if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
+ async with bot.instance.http_session.get(doc_item.url) as response:
+ soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
+
+ self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
+ self._item_futures.update((symbol, ParseResultFuture()) for symbol in symbols_to_queue)
+ del self._page_symbols[doc_item.url]
+ log.debug(f"Added symbols from {doc_item.url} to parse queue.")
+
+ if self._parse_task is None:
+ self._parse_task = asyncio.create_task(self._parse_queue())
+
+ with suppress(ValueError):
+ # If the item is not in the list then the item is already parsed or is being parsed
+ self._move_to_front(doc_item)
+ self._item_futures[doc_item].user_requested = True
+ return await self._item_futures[doc_item]
+
+ async def _parse_queue(self) -> None:
+ """
+ Parse all item from the queue, setting their result markdown on the futures and sending them to redis.
+
+ The coroutine will run as long as the queue is not empty, resetting `self._parse_task` to None when finished.
+ """
+ log.trace("Starting queue parsing.")
+ try:
+ while self._queue:
+ item, soup = self._queue.pop()
+ try:
+ if (future := self._item_futures[item]).done():
+ # Some items are present in the inventories multiple times under different symbols,
+ # if we already parsed an equal item, we can just skip it.
+ continue
+
+ markdown = await bot.instance.loop.run_in_executor(
+ None,
+ partial(get_symbol_markdown, soup, item),
+ )
+ if markdown is not None:
+ await doc_cache.set(item, markdown)
+ except Exception as e:
+ log.exception(f"Unexpected error when handling {item}")
+ future.set_exception(e)
+ else:
+ future.set_result(markdown)
+ await asyncio.sleep(0.1)
+ finally:
+ self._parse_task = None
+ log.trace("Finished parsing queue.")
+
+ def _move_to_front(self, item: Union[QueueItem, DocItem]) -> None:
+ """Move `item` to the front of the parse queue."""
+ # The parse queue stores soups along with the doc symbols in QueueItem objects,
+ # in case we're moving a DocItem we have to get the associated QueueItem first and then move it.
+ item_index = self._queue.index(item)
+ queue_item = self._queue.pop(item_index)
+
+ self._queue.append(queue_item)
+
+ def add_item(self, doc_item: DocItem) -> None:
+ """Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
+ self._page_symbols[doc_item.url].append(doc_item)
+
+ async def clear(self) -> None:
+ """
+ Clear all internal symbol data.
+
+ All currently requested items are waited to be parsed before clearing.
+ """
+ for future in filter(attrgetter("user_requested"), self._item_futures.values()):
+ await future
+ if self._parse_task is not None:
+ self._parse_task.cancel()
+ self._queue.clear()
+ self._page_symbols.clear()
+ self._item_futures.clear()
+
+ async def _cleanup_futures(self) -> None:
+ """
+ Clear old futures from internal results.
+
+ After a future is set, we only need to wait for old requests to its associated `DocItem` to finish
+ as all new requests will get the value from the redis cache in the cog first.
+ Keeping them around for longer than a second is unnecessary and keeps the parsed Markdown strings alive.
+ """
+ while True:
+ current_time = time.time()
+ for key, future in self._item_futures.copy().items():
+ if current_time - future.result_set_time > 5:
+ del self._item_futures[key]
+ await asyncio.sleep(5)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index fd211d9f1..7a943f1a4 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -4,19 +4,13 @@ import asyncio
import logging
import re
import sys
-import time
-from collections import defaultdict
from contextlib import suppress
-from functools import partial
-from operator import attrgetter
from types import SimpleNamespace
-from typing import Dict, List, NamedTuple, Optional, Union
+from typing import Dict, NamedTuple, Optional
import discord
-from bs4 import BeautifulSoup
from discord.ext import commands
-from bot import instance as bot_instance
from bot.bot import Bot
from bot.constants import MODERATION_ROLES, RedirectOutput
from bot.converters import Inventory, PackageName, ValidURL
@@ -24,10 +18,9 @@ from bot.pagination import LinePaginator
from bot.utils.lock import lock
from bot.utils.messages import send_denial, wait_for_deletion
from bot.utils.scheduling import Scheduler
-from . import PRIORITY_PACKAGES
+from . import PRIORITY_PACKAGES, doc_cache
+from ._batch_parser import BatchParser
from ._inventory_parser import INVENTORY_DICT, fetch_inventory
-from ._parsing import get_symbol_markdown
-from ._redis_cache import DocRedisCache
log = logging.getLogger(__name__)
@@ -48,8 +41,6 @@ REFRESH_EVENT = asyncio.Event()
REFRESH_EVENT.set()
COMMAND_LOCK_SINGLETON = "inventory refresh"
-doc_cache = DocRedisCache(namespace="Docs")
-
class DocItem(NamedTuple):
"""Holds inventory symbol information."""
@@ -66,159 +57,6 @@ class DocItem(NamedTuple):
return self.base_url + self.relative_url_path
-class QueueItem(NamedTuple):
- """Contains a symbol and the BeautifulSoup object needed to parse it."""
-
- symbol: DocItem
- soup: BeautifulSoup
-
- def __eq__(self, other: Union[QueueItem, DocItem]):
- if isinstance(other, DocItem):
- return self.symbol == other
- return NamedTuple.__eq__(self, other)
-
-
-class ParseResultFuture(asyncio.Future):
- """
- Future with metadata for the parser class.
-
- `user_requested` is set by the parser when a Future is requested by an user and moved to the front,
- allowing the futures to only be waited for when clearing if they were user requested.
-
- `result_set_time` provides the time at which the future's result has been set,
- or -inf if the result hasn't been set yet
- """
-
- def __init__(self):
- super().__init__()
- self.user_requested = False
- self.result_set_time = float("inf")
-
- def set_result(self, result: str, /) -> None:
- """Set `self.result_set_time` to current time when the result is set."""
- self.result_set_time = time.time()
- super().set_result(result)
-
-
-class CachedParser:
- """
- Get the symbol Markdown from pages with smarter caching.
-
- DocItems are added through the `add_item` method which adds them to the `_page_symbols` dict.
- `get_markdown` is used to fetch the Markdown; when this is used for the first time on a page,
- all of the symbols are queued to be parsed to avoid multiple web requests to the same page.
- """
-
- def __init__(self):
- self._queue: List[QueueItem] = []
- self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
- self._item_futures: Dict[DocItem, ParseResultFuture] = {}
- self._parse_task = None
-
- self.cleanup_futures_task = bot_instance.loop.create_task(self._cleanup_futures())
-
- async def get_markdown(self, doc_item: DocItem) -> str:
- """
- Get the result Markdown of `doc_item`.
-
- If no symbols were fetched from `doc_item`s page before,
- the HTML has to be fetched before parsing can be queued.
-
- Not safe to run while `self.clear` is running.
- """
- if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
- async with bot_instance.http_session.get(doc_item.url) as response:
- soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
-
- self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
- self._item_futures.update((symbol, ParseResultFuture()) for symbol in symbols_to_queue)
- del self._page_symbols[doc_item.url]
- log.debug(f"Added symbols from {doc_item.url} to parse queue.")
-
- if self._parse_task is None:
- self._parse_task = asyncio.create_task(self._parse_queue())
-
- with suppress(ValueError):
- # If the item is not in the list then the item is already parsed or is being parsed
- self._move_to_front(doc_item)
- self._item_futures[doc_item].user_requested = True
- return await self._item_futures[doc_item]
-
- async def _parse_queue(self) -> None:
- """
- Parse all item from the queue, setting associated events for symbols if present.
-
- The coroutine will run as long as the queue is not empty, resetting `self._parse_task` to None when finished.
- """
- log.trace("Starting queue parsing.")
- try:
- while self._queue:
- item, soup = self._queue.pop()
- try:
- if (future := self._item_futures[item]).done():
- # Some items are present in the inventories multiple times under different symbols,
- # if we already parsed an equal item, we can just skip it.
- continue
-
- markdown = await bot_instance.loop.run_in_executor(
- None,
- partial(get_symbol_markdown, soup, item),
- )
- if markdown is not None:
- await doc_cache.set(item, markdown)
- except Exception as e:
- log.exception(f"Unexpected error when handling {item}")
- future.set_exception(e)
- else:
- future.set_result(markdown)
- await asyncio.sleep(0.1)
- finally:
- self._parse_task = None
- log.trace("Finished parsing queue.")
-
- def _move_to_front(self, item: Union[QueueItem, DocItem]) -> None:
- """Move `item` to the front of the parse queue."""
- # The parse queue stores soups along with the doc symbols in QueueItem objects,
- # in case we're moving a DocItem we have to get the associated QueueItem first and then move it.
- item_index = self._queue.index(item)
- queue_item = self._queue.pop(item_index)
-
- self._queue.append(queue_item)
-
- def add_item(self, doc_item: DocItem) -> None:
- """Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
- self._page_symbols[doc_item.url].append(doc_item)
-
- async def clear(self) -> None:
- """
- Clear all internal symbol data.
-
- All currently requested items are waited to be parsed before clearing.
- """
- for future in filter(attrgetter("user_requested"), self._item_futures.values()):
- await future
- if self._parse_task is not None:
- self._parse_task.cancel()
- self._queue.clear()
- self._page_symbols.clear()
- self._item_futures.clear()
-
- async def _cleanup_futures(self) -> None:
- """
- Clear old futures from internal results.
-
- After a future is set, we only need to wait for old requests to its associated DocItem to finish
- as all new requests will get the value from the redis cache in the cog first.
- Keeping them around for longer than a second is unnecessary and keeps the parsed Markdown strings alive.
- """
- while True:
- current_time = time.time()
- for key, future in self._item_futures.copy().items():
- if current_time - future.result_set_time > 5:
- del self._item_futures[key]
- await asyncio.sleep(5)
-
-
class DocCog(commands.Cog):
"""A set of commands for querying & displaying documentation."""
@@ -226,7 +64,7 @@ class DocCog(commands.Cog):
self.base_urls = {}
self.bot = bot
self.doc_symbols: Dict[str, DocItem] = {}
- self.item_fetcher = CachedParser()
+ self.item_fetcher = BatchParser()
self.renamed_symbols = set()
self.inventory_scheduler = Scheduler(self.__class__.__name__)
--
cgit v1.2.3
From a50239e6fe46e2da36f925967db21c95f940597a Mon Sep 17 00:00:00 2001
From: Harbys
Date: Sun, 10 Jan 2021 19:45:25 +0100
Subject: indent fix
---
config-default.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/config-default.yml b/config-default.yml
index 042d80408..03d568285 100644
--- a/config-default.yml
+++ b/config-default.yml
@@ -255,7 +255,7 @@ guild:
team_leaders: 737250302834638889
# Streaming
- video: 764245844798079016
+ video: 764245844798079016
moderation_roles:
- *OWNERS_ROLE
--
cgit v1.2.3
From 074ce91205539ca06c1c048c62b7e649c4ae78b5 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Sun, 10 Jan 2021 20:08:01 +0100
Subject: add 30 minute default for stream command
---
bot/exts/moderation/stream.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index d8c2a8628..d8ffe32ff 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -1,3 +1,5 @@
+import datetime
+
import discord
from discord.ext import commands
@@ -32,7 +34,7 @@ class Stream(commands.Cog):
self,
ctx: commands.Context,
user: discord.Member,
- duration: Expiry,
+ duration: Expiry = datetime.datetime.utcnow() + datetime.timedelta(minutes=30),
*_
) -> None:
"""
--
cgit v1.2.3
From a0e2179da398a9164582e7debb7e139754434385 Mon Sep 17 00:00:00 2001
From: Harbys
Date: Sun, 10 Jan 2021 20:31:05 +0100
Subject: move default duration for stream command to config
---
bot/constants.py | 6 ++++++
bot/exts/moderation/stream.py | 5 +++--
config-default.yml | 4 ++++
3 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/bot/constants.py b/bot/constants.py
index dca83e7ab..d912a5a9a 100644
--- a/bot/constants.py
+++ b/bot/constants.py
@@ -635,6 +635,12 @@ class Event(Enum):
voice_state_update = "voice_state_update"
+class VideoPermission(metaclass=YAMLGetter):
+ section = "video_permission"
+
+ default_permission_duration: int
+
+
# Debug mode
DEBUG_MODE = 'local' in os.environ.get("SITE_URL", "local")
diff --git a/bot/exts/moderation/stream.py b/bot/exts/moderation/stream.py
index d8ffe32ff..b590956a3 100644
--- a/bot/exts/moderation/stream.py
+++ b/bot/exts/moderation/stream.py
@@ -4,7 +4,7 @@ import discord
from discord.ext import commands
from bot.bot import Bot
-from bot.constants import Emojis, Roles, STAFF_ROLES
+from bot.constants import Emojis, Roles, STAFF_ROLES, VideoPermission
from bot.converters import Expiry
from bot.utils.scheduling import Scheduler
from bot.utils.time import format_infraction_with_duration
@@ -34,7 +34,8 @@ class Stream(commands.Cog):
self,
ctx: commands.Context,
user: discord.Member,
- duration: Expiry = datetime.datetime.utcnow() + datetime.timedelta(minutes=30),
+ duration: Expiry =
+ datetime.datetime.utcnow() + datetime.timedelta(minutes=VideoPermission.default_permission_duration),
*_
) -> None:
"""
diff --git a/config-default.yml b/config-default.yml
index 03d568285..ec982b0d3 100644
--- a/config-default.yml
+++ b/config-default.yml
@@ -533,3 +533,7 @@ voice_gate:
config:
required_keys: ['bot.token']
+
+
+video_permission:
+ default_permission_duration: 30 # Default duration for stream command in minutes
--
cgit v1.2.3
From 9a4ad5f73cd2c42087643cb36b9e6076c24695fb Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 22:00:59 +0100
Subject: Change the func name to wrapped for clarity
---
bot/utils/function.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/bot/utils/function.py b/bot/utils/function.py
index 8b8c7ba5c..037516ac4 100644
--- a/bot/utils/function.py
+++ b/bot/utils/function.py
@@ -76,23 +76,23 @@ def get_bound_args(func: t.Callable, args: t.Tuple, kwargs: t.Dict[str, t.Any])
return bound_args.arguments
-def update_wrapper_globals(wrapper: types.FunctionType, func: types.FunctionType) -> types.FunctionType:
+def update_wrapper_globals(wrapper: types.FunctionType, wrapped: types.FunctionType) -> types.FunctionType:
"""
- Update globals of `wrapper` with the globals from `func`.
+ Update globals of `wrapper` with the globals from `wrapped`.
For forwardrefs in command annotations discordpy uses the __global__ attribute of the function
to resolve their values, with decorators that replace the function this breaks because they have
their own globals.
This function creates a new function functionally identical to `wrapper`, which has the globals replaced with
- a merge of `func`s globals and the `wrapper`s globals.
+ a merge of `wrapped`s globals and the `wrapper`s globals.
- In case a global name from `func` conflicts with a name from `wrapper`'s globals, `wrapper` will win
+ In case a global name from `wrapped` conflicts with a name from `wrapper`'s globals, `wrapper` will win
to keep it functional, but this may cause problems if the name is used as an annotation and
- discord.py uses it as a converter on a parameter from `func`.
+ discord.py uses it as a converter on a parameter from `wrapped`.
"""
new_globals = wrapper.__globals__.copy()
- new_globals.update((k, v) for k, v in func.__globals__.items() if k not in wrapper.__code__.co_names)
+ new_globals.update((k, v) for k, v in wrapped.__globals__.items() if k not in wrapper.__code__.co_names)
return types.FunctionType(
code=wrapper.__code__,
globals=new_globals,
--
cgit v1.2.3
From 4788a9364ac84cf0ee210c8b026ea7f2d5dd31ee Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 22:07:58 +0100
Subject: Create decorator for update_wrapper_globals mimicking functools.wraps
---
bot/decorators.py | 14 ++++++++------
bot/utils/function.py | 15 +++++++++++++++
bot/utils/lock.py | 10 +++++++---
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/bot/decorators.py b/bot/decorators.py
index a37996e80..02735d0dc 100644
--- a/bot/decorators.py
+++ b/bot/decorators.py
@@ -1,8 +1,8 @@
import asyncio
import logging
+import types
import typing as t
from contextlib import suppress
-from functools import update_wrapper
from discord import Member, NotFound
from discord.ext import commands
@@ -11,6 +11,7 @@ from discord.ext.commands import Cog, Context
from bot.constants import Channels, RedirectOutput
from bot.utils import function
from bot.utils.checks import in_whitelist_check
+from bot.utils.function import command_wraps
log = logging.getLogger(__name__)
@@ -70,7 +71,8 @@ def redirect_output(destination_channel: int, bypass_roles: t.Container[int] = N
This decorator must go before (below) the `command` decorator.
"""
- def wrap(func: t.Callable) -> t.Callable:
+ def wrap(func: types.FunctionType) -> types.FunctionType:
+ @command_wraps(func)
async def inner(self: Cog, ctx: Context, *args, **kwargs) -> None:
if ctx.channel.id == destination_channel:
log.trace(f"Command {ctx.command.name} was invoked in destination_channel, not redirecting")
@@ -104,8 +106,7 @@ def redirect_output(destination_channel: int, bypass_roles: t.Container[int] = N
with suppress(NotFound):
await ctx.message.delete()
log.trace("Redirect output: Deleted invocation message")
-
- return update_wrapper(function.update_wrapper_globals(inner, func), func)
+ return inner
return wrap
@@ -121,7 +122,8 @@ def respect_role_hierarchy(member_arg: function.Argument) -> t.Callable:
This decorator must go before (below) the `command` decorator.
"""
- def decorator(func: t.Callable) -> t.Callable:
+ def decorator(func: types.FunctionType) -> types.FunctionType:
+ @command_wraps(func)
async def wrapper(*args, **kwargs) -> None:
log.trace(f"{func.__name__}: respect role hierarchy decorator called")
@@ -149,5 +151,5 @@ def respect_role_hierarchy(member_arg: function.Argument) -> t.Callable:
else:
log.trace(f"{func.__name__}: {target.top_role=} < {actor.top_role=}; calling func")
await func(*args, **kwargs)
- return update_wrapper(function.update_wrapper_globals(wrapper, func), func)
+ return wrapper
return decorator
diff --git a/bot/utils/function.py b/bot/utils/function.py
index 037516ac4..5fd70e1e8 100644
--- a/bot/utils/function.py
+++ b/bot/utils/function.py
@@ -1,5 +1,6 @@
"""Utilities for interaction with functions."""
+import functools
import inspect
import types
import typing as t
@@ -100,3 +101,17 @@ def update_wrapper_globals(wrapper: types.FunctionType, wrapped: types.FunctionT
argdefs=wrapper.__defaults__,
closure=wrapper.__closure__,
)
+
+
+def command_wraps(
+ wrapped: types.FunctionType,
+ assigned: t.Sequence[str] = functools.WRAPPER_ASSIGNMENTS,
+ updated: t.Sequence[str] = functools.WRAPPER_UPDATES,
+) -> t.Callable[[types.FunctionType], types.FunctionType]:
+ """Update `wrapped` to look like the decorated function and update globals for discordpy forwardref evaluation."""
+ def decorator(wrapper: types.FunctionType) -> types.FunctionType:
+ return functools.update_wrapper(
+ update_wrapper_globals(wrapper, wrapped), wrapped, assigned, updated
+ )
+
+ return decorator
diff --git a/bot/utils/lock.py b/bot/utils/lock.py
index 02188c827..978e3ae94 100644
--- a/bot/utils/lock.py
+++ b/bot/utils/lock.py
@@ -1,12 +1,14 @@
import inspect
import logging
+import types
from collections import defaultdict
-from functools import partial, update_wrapper
+from functools import partial
from typing import Any, Awaitable, Callable, Hashable, Union
from weakref import WeakValueDictionary
from bot.errors import LockedResourceError
from bot.utils import function
+from bot.utils.function import command_wraps
log = logging.getLogger(__name__)
__lock_dicts = defaultdict(WeakValueDictionary)
@@ -58,9 +60,10 @@ def lock(namespace: Hashable, resource_id: ResourceId, *, raise_error: bool = Fa
If decorating a command, this decorator must go before (below) the `command` decorator.
"""
- def decorator(func: Callable) -> Callable:
+ def decorator(func: types.FunctionType) -> types.FunctionType:
name = func.__name__
+ @command_wraps(func)
async def wrapper(*args, **kwargs) -> Any:
log.trace(f"{name}: mutually exclusive decorator called")
@@ -91,7 +94,8 @@ def lock(namespace: Hashable, resource_id: ResourceId, *, raise_error: bool = Fa
log.info(f"{name}: aborted because resource {namespace!r}:{id_!r} is locked")
if raise_error:
raise LockedResourceError(str(namespace), id_)
- return update_wrapper(function.update_wrapper_globals(wrapper, func), func)
+ return wrapper
+
return decorator
--
cgit v1.2.3
From d50ae50681f552c9a0d3e2c797b0916a09da54da Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Sun, 10 Jan 2021 22:10:12 +0100
Subject: Resolve wrapped command callbacks in the source command
Without this the command will fetch the source of the wrapper
---
bot/exts/info/source.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/source.py b/bot/exts/info/source.py
index 7b41352d4..ae68ef7e8 100644
--- a/bot/exts/info/source.py
+++ b/bot/exts/info/source.py
@@ -68,7 +68,10 @@ class BotSource(commands.Cog):
Raise BadArgument if `source_item` is a dynamically-created object (e.g. via internal eval).
"""
if isinstance(source_item, commands.Command):
- src = source_item.callback.__code__
+ source_item = source_item.callback
+ while hasattr(source_item, "__wrapped__"):
+ source_item = source_item.__wrapped__
+ src = source_item.__code__
filename = src.co_filename
elif isinstance(source_item, str):
tags_cog = self.bot.get_cog("Tags")
--
cgit v1.2.3
From 760ca7e9a0996865ee4d9e127baef8f0246a9e25 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 00:54:37 +0100
Subject: Send a message to devlog instead of logging a warning
---
bot/exts/info/doc/_batch_parser.py | 30 ++++++++++++++++++++++++++++++
bot/exts/info/doc/_parsing.py | 1 -
2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py
index edd6bb090..ebae6efb8 100644
--- a/bot/exts/info/doc/_batch_parser.py
+++ b/bot/exts/info/doc/_batch_parser.py
@@ -9,9 +9,11 @@ from functools import partial
from operator import attrgetter
from typing import Dict, List, NamedTuple, TYPE_CHECKING, Union
+import discord
from bs4 import BeautifulSoup
import bot
+from bot.constants import Channels
from . import doc_cache
from ._parsing import get_symbol_markdown
if TYPE_CHECKING:
@@ -20,6 +22,30 @@ if TYPE_CHECKING:
log = logging.getLogger(__name__)
+class StaleInventoryNotifier:
+ """Handle sending notifications about stale inventories through `DocItem`s to dev log."""
+
+ def __init__(self):
+ self._init_task = bot.instance.loop.create_task(self._init_channel())
+ self._warned_urls = set()
+
+ async def _init_channel(self) -> None:
+ """Wait for guild and get channel."""
+ await bot.instance.wait_until_guild_available()
+ self._dev_log = bot.instance.get_channel(Channels.dev_log)
+
+ async def send_warning(self, item: DocItem) -> None:
+ """Send a warning to dev log is one wasn't already sent for `item`'s url."""
+ if item.url not in self._warned_urls:
+ self._warned_urls.add(item.url)
+ await self._init_task
+ embed = discord.Embed(
+ description=f"Doc item `{item.symbol_id=}` present in loaded documentation inventories "
+ f"not found on [site]({item.url}), inventories may need to be refreshed."
+ )
+ await self._dev_log.send(embed=embed)
+
+
class QueueItem(NamedTuple):
"""Contains a symbol and the BeautifulSoup object needed to parse it."""
@@ -71,6 +97,8 @@ class BatchParser:
self.cleanup_futures_task = bot.instance.loop.create_task(self._cleanup_futures())
+ self.stale_inventory_notifier = StaleInventoryNotifier()
+
async def get_markdown(self, doc_item: DocItem) -> str:
"""
Get the result Markdown of `doc_item`.
@@ -120,6 +148,8 @@ class BatchParser:
)
if markdown is not None:
await doc_cache.set(item, markdown)
+ else:
+ asyncio.create_task(self.stale_inventory_notifier.send_warning(item))
except Exception as e:
log.exception(f"Unexpected error when handling {item}")
future.set_exception(e)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index d68f7c8d7..257161dd5 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -231,7 +231,6 @@ def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[s
"""
symbol_heading = soup.find(id=symbol_data.symbol_id)
if symbol_heading is None:
- log.warning("Symbol present in loaded inventories not found on site, consider refreshing inventories.")
return None
signature = None
# Modules, doc pages and labels don't point to description list tags but to tags like divs,
--
cgit v1.2.3
From c2447e0f2a3f28f79ec73d82b3ba4923b377f3e9 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 01:07:07 +0100
Subject: Update outdated docstring
---
bot/exts/info/doc/_parsing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index 257161dd5..f07b530c1 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -225,7 +225,7 @@ def _create_markdown(signatures: Optional[List[str]], description: Iterable[Tag]
def get_symbol_markdown(soup: BeautifulSoup, symbol_data: DocItem) -> Optional[str]:
"""
- Return parsed markdown of the passed symbol using the passed in soup, truncated to 1000 characters.
+ Return parsed markdown of the passed symbol using the passed in soup, truncated to fit within a discord message.
The method of parsing and what information gets included depends on the symbol's group.
"""
--
cgit v1.2.3
From 3b735398ca88b022e2fd815d715f3965c87f32ce Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 02:01:00 +0100
Subject: Handle renaming conflicting symbols in ensure_unique_symbol_name
Previously update_single looped this function until there were no
duplicates and when creating new symbols the function had to check
if the symbol to create a new name from started with a group/package
to avoid redundancy.
The new approach ensures a new symbol is always unique when returning
by handling the containment check inside and outputting a symbol name
in the format of package.group.symbol which should always be unique
---
bot/exts/info/doc/_cog.py | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 7a943f1a4..5b38af95b 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -95,16 +95,14 @@ class DocCog(commands.Cog):
# e.g. get 'class' from 'py:class'
group_name = group.split(":")[1]
- while (original_symbol := self.doc_symbols.get(symbol)) is not None:
+ if (original_symbol := self.doc_symbols.get(symbol)) is not None:
replaced_symbol_name = self.ensure_unique_symbol_name(
api_package_name,
group_name,
original_symbol,
symbol,
)
- if replaced_symbol_name is None:
- break
- else:
+ if replaced_symbol_name is not None:
symbol = replaced_symbol_name
relative_url_path, _, symbol_id = relative_doc_url.partition("#")
@@ -169,8 +167,12 @@ class DocCog(commands.Cog):
"""
# Certain groups are added as prefixes to disambiguate the symbols.
if group_name in FORCE_PREFIX_GROUPS:
- self.renamed_symbols.add(symbol_name)
- return f"{group_name}.{symbol_name}"
+ new_symbol = f"{group_name}.{symbol_name}"
+ if new_symbol in self.doc_symbols:
+ # If there's still a conflict, prefix with package name.
+ new_symbol = f"{package_name}.{new_symbol}"
+ self.renamed_symbols.add(new_symbol)
+ return new_symbol
# The existing symbol with which the current symbol conflicts should have a group prefix.
# It currently doesn't have the group prefix because it's only added once there's a conflict.
@@ -195,15 +197,12 @@ class DocCog(commands.Cog):
# If we can't specially handle the symbol through its group or package,
# fall back to prepending its package name to the front.
else:
- if symbol_name.startswith(package_name):
- # If the symbol already starts with the package name, insert the group name after it.
- split_symbol_name = symbol_name.split(".", maxsplit=1)
- split_symbol_name.insert(1, group_name)
- overridden_symbol = ".".join(split_symbol_name)
- else:
- overridden_symbol = f"{package_name}.{symbol_name}"
- self.renamed_symbols.add(overridden_symbol)
- return overridden_symbol
+ new_symbol = f"{package_name}.{symbol_name}"
+ if new_symbol in self.doc_symbols:
+ # If there's still a conflict, add the symbol's group in the middle.
+ new_symbol = f"{package_name}.{group_name}.{symbol_name}"
+ self.renamed_symbols.add(new_symbol)
+ return new_symbol
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
--
cgit v1.2.3
From c92a9985a5a43dc26e7590d7581d47fbbc5e27a8 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 02:02:56 +0100
Subject: Use a dictionary of lists instead of set for renamed symbols
A dictionary allows us to grab the original symbol name and then
get all the renamed symbols from it, with the improvements to
`ensure_unique_symbol_name` we can also use lists instead of sets as
each symbol we add should be unique
---
bot/exts/info/doc/_cog.py | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 5b38af95b..deef37f8f 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -4,6 +4,7 @@ import asyncio
import logging
import re
import sys
+from collections import defaultdict
from contextlib import suppress
from types import SimpleNamespace
from typing import Dict, NamedTuple, Optional
@@ -65,7 +66,7 @@ class DocCog(commands.Cog):
self.bot = bot
self.doc_symbols: Dict[str, DocItem] = {}
self.item_fetcher = BatchParser()
- self.renamed_symbols = set()
+ self.renamed_symbols = defaultdict(list)
self.inventory_scheduler = Scheduler(self.__class__.__name__)
self.scheduled_inventories = set()
@@ -171,7 +172,7 @@ class DocCog(commands.Cog):
if new_symbol in self.doc_symbols:
# If there's still a conflict, prefix with package name.
new_symbol = f"{package_name}.{new_symbol}"
- self.renamed_symbols.add(new_symbol)
+ self.renamed_symbols[symbol_name].append(new_symbol)
return new_symbol
# The existing symbol with which the current symbol conflicts should have a group prefix.
@@ -183,7 +184,7 @@ class DocCog(commands.Cog):
overridden_symbol = f"{original_item.package}.{overridden_symbol}"
self.doc_symbols[overridden_symbol] = original_item
- self.renamed_symbols.add(overridden_symbol)
+ self.renamed_symbols[symbol_name].append(overridden_symbol)
elif package_name in PRIORITY_PACKAGES:
overridden_symbol = f"{original_item.package}.{symbol_name}"
@@ -192,7 +193,7 @@ class DocCog(commands.Cog):
overridden_symbol = f"{original_item.package}.{original_item.group}.{symbol_name}"
self.doc_symbols[overridden_symbol] = original_item
- self.renamed_symbols.add(overridden_symbol)
+ self.renamed_symbols[symbol_name].append(overridden_symbol)
# If we can't specially handle the symbol through its group or package,
# fall back to prepending its package name to the front.
@@ -201,7 +202,7 @@ class DocCog(commands.Cog):
if new_symbol in self.doc_symbols:
# If there's still a conflict, add the symbol's group in the middle.
new_symbol = f"{package_name}.{group_name}.{symbol_name}"
- self.renamed_symbols.add(new_symbol)
+ self.renamed_symbols[symbol_name].append(new_symbol)
return new_symbol
async def refresh_inventory(self) -> None:
@@ -265,9 +266,7 @@ class DocCog(commands.Cog):
description=markdown
)
# Show all symbols with the same name that were renamed in the footer.
- embed.set_footer(
- text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}"))
- )
+ embed.set_footer(text=", ".join(self.renamed_symbols[symbol]))
return embed
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
--
cgit v1.2.3
From 8d927ff13e0fd93e80102b43c2568f1e74a29a7c Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 02:10:35 +0100
Subject: Ensure footer fits into message
The footer also now says Moved: at the start to clarify the meaning
of the symbols to the user
---
bot/exts/info/doc/_cog.py | 7 ++++++-
bot/exts/info/doc/_parsing.py | 4 ++--
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index deef37f8f..b8c1a10d4 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -4,6 +4,7 @@ import asyncio
import logging
import re
import sys
+import textwrap
from collections import defaultdict
from contextlib import suppress
from types import SimpleNamespace
@@ -266,7 +267,11 @@ class DocCog(commands.Cog):
description=markdown
)
# Show all symbols with the same name that were renamed in the footer.
- embed.set_footer(text=", ".join(self.renamed_symbols[symbol]))
+ if renamed_symbols := self.renamed_symbols[symbol]:
+ footer_text = f"Moved: {textwrap.shorten(', '.join(renamed_symbols), 100, placeholder=' ...')}"
+ else:
+ footer_text = ""
+ embed.set_footer(text=footer_text)
return embed
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
diff --git a/bot/exts/info/doc/_parsing.py b/bot/exts/info/doc/_parsing.py
index f07b530c1..45a81a4cb 100644
--- a/bot/exts/info/doc/_parsing.py
+++ b/bot/exts/info/doc/_parsing.py
@@ -33,8 +33,8 @@ _NO_SIGNATURE_GROUPS = {
_EMBED_CODE_BLOCK_LINE_LENGTH = 61
# _MAX_SIGNATURE_AMOUNT code block wrapped lines with py syntax highlight
_MAX_SIGNATURES_LENGTH = (_EMBED_CODE_BLOCK_LINE_LENGTH + 8) * MAX_SIGNATURE_AMOUNT
-# Maximum discord message length - signatures on top
-_MAX_DESCRIPTION_LENGTH = 2000 - _MAX_SIGNATURES_LENGTH
+# Maximum discord message length - signatures on top - space for footer
+_MAX_DESCRIPTION_LENGTH = 1900 - _MAX_SIGNATURES_LENGTH
_TRUNCATE_STRIP_CHARACTERS = "!?:;." + string.whitespace
BracketPair = namedtuple("BracketPair", ["opening_bracket", "closing_bracket"])
--
cgit v1.2.3
From 5c97efab1bf3d15911a343687b50af92b57bc036 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 02:13:58 +0100
Subject: Don't convert package names into lowercase
The converter used to set them already ensures this for us, making the
call redundant
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index b8c1a10d4..0e7eff9d9 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -250,7 +250,7 @@ class DocCog(commands.Cog):
if symbol_info is None:
log.debug("Symbol does not exist.")
return None
- self.bot.stats.incr(f"doc_fetches.{symbol_info.package.lower()}")
+ self.bot.stats.incr(f"doc_fetches.{symbol_info.package}")
markdown = await doc_cache.get(symbol_info)
if markdown is None:
--
cgit v1.2.3
From a7ba149904ac0643cc7e267d219fe86c159816e0 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 02:37:53 +0100
Subject: Notify the user that inventories were refreshed on cache clears
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 0e7eff9d9..822c984d7 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -410,7 +410,7 @@ class DocCog(commands.Cog):
"""Clear the persistent redis cache for `package`."""
if await doc_cache.delete(package_name):
await self.refresh_inventory()
- await ctx.send(f"Successfully cleared the cache for `{package_name}`.")
+ await ctx.send(f"Successfully cleared the cache for `{package_name}` and refreshed the inventories.")
else:
await ctx.send("No keys matching the package found.")
--
cgit v1.2.3
From f5235b16343816b02ceef56d1e753cb0167c6b03 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 02:42:19 +0100
Subject: Check for containment instead of always getting the value from the
dict
Getting the value from a defaultdict will always create the key for it,
creating unnecessary entries every time a symbol is fetched from the bot
---
bot/exts/info/doc/_cog.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 822c984d7..b35469787 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -267,8 +267,9 @@ class DocCog(commands.Cog):
description=markdown
)
# Show all symbols with the same name that were renamed in the footer.
- if renamed_symbols := self.renamed_symbols[symbol]:
- footer_text = f"Moved: {textwrap.shorten(', '.join(renamed_symbols), 100, placeholder=' ...')}"
+ if symbol in self.renamed_symbols:
+ renamed_symbols = ', '.join(self.renamed_symbols[symbol])
+ footer_text = f"Moved: {textwrap.shorten(renamed_symbols, 100, placeholder=' ...')}"
else:
footer_text = ""
embed.set_footer(text=footer_text)
--
cgit v1.2.3
From 780dbc7683c7ce9cece6f0707840f56005466dfe Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 02:52:34 +0100
Subject: Remove old reference to CachedParser and unused const
---
bot/exts/info/doc/_cog.py | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index b35469787..bc230b74b 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -2,7 +2,6 @@ from __future__ import annotations
import asyncio
import logging
-import re
import sys
import textwrap
from collections import defaultdict
@@ -34,7 +33,6 @@ FORCE_PREFIX_GROUPS = (
"pdbcommand",
"term",
)
-WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
# Delay to wait before trying to reach a rescheduled inventory again, in minutes
FETCH_RESCHEDULE_DELAY = SimpleNamespace(first=2, repeated=5)
@@ -238,8 +236,7 @@ class DocCog(commands.Cog):
If the symbol is known, an Embed with documentation about it is returned.
- First check the DocRedisCache before querying the cog's `CachedParser`,
- if not present also create a redis entry for the symbol.
+ First check the DocRedisCache before querying the cog's `BatchParser`.
"""
log.trace(f"Building embed for symbol `{symbol}`")
if not REFRESH_EVENT.is_set():
--
cgit v1.2.3
From a2c1e67ac764b363d48d685ace707a650279e009 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 04:32:10 +0100
Subject: Make REFRESH_EVENT an instance variable
---
bot/exts/info/doc/_cog.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index bc230b74b..7bb819987 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -37,8 +37,6 @@ NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
# Delay to wait before trying to reach a rescheduled inventory again, in minutes
FETCH_RESCHEDULE_DELAY = SimpleNamespace(first=2, repeated=5)
-REFRESH_EVENT = asyncio.Event()
-REFRESH_EVENT.set()
COMMAND_LOCK_SINGLETON = "inventory refresh"
@@ -70,6 +68,8 @@ class DocCog(commands.Cog):
self.inventory_scheduler = Scheduler(self.__class__.__name__)
self.scheduled_inventories = set()
+ self.refresh_event = asyncio.Event()
+ self.refresh_event.set()
self.bot.loop.create_task(self.init_refresh_inventory())
@lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
@@ -206,7 +206,7 @@ class DocCog(commands.Cog):
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
- REFRESH_EVENT.clear()
+ self.refresh_event.clear()
log.debug("Refreshing documentation inventory...")
self.inventory_scheduler.cancel_all()
@@ -228,7 +228,7 @@ class DocCog(commands.Cog):
]
await asyncio.gather(*coros)
log.debug("Finished inventory refresh.")
- REFRESH_EVENT.set()
+ self.refresh_event.set()
async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
"""
@@ -239,9 +239,9 @@ class DocCog(commands.Cog):
First check the DocRedisCache before querying the cog's `BatchParser`.
"""
log.trace(f"Building embed for symbol `{symbol}`")
- if not REFRESH_EVENT.is_set():
+ if not self.refresh_event.is_set():
log.debug("Waiting for inventories to be refreshed before processing item.")
- await REFRESH_EVENT.wait()
+ await self.refresh_event.wait()
symbol_info = self.doc_symbols.get(symbol)
if symbol_info is None:
--
cgit v1.2.3
From 551c01e2537b036c17253d5cbfc4cfee6150cc4a Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 04:59:25 +0100
Subject: Return whitespace to its previous state
---
bot/utils/lock.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/utils/lock.py b/bot/utils/lock.py
index 978e3ae94..997c653a1 100644
--- a/bot/utils/lock.py
+++ b/bot/utils/lock.py
@@ -94,8 +94,8 @@ def lock(namespace: Hashable, resource_id: ResourceId, *, raise_error: bool = Fa
log.info(f"{name}: aborted because resource {namespace!r}:{id_!r} is locked")
if raise_error:
raise LockedResourceError(str(namespace), id_)
- return wrapper
+ return wrapper
return decorator
--
cgit v1.2.3
From bf2d3d58dda76e7407b2d10f1dd9c89ce8f17d8f Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Mon, 11 Jan 2021 10:35:56 +0100
Subject: Fix docstring
The decorator works in revers to what the docstring explained
---
bot/utils/function.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/utils/function.py b/bot/utils/function.py
index 5fd70e1e8..ab7f45761 100644
--- a/bot/utils/function.py
+++ b/bot/utils/function.py
@@ -108,7 +108,7 @@ def command_wraps(
assigned: t.Sequence[str] = functools.WRAPPER_ASSIGNMENTS,
updated: t.Sequence[str] = functools.WRAPPER_UPDATES,
) -> t.Callable[[types.FunctionType], types.FunctionType]:
- """Update `wrapped` to look like the decorated function and update globals for discordpy forwardref evaluation."""
+ """Update the decorated function to look like `wrapped` and update globals for discordpy forwardref evaluation."""
def decorator(wrapper: types.FunctionType) -> types.FunctionType:
return functools.update_wrapper(
update_wrapper_globals(wrapper, wrapped), wrapped, assigned, updated
--
cgit v1.2.3
From d32e8f1029be8deb76e8c0d9bb457c9768ca878e Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Wed, 13 Jan 2021 19:08:32 +0200
Subject: Better regex, moved pattern handlers to __init__, and constant header
---
bot/exts/info/code_snippets.py | 52 +++++++++++++++++++++++-------------------
1 file changed, 28 insertions(+), 24 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 669a21c7d..1899b139b 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -12,24 +12,27 @@ from bot.utils.messages import wait_for_deletion
log = logging.getLogger(__name__)
GITHUB_RE = re.compile(
- r'https://github\.com/(?P.+?)/blob/(?P.+/.+)'
- r'#L(?P\d+)([-~]L(?P\d+))?\b'
+ r'https://github\.com/(?P\S+?)/blob/(?P\S+/[^\s#]+)'
+ r'(#L(?P\d+)([-~:]L(?P\d+))?)?($|\s)'
)
GITHUB_GIST_RE = re.compile(
r'https://gist\.github\.com/([^/]+)/(?P[^\W_]+)/*'
- r'(?P[^\W_]*)/*#file-(?P.+?)'
- r'-L(?P\d+)([-~]L(?P\d+))?\b'
+ r'(?P[^\W_]*)/*#file-(?P\S+?)'
+ r'(-L(?P\d+)([-~:]L(?P\d+))?)?($|\s)'
)
+GITHUB_HEADERS = {'Accept': 'application/vnd.github.v3.raw'}
+
GITLAB_RE = re.compile(
- r'https://gitlab\.com/(?P.+?)/\-/blob/(?P.+/.+)'
- r'#L(?P\d+)([-](?P\d+))?\b'
+ r'https://gitlab\.com/(?P\S+?)/\-/blob/(?P\S+/[^\s#]+)'
+ r'(#L(?P\d+)([-](?P\d+))?)?($|\s)'
)
BITBUCKET_RE = re.compile(
- r'https://bitbucket\.org/(?P.+?)/src/(?P.+?)/'
- r'(?P.+?)#lines-(?P\d+)(:(?P\d+))?\b'
+ r'https://bitbucket\.org/(?P\S+?)/src/'
+ r'(?P\S+?)/(?P[^\s#]+)'
+ r'(#lines-(?P\d+)(:(?P\d+))?)?($|\s)'
)
@@ -71,18 +74,20 @@ class CodeSnippets(Cog):
end_line: str
) -> str:
"""Fetches a snippet from a GitHub repo."""
- headers = {'Accept': 'application/vnd.github.v3.raw'}
-
# Search the GitHub API for the specified branch
- branches = await self._fetch_response(f'https://api.github.com/repos/{repo}/branches', 'json', headers=headers)
- tags = await self._fetch_response(f'https://api.github.com/repos/{repo}/tags', 'json', headers=headers)
+ branches = await self._fetch_response(
+ f'https://api.github.com/repos/{repo}/branches',
+ 'json',
+ headers=GITHUB_HEADERS
+ )
+ tags = await self._fetch_response(f'https://api.github.com/repos/{repo}/tags', 'json', headers=GITHUB_HEADERS)
refs = branches + tags
ref, file_path = self._find_ref(path, refs)
file_contents = await self._fetch_response(
f'https://api.github.com/repos/{repo}/contents/{file_path}?ref={ref}',
'text',
- headers=headers,
+ headers=GITHUB_HEADERS,
)
return self._snippet_to_codeblock(file_contents, file_path, start_line, end_line)
@@ -95,12 +100,10 @@ class CodeSnippets(Cog):
end_line: str
) -> str:
"""Fetches a snippet from a GitHub gist."""
- headers = {'Accept': 'application/vnd.github.v3.raw'}
-
gist_json = await self._fetch_response(
f'https://api.github.com/gists/{gist_id}{f"/{revision}" if len(revision) > 0 else ""}',
'json',
- headers=headers,
+ headers=GITHUB_HEADERS,
)
# Check each file in the gist for the specified file
@@ -207,19 +210,20 @@ class CodeSnippets(Cog):
"""Initializes the cog's bot."""
self.bot = bot
+ self.pattern_handlers = [
+ (GITHUB_RE, self._fetch_github_snippet),
+ (GITHUB_GIST_RE, self._fetch_github_gist_snippet),
+ (GITLAB_RE, self._fetch_gitlab_snippet),
+ (BITBUCKET_RE, self._fetch_bitbucket_snippet)
+ ]
+
@Cog.listener()
async def on_message(self, message: Message) -> None:
"""Checks if the message has a snippet link, removes the embed, then sends the snippet contents."""
if not message.author.bot:
message_to_send = ''
- pattern_handlers = [
- (GITHUB_RE, self._fetch_github_snippet),
- (GITHUB_GIST_RE, self._fetch_github_gist_snippet),
- (GITLAB_RE, self._fetch_gitlab_snippet),
- (BITBUCKET_RE, self._fetch_bitbucket_snippet)
- ]
-
- for pattern, handler in pattern_handlers:
+
+ for pattern, handler in self.pattern_handlers:
for match in pattern.finditer(message.content):
message_to_send += await handler(**match.groupdict())
--
cgit v1.2.3
From 1856ed852515c17c2095c10b93d4d418787ec178 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Wed, 13 Jan 2021 19:10:03 +0200
Subject: Better regex now works for
---
bot/exts/info/code_snippets.py | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 1899b139b..1d1bc2850 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -12,27 +12,27 @@ from bot.utils.messages import wait_for_deletion
log = logging.getLogger(__name__)
GITHUB_RE = re.compile(
- r'https://github\.com/(?P\S+?)/blob/(?P\S+/[^\s#]+)'
- r'(#L(?P\d+)([-~:]L(?P\d+))?)?($|\s)'
+ r'https://github\.com/(?P\S+?)/blob/(?P\S+/[^\s#,>]+)'
+ r'(#L(?P\d+)([-~:]L(?P\d+))?)?($|\s|,|>)'
)
GITHUB_GIST_RE = re.compile(
r'https://gist\.github\.com/([^/]+)/(?P[^\W_]+)/*'
r'(?P[^\W_]*)/*#file-(?P\S+?)'
- r'(-L(?P\d+)([-~:]L(?P\d+))?)?($|\s)'
+ r'(-L(?P\d+)([-~:]L(?P\d+))?)?($|\s|,|>)'
)
GITHUB_HEADERS = {'Accept': 'application/vnd.github.v3.raw'}
GITLAB_RE = re.compile(
- r'https://gitlab\.com/(?P\S+?)/\-/blob/(?P\S+/[^\s#]+)'
- r'(#L(?P\d+)([-](?P\d+))?)?($|\s)'
+ r'https://gitlab\.com/(?P\S+?)/\-/blob/(?P\S+/[^\s#,>]+)'
+ r'(#L(?P\d+)([-](?P\d+))?)?($|\s|,|>)'
)
BITBUCKET_RE = re.compile(
r'https://bitbucket\.org/(?P\S+?)/src/'
- r'(?P\S+?)/(?P[^\s#]+)'
- r'(#lines-(?P\d+)(:(?P\d+))?)?($|\s)'
+ r'(?P\S+?)/(?P[^\s#,>]+)'
+ r'(#lines-(?P\d+)(:(?P\d+))?)?($|\s|,|>)'
)
--
cgit v1.2.3
From 08b793024f271de009aab2391cd85576af5313cf Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Wed, 13 Jan 2021 19:19:49 +0200
Subject: Better error reporting in _fetch_response(?)
---
bot/exts/info/code_snippets.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 1d1bc2850..3469b88f4 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -3,6 +3,7 @@ import re
import textwrap
from urllib.parse import quote_plus
+from aiohttp import ClientResponseError
from discord import Message
from discord.ext.commands import Cog
@@ -46,13 +47,13 @@ class CodeSnippets(Cog):
async def _fetch_response(self, url: str, response_format: str, **kwargs) -> str:
"""Makes http requests using aiohttp."""
try:
- async with self.bot.http_session.get(url, **kwargs) as response:
+ async with self.bot.http_session.get(url, raise_for_status=True, **kwargs) as response:
if response_format == 'text':
return await response.text()
elif response_format == 'json':
return await response.json()
- except Exception:
- log.exception(f'Failed to fetch code snippet from {url}.')
+ except ClientResponseError as error:
+ log.error(f'Failed to fetch code snippet from {url}. HTTP Status: {error.status}. Message: {str(error)}.')
def _find_ref(self, path: str, refs: tuple) -> tuple:
"""Loops through all branches and tags to find the required ref."""
--
cgit v1.2.3
From a3145654ab5c90d16f9b4ff53f3df40d7e35f683 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 12:56:57 +0100
Subject: Turn update_single into a normal function
The method no longer runs anything asynchronous
---
bot/exts/info/doc/_cog.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 7bb819987..f008f2c28 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -78,7 +78,7 @@ class DocCog(commands.Cog):
await self.bot.wait_until_guild_available()
await self.refresh_inventory()
- async def update_single(self, api_package_name: str, base_url: str, package: INVENTORY_DICT) -> None:
+ def update_single(self, api_package_name: str, base_url: str, package: INVENTORY_DICT) -> None:
"""
Rebuild the inventory for a single package.
@@ -148,7 +148,7 @@ class DocCog(commands.Cog):
return
self.scheduled_inventories.discard(api_package_name)
- await self.update_single(api_package_name, base_url, package)
+ self.update_single(api_package_name, base_url, package)
def ensure_unique_symbol_name(
self,
@@ -357,7 +357,7 @@ class DocCog(commands.Cog):
+ "\n".join(f"{key}: {value}" for key, value in body.items())
)
- await self.update_single(package_name, base_url, inventory_dict)
+ self.update_single(package_name, base_url, inventory_dict)
await ctx.send(f"Added the package `{package_name}` to the database and refreshed the inventory.")
@docs_group.command(name='deletedoc', aliases=('removedoc', 'rm', 'd'))
--
cgit v1.2.3
From a74d7f81f258b4e70221c445b351fe646d385dd5 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 12:57:36 +0100
Subject: Correct return type annotation
---
bot/exts/info/doc/_cog.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index f008f2c28..ac74e7997 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -124,7 +124,7 @@ class DocCog(commands.Cog):
api_package_name: str,
base_url: str,
inventory_url: str
- ) -> Optional[INVENTORY_DICT]:
+ ) -> None:
"""
Update the cog's inventory, or reschedule this method to execute again if the remote inventory unreachable.
--
cgit v1.2.3
From f3323503ff84b67ae2b8d4412001238937b7f684 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 21:28:21 +0100
Subject: Use different task ids for every inventory reschedule attempts
The scheduler can't keep track of multiple tasks with the same id,
and rescheduling the update task using the same id within an already
scheduled update task caused the new task to get ignored as the old task
only got deleted from the scheduler after it was finished
---
bot/exts/info/doc/_cog.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index ac74e7997..43407d5ba 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -66,7 +66,7 @@ class DocCog(commands.Cog):
self.renamed_symbols = defaultdict(list)
self.inventory_scheduler = Scheduler(self.__class__.__name__)
- self.scheduled_inventories = set()
+ self.inventory_reschedule_attempts = defaultdict(int)
self.refresh_event = asyncio.Event()
self.refresh_event.set()
@@ -134,20 +134,20 @@ class DocCog(commands.Cog):
package = await fetch_inventory(inventory_url)
if not package:
- if inventory_url not in self.scheduled_inventories:
+ attempt = self.inventory_reschedule_attempts[package]
+ self.inventory_reschedule_attempts[package] += 1
+ if attempt == 0:
delay = FETCH_RESCHEDULE_DELAY.first
else:
delay = FETCH_RESCHEDULE_DELAY.repeated
log.info(f"Failed to fetch inventory; attempting again in {delay} minutes.")
self.inventory_scheduler.schedule_later(
delay*60,
- api_package_name,
+ (attempt, api_package_name),
self.update_or_reschedule_inventory(api_package_name, base_url, inventory_url)
)
- self.scheduled_inventories.add(api_package_name)
return
- self.scheduled_inventories.discard(api_package_name)
self.update_single(api_package_name, base_url, package)
def ensure_unique_symbol_name(
@@ -209,6 +209,7 @@ class DocCog(commands.Cog):
self.refresh_event.clear()
log.debug("Refreshing documentation inventory...")
self.inventory_scheduler.cancel_all()
+ self.inventory_reschedule_attempts.clear()
# Clear the old base URLS and doc symbols to ensure
# that we start from a fresh local dataset.
@@ -216,7 +217,6 @@ class DocCog(commands.Cog):
self.base_urls.clear()
self.doc_symbols.clear()
self.renamed_symbols.clear()
- self.scheduled_inventories.clear()
await self.item_fetcher.clear()
# Run all coroutines concurrently - since each of them performs an HTTP
--
cgit v1.2.3
From 93ef70f7bcbb638fbdf55fb278cf16c2605db63b Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 21:30:18 +0100
Subject: Simplify control flow
Co-authored-by: MarkKoz
---
bot/exts/info/doc/_cog.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 43407d5ba..eea380fc0 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -146,9 +146,8 @@ class DocCog(commands.Cog):
(attempt, api_package_name),
self.update_or_reschedule_inventory(api_package_name, base_url, inventory_url)
)
- return
-
- self.update_single(api_package_name, base_url, package)
+ else:
+ self.update_single(api_package_name, base_url, package)
def ensure_unique_symbol_name(
self,
--
cgit v1.2.3
From e7b20b90efb50169aecf865168840a319037c776 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 21:30:40 +0100
Subject: Keep trakck of the init task and cancel it when the cog is unloaded
---
bot/exts/info/doc/_cog.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index eea380fc0..aa9642016 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -70,7 +70,7 @@ class DocCog(commands.Cog):
self.refresh_event = asyncio.Event()
self.refresh_event.set()
- self.bot.loop.create_task(self.init_refresh_inventory())
+ self.init_refresh_task = self.bot.loop.create_task(self.init_refresh_inventory())
@lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
async def init_refresh_inventory(self) -> None:
@@ -415,4 +415,5 @@ class DocCog(commands.Cog):
"""Clear scheduled inventories, queued symbols and cleanup task on cog unload."""
self.inventory_scheduler.cancel_all()
self.item_fetcher.cleanup_futures_task.cancel()
+ self.init_refresh_task.cancel()
asyncio.create_task(self.item_fetcher.clear())
--
cgit v1.2.3
From a4de9fe294b7626dc81ee191d2d6bce751ad91c7 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 21:31:55 +0100
Subject: Change typehint name casing to PascalCase
---
bot/converters.py | 2 +-
bot/exts/info/doc/_cog.py | 4 ++--
bot/exts/info/doc/_inventory_parser.py | 11 ++++++-----
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/bot/converters.py b/bot/converters.py
index 6bbc22c3a..2b383636c 100644
--- a/bot/converters.py
+++ b/bot/converters.py
@@ -187,7 +187,7 @@ class Inventory(Converter):
"""
@staticmethod
- async def convert(ctx: Context, url: str) -> t.Tuple[str, _inventory_parser.INVENTORY_DICT]:
+ async def convert(ctx: Context, url: str) -> t.Tuple[str, _inventory_parser.InventoryDict]:
"""Convert url to Intersphinx inventory URL."""
await ctx.trigger_typing()
if (inventory := await _inventory_parser.fetch_inventory(url)) is None:
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index aa9642016..51283a67e 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -21,7 +21,7 @@ from bot.utils.messages import send_denial, wait_for_deletion
from bot.utils.scheduling import Scheduler
from . import PRIORITY_PACKAGES, doc_cache
from ._batch_parser import BatchParser
-from ._inventory_parser import INVENTORY_DICT, fetch_inventory
+from ._inventory_parser import InventoryDict, fetch_inventory
log = logging.getLogger(__name__)
@@ -78,7 +78,7 @@ class DocCog(commands.Cog):
await self.bot.wait_until_guild_available()
await self.refresh_inventory()
- def update_single(self, api_package_name: str, base_url: str, package: INVENTORY_DICT) -> None:
+ def update_single(self, api_package_name: str, base_url: str, package: InventoryDict) -> None:
"""
Rebuild the inventory for a single package.
diff --git a/bot/exts/info/doc/_inventory_parser.py b/bot/exts/info/doc/_inventory_parser.py
index 886708867..1615f15bd 100644
--- a/bot/exts/info/doc/_inventory_parser.py
+++ b/bot/exts/info/doc/_inventory_parser.py
@@ -11,9 +11,10 @@ import bot
log = logging.getLogger(__name__)
FAILED_REQUEST_ATTEMPTS = 3
-INVENTORY_DICT = DefaultDict[str, List[Tuple[str, str]]]
_V2_LINE_RE = re.compile(r'(?x)(.+?)\s+(\S*:\S*)\s+(-?\d+)\s+?(\S*)\s+(.*)')
+InventoryDict = DefaultDict[str, List[Tuple[str, str]]]
+
class ZlibStreamReader:
"""Class used for decoding zlib data of a stream line by line."""
@@ -43,7 +44,7 @@ class ZlibStreamReader:
pos = buf.find(b'\n')
-async def _load_v1(stream: aiohttp.StreamReader) -> INVENTORY_DICT:
+async def _load_v1(stream: aiohttp.StreamReader) -> InventoryDict:
invdata = defaultdict(list)
async for line in stream:
@@ -59,7 +60,7 @@ async def _load_v1(stream: aiohttp.StreamReader) -> INVENTORY_DICT:
return invdata
-async def _load_v2(stream: aiohttp.StreamReader) -> INVENTORY_DICT:
+async def _load_v2(stream: aiohttp.StreamReader) -> InventoryDict:
invdata = defaultdict(list)
async for line in ZlibStreamReader(stream):
@@ -72,7 +73,7 @@ async def _load_v2(stream: aiohttp.StreamReader) -> INVENTORY_DICT:
return invdata
-async def _fetch_inventory(url: str) -> INVENTORY_DICT:
+async def _fetch_inventory(url: str) -> InventoryDict:
"""Fetch, parse and return an intersphinx inventory file from an url."""
timeout = aiohttp.ClientTimeout(sock_connect=5, sock_read=5)
async with bot.instance.http_session.get(url, timeout=timeout, raise_for_status=True) as response:
@@ -94,7 +95,7 @@ async def _fetch_inventory(url: str) -> INVENTORY_DICT:
raise ValueError(f"Invalid inventory file at url {url}.")
-async def fetch_inventory(url: str) -> Optional[INVENTORY_DICT]:
+async def fetch_inventory(url: str) -> Optional[InventoryDict]:
"""
Get an inventory dict from `url`, retrying `FAILED_REQUEST_ATTEMPTS` times on errors.
--
cgit v1.2.3
From d972b7800346b4d1ee88c706354bb1c18ba4b725 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 21:33:56 +0100
Subject: Reuse the redis key instead of creating a new string for the expires
set
---
bot/exts/info/doc/_redis_cache.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_redis_cache.py b/bot/exts/info/doc/_redis_cache.py
index 52cb2bc94..cab51c3f1 100644
--- a/bot/exts/info/doc/_redis_cache.py
+++ b/bot/exts/info/doc/_redis_cache.py
@@ -27,8 +27,8 @@ class DocRedisCache(RedisObject):
needs_expire = False
with await self._get_pool_connection() as connection:
- if item.package+url_key not in self._set_expires:
- self._set_expires.add(item.package+url_key)
+ if redis_key not in self._set_expires:
+ self._set_expires.add(redis_key)
needs_expire = not await connection.exists(redis_key)
await connection.hset(redis_key, item.symbol_id, value)
--
cgit v1.2.3
From 7342510667ea159fcc83927cb9caee14661c12a8 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Wed, 13 Jan 2021 23:18:33 +0100
Subject: Set the user_requested attribute at the start of the coroutine
A context switch may occur when we're waiting for the web page response,
during which a clear could be triggered. If the event is not set before
that we could end up with the dictionary changing sizes, or if a copy
was made, a future that'd never finish as it'd be
cleared from the queue and the futures dict
---
bot/exts/info/doc/_batch_parser.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py
index ebae6efb8..4a6d9b544 100644
--- a/bot/exts/info/doc/_batch_parser.py
+++ b/bot/exts/info/doc/_batch_parser.py
@@ -108,6 +108,7 @@ class BatchParser:
Not safe to run while `self.clear` is running.
"""
+ self._item_futures[doc_item].user_requested = True
if (symbols_to_queue := self._page_symbols.get(doc_item.url)) is not None:
async with bot.instance.http_session.get(doc_item.url) as response:
soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
@@ -123,7 +124,6 @@ class BatchParser:
with suppress(ValueError):
# If the item is not in the list then the item is already parsed or is being parsed
self._move_to_front(doc_item)
- self._item_futures[doc_item].user_requested = True
return await self._item_futures[doc_item]
async def _parse_queue(self) -> None:
--
cgit v1.2.3
From 1bdfdac30d27d67d95c49b5b66a0a4de919afa21 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Thu, 14 Jan 2021 01:10:48 +0100
Subject: Ensure footer is actually max 100 chars
Shortening the renamed symbols string to 100 chars is not accurate
as the footer also contains a string before that, subtracting its length
fixes this.
---
bot/exts/info/doc/_cog.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 51283a67e..942d685af 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -262,10 +262,11 @@ class DocCog(commands.Cog):
url=f"{symbol_info.url}#{symbol_info.symbol_id}",
description=markdown
)
- # Show all symbols with the same name that were renamed in the footer.
+ # Show all symbols with the same name that were renamed in the footer,
+ # with a max of 100 chars.
if symbol in self.renamed_symbols:
renamed_symbols = ', '.join(self.renamed_symbols[symbol])
- footer_text = f"Moved: {textwrap.shorten(renamed_symbols, 100, placeholder=' ...')}"
+ footer_text = f"Moved: {textwrap.shorten(renamed_symbols, 100-7, placeholder=' ...')}"
else:
footer_text = ""
embed.set_footer(text=footer_text)
--
cgit v1.2.3
From e86e9f921a4bbbe42a5fb6fd8486425f11af62cf Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Thu, 14 Jan 2021 05:00:22 +0100
Subject: Raise an error or log a warning if there's a global name conflict
When wrapper uses a global name, which conflicts with a global name
from wrapped's module that wrapped uses for its annotations, we run into
a situation that can't be solved without changing one of the names, so
an error is raised to give this clearer meaning.
The check may be erroneous in some edge cases or the objects the
conflicting names refer to can be functionally identical, so the error
can be turned into a logged warning.
---
bot/utils/function.py | 40 +++++++++++++++++++++++++++++++++++-----
1 file changed, 35 insertions(+), 5 deletions(-)
diff --git a/bot/utils/function.py b/bot/utils/function.py
index ab7f45761..4fa7a9f60 100644
--- a/bot/utils/function.py
+++ b/bot/utils/function.py
@@ -2,15 +2,22 @@
import functools
import inspect
+import logging
import types
import typing as t
+log = logging.getLogger(__name__)
+
Argument = t.Union[int, str]
BoundArgs = t.OrderedDict[str, t.Any]
Decorator = t.Callable[[t.Callable], t.Callable]
ArgValGetter = t.Callable[[BoundArgs], t.Any]
+class GlobalNameConflictError(Exception):
+ """Raised when there's a conflict between the globals used to resolve annotations of wrapped and its wrapper."""
+
+
def get_arg_value(name_or_pos: Argument, arguments: BoundArgs) -> t.Any:
"""
Return a value from `arguments` based on a name or position.
@@ -77,7 +84,12 @@ def get_bound_args(func: t.Callable, args: t.Tuple, kwargs: t.Dict[str, t.Any])
return bound_args.arguments
-def update_wrapper_globals(wrapper: types.FunctionType, wrapped: types.FunctionType) -> types.FunctionType:
+def update_wrapper_globals(
+ wrapper: types.FunctionType,
+ wrapped: types.FunctionType,
+ *,
+ error_on_conflict: bool = True,
+) -> types.FunctionType:
"""
Update globals of `wrapper` with the globals from `wrapped`.
@@ -88,10 +100,26 @@ def update_wrapper_globals(wrapper: types.FunctionType, wrapped: types.FunctionT
This function creates a new function functionally identical to `wrapper`, which has the globals replaced with
a merge of `wrapped`s globals and the `wrapper`s globals.
- In case a global name from `wrapped` conflicts with a name from `wrapper`'s globals, `wrapper` will win
- to keep it functional, but this may cause problems if the name is used as an annotation and
- discord.py uses it as a converter on a parameter from `wrapped`.
+ If `error_on_conflict` is True, an exception will be raised in case `wrapper` and `wrapped` share a global name
+ that is used by `wrapped`'s typehints, as this can cause incorrect objects being used by discordpy's converters.
+ The error can be turned into a warning by setting the argument to False.
"""
+ forwardrefs = (ann for ann in wrapped.__annotations__.values() if isinstance(ann, str))
+ annotation_global_names = (ann.split(".", maxsplit=1)[0] for ann in forwardrefs)
+ # Conflicting globals from both functions' modules that are also used in the wrapper and in wrapped's annotations.
+ shared_globals = set(wrapper.__code__.co_names) & set(annotation_global_names)
+ shared_globals &= set(wrapped.__globals__) & set(wrapper.__globals__)
+ if shared_globals:
+ message = (
+ f"wrapper and the wrapped function share the following "
+ f"global names used by annotations: {', '.join(shared_globals)}. "
+ f"Resolve the conflicts or pass error_on_conflict=False to suppress this error if this is intentional."
+ )
+ if error_on_conflict:
+ raise GlobalNameConflictError(message)
+ else:
+ log.info(message)
+
new_globals = wrapper.__globals__.copy()
new_globals.update((k, v) for k, v in wrapped.__globals__.items() if k not in wrapper.__code__.co_names)
return types.FunctionType(
@@ -107,11 +135,13 @@ def command_wraps(
wrapped: types.FunctionType,
assigned: t.Sequence[str] = functools.WRAPPER_ASSIGNMENTS,
updated: t.Sequence[str] = functools.WRAPPER_UPDATES,
+ *,
+ error_on_conflict: bool = True,
) -> t.Callable[[types.FunctionType], types.FunctionType]:
"""Update the decorated function to look like `wrapped` and update globals for discordpy forwardref evaluation."""
def decorator(wrapper: types.FunctionType) -> types.FunctionType:
return functools.update_wrapper(
- update_wrapper_globals(wrapper, wrapped), wrapped, assigned, updated
+ update_wrapper_globals(wrapper, wrapped, error_on_conflict=error_on_conflict), wrapped, assigned, updated
)
return decorator
--
cgit v1.2.3
From b1250515e7d6d3545bcfd850c6286c69239cb420 Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Thu, 14 Jan 2021 05:17:07 +0100
Subject: Prevent an inventory refresh while waiting for item cache
If an inventory refresh was started while the symbol embed coroutine
was suspended, it could cause the parser to try to fetch a non existent
future if the markdown was requested after it was cleared but before
new inventories were loaded in.
---
bot/exts/info/doc/_cog.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/bot/exts/info/doc/_cog.py b/bot/exts/info/doc/_cog.py
index 942d685af..7b9dad135 100644
--- a/bot/exts/info/doc/_cog.py
+++ b/bot/exts/info/doc/_cog.py
@@ -70,6 +70,9 @@ class DocCog(commands.Cog):
self.refresh_event = asyncio.Event()
self.refresh_event.set()
+ self.symbol_get_event = asyncio.Event()
+ self.symbol_get_event.set()
+
self.init_refresh_task = self.bot.loop.create_task(self.init_refresh_inventory())
@lock("doc", COMMAND_LOCK_SINGLETON, raise_error=True)
@@ -206,6 +209,7 @@ class DocCog(commands.Cog):
async def refresh_inventory(self) -> None:
"""Refresh internal documentation inventory."""
self.refresh_event.clear()
+ await self.symbol_get_event.wait()
log.debug("Refreshing documentation inventory...")
self.inventory_scheduler.cancel_all()
self.inventory_reschedule_attempts.clear()
@@ -248,7 +252,10 @@ class DocCog(commands.Cog):
return None
self.bot.stats.incr(f"doc_fetches.{symbol_info.package}")
+ self.symbol_get_event.clear()
markdown = await doc_cache.get(symbol_info)
+ self.symbol_get_event.set()
+
if markdown is None:
log.debug(f"Redis cache miss for symbol `{symbol}`.")
markdown = await self.item_fetcher.get_markdown(symbol_info)
--
cgit v1.2.3
From f1103aeade13f964282154d5d1597b81188ce98f Mon Sep 17 00:00:00 2001
From: Numerlor <25886452+Numerlor@users.noreply.github.com>
Date: Fri, 15 Jan 2021 23:11:57 +0100
Subject: Use a defaultdict for item futures
To be able to set the attribute at the start of the coro we need to
be able to access the item's future before we know about all the other
items. This also saves us from having to add them all as the queue
parser or get_markdown will create the futures for us dynamically
---
bot/exts/info/doc/_batch_parser.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bot/exts/info/doc/_batch_parser.py b/bot/exts/info/doc/_batch_parser.py
index 4a6d9b544..606c5d803 100644
--- a/bot/exts/info/doc/_batch_parser.py
+++ b/bot/exts/info/doc/_batch_parser.py
@@ -92,7 +92,7 @@ class BatchParser:
def __init__(self):
self._queue: List[QueueItem] = []
self._page_symbols: Dict[str, List[DocItem]] = defaultdict(list)
- self._item_futures: Dict[DocItem, ParseResultFuture] = {}
+ self._item_futures: Dict[DocItem, ParseResultFuture] = defaultdict(ParseResultFuture)
self._parse_task = None
self.cleanup_futures_task = bot.instance.loop.create_task(self._cleanup_futures())
@@ -114,7 +114,6 @@ class BatchParser:
soup = BeautifulSoup(await response.text(encoding="utf8"), "lxml")
self._queue.extend(QueueItem(symbol, soup) for symbol in symbols_to_queue)
- self._item_futures.update((symbol, ParseResultFuture()) for symbol in symbols_to_queue)
del self._page_symbols[doc_item.url]
log.debug(f"Added symbols from {doc_item.url} to parse queue.")
@@ -168,6 +167,7 @@ class BatchParser:
queue_item = self._queue.pop(item_index)
self._queue.append(queue_item)
+ log.trace(f"Moved {item} to the front of the queue.")
def add_item(self, doc_item: DocItem) -> None:
"""Map a DocItem to its page so that the symbol will be parsed once the page is requested."""
--
cgit v1.2.3
From 318a0f6c5e597c61833984cd608359c8b4e5ddf0 Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 19 Jan 2021 21:00:34 +0200
Subject: Better GitHub regex
---
bot/exts/info/code_snippets.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 3469b88f4..84f606036 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -13,27 +13,27 @@ from bot.utils.messages import wait_for_deletion
log = logging.getLogger(__name__)
GITHUB_RE = re.compile(
- r'https://github\.com/(?P\S+?)/blob/(?P\S+/[^\s#,>]+)'
- r'(#L(?P\d+)([-~:]L(?P\d+))?)?($|\s|,|>)'
+ r'https://github\.com/(?P[a-zA-Z0-9-]+/[\w.-]+)/blob/'
+ r'(?P[^#>]+/{0,1})(#L(?P\d+)([-~:]L(?P\d+))?)'
)
GITHUB_GIST_RE = re.compile(
r'https://gist\.github\.com/([^/]+)/(?P[^\W_]+)/*'
r'(?P[^\W_]*)/*#file-(?P\S+?)'
- r'(-L(?P\d+)([-~:]L(?P\d+))?)?($|\s|,|>)'
+ r'(-L(?P\d+)([-~:]L(?P\d+))?)'
)
GITHUB_HEADERS = {'Accept': 'application/vnd.github.v3.raw'}
GITLAB_RE = re.compile(
r'https://gitlab\.com/(?P\S+?)/\-/blob/(?P\S+/[^\s#,>]+)'
- r'(#L(?P\d+)([-](?P\d+))?)?($|\s|,|>)'
+ r'(#L(?P\d+)([-](?P\d+))?)'
)
BITBUCKET_RE = re.compile(
r'https://bitbucket\.org/(?P\S+?)/src/'
r'(?P\S+?)/(?P[^\s#,>]+)'
- r'(#lines-(?P\d+)(:(?P\d+))?)?($|\s|,|>)'
+ r'(#lines-(?P\d+)(:(?P\d+))?)'
)
--
cgit v1.2.3
From e9f48d83d482502a846dd8d37cee6ab4c01fdf7e Mon Sep 17 00:00:00 2001
From: Andi Qu
Date: Tue, 19 Jan 2021 21:14:19 +0200
Subject: Account for query params in bitbucket
---
bot/exts/info/code_snippets.py | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py
index 84f606036..75d8ac290 100644
--- a/bot/exts/info/code_snippets.py
+++ b/bot/exts/info/code_snippets.py
@@ -18,22 +18,21 @@ GITHUB_RE = re.compile(
)
GITHUB_GIST_RE = re.compile(
- r'https://gist\.github\.com/([^/]+)/(?P[^\W_]+)/*'
- r'(?P[^\W_]*)/*#file-(?P\S+?)'
+ r'https://gist\.github\.com/([^/]+)/(?P[a-zA-Z0-9]+)/*'
+ r'(?P[a-zA-Z0-9-]*)/*#file-(?P[^#>]+?)'
r'(-L(?P\d+)([-~:]L(?P\d+))?)'
)
GITHUB_HEADERS = {'Accept': 'application/vnd.github.v3.raw'}
GITLAB_RE = re.compile(
- r'https://gitlab\.com/(?P\S+?)/\-/blob/(?P\S+/[^\s#,>]+)'
- r'(#L(?P\d+)([-](?P\d+))?)'
+ r'https://gitlab\.com/(?P[a-zA-Z0-9-]+?)/\-/blob/(?P[^#>]+/{0,1})'
+ r'(#L(?P\d+)(-(?P\d+))?)'
)
BITBUCKET_RE = re.compile(
- r'https://bitbucket\.org/(?P\S+?)/src/'
- r'(?P\S+?)/(?P[^\s#,>]+)'
- r'(#lines-(?P\d+)(:(?P\d+))?)'
+ r'https://bitbucket\.org/(?P[a-zA-Z0-9-]+/[\w.-]+?)/src/(?P[0-9a-zA-Z]+?)'
+ r'/(?P