aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar scragly <[email protected]>2019-11-15 23:52:00 +1000
committerGravatar GitHub <[email protected]>2019-11-15 23:52:00 +1000
commit1992cb248ba388aa7e171caef16a4c6f829e652a (patch)
treebfee0c74b71a870363c3ab2bcdbb940487aebe78
parentMerge pull request #619 from python-discord/moderation-logging (diff)
parentMerge branch 'master' into doc-command (diff)
Docs command improvements (#546)
Docs command improvements Co-authored-by: Sebastiaan Zeeff <[email protected]>
-rw-r--r--bot/cogs/doc.py214
1 files changed, 177 insertions, 37 deletions
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py
index 65cabe46f..e5b3a4062 100644
--- a/bot/cogs/doc.py
+++ b/bot/cogs/doc.py
@@ -4,17 +4,20 @@ import logging
import re
import textwrap
from collections import OrderedDict
+from contextlib import suppress
from typing import Any, Callable, Optional, Tuple
import discord
from bs4 import BeautifulSoup
-from bs4.element import PageElement
+from bs4.element import PageElement, Tag
+from discord.errors import NotFound
from discord.ext import commands
from markdownify import MarkdownConverter
-from requests import ConnectionError
+from requests import ConnectTimeout, ConnectionError, HTTPError
from sphinx.ext import intersphinx
+from urllib3.exceptions import ProtocolError
-from bot.constants import MODERATION_ROLES
+from bot.constants import MODERATION_ROLES, RedirectOutput
from bot.converters import ValidPythonIdentifier, ValidURL
from bot.decorators import with_role
from bot.pagination import LinePaginator
@@ -23,10 +26,33 @@ from bot.pagination import LinePaginator
log = logging.getLogger(__name__)
logging.getLogger('urllib3').setLevel(logging.WARNING)
-
-UNWANTED_SIGNATURE_SYMBOLS = ('[source]', '¶')
+NO_OVERRIDE_GROUPS = (
+ "2to3fixer",
+ "token",
+ "label",
+ "pdbcommand",
+ "term",
+)
+NO_OVERRIDE_PACKAGES = (
+ "python",
+)
+
+SEARCH_END_TAG_ATTRS = (
+ "data",
+ "function",
+ "class",
+ "exception",
+ "seealso",
+ "section",
+ "rubric",
+ "sphinxsidebar",
+)
+UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")
WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)")
+FAILED_REQUEST_RETRY_AMOUNT = 3
+NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay
+
def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:
"""
@@ -125,6 +151,7 @@ class Doc(commands.Cog):
self.base_urls = {}
self.bot = bot
self.inventories = {}
+ self.renamed_symbols = set()
self.bot.loop.create_task(self.init_refresh_inventory())
@@ -150,13 +177,32 @@ class Doc(commands.Cog):
"""
self.base_urls[package_name] = base_url
- fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url)
- for _, value in (await self.bot.loop.run_in_executor(None, fetch_func)).items():
- # Each value has a bunch of information in the form
- # `(package_name, version, relative_url, ???)`, and we only
- # need the relative documentation URL.
- for symbol, (_, _, relative_doc_url, _) in value.items():
+ package = await self._fetch_inventory(inventory_url, config)
+ if not package:
+ return None
+
+ for group, value in package.items():
+ for symbol, (package_name, _version, relative_doc_url, _) in value.items():
absolute_doc_url = base_url + relative_doc_url
+
+ if symbol in self.inventories:
+ group_name = group.split(":")[1]
+ symbol_base_url = self.inventories[symbol].split("/", 3)[2]
+ if (
+ group_name in NO_OVERRIDE_GROUPS
+ or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES)
+ ):
+
+ symbol = f"{group_name}.{symbol}"
+ # If renamed `symbol` already exists, add library name in front to differentiate between them.
+ if symbol in self.renamed_symbols:
+ # Split `package_name` because of packages like Pillow that have spaces in them.
+ symbol = f"{package_name.split()[0]}.{symbol}"
+
+ self.inventories[symbol] = absolute_doc_url
+ self.renamed_symbols.add(symbol)
+ continue
+
self.inventories[symbol] = absolute_doc_url
log.trace(f"Fetched inventory for {package_name}.")
@@ -170,6 +216,7 @@ class Doc(commands.Cog):
# Also, reset the cache used for fetching documentation.
self.base_urls.clear()
self.inventories.clear()
+ self.renamed_symbols.clear()
async_cache.cache = OrderedDict()
# Since Intersphinx is intended to be used with Sphinx,
@@ -185,16 +232,15 @@ class Doc(commands.Cog):
]
await asyncio.gather(*coros)
- async def get_symbol_html(self, symbol: str) -> Optional[Tuple[str, str]]:
+ async def get_symbol_html(self, symbol: str) -> Optional[Tuple[list, str]]:
"""
Given a Python symbol, return its signature and description.
- Returns a tuple in the form (str, str), or `None`.
-
The first tuple element is the signature of the given symbol as a markup-free string, and
the second tuple element is the description of the given symbol with HTML markup included.
- If the given symbol could not be found, returns `None`.
+ If the given symbol is a module, returns a tuple `(None, str)`
+ else if the symbol could not be found, returns `None`.
"""
url = self.inventories.get(symbol)
if url is None:
@@ -207,21 +253,38 @@ class Doc(commands.Cog):
symbol_id = url.split('#')[-1]
soup = BeautifulSoup(html, 'lxml')
symbol_heading = soup.find(id=symbol_id)
- signature_buffer = []
+ search_html = str(soup)
if symbol_heading is None:
return None
- # Traverse the tags of the signature header and ignore any
- # unwanted symbols from it. Add all of it to a temporary buffer.
- for tag in symbol_heading.strings:
- if tag not in UNWANTED_SIGNATURE_SYMBOLS:
- signature_buffer.append(tag.replace('\\', ''))
+ if symbol_id == f"module-{symbol}":
+ # Get page content from the module headerlink to the
+ # first tag that has its class in `SEARCH_END_TAG_ATTRS`
+ start_tag = symbol_heading.find("a", attrs={"class": "headerlink"})
+ if start_tag is None:
+ return [], ""
+
+ end_tag = start_tag.find_next(self._match_end_tag)
+ if end_tag is None:
+ return [], ""
+
+ description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent))
+ description_end_index = search_html.find(str(end_tag))
+ description = search_html[description_start_index:description_end_index]
+ signatures = None
- signature = ''.join(signature_buffer)
- description = str(symbol_heading.next_sibling.next_sibling).replace('¶', '')
+ else:
+ signatures = []
+ description = str(symbol_heading.find_next_sibling("dd"))
+ description_pos = search_html.find(description)
+ # Get text of up to 3 signatures, remove unwanted symbols
+ for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2):
+ signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text)
+ if signature and search_html.find(str(element)) < description_pos:
+ signatures.append(signature)
- return signature, description
+ return signatures, description.replace('¶', '')
@async_cache(arg_offset=1)
async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]:
@@ -234,7 +297,7 @@ class Doc(commands.Cog):
if scraped_html is None:
return None
- signature = scraped_html[0]
+ signatures = scraped_html[0]
permalink = self.inventories[symbol]
description = markdownify(scraped_html[1])
@@ -242,26 +305,42 @@ class Doc(commands.Cog):
# of a double newline (interpreted as a paragraph) before index 1000.
if len(description) > 1000:
shortened = description[:1000]
- last_paragraph_end = shortened.rfind('\n\n')
- description = description[:last_paragraph_end] + f"... [read more]({permalink})"
+ last_paragraph_end = shortened.rfind('\n\n', 100)
+ if last_paragraph_end == -1:
+ last_paragraph_end = shortened.rfind('. ')
+ description = description[:last_paragraph_end]
+
+ # If there is an incomplete code block, cut it out
+ if description.count("```") % 2:
+ codeblock_start = description.rfind('```py')
+ description = description[:codeblock_start].rstrip()
+ description += f"... [read more]({permalink})"
description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description)
- if not signature:
+ if signatures is None:
+ # If symbol is a module, don't show signature.
+ embed_description = description
+
+ elif not signatures:
# It's some "meta-page", for example:
# https://docs.djangoproject.com/en/dev/ref/views/#module-django.views
- return discord.Embed(
- title=f'`{symbol}`',
- url=permalink,
- description="This appears to be a generic page not tied to a specific symbol."
- )
+ embed_description = "This appears to be a generic page not tied to a specific symbol."
- signature = textwrap.shorten(signature, 500)
- return discord.Embed(
+ else:
+ embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures)
+ embed_description += f"\n{description}"
+
+ embed = discord.Embed(
title=f'`{symbol}`',
url=permalink,
- description=f"```py\n{signature}```{description}"
+ description=embed_description
)
+ # Show all symbols with the same name that were renamed in the footer.
+ embed.set_footer(
+ text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}"))
+ )
+ return embed
@commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)
async def docs_group(self, ctx: commands.Context, symbol: commands.clean_content = None) -> None:
@@ -307,7 +386,10 @@ class Doc(commands.Cog):
description=f"Sorry, I could not find any documentation for `{symbol}`.",
colour=discord.Colour.red()
)
- await ctx.send(embed=error_embed)
+ error_message = await ctx.send(embed=error_embed)
+ with suppress(NotFound):
+ await error_message.delete(delay=NOT_FOUND_DELETE_DELAY)
+ await ctx.message.delete(delay=NOT_FOUND_DELETE_DELAY)
else:
await ctx.send(embed=doc_embed)
@@ -365,6 +447,64 @@ class Doc(commands.Cog):
await self.refresh_inventory()
await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.")
+ @docs_group.command(name="refresh", aliases=("rfsh", "r"))
+ @with_role(*MODERATION_ROLES)
+ async def refresh_command(self, ctx: commands.Context) -> None:
+ """Refresh inventories and send differences to channel."""
+ old_inventories = set(self.base_urls)
+ with ctx.typing():
+ await self.refresh_inventory()
+ # Get differences of added and removed inventories
+ added = ', '.join(inv for inv in self.base_urls if inv not in old_inventories)
+ if added:
+ added = f"+ {added}"
+
+ removed = ', '.join(inv for inv in old_inventories if inv not in self.base_urls)
+ if removed:
+ removed = f"- {removed}"
+
+ embed = discord.Embed(
+ title="Inventories refreshed",
+ description=f"```diff\n{added}\n{removed}```" if added or removed else ""
+ )
+ await ctx.send(embed=embed)
+
+ async def _fetch_inventory(self, inventory_url: str, config: SphinxConfiguration) -> Optional[dict]:
+ """Get and return inventory from `inventory_url`. If fetching fails, return None."""
+ fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url)
+ for retry in range(1, FAILED_REQUEST_RETRY_AMOUNT+1):
+ try:
+ package = await self.bot.loop.run_in_executor(None, fetch_func)
+ except ConnectTimeout:
+ log.error(
+ f"Fetching of inventory {inventory_url} timed out,"
+ f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
+ )
+ except ProtocolError:
+ log.error(
+ f"Connection lost while fetching inventory {inventory_url},"
+ f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})"
+ )
+ except HTTPError as e:
+ log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.")
+ return None
+ except ConnectionError:
+ log.error(f"Couldn't establish connection to inventory {inventory_url}.")
+ return None
+ else:
+ return package
+ log.error(f"Fetching of inventory {inventory_url} failed.")
+ return None
+
+ @staticmethod
+ def _match_end_tag(tag: Tag) -> bool:
+ """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table."""
+ for attr in SEARCH_END_TAG_ATTRS:
+ if attr in tag.get("class", ()):
+ return True
+
+ return tag.name == "table"
+
def setup(bot: commands.Bot) -> None:
"""Doc cog load."""