From 4efb97c5020f591d8cdd1e214e06df294e72d8f1 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 20 Oct 2019 18:32:25 +0200 Subject: add handling for duplicate symbols in docs inventories --- bot/cogs/doc.py | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index a13464bff..43315f477 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -23,7 +23,17 @@ from bot.pagination import LinePaginator log = logging.getLogger(__name__) logging.getLogger('urllib3').setLevel(logging.WARNING) - +NO_OVERRIDE_GROUPS = ( + "2to3fixer", + "token", + "label", + "pdbcommand", + "term", + "function" +) +NO_OVERRIDE_PACKAGES = ( + "Python", +) UNWANTED_SIGNATURE_SYMBOLS = ('[source]', '¶') WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") @@ -125,6 +135,7 @@ class Doc(commands.Cog): self.base_urls = {} self.bot = bot self.inventories = {} + self.renamed_symbols = set() self.bot.loop.create_task(self.init_refresh_inventory()) @@ -151,12 +162,32 @@ class Doc(commands.Cog): self.base_urls[package_name] = base_url fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url) - for _, value in (await self.bot.loop.run_in_executor(None, fetch_func)).items(): + for group, value in (await self.bot.loop.run_in_executor(None, fetch_func)).items(): # Each value has a bunch of information in the form # `(package_name, version, relative_url, ???)`, and we only - # need the relative documentation URL. - for symbol, (_, _, relative_doc_url, _) in value.items(): + # need the package_name and the relative documentation URL. + for symbol, (package_name, _, relative_doc_url, _) in value.items(): absolute_doc_url = base_url + relative_doc_url + + if symbol in self.inventories: + # get `group_name` from _:group_name + group_name = group.split(":")[1] + if (group_name in NO_OVERRIDE_GROUPS + # check if any package from `NO_OVERRIDE_PACKAGES` + # is in base URL of the symbol that would be overridden + or any(package in self.inventories[symbol].split("/", 3)[2] + for package in NO_OVERRIDE_PACKAGES)): + + symbol = f"{group_name}.{symbol}" + # if renamed `symbol` was already exists, add library name in front + if symbol in self.renamed_symbols: + # split `package_name` because of packages like Pillow that have spaces in them + symbol = f"{package_name.split()[0]}.{symbol}" + + self.inventories[symbol] = absolute_doc_url + self.renamed_symbols.add(symbol) + continue + self.inventories[symbol] = absolute_doc_url log.trace(f"Fetched inventory for {package_name}.") -- cgit v1.2.3 From f1dbb63e6c4a7ed38f8bed994c109e638498d546 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 20 Oct 2019 18:39:08 +0200 Subject: show renamed duplicates in embed footer --- bot/cogs/doc.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 43315f477..ecff43864 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -281,18 +281,23 @@ class Doc(commands.Cog): if not signature: # It's some "meta-page", for example: # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views - return discord.Embed( + embed = discord.Embed( title=f'`{symbol}`', url=permalink, description="This appears to be a generic page not tied to a specific symbol." ) - - signature = textwrap.shorten(signature, 500) - return discord.Embed( - title=f'`{symbol}`', - url=permalink, - description=f"```py\n{signature}```{description}" - ) + else: + signature = textwrap.shorten(signature, 500) + embed = discord.Embed( + title=f'`{symbol}`', + url=permalink, + description=f"```py\n{signature}```{description}" + ) + # show all symbols with the same name that were renamed in the footer + embed.set_footer(text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} + if renamed.endswith(f".{symbol}")) + ) + return embed @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True) async def docs_group(self, ctx: commands.Context, symbol: commands.clean_content = None) -> None: -- cgit v1.2.3 From a05f28c97d0f2ea9d3dafcdbd24444c59905af84 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 20 Oct 2019 18:42:59 +0200 Subject: Auto delete messages when docs are not found --- bot/cogs/doc.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index ecff43864..9bb21cce3 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -4,17 +4,19 @@ import logging import re import textwrap from collections import OrderedDict +from contextlib import suppress from typing import Any, Callable, Optional, Tuple import discord from bs4 import BeautifulSoup from bs4.element import PageElement +from discord.errors import NotFound from discord.ext import commands from markdownify import MarkdownConverter from requests import ConnectionError from sphinx.ext import intersphinx -from bot.constants import MODERATION_ROLES +from bot.constants import MODERATION_ROLES, RedirectOutput from bot.converters import ValidPythonIdentifier, ValidURL from bot.decorators import with_role from bot.pagination import LinePaginator @@ -23,6 +25,7 @@ from bot.pagination import LinePaginator log = logging.getLogger(__name__) logging.getLogger('urllib3').setLevel(logging.WARNING) +NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay NO_OVERRIDE_GROUPS = ( "2to3fixer", "token", @@ -343,7 +346,10 @@ class Doc(commands.Cog): description=f"Sorry, I could not find any documentation for `{symbol}`.", colour=discord.Colour.red() ) - await ctx.send(embed=error_embed) + error_message = await ctx.send(embed=error_embed) + with suppress(NotFound): + await error_message.delete(delay=NOT_FOUND_DELETE_DELAY) + await ctx.message.delete(delay=NOT_FOUND_DELETE_DELAY) else: await ctx.send(embed=doc_embed) -- cgit v1.2.3 From eda6cd7ff818454ad7bf448040a87ff0077025bc Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 20 Oct 2019 21:15:12 +0200 Subject: remove "function" from NO_OVERRIDE_GROUPS --- bot/cogs/doc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 9bb21cce3..f1213d170 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -32,7 +32,6 @@ NO_OVERRIDE_GROUPS = ( "label", "pdbcommand", "term", - "function" ) NO_OVERRIDE_PACKAGES = ( "Python", -- cgit v1.2.3 From d5dea25fef79e16d726f1f0ce8d2bb25291d6c49 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Mon, 21 Oct 2019 22:09:46 +0200 Subject: Don't include a signature and only get first paragraphs when scraping when symbol is a module --- bot/cogs/doc.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index f1213d170..a13552ac0 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -222,12 +222,11 @@ class Doc(commands.Cog): """ Given a Python symbol, return its signature and description. - Returns a tuple in the form (str, str), or `None`. - The first tuple element is the signature of the given symbol as a markup-free string, and the second tuple element is the description of the given symbol with HTML markup included. - If the given symbol could not be found, returns `None`. + If the given symbol is a module, returns a tuple `(None, str)` + else if the symbol could not be found, returns `None`. """ url = self.inventories.get(symbol) if url is None: @@ -245,14 +244,23 @@ class Doc(commands.Cog): if symbol_heading is None: return None - # Traverse the tags of the signature header and ignore any - # unwanted symbols from it. Add all of it to a temporary buffer. - for tag in symbol_heading.strings: - if tag not in UNWANTED_SIGNATURE_SYMBOLS: - signature_buffer.append(tag.replace('\\', '')) + if symbol_id == f"module-{symbol}": + # Get all paragraphs until the first div after the section div + # if searched symbol is a module. + trailing_div = symbol_heading.findNext("div") + info_paragraphs = trailing_div.find_previous_siblings("p")[::-1] + signature = None + description = ''.join(str(paragraph) for paragraph in info_paragraphs).replace('¶', '') - signature = ''.join(signature_buffer) - description = str(symbol_heading.next_sibling.next_sibling).replace('¶', '') + else: + # Traverse the tags of the signature header and ignore any + # unwanted symbols from it. Add all of it to a temporary buffer. + + for tag in symbol_heading.strings: + if tag not in UNWANTED_SIGNATURE_SYMBOLS: + signature_buffer.append(tag.replace('\\', '')) + signature = ''.join(signature_buffer) + description = str(symbol_heading.next_sibling.next_sibling).replace('¶', '') return signature, description -- cgit v1.2.3 From 55b276a1f7e56a950e215bd8289b7f946b2f180e Mon Sep 17 00:00:00 2001 From: Numerlor Date: Mon, 21 Oct 2019 22:10:45 +0200 Subject: Allow embeds to not include signatures in case the symbol is a module --- bot/cogs/doc.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index a13552ac0..0c370f665 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -288,21 +288,24 @@ class Doc(commands.Cog): description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description) - if not signature: + if signature is None: + # If symbol is a module, don't show signature. + embed_description = description + + elif not signature: # It's some "meta-page", for example: # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views - embed = discord.Embed( - title=f'`{symbol}`', - url=permalink, - description="This appears to be a generic page not tied to a specific symbol." - ) + embed_description = "This appears to be a generic page not tied to a specific symbol." + else: signature = textwrap.shorten(signature, 500) - embed = discord.Embed( - title=f'`{symbol}`', - url=permalink, - description=f"```py\n{signature}```{description}" - ) + embed_description = f"```py\n{signature}```{description}" + + embed = discord.Embed( + title=f'`{symbol}`', + url=permalink, + description=embed_description + ) # show all symbols with the same name that were renamed in the footer embed.set_footer(text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}")) -- cgit v1.2.3 From 09f5cd78142201ff0133a25ee1ea6cff1c739e1f Mon Sep 17 00:00:00 2001 From: Numerlor Date: Mon, 21 Oct 2019 22:11:20 +0200 Subject: Grammar check comment --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 0c370f665..8b81b3053 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -306,7 +306,7 @@ class Doc(commands.Cog): url=permalink, description=embed_description ) - # show all symbols with the same name that were renamed in the footer + # Show all symbols with the same name that were renamed in the footer. embed.set_footer(text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}")) ) -- cgit v1.2.3 From efe592cc0420f325ab266afc822b8d4b8135d467 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sat, 2 Nov 2019 17:26:50 +0100 Subject: Do not cut off description in code blocks --- bot/cogs/doc.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 8b81b3053..4a095fa51 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -284,7 +284,13 @@ class Doc(commands.Cog): if len(description) > 1000: shortened = description[:1000] last_paragraph_end = shortened.rfind('\n\n') - description = description[:last_paragraph_end] + f"... [read more]({permalink})" + description = description[:last_paragraph_end] + + # If there is an incomplete code block, cut it out + if description.count("```") % 2: + codeblock_start = description.rfind('```py') + description = description[:codeblock_start].rstrip() + description += f"... [read more]({permalink})" description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description) -- cgit v1.2.3 From 82e1f3764ba0d102ede007ba6352406cfe3fb82a Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sat, 2 Nov 2019 17:37:42 +0100 Subject: Get symbol description by searching for a dd tag instead of traversing the siblings --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 4a095fa51..96f737c03 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -260,7 +260,7 @@ class Doc(commands.Cog): if tag not in UNWANTED_SIGNATURE_SYMBOLS: signature_buffer.append(tag.replace('\\', '')) signature = ''.join(signature_buffer) - description = str(symbol_heading.next_sibling.next_sibling).replace('¶', '') + description = str(symbol_heading.find_next_sibling("dd")).replace('¶', '') return signature, description -- cgit v1.2.3 From ae8c862a353ddc10593d36d557fc7215232baf5b Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sat, 2 Nov 2019 17:44:02 +0100 Subject: Get up to 3 signatures of a symbol --- bot/cogs/doc.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 96f737c03..2987f7245 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -36,7 +36,7 @@ NO_OVERRIDE_GROUPS = ( NO_OVERRIDE_PACKAGES = ( "Python", ) -UNWANTED_SIGNATURE_SYMBOLS = ('[source]', '¶') +UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶") WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") @@ -218,7 +218,7 @@ class Doc(commands.Cog): ] await asyncio.gather(*coros) - async def get_symbol_html(self, symbol: str) -> Optional[Tuple[str, str]]: + async def get_symbol_html(self, symbol: str) -> Optional[Tuple[list, str]]: """ Given a Python symbol, return its signature and description. @@ -239,7 +239,7 @@ class Doc(commands.Cog): symbol_id = url.split('#')[-1] soup = BeautifulSoup(html, 'lxml') symbol_heading = soup.find(id=symbol_id) - signature_buffer = [] + signatures = [] if symbol_heading is None: return None @@ -253,16 +253,14 @@ class Doc(commands.Cog): description = ''.join(str(paragraph) for paragraph in info_paragraphs).replace('¶', '') else: - # Traverse the tags of the signature header and ignore any - # unwanted symbols from it. Add all of it to a temporary buffer. - - for tag in symbol_heading.strings: - if tag not in UNWANTED_SIGNATURE_SYMBOLS: - signature_buffer.append(tag.replace('\\', '')) - signature = ''.join(signature_buffer) + # Get text of up to 3 signatures, remove unwanted symbols + for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2): + signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) + if signature: + signatures.append(signature) description = str(symbol_heading.find_next_sibling("dd")).replace('¶', '') - return signature, description + return signatures, description @async_cache(arg_offset=1) async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]: @@ -275,7 +273,7 @@ class Doc(commands.Cog): if scraped_html is None: return None - signature = scraped_html[0] + signatures = scraped_html[0] permalink = self.inventories[symbol] description = markdownify(scraped_html[1]) @@ -294,18 +292,18 @@ class Doc(commands.Cog): description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description) - if signature is None: + if signatures is None: # If symbol is a module, don't show signature. embed_description = description - elif not signature: + elif not signatures: # It's some "meta-page", for example: # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views embed_description = "This appears to be a generic page not tied to a specific symbol." else: - signature = textwrap.shorten(signature, 500) - embed_description = f"```py\n{signature}```{description}" + embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures) + embed_description += description embed = discord.Embed( title=f'`{symbol}`', -- cgit v1.2.3 From 1aed2e4f4996f5546652bbb26e8fbf403e28aac4 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sat, 2 Nov 2019 18:28:04 +0100 Subject: Improve module description searching --- bot/cogs/doc.py | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 2987f7245..30a14f26c 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -9,7 +9,7 @@ from typing import Any, Callable, Optional, Tuple import discord from bs4 import BeautifulSoup -from bs4.element import PageElement +from bs4.element import PageElement, Tag from discord.errors import NotFound from discord.ext import commands from markdownify import MarkdownConverter @@ -37,6 +37,16 @@ NO_OVERRIDE_PACKAGES = ( "Python", ) UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶") +SEARCH_END_TAG_ATTRS = ( + "data", + "function", + "class", + "exception", + "seealso", + "section", + "rubric", + "sphinxsidebar", +) WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") @@ -245,12 +255,21 @@ class Doc(commands.Cog): return None if symbol_id == f"module-{symbol}": - # Get all paragraphs until the first div after the section div - # if searched symbol is a module. - trailing_div = symbol_heading.findNext("div") - info_paragraphs = trailing_div.find_previous_siblings("p")[::-1] - signature = None - description = ''.join(str(paragraph) for paragraph in info_paragraphs).replace('¶', '') + search_html = str(soup) + # Get page content from the module headerlink to the + # first tag that has its class in `SEARCH_END_TAG_ATTRS` + start_tag = symbol_heading.find("a", attrs={"class": "headerlink"}) + if start_tag is None: + return [], "" + + end_tag = start_tag.find_next(self._match_end_tag) + if end_tag is None: + return [], "" + + description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent)) + description_end_index = search_html.find(str(end_tag)) + description = search_html[description_start_index:description_end_index].replace('¶', '') + signatures = None else: # Get text of up to 3 signatures, remove unwanted symbols @@ -422,6 +441,15 @@ class Doc(commands.Cog): await self.refresh_inventory() await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.") + @staticmethod + def _match_end_tag(tag: Tag) -> bool: + """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table.""" + for attr in SEARCH_END_TAG_ATTRS: + if attr in tag.get("class", ()): + return True + + return tag.name == "table" + def setup(bot: commands.Bot) -> None: """Doc cog load.""" -- cgit v1.2.3 From 3140b01bff9c4912b9f89589e3b3f200dbad99ee Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 3 Nov 2019 18:36:16 +0100 Subject: Handle exceptions when fetching inventories --- bot/cogs/doc.py | 88 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 31 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 30a14f26c..55b69e9a4 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -13,8 +13,9 @@ from bs4.element import PageElement, Tag from discord.errors import NotFound from discord.ext import commands from markdownify import MarkdownConverter -from requests import ConnectionError +from requests import ConnectTimeout, ConnectionError, HTTPError from sphinx.ext import intersphinx +from urllib3.exceptions import ProtocolError from bot.constants import MODERATION_ROLES, RedirectOutput from bot.converters import ValidPythonIdentifier, ValidURL @@ -36,6 +37,7 @@ NO_OVERRIDE_GROUPS = ( NO_OVERRIDE_PACKAGES = ( "Python", ) +FAILED_REQUEST_RETRY_AMOUNT = 3 UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶") SEARCH_END_TAG_ATTRS = ( "data", @@ -173,36 +175,37 @@ class Doc(commands.Cog): """ self.base_urls[package_name] = base_url - fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url) - for group, value in (await self.bot.loop.run_in_executor(None, fetch_func)).items(): - # Each value has a bunch of information in the form - # `(package_name, version, relative_url, ???)`, and we only - # need the package_name and the relative documentation URL. - for symbol, (package_name, _, relative_doc_url, _) in value.items(): - absolute_doc_url = base_url + relative_doc_url - - if symbol in self.inventories: - # get `group_name` from _:group_name - group_name = group.split(":")[1] - if (group_name in NO_OVERRIDE_GROUPS - # check if any package from `NO_OVERRIDE_PACKAGES` - # is in base URL of the symbol that would be overridden - or any(package in self.inventories[symbol].split("/", 3)[2] - for package in NO_OVERRIDE_PACKAGES)): - - symbol = f"{group_name}.{symbol}" - # if renamed `symbol` was already exists, add library name in front - if symbol in self.renamed_symbols: - # split `package_name` because of packages like Pillow that have spaces in them - symbol = f"{package_name.split()[0]}.{symbol}" - - self.inventories[symbol] = absolute_doc_url - self.renamed_symbols.add(symbol) - continue - - self.inventories[symbol] = absolute_doc_url - - log.trace(f"Fetched inventory for {package_name}.") + package = await self._fetch_inventory(inventory_url, config) + if package: + for group, value in package.items(): + # Each value has a bunch of information in the form + # `(package_name, version, relative_url, ???)`, and we only + # need the package_name and the relative documentation URL. + for symbol, (package_name, _, relative_doc_url, _) in value.items(): + absolute_doc_url = base_url + relative_doc_url + + if symbol in self.inventories: + # get `group_name` from _:group_name + group_name = group.split(":")[1] + if (group_name in NO_OVERRIDE_GROUPS + # check if any package from `NO_OVERRIDE_PACKAGES` + # is in base URL of the symbol that would be overridden + or any(package in self.inventories[symbol].split("/", 3)[2] + for package in NO_OVERRIDE_PACKAGES)): + + symbol = f"{group_name}.{symbol}" + # if renamed `symbol` was already exists, add library name in front + if symbol in self.renamed_symbols: + # split `package_name` because of packages like Pillow that have spaces in them + symbol = f"{package_name.split()[0]}.{symbol}" + + self.inventories[symbol] = absolute_doc_url + self.renamed_symbols.add(symbol) + continue + + self.inventories[symbol] = absolute_doc_url + + log.trace(f"Fetched inventory for {package_name}.") async def refresh_inventory(self) -> None: """Refresh internal documentation inventory.""" @@ -441,6 +444,29 @@ class Doc(commands.Cog): await self.refresh_inventory() await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.") + async def _fetch_inventory(self, inventory_url: str, config: SphinxConfiguration) -> Optional[dict]: + """Get and return inventory from `inventory_url`. If fetching fails, return None.""" + fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url) + for retry in range(1, FAILED_REQUEST_RETRY_AMOUNT+1): + try: + package = await self.bot.loop.run_in_executor(None, fetch_func) + except ConnectTimeout: + log.error(f"Fetching of inventory {inventory_url} timed out," + f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})") + except ProtocolError: + log.error(f"Connection lost while fetching inventory {inventory_url}," + f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})") + except HTTPError as e: + log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.") + return None + except ConnectionError: + log.error(f"Couldn't establish connection to inventory {inventory_url}.") + return None + else: + return package + log.error(f"Fetching of inventory {inventory_url} failed.") + return None + @staticmethod def _match_end_tag(tag: Tag) -> bool: """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table.""" -- cgit v1.2.3 From a8475f5fedb91c9e0f1c5c28c7d64aebbbef64f4 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 3 Nov 2019 20:06:15 +0100 Subject: Fix case for the python package name in `NO_OVERRIDE_PACKAGES` --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 55b69e9a4..563f83040 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -35,7 +35,7 @@ NO_OVERRIDE_GROUPS = ( "term", ) NO_OVERRIDE_PACKAGES = ( - "Python", + "python", ) FAILED_REQUEST_RETRY_AMOUNT = 3 UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶") -- cgit v1.2.3 From 1b0a8c8109240615e5d9309937a434e1d29bcf24 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 3 Nov 2019 20:06:41 +0100 Subject: Comment grammar --- bot/cogs/doc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 563f83040..934cb2a6d 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -194,9 +194,9 @@ class Doc(commands.Cog): for package in NO_OVERRIDE_PACKAGES)): symbol = f"{group_name}.{symbol}" - # if renamed `symbol` was already exists, add library name in front + # If renamed `symbol` already exists, add library name in front. if symbol in self.renamed_symbols: - # split `package_name` because of packages like Pillow that have spaces in them + # Split `package_name` because of packages like Pillow that have spaces in them. symbol = f"{package_name.split()[0]}.{symbol}" self.inventories[symbol] = absolute_doc_url -- cgit v1.2.3 From 254dfbb616651f875936598c9884761921de7b76 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 3 Nov 2019 20:28:07 +0100 Subject: Make sure only signatures belonging to the symbol are fetched --- bot/cogs/doc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 934cb2a6d..dcbcfe3ad 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -253,12 +253,12 @@ class Doc(commands.Cog): soup = BeautifulSoup(html, 'lxml') symbol_heading = soup.find(id=symbol_id) signatures = [] + search_html = str(soup) if symbol_heading is None: return None if symbol_id == f"module-{symbol}": - search_html = str(soup) # Get page content from the module headerlink to the # first tag that has its class in `SEARCH_END_TAG_ATTRS` start_tag = symbol_heading.find("a", attrs={"class": "headerlink"}) @@ -275,12 +275,13 @@ class Doc(commands.Cog): signatures = None else: + description = str(symbol_heading.find_next_sibling("dd")).replace('¶', '') + description_pos = search_html.find(description) # Get text of up to 3 signatures, remove unwanted symbols for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2): signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) - if signature: + if signature and search_html.find(signature) < description_pos: signatures.append(signature) - description = str(symbol_heading.find_next_sibling("dd")).replace('¶', '') return signatures, description -- cgit v1.2.3 From 4d5d307f9a499cd874d90e6500f877ce560c012f Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 10 Nov 2019 19:34:28 +0100 Subject: fix signatures and descriptions not being found when present --- bot/cogs/doc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index dcbcfe3ad..6e50cd27d 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -271,19 +271,19 @@ class Doc(commands.Cog): description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent)) description_end_index = search_html.find(str(end_tag)) - description = search_html[description_start_index:description_end_index].replace('¶', '') + description = search_html[description_start_index:description_end_index] signatures = None else: - description = str(symbol_heading.find_next_sibling("dd")).replace('¶', '') + description = str(symbol_heading.find_next_sibling("dd")) description_pos = search_html.find(description) # Get text of up to 3 signatures, remove unwanted symbols for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2): signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) - if signature and search_html.find(signature) < description_pos: + if signature and search_html.find(str(element)) < description_pos: signatures.append(signature) - return signatures, description + return signatures, description.replace('¶', '') @async_cache(arg_offset=1) async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]: -- cgit v1.2.3 From 7de5156a7719f0639021e8186f7ea17f5b853af7 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 10 Nov 2019 19:39:14 +0100 Subject: Add a newline after signatures for readability --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 6e50cd27d..653d48528 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -326,7 +326,7 @@ class Doc(commands.Cog): else: embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures) - embed_description += description + embed_description += f"\n{description}" embed = discord.Embed( title=f'`{symbol}`', -- cgit v1.2.3 From 4795da86d0fef72ac677ae0a8f9e988da1923e17 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 10 Nov 2019 19:43:56 +0100 Subject: Cut off description at 1000 chars if paragraph is not found --- bot/cogs/doc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 653d48528..b04355e28 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -305,6 +305,8 @@ class Doc(commands.Cog): if len(description) > 1000: shortened = description[:1000] last_paragraph_end = shortened.rfind('\n\n') + if last_paragraph_end == -1: + last_paragraph_end = 1000 description = description[:last_paragraph_end] # If there is an incomplete code block, cut it out -- cgit v1.2.3 From 34510f52c6bbe5e2a8bbfc34f8e5d648d0d39a96 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 10 Nov 2019 20:03:48 +0100 Subject: Move paragraph search to not cut off long starting paragraphs Co-authored-by: scargly <29337040+scragly@users.noreply.github.com> --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index b04355e28..73895e3eb 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -304,7 +304,7 @@ class Doc(commands.Cog): # of a double newline (interpreted as a paragraph) before index 1000. if len(description) > 1000: shortened = description[:1000] - last_paragraph_end = shortened.rfind('\n\n') + last_paragraph_end = shortened.rfind('\n\n', 100) if last_paragraph_end == -1: last_paragraph_end = 1000 description = description[:last_paragraph_end] -- cgit v1.2.3 From 219cde70f03476ac6ae4a7f84322757bebeec51e Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 10 Nov 2019 21:30:26 +0100 Subject: Add a command for refreshing inventories --- bot/cogs/doc.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 73895e3eb..8cf32fc7f 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -447,6 +447,28 @@ class Doc(commands.Cog): await self.refresh_inventory() await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.") + @docs_group.command(name="refresh", aliases=("rfsh", "r")) + @with_role(*MODERATION_ROLES) + async def refresh_command(self, ctx: commands.Context) -> None: + """Refresh inventories and send differences to channel.""" + old_inventories = set(self.base_urls) + with ctx.typing(): + await self.refresh_inventory() + # Get differences of added and removed inventories + added = ', '.join(inv for inv in self.base_urls if inv not in old_inventories) + if added: + added = f"`+ {added}`" + + removed = ', '.join(inv for inv in old_inventories if inv not in self.base_urls) + if removed: + removed = f"`- {removed}`" + + embed = discord.Embed( + title="Inventories refreshed", + description=f"{added}\n{removed}" if added or removed else "" + ) + await ctx.send(embed=embed) + async def _fetch_inventory(self, inventory_url: str, config: SphinxConfiguration) -> Optional[dict]: """Get and return inventory from `inventory_url`. If fetching fails, return None.""" fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url) -- cgit v1.2.3 From 4f393d7b95101cc31269eb30742195e771deb705 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Sun, 10 Nov 2019 21:31:47 +0100 Subject: Move signatures definition --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 8cf32fc7f..f7e8ae9d6 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -252,7 +252,6 @@ class Doc(commands.Cog): symbol_id = url.split('#')[-1] soup = BeautifulSoup(html, 'lxml') symbol_heading = soup.find(id=symbol_id) - signatures = [] search_html = str(soup) if symbol_heading is None: @@ -275,6 +274,7 @@ class Doc(commands.Cog): signatures = None else: + signatures = [] description = str(symbol_heading.find_next_sibling("dd")) description_pos = search_html.find(description) # Get text of up to 3 signatures, remove unwanted symbols -- cgit v1.2.3 From 6944175cea2c6595ec29b9ef67ff2ad9a8efb8ae Mon Sep 17 00:00:00 2001 From: Numerlor Date: Mon, 11 Nov 2019 00:58:32 +0100 Subject: clear renamed symbols on inventory refresh --- bot/cogs/doc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index f7e8ae9d6..90f496ceb 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -216,6 +216,7 @@ class Doc(commands.Cog): # Also, reset the cache used for fetching documentation. self.base_urls.clear() self.inventories.clear() + self.renamed_symbols.clear() async_cache.cache = OrderedDict() # Since Intersphinx is intended to be used with Sphinx, -- cgit v1.2.3 From 4a7de0bd155a4717f6cbc593a60dbec130e7ca40 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Mon, 11 Nov 2019 01:12:21 +0100 Subject: Do not cut off text arbitrarily but at last sentence to make sure no unfinished markdown is left in --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 90f496ceb..bf6cee101 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -307,7 +307,7 @@ class Doc(commands.Cog): shortened = description[:1000] last_paragraph_end = shortened.rfind('\n\n', 100) if last_paragraph_end == -1: - last_paragraph_end = 1000 + last_paragraph_end = shortened.rfind('. ') description = description[:last_paragraph_end] # If there is an incomplete code block, cut it out -- cgit v1.2.3 From fb338545c4c2a133e23a664c77813d2ce9aba41c Mon Sep 17 00:00:00 2001 From: Numerlor Date: Mon, 11 Nov 2019 01:15:01 +0100 Subject: syntax highlight diff of reloaded inventories --- bot/cogs/doc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index bf6cee101..0d4884e8b 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -458,15 +458,15 @@ class Doc(commands.Cog): # Get differences of added and removed inventories added = ', '.join(inv for inv in self.base_urls if inv not in old_inventories) if added: - added = f"`+ {added}`" + added = f"+ {added}" removed = ', '.join(inv for inv in old_inventories if inv not in self.base_urls) if removed: - removed = f"`- {removed}`" + removed = f"- {removed}" embed = discord.Embed( title="Inventories refreshed", - description=f"{added}\n{removed}" if added or removed else "" + description=f"```diff\n{added}\n{removed}```" if added or removed else "" ) await ctx.send(embed=embed) -- cgit v1.2.3 From 160962a56110ed970c7419ed650d9d8a84dbaa9a Mon Sep 17 00:00:00 2001 From: Numerlor Date: Tue, 12 Nov 2019 16:46:51 +0100 Subject: Adjust code style and comments --- bot/cogs/doc.py | 77 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 0d4884e8b..b82eac5fe 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -176,36 +176,34 @@ class Doc(commands.Cog): self.base_urls[package_name] = base_url package = await self._fetch_inventory(inventory_url, config) - if package: - for group, value in package.items(): - # Each value has a bunch of information in the form - # `(package_name, version, relative_url, ???)`, and we only - # need the package_name and the relative documentation URL. - for symbol, (package_name, _, relative_doc_url, _) in value.items(): - absolute_doc_url = base_url + relative_doc_url - - if symbol in self.inventories: - # get `group_name` from _:group_name - group_name = group.split(":")[1] - if (group_name in NO_OVERRIDE_GROUPS - # check if any package from `NO_OVERRIDE_PACKAGES` - # is in base URL of the symbol that would be overridden - or any(package in self.inventories[symbol].split("/", 3)[2] - for package in NO_OVERRIDE_PACKAGES)): - - symbol = f"{group_name}.{symbol}" - # If renamed `symbol` already exists, add library name in front. - if symbol in self.renamed_symbols: - # Split `package_name` because of packages like Pillow that have spaces in them. - symbol = f"{package_name.split()[0]}.{symbol}" - - self.inventories[symbol] = absolute_doc_url - self.renamed_symbols.add(symbol) - continue - - self.inventories[symbol] = absolute_doc_url - - log.trace(f"Fetched inventory for {package_name}.") + if not package: + return None + + for group, value in package.items(): + for symbol, (package_name, _, relative_doc_url, _) in value.items(): + absolute_doc_url = base_url + relative_doc_url + + if symbol in self.inventories: + group_name = group.split(":")[1] + symbol_base_url = self.inventories[symbol].split("/", 3)[2] + if ( + group_name in NO_OVERRIDE_GROUPS + or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES) + ): + + symbol = f"{group_name}.{symbol}" + # If renamed `symbol` already exists, add library name in front to differentiate between them. + if symbol in self.renamed_symbols: + # Split `package_name` because of packages like Pillow that have spaces in them. + symbol = f"{package_name.split()[0]}.{symbol}" + + self.inventories[symbol] = absolute_doc_url + self.renamed_symbols.add(symbol) + continue + + self.inventories[symbol] = absolute_doc_url + + log.trace(f"Fetched inventory for {package_name}.") async def refresh_inventory(self) -> None: """Refresh internal documentation inventory.""" @@ -337,9 +335,10 @@ class Doc(commands.Cog): description=embed_description ) # Show all symbols with the same name that were renamed in the footer. - embed.set_footer(text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} - if renamed.endswith(f".{symbol}")) - ) + embed.set_footer( + text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} + if renamed.endswith(f".{symbol}")) + ) return embed @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True) @@ -477,11 +476,15 @@ class Doc(commands.Cog): try: package = await self.bot.loop.run_in_executor(None, fetch_func) except ConnectTimeout: - log.error(f"Fetching of inventory {inventory_url} timed out," - f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})") + log.error( + f"Fetching of inventory {inventory_url} timed out," + f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})" + ) except ProtocolError: - log.error(f"Connection lost while fetching inventory {inventory_url}," - f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})") + log.error( + f"Connection lost while fetching inventory {inventory_url}," + f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})" + ) except HTTPError as e: log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.") return None -- cgit v1.2.3 From f212ddeea4de54d6eb75081c13162c2ad64bfeff Mon Sep 17 00:00:00 2001 From: Numerlor Date: Fri, 15 Nov 2019 13:10:19 +0100 Subject: join extra newline --- bot/cogs/doc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index b82eac5fe..20bc010d9 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -336,8 +336,7 @@ class Doc(commands.Cog): ) # Show all symbols with the same name that were renamed in the footer. embed.set_footer( - text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} - if renamed.endswith(f".{symbol}")) + text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}")) ) return embed -- cgit v1.2.3 From a0ed0c1d6c6d3ba32df4d9bb355ffe1a59e8f76b Mon Sep 17 00:00:00 2001 From: Numerlor Date: Fri, 15 Nov 2019 13:13:18 +0100 Subject: Add variable info after comment was deleted Co-authored-by: scargly <29337040+scragly@users.noreply.github.com> --- bot/cogs/doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 20bc010d9..76fdcd831 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -180,7 +180,7 @@ class Doc(commands.Cog): return None for group, value in package.items(): - for symbol, (package_name, _, relative_doc_url, _) in value.items(): + for symbol, (package_name, _version, relative_doc_url, _) in value.items(): absolute_doc_url = base_url + relative_doc_url if symbol in self.inventories: -- cgit v1.2.3 From f1180d9cd05329f61439c8a45dedb47e841e7216 Mon Sep 17 00:00:00 2001 From: Numerlor Date: Fri, 15 Nov 2019 13:35:44 +0100 Subject: group and order constants --- bot/cogs/doc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 76fdcd831..dc53937ee 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -26,7 +26,6 @@ from bot.pagination import LinePaginator log = logging.getLogger(__name__) logging.getLogger('urllib3').setLevel(logging.WARNING) -NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay NO_OVERRIDE_GROUPS = ( "2to3fixer", "token", @@ -37,8 +36,7 @@ NO_OVERRIDE_GROUPS = ( NO_OVERRIDE_PACKAGES = ( "python", ) -FAILED_REQUEST_RETRY_AMOUNT = 3 -UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶") + SEARCH_END_TAG_ATTRS = ( "data", "function", @@ -49,8 +47,12 @@ SEARCH_END_TAG_ATTRS = ( "rubric", "sphinxsidebar", ) +UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶") WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") +FAILED_REQUEST_RETRY_AMOUNT = 3 +NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay + def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable: """ -- cgit v1.2.3