diff options
| author | 2019-11-15 23:52:00 +1000 | |
|---|---|---|
| committer | 2019-11-15 23:52:00 +1000 | |
| commit | 1992cb248ba388aa7e171caef16a4c6f829e652a (patch) | |
| tree | bfee0c74b71a870363c3ab2bcdbb940487aebe78 | |
| parent | Merge pull request #619 from python-discord/moderation-logging (diff) | |
| parent | Merge branch 'master' into doc-command (diff) | |
Docs command improvements (#546)
Docs command improvements
Co-authored-by: Sebastiaan Zeeff <[email protected]>
| -rw-r--r-- | bot/cogs/doc.py | 214 | 
1 files changed, 177 insertions, 37 deletions
diff --git a/bot/cogs/doc.py b/bot/cogs/doc.py index 65cabe46f..e5b3a4062 100644 --- a/bot/cogs/doc.py +++ b/bot/cogs/doc.py @@ -4,17 +4,20 @@ import logging  import re  import textwrap  from collections import OrderedDict +from contextlib import suppress  from typing import Any, Callable, Optional, Tuple  import discord  from bs4 import BeautifulSoup -from bs4.element import PageElement +from bs4.element import PageElement, Tag +from discord.errors import NotFound  from discord.ext import commands  from markdownify import MarkdownConverter -from requests import ConnectionError +from requests import ConnectTimeout, ConnectionError, HTTPError  from sphinx.ext import intersphinx +from urllib3.exceptions import ProtocolError -from bot.constants import MODERATION_ROLES +from bot.constants import MODERATION_ROLES, RedirectOutput  from bot.converters import ValidPythonIdentifier, ValidURL  from bot.decorators import with_role  from bot.pagination import LinePaginator @@ -23,10 +26,33 @@ from bot.pagination import LinePaginator  log = logging.getLogger(__name__)  logging.getLogger('urllib3').setLevel(logging.WARNING) - -UNWANTED_SIGNATURE_SYMBOLS = ('[source]', '¶') +NO_OVERRIDE_GROUPS = ( +    "2to3fixer", +    "token", +    "label", +    "pdbcommand", +    "term", +) +NO_OVERRIDE_PACKAGES = ( +    "python", +) + +SEARCH_END_TAG_ATTRS = ( +    "data", +    "function", +    "class", +    "exception", +    "seealso", +    "section", +    "rubric", +    "sphinxsidebar", +) +UNWANTED_SIGNATURE_SYMBOLS_RE = re.compile(r"\[source]|\\\\|¶")  WHITESPACE_AFTER_NEWLINES_RE = re.compile(r"(?<=\n\n)(\s+)") +FAILED_REQUEST_RETRY_AMOUNT = 3 +NOT_FOUND_DELETE_DELAY = RedirectOutput.delete_delay +  def async_cache(max_size: int = 128, arg_offset: int = 0) -> Callable:      """ @@ -125,6 +151,7 @@ class Doc(commands.Cog):          self.base_urls = {}          self.bot = bot          self.inventories = {} +        self.renamed_symbols = set()          self.bot.loop.create_task(self.init_refresh_inventory()) @@ -150,13 +177,32 @@ class Doc(commands.Cog):          """          self.base_urls[package_name] = base_url -        fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url) -        for _, value in (await self.bot.loop.run_in_executor(None, fetch_func)).items(): -            # Each value has a bunch of information in the form -            # `(package_name, version, relative_url, ???)`, and we only -            # need the relative documentation URL. -            for symbol, (_, _, relative_doc_url, _) in value.items(): +        package = await self._fetch_inventory(inventory_url, config) +        if not package: +            return None + +        for group, value in package.items(): +            for symbol, (package_name, _version, relative_doc_url, _) in value.items():                  absolute_doc_url = base_url + relative_doc_url + +                if symbol in self.inventories: +                    group_name = group.split(":")[1] +                    symbol_base_url = self.inventories[symbol].split("/", 3)[2] +                    if ( +                        group_name in NO_OVERRIDE_GROUPS +                        or any(package in symbol_base_url for package in NO_OVERRIDE_PACKAGES) +                    ): + +                        symbol = f"{group_name}.{symbol}" +                        # If renamed `symbol` already exists, add library name in front to differentiate between them. +                        if symbol in self.renamed_symbols: +                            # Split `package_name` because of packages like Pillow that have spaces in them. +                            symbol = f"{package_name.split()[0]}.{symbol}" + +                        self.inventories[symbol] = absolute_doc_url +                        self.renamed_symbols.add(symbol) +                        continue +                  self.inventories[symbol] = absolute_doc_url          log.trace(f"Fetched inventory for {package_name}.") @@ -170,6 +216,7 @@ class Doc(commands.Cog):          # Also, reset the cache used for fetching documentation.          self.base_urls.clear()          self.inventories.clear() +        self.renamed_symbols.clear()          async_cache.cache = OrderedDict()          # Since Intersphinx is intended to be used with Sphinx, @@ -185,16 +232,15 @@ class Doc(commands.Cog):          ]          await asyncio.gather(*coros) -    async def get_symbol_html(self, symbol: str) -> Optional[Tuple[str, str]]: +    async def get_symbol_html(self, symbol: str) -> Optional[Tuple[list, str]]:          """          Given a Python symbol, return its signature and description. -        Returns a tuple in the form (str, str), or `None`. -          The first tuple element is the signature of the given symbol as a markup-free string, and          the second tuple element is the description of the given symbol with HTML markup included. -        If the given symbol could not be found, returns `None`. +        If the given symbol is a module, returns a tuple `(None, str)` +        else if the symbol could not be found, returns `None`.          """          url = self.inventories.get(symbol)          if url is None: @@ -207,21 +253,38 @@ class Doc(commands.Cog):          symbol_id = url.split('#')[-1]          soup = BeautifulSoup(html, 'lxml')          symbol_heading = soup.find(id=symbol_id) -        signature_buffer = [] +        search_html = str(soup)          if symbol_heading is None:              return None -        # Traverse the tags of the signature header and ignore any -        # unwanted symbols from it. Add all of it to a temporary buffer. -        for tag in symbol_heading.strings: -            if tag not in UNWANTED_SIGNATURE_SYMBOLS: -                signature_buffer.append(tag.replace('\\', '')) +        if symbol_id == f"module-{symbol}": +            # Get page content from the module headerlink to the +            # first tag that has its class in `SEARCH_END_TAG_ATTRS` +            start_tag = symbol_heading.find("a", attrs={"class": "headerlink"}) +            if start_tag is None: +                return [], "" + +            end_tag = start_tag.find_next(self._match_end_tag) +            if end_tag is None: +                return [], "" + +            description_start_index = search_html.find(str(start_tag.parent)) + len(str(start_tag.parent)) +            description_end_index = search_html.find(str(end_tag)) +            description = search_html[description_start_index:description_end_index] +            signatures = None -        signature = ''.join(signature_buffer) -        description = str(symbol_heading.next_sibling.next_sibling).replace('¶', '') +        else: +            signatures = [] +            description = str(symbol_heading.find_next_sibling("dd")) +            description_pos = search_html.find(description) +            # Get text of up to 3 signatures, remove unwanted symbols +            for element in [symbol_heading] + symbol_heading.find_next_siblings("dt", limit=2): +                signature = UNWANTED_SIGNATURE_SYMBOLS_RE.sub("", element.text) +                if signature and search_html.find(str(element)) < description_pos: +                    signatures.append(signature) -        return signature, description +        return signatures, description.replace('¶', '')      @async_cache(arg_offset=1)      async def get_symbol_embed(self, symbol: str) -> Optional[discord.Embed]: @@ -234,7 +297,7 @@ class Doc(commands.Cog):          if scraped_html is None:              return None -        signature = scraped_html[0] +        signatures = scraped_html[0]          permalink = self.inventories[symbol]          description = markdownify(scraped_html[1]) @@ -242,26 +305,42 @@ class Doc(commands.Cog):          # of a double newline (interpreted as a paragraph) before index 1000.          if len(description) > 1000:              shortened = description[:1000] -            last_paragraph_end = shortened.rfind('\n\n') -            description = description[:last_paragraph_end] + f"... [read more]({permalink})" +            last_paragraph_end = shortened.rfind('\n\n', 100) +            if last_paragraph_end == -1: +                last_paragraph_end = shortened.rfind('. ') +            description = description[:last_paragraph_end] + +            # If there is an incomplete code block, cut it out +            if description.count("```") % 2: +                codeblock_start = description.rfind('```py') +                description = description[:codeblock_start].rstrip() +            description += f"... [read more]({permalink})"          description = WHITESPACE_AFTER_NEWLINES_RE.sub('', description) -        if not signature: +        if signatures is None: +            # If symbol is a module, don't show signature. +            embed_description = description + +        elif not signatures:              # It's some "meta-page", for example:              # https://docs.djangoproject.com/en/dev/ref/views/#module-django.views -            return discord.Embed( -                title=f'`{symbol}`', -                url=permalink, -                description="This appears to be a generic page not tied to a specific symbol." -            ) +            embed_description = "This appears to be a generic page not tied to a specific symbol." -        signature = textwrap.shorten(signature, 500) -        return discord.Embed( +        else: +            embed_description = "".join(f"```py\n{textwrap.shorten(signature, 500)}```" for signature in signatures) +            embed_description += f"\n{description}" + +        embed = discord.Embed(              title=f'`{symbol}`',              url=permalink, -            description=f"```py\n{signature}```{description}" +            description=embed_description          ) +        # Show all symbols with the same name that were renamed in the footer. +        embed.set_footer( +            text=", ".join(renamed for renamed in self.renamed_symbols - {symbol} if renamed.endswith(f".{symbol}")) +        ) +        return embed      @commands.group(name='docs', aliases=('doc', 'd'), invoke_without_command=True)      async def docs_group(self, ctx: commands.Context, symbol: commands.clean_content = None) -> None: @@ -307,7 +386,10 @@ class Doc(commands.Cog):                      description=f"Sorry, I could not find any documentation for `{symbol}`.",                      colour=discord.Colour.red()                  ) -                await ctx.send(embed=error_embed) +                error_message = await ctx.send(embed=error_embed) +                with suppress(NotFound): +                    await error_message.delete(delay=NOT_FOUND_DELETE_DELAY) +                    await ctx.message.delete(delay=NOT_FOUND_DELETE_DELAY)              else:                  await ctx.send(embed=doc_embed) @@ -365,6 +447,64 @@ class Doc(commands.Cog):              await self.refresh_inventory()          await ctx.send(f"Successfully deleted `{package_name}` and refreshed inventory.") +    @docs_group.command(name="refresh", aliases=("rfsh", "r")) +    @with_role(*MODERATION_ROLES) +    async def refresh_command(self, ctx: commands.Context) -> None: +        """Refresh inventories and send differences to channel.""" +        old_inventories = set(self.base_urls) +        with ctx.typing(): +            await self.refresh_inventory() +        # Get differences of added and removed inventories +        added = ', '.join(inv for inv in self.base_urls if inv not in old_inventories) +        if added: +            added = f"+ {added}" + +        removed = ', '.join(inv for inv in old_inventories if inv not in self.base_urls) +        if removed: +            removed = f"- {removed}" + +        embed = discord.Embed( +            title="Inventories refreshed", +            description=f"```diff\n{added}\n{removed}```" if added or removed else "" +        ) +        await ctx.send(embed=embed) + +    async def _fetch_inventory(self, inventory_url: str, config: SphinxConfiguration) -> Optional[dict]: +        """Get and return inventory from `inventory_url`. If fetching fails, return None.""" +        fetch_func = functools.partial(intersphinx.fetch_inventory, config, '', inventory_url) +        for retry in range(1, FAILED_REQUEST_RETRY_AMOUNT+1): +            try: +                package = await self.bot.loop.run_in_executor(None, fetch_func) +            except ConnectTimeout: +                log.error( +                    f"Fetching of inventory {inventory_url} timed out," +                    f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})" +                ) +            except ProtocolError: +                log.error( +                    f"Connection lost while fetching inventory {inventory_url}," +                    f" trying again. ({retry}/{FAILED_REQUEST_RETRY_AMOUNT})" +                ) +            except HTTPError as e: +                log.error(f"Fetching of inventory {inventory_url} failed with status code {e.response.status_code}.") +                return None +            except ConnectionError: +                log.error(f"Couldn't establish connection to inventory {inventory_url}.") +                return None +            else: +                return package +        log.error(f"Fetching of inventory {inventory_url} failed.") +        return None + +    @staticmethod +    def _match_end_tag(tag: Tag) -> bool: +        """Matches `tag` if its class value is in `SEARCH_END_TAG_ATTRS` or the tag is table.""" +        for attr in SEARCH_END_TAG_ATTRS: +            if attr in tag.get("class", ()): +                return True + +        return tag.name == "table" +  def setup(bot: commands.Bot) -> None:      """Doc cog load."""  |