diff options
| -rw-r--r-- | LICENSE-THIRD-PARTY | 36 | ||||
| -rw-r--r-- | bot/constants.py | 9 | ||||
| -rw-r--r-- | bot/exts/help_channels.py | 45 | ||||
| -rw-r--r-- | bot/exts/info/codeblock/__init__.py | 8 | ||||
| -rw-r--r-- | bot/exts/info/codeblock/_cog.py | 186 | ||||
| -rw-r--r-- | bot/exts/info/codeblock/_instructions.py | 184 | ||||
| -rw-r--r-- | bot/exts/info/codeblock/_parsing.py | 224 | ||||
| -rw-r--r-- | bot/exts/info/stats.py | 5 | ||||
| -rw-r--r-- | bot/exts/utils/bot.py | 326 | ||||
| -rw-r--r-- | bot/utils/__init__.py | 4 | ||||
| -rw-r--r-- | bot/utils/channel.py | 33 | ||||
| -rw-r--r-- | bot/utils/helpers.py | 9 | ||||
| -rw-r--r-- | config-default.yml | 21 | 
13 files changed, 733 insertions, 357 deletions
| diff --git a/LICENSE-THIRD-PARTY b/LICENSE-THIRD-PARTY new file mode 100644 index 000000000..3349d7c05 --- /dev/null +++ b/LICENSE-THIRD-PARTY @@ -0,0 +1,36 @@ +BSD 3-Clause License + +Applies to: +- _RE_PYTHON_REPL and portions of _RE_IPYTHON_REPL in bot/cogs/codeblock/parsing.py + +- Copyright (c) 2008-Present, IPython Development Team +- Copyright (c) 2001-2007, Fernando Perez <[email protected]> +- Copyright (c) 2001, Janko Hauser <[email protected]> +- Copyright (c) 2001, Nathaniel Gray <[email protected]> + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this +  list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, +  this list of conditions and the following disclaimer in the documentation +  and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its +  contributors may be used to endorse or promote products derived from +  this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bot/constants.py b/bot/constants.py index c21fd52e0..6c8b933af 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -528,6 +528,15 @@ class BigBrother(metaclass=YAMLGetter):      header_message_limit: int +class CodeBlock(metaclass=YAMLGetter): +    section = 'code_block' + +    channel_whitelist: List[int] +    cooldown_channels: List[int] +    cooldown_seconds: int +    minimum_lines: int + +  class Free(metaclass=YAMLGetter):      section = 'free' diff --git a/bot/exts/help_channels.py b/bot/exts/help_channels.py index f5c9a5dd0..062d4fcfe 100644 --- a/bot/exts/help_channels.py +++ b/bot/exts/help_channels.py @@ -14,6 +14,7 @@ from discord.ext import commands  from bot import constants  from bot.bot import Bot +from bot.utils import channel as channel_utils  from bot.utils.scheduling import Scheduler  log = logging.getLogger(__name__) @@ -378,11 +379,18 @@ class HelpChannels(commands.Cog):          log.trace("Getting the CategoryChannel objects for the help categories.")          try: -            self.available_category = await self.try_get_channel( -                constants.Categories.help_available +            self.available_category = await channel_utils.try_get_channel( +                constants.Categories.help_available, +                self.bot +            ) +            self.in_use_category = await channel_utils.try_get_channel( +                constants.Categories.help_in_use, +                self.bot +            ) +            self.dormant_category = await channel_utils.try_get_channel( +                constants.Categories.help_dormant, +                self.bot              ) -            self.in_use_category = await self.try_get_channel(constants.Categories.help_in_use) -            self.dormant_category = await self.try_get_channel(constants.Categories.help_dormant)          except discord.HTTPException:              log.exception("Failed to get a category; cog will be removed")              self.bot.remove_cog(self.qualified_name) @@ -442,12 +450,6 @@ class HelpChannels(commands.Cog):              return False          return message.author == self.bot.user and bot_msg_desc.strip() == description.strip() -    @staticmethod -    def is_in_category(channel: discord.TextChannel, category_id: int) -> bool: -        """Return True if `channel` is within a category with `category_id`.""" -        actual_category = getattr(channel, "category", None) -        return actual_category is not None and actual_category.id == category_id -      async def move_idle_channel(self, channel: discord.TextChannel, has_task: bool = True) -> None:          """          Make the `channel` dormant if idle or schedule the move if still active. @@ -498,7 +500,7 @@ class HelpChannels(commands.Cog):          options should be avoided, as it may interfere with the category move we perform.          """          # Get a fresh copy of the category from the bot to avoid the cache mismatch issue we had. -        category = await self.try_get_channel(category_id) +        category = await channel_utils.try_get_channel(category_id, self.bot)          payload = [{"id": c.id, "position": c.position} for c in category.channels] @@ -646,7 +648,7 @@ class HelpChannels(commands.Cog):          channel = message.channel          # Confirm the channel is an in use help channel -        if self.is_in_category(channel, constants.Categories.help_in_use): +        if channel_utils.is_in_category(channel, constants.Categories.help_in_use):              log.trace(f"Checking if #{channel} ({channel.id}) has been answered.")              # Check if there is an entry in unanswered @@ -671,7 +673,8 @@ class HelpChannels(commands.Cog):          await self.check_for_answer(message) -        if not self.is_in_category(channel, constants.Categories.help_available) or self.is_excluded_channel(channel): +        is_available = channel_utils.is_in_category(channel, constants.Categories.help_available) +        if not is_available or self.is_excluded_channel(channel):              return  # Ignore messages outside the Available category or in excluded channels.          log.trace("Waiting for the cog to be ready before processing messages.") @@ -681,7 +684,7 @@ class HelpChannels(commands.Cog):          async with self.on_message_lock:              log.trace(f"on_message lock acquired for {message.id}.") -            if not self.is_in_category(channel, constants.Categories.help_available): +            if not channel_utils.is_in_category(channel, constants.Categories.help_available):                  log.debug(                      f"Message {message.id} will not make #{channel} ({channel.id}) in-use "                      f"because another message in the channel already triggered that." @@ -719,7 +722,7 @@ class HelpChannels(commands.Cog):          The new time for the dormant task is configured with `HelpChannels.deleted_idle_minutes`.          """ -        if not self.is_in_category(msg.channel, constants.Categories.help_in_use): +        if not channel_utils.is_in_category(msg.channel, constants.Categories.help_in_use):              return          if not await self.is_empty(msg.channel): @@ -844,18 +847,6 @@ class HelpChannels(commands.Cog):              log.trace(f"Dormant message not found in {channel_info}; sending a new message.")              await channel.send(embed=embed) -    async def try_get_channel(self, channel_id: int) -> discord.abc.GuildChannel: -        """Attempt to get or fetch a channel and return it.""" -        log.trace(f"Getting the channel {channel_id}.") - -        channel = self.bot.get_channel(channel_id) -        if not channel: -            log.debug(f"Channel {channel_id} is not in cache; fetching from API.") -            channel = await self.bot.fetch_channel(channel_id) - -        log.trace(f"Channel #{channel} ({channel_id}) retrieved.") -        return channel -      async def pin_wrapper(self, msg_id: int, channel: discord.TextChannel, *, pin: bool) -> bool:          """          Pin message `msg_id` in `channel` if `pin` is True or unpin if it's False. diff --git a/bot/exts/info/codeblock/__init__.py b/bot/exts/info/codeblock/__init__.py new file mode 100644 index 000000000..5c55bc5e3 --- /dev/null +++ b/bot/exts/info/codeblock/__init__.py @@ -0,0 +1,8 @@ +from bot.bot import Bot + + +def setup(bot: Bot) -> None: +    """Load the CodeBlockCog cog.""" +    # Defer import to reduce side effects from importing the codeblock package. +    from bot.exts.info.codeblock._cog import CodeBlockCog +    bot.add_cog(CodeBlockCog(bot)) diff --git a/bot/exts/info/codeblock/_cog.py b/bot/exts/info/codeblock/_cog.py new file mode 100644 index 000000000..1e0feab0d --- /dev/null +++ b/bot/exts/info/codeblock/_cog.py @@ -0,0 +1,186 @@ +import logging +import time +from typing import Optional + +import discord +from discord import Message, RawMessageUpdateEvent +from discord.ext.commands import Cog + +from bot import constants +from bot.bot import Bot +from bot.exts.filters.token_remover import TokenRemover +from bot.exts.filters.webhook_remover import WEBHOOK_URL_RE +from bot.exts.info.codeblock._instructions import get_instructions +from bot.utils import has_lines +from bot.utils.channel import is_help_channel +from bot.utils.messages import wait_for_deletion + +log = logging.getLogger(__name__) + + +class CodeBlockCog(Cog, name="Code Block"): +    """ +    Detect improperly formatted Markdown code blocks and suggest proper formatting. + +    There are four basic ways in which a code block is considered improperly formatted: + +    1. The code is not within a code block at all +        * Ignored if the code is not valid Python or Python REPL code +    2. Incorrect characters are used for backticks +    3. A language for syntax highlighting is not specified +        * Ignored if the code is not valid Python or Python REPL code +    4. A syntax highlighting language is incorrectly specified +        * Ignored if the language specified doesn't look like it was meant for Python +        * This can go wrong in two ways: +            1. Spaces before the language +            2. No newline immediately following the language + +    Messages or code blocks must meet a minimum line count to be detected. Detecting multiple code +    blocks is supported. However, if at least one code block is correct, then instructions will not +    be sent even if others are incorrect. When multiple incorrect code blocks are found, only the +    first one is used as the basis for the instructions sent. + +    When an issue is detected, an embed is sent containing specific instructions on fixing what +    is wrong. If the user edits their message to fix the code block, the instructions will be +    removed. If they fail to fix the code block with an edit, the instructions will be updated to +    show what is still incorrect after the user's edit. The embed can be manually deleted with a +    reaction. Otherwise, it will automatically be removed after 5 minutes. + +    The cog only detects messages in whitelisted channels. Channels may also have a cooldown on the +    instructions being sent. Note all help channels are also whitelisted with cooldowns enabled. + +    For configurable parameters, see the `code_block` section in config-default.py. +    """ + +    def __init__(self, bot: Bot): +        self.bot = bot + +        # Stores allowed channels plus epoch times since the last instructional messages sent. +        self.channel_cooldowns = dict.fromkeys(constants.CodeBlock.cooldown_channels, 0.0) + +        # Maps users' messages to the messages the bot sent with instructions. +        self.codeblock_message_ids = {} + +    @staticmethod +    def create_embed(instructions: str) -> discord.Embed: +        """Return an embed which displays code block formatting `instructions`.""" +        return discord.Embed(description=instructions) + +    async def get_sent_instructions(self, payload: RawMessageUpdateEvent) -> Optional[Message]: +        """ +        Return the bot's sent instructions message associated with a user's message `payload`. + +        Return None if the message cannot be found. In this case, it's likely the message was +        deleted either manually via a reaction or automatically by a timer. +        """ +        log.trace(f"Retrieving instructions message for ID {payload.message_id}") +        channel = self.bot.get_channel(payload.channel_id) + +        try: +            return await channel.fetch_message(self.codeblock_message_ids[payload.message_id]) +        except discord.NotFound: +            log.debug("Could not find instructions message; it was probably deleted.") +            return None + +    def is_on_cooldown(self, channel: discord.TextChannel) -> bool: +        """ +        Return True if an embed was sent too recently for `channel`. + +        The cooldown is configured by `constants.CodeBlock.cooldown_seconds`. +        Note: only channels in the `channel_cooldowns` have cooldowns enabled. +        """ +        log.trace(f"Checking if #{channel} is on cooldown.") +        cooldown = constants.CodeBlock.cooldown_seconds +        return (time.time() - self.channel_cooldowns.get(channel.id, 0)) < cooldown + +    def is_valid_channel(self, channel: discord.TextChannel) -> bool: +        """Return True if `channel` is a help channel, may be on a cooldown, or is whitelisted.""" +        log.trace(f"Checking if #{channel} qualifies for code block detection.") +        return ( +            is_help_channel(channel) +            or channel.id in self.channel_cooldowns +            or channel.id in constants.CodeBlock.channel_whitelist +        ) + +    async def send_instructions(self, message: discord.Message, instructions: str) -> None: +        """ +        Send an embed with `instructions` on fixing an incorrect code block in a `message`. + +        The embed will be deleted automatically after 5 minutes. +        """ +        log.info(f"Sending code block formatting instructions for message {message.id}.") + +        embed = self.create_embed(instructions) +        bot_message = await message.channel.send(f"Hey {message.author.mention}!", embed=embed) +        self.codeblock_message_ids[message.id] = bot_message.id + +        self.bot.loop.create_task(wait_for_deletion(bot_message, (message.author.id,), self.bot)) + +        # Increase amount of codeblock correction in stats +        self.bot.stats.incr("codeblock_corrections") + +    def should_parse(self, message: discord.Message) -> bool: +        """ +        Return True if `message` should be parsed. + +        A qualifying message: + +        1. Is not authored by a bot +        2. Is in a valid channel +        3. Has more than 3 lines +        4. Has no bot or webhook token +        """ +        return ( +            not message.author.bot +            and self.is_valid_channel(message.channel) +            and has_lines(message.content, constants.CodeBlock.minimum_lines) +            and not TokenRemover.find_token_in_message(message) +            and not WEBHOOK_URL_RE.search(message.content) +        ) + +    @Cog.listener() +    async def on_message(self, msg: Message) -> None: +        """Detect incorrect Markdown code blocks in `msg` and send instructions to fix them.""" +        if not self.should_parse(msg): +            log.trace(f"Skipping code block detection of {msg.id}: message doesn't qualify.") +            return + +        # When debugging, ignore cooldowns. +        if self.is_on_cooldown(msg.channel) and not constants.DEBUG_MODE: +            log.trace(f"Skipping code block detection of {msg.id}: #{msg.channel} is on cooldown.") +            return + +        instructions = get_instructions(msg.content) +        if instructions: +            await self.send_instructions(msg, instructions) + +            if msg.channel.id not in constants.CodeBlock.channel_whitelist: +                log.debug(f"Adding #{msg.channel} to the channel cooldowns.") +                self.channel_cooldowns[msg.channel.id] = time.time() + +    @Cog.listener() +    async def on_raw_message_edit(self, payload: RawMessageUpdateEvent) -> None: +        """Delete the instructional message if an edited message had its code blocks fixed.""" +        if payload.message_id not in self.codeblock_message_ids: +            log.trace(f"Ignoring message edit {payload.message_id}: message isn't being tracked.") +            return + +        if payload.data.get("content") is None or payload.data.get("channel_id") is None: +            log.trace(f"Ignoring message edit {payload.message_id}: missing content or channel ID.") +            return + +        # Parse the message to see if the code blocks have been fixed. +        content = payload.data.get("content") +        instructions = get_instructions(content) + +        bot_message = await self.get_sent_instructions(payload) +        if not bot_message: +            return + +        if not instructions: +            log.info("User's incorrect code block has been fixed. Removing instructions message.") +            await bot_message.delete() +            del self.codeblock_message_ids[payload.message_id] +        else: +            log.info("Message edited but still has invalid code blocks; editing the instructions.") +            await bot_message.edit(embed=self.create_embed(instructions)) diff --git a/bot/exts/info/codeblock/_instructions.py b/bot/exts/info/codeblock/_instructions.py new file mode 100644 index 000000000..508f157fb --- /dev/null +++ b/bot/exts/info/codeblock/_instructions.py @@ -0,0 +1,184 @@ +"""This module generates and formats instructional messages about fixing Markdown code blocks.""" + +import logging +from typing import Optional + +from bot.exts.info.codeblock import _parsing + +log = logging.getLogger(__name__) + +_EXAMPLE_PY = "{lang}\nprint('Hello, world!')"  # Make sure to escape any Markdown symbols here. +_EXAMPLE_CODE_BLOCKS = ( +    "\\`\\`\\`{content}\n\\`\\`\\`\n\n" +    "**This will result in the following:**\n" +    "```{content}```" +) + + +def _get_example(language: str) -> str: +    """Return an example of a correct code block using `language` for syntax highlighting.""" +    # Determine the example code to put in the code block based on the language specifier. +    if language.lower() in _parsing.PY_LANG_CODES: +        log.trace(f"Code block has a Python language specifier `{language}`.") +        content = _EXAMPLE_PY.format(lang=language) +    elif language: +        log.trace(f"Code block has a foreign language specifier `{language}`.") +        # It's not feasible to determine what would be a valid example for other languages. +        content = f"{language}\n..." +    else: +        log.trace("Code block has no language specifier.") +        content = "\nHello, world!" + +    return _EXAMPLE_CODE_BLOCKS.format(content=content) + + +def _get_bad_ticks_message(code_block: _parsing.CodeBlock) -> Optional[str]: +    """Return instructions on using the correct ticks for `code_block`.""" +    log.trace("Creating instructions for incorrect code block ticks.") + +    valid_ticks = f"\\{_parsing.BACKTICK}" * 3 +    instructions = ( +        "It looks like you are trying to paste code into this channel.\n\n" +        "You seem to be using the wrong symbols to indicate where the code block should start. " +        f"The correct symbols would be {valid_ticks}, not `{code_block.tick * 3}`." +    ) + +    log.trace("Check if the bad ticks code block also has issues with the language specifier.") +    addition_msg = _get_bad_lang_message(code_block.content) +    if not addition_msg and not code_block.language: +        addition_msg = _get_no_lang_message(code_block.content) + +    # Combine the back ticks message with the language specifier message. The latter will +    # already have an example code block. +    if addition_msg: +        log.trace("Language specifier issue found; appending additional instructions.") + +        # The first line has double newlines which are not desirable when appending the msg. +        addition_msg = addition_msg.replace("\n\n", " ", 1) + +        # Make the first character of the addition lower case. +        instructions += "\n\nFurthermore, " + addition_msg[0].lower() + addition_msg[1:] +    else: +        log.trace("No issues with the language specifier found.") +        example_blocks = _get_example(code_block.language) +        instructions += f"\n\n**Here is an example of how it should look:**\n{example_blocks}" + +    return instructions + + +def _get_no_ticks_message(content: str) -> Optional[str]: +    """If `content` is Python/REPL code, return instructions on using code blocks.""" +    log.trace("Creating instructions for a missing code block.") + +    if _parsing.is_python_code(content): +        example_blocks = _get_example("python") +        return ( +            "It looks like you're trying to paste code into this channel.\n\n" +            "Discord has support for Markdown, which allows you to post code with full " +            "syntax highlighting. Please use these whenever you paste code, as this " +            "helps improve the legibility and makes it easier for us to help you.\n\n" +            f"**To do this, use the following method:**\n{example_blocks}" +        ) +    else: +        log.trace("Aborting missing code block instructions: content is not Python code.") + + +def _get_bad_lang_message(content: str) -> Optional[str]: +    """ +    Return instructions on fixing the Python language specifier for a code block. + +    If `code_block` does not have a Python language specifier, return None. +    If there's nothing wrong with the language specifier, return None. +    """ +    log.trace("Creating instructions for a poorly specified language.") + +    info = _parsing.parse_bad_language(content) +    if not info: +        log.trace("Aborting bad language instructions: language specified isn't Python.") +        return + +    lines = [] +    language = info.language + +    if info.has_leading_spaces: +        log.trace("Language specifier was preceded by a space.") +        lines.append(f"Make sure there are no spaces between the back ticks and `{language}`.") + +    if not info.has_terminal_newline: +        log.trace("Language specifier was not followed by a newline.") +        lines.append( +            f"Make sure you put your code on a new line following `{language}`. " +            f"There must not be any spaces after `{language}`." +        ) + +    if lines: +        lines = " ".join(lines) +        example_blocks = _get_example(language) + +        # Note that _get_bad_ticks_message expects the first line to have two newlines. +        return ( +            f"It looks like you incorrectly specified a language for your code block.\n\n{lines}" +            f"\n\n**Here is an example of how it should look:**\n{example_blocks}" +        ) +    else: +        log.trace("Nothing wrong with the language specifier; no instructions to return.") + + +def _get_no_lang_message(content: str) -> Optional[str]: +    """ +    Return instructions on specifying a language for a code block. + +    If `content` is not valid Python or Python REPL code, return None. +    """ +    log.trace("Creating instructions for a missing language.") + +    if _parsing.is_python_code(content): +        example_blocks = _get_example("python") + +        # Note that _get_bad_ticks_message expects the first line to have two newlines. +        return ( +            "It looks like you pasted Python code without syntax highlighting.\n\n" +            "Please use syntax highlighting to improve the legibility of your code and make " +            "it easier for us to help you.\n\n" +            f"**To do this, use the following method:**\n{example_blocks}" +        ) +    else: +        log.trace("Aborting missing language instructions: content is not Python code.") + + +def get_instructions(content: str) -> Optional[str]: +    """ +    Parse `content` and return code block formatting instructions if something is wrong. + +    Return None if `content` lacks code block formatting issues. +    """ +    log.trace("Getting formatting instructions.") + +    blocks = _parsing.find_code_blocks(content) +    if blocks is None: +        log.trace("At least one valid code block found; no instructions to return.") +        return + +    if not blocks: +        log.trace("No code blocks were found in message.") +        instructions = _get_no_ticks_message(content) +    else: +        log.trace("Searching results for a code block with invalid ticks.") +        block = next((block for block in blocks if block.tick != _parsing.BACKTICK), None) + +        if block: +            log.trace("A code block exists but has invalid ticks.") +            instructions = _get_bad_ticks_message(block) +        else: +            log.trace("A code block exists but is missing a language.") +            block = blocks[0] + +            # Check for a bad language first to avoid parsing content into an AST. +            instructions = _get_bad_lang_message(block.content) +            if not instructions: +                instructions = _get_no_lang_message(block.content) + +    if instructions: +        instructions += "\nYou can **edit your original message** to correct your code block." + +    return instructions diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py new file mode 100644 index 000000000..e67224494 --- /dev/null +++ b/bot/exts/info/codeblock/_parsing.py @@ -0,0 +1,224 @@ +"""This module provides functions for parsing Markdown code blocks.""" + +import ast +import logging +import re +import textwrap +from typing import NamedTuple, Optional, Sequence + +from bot import constants +from bot.utils import has_lines + +log = logging.getLogger(__name__) + +BACKTICK = "`" +PY_LANG_CODES = ("python", "pycon", "py")  # Order is important; "py" is last cause it's a subset. +_TICKS = { +    BACKTICK, +    "'", +    '"', +    "\u00b4",  # ACUTE ACCENT +    "\u2018",  # LEFT SINGLE QUOTATION MARK +    "\u2019",  # RIGHT SINGLE QUOTATION MARK +    "\u2032",  # PRIME +    "\u201c",  # LEFT DOUBLE QUOTATION MARK +    "\u201d",  # RIGHT DOUBLE QUOTATION MARK +    "\u2033",  # DOUBLE PRIME +    "\u3003",  # VERTICAL KANA REPEAT MARK UPPER HALF +} + +_RE_PYTHON_REPL = re.compile(r"^(>>>|\.\.\.)( |$)") +_RE_IPYTHON_REPL = re.compile(r"^((In|Out) \[\d+\]: |\s*\.{3,}: ?)") + +_RE_CODE_BLOCK = re.compile( +    fr""" +    (?P<ticks> +        (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group. +        \2{{2}}                       # Match previous group 2 more times to ensure the same char. +    ) +    (?P<lang>[^\W_]+\n)?              # Optionally match a language specifier followed by a newline. +    (?P<code>.+?)                     # Match the actual code within the block. +    \1                                # Match the same 3 ticks used at the start of the block. +    """, +    re.DOTALL | re.VERBOSE +) + +_RE_LANGUAGE = re.compile( +    fr""" +    ^(?P<spaces>\s+)?                    # Optionally match leading spaces from the beginning. +    (?P<lang>{'|'.join(PY_LANG_CODES)})  # Match a Python language. +    (?P<newline>\n)?                     # Optionally match a newline following the language. +    """, +    re.IGNORECASE | re.VERBOSE +) + + +class CodeBlock(NamedTuple): +    """Represents a Markdown code block.""" + +    content: str +    language: str +    tick: str + + +class BadLanguage(NamedTuple): +    """Parsed information about a poorly formatted language specifier.""" + +    language: str +    has_leading_spaces: bool +    has_terminal_newline: bool + + +def find_code_blocks(message: str) -> Optional[Sequence[CodeBlock]]: +    """ +    Find and return all Markdown code blocks in the `message`. + +    Code blocks with 3 or fewer lines are excluded. + +    If the `message` contains at least one code block with valid ticks and a specified language, +    return None. This is based on the assumption that if the user managed to get one code block +    right, they already know how to fix the rest themselves. +    """ +    log.trace("Finding all code blocks in a message.") + +    code_blocks = [] +    for match in _RE_CODE_BLOCK.finditer(message): +        # Used to ensure non-matched groups have an empty string as the default value. +        groups = match.groupdict("") +        language = groups["lang"].strip()  # Strip the newline cause it's included in the group. + +        if groups["tick"] == BACKTICK and language: +            log.trace("Message has a valid code block with a language; returning None.") +            return None +        elif has_lines(groups["code"], constants.CodeBlock.minimum_lines): +            code_block = CodeBlock(groups["code"], language, groups["tick"]) +            code_blocks.append(code_block) +        else: +            log.trace("Skipped a code block shorter than 4 lines.") + +    return code_blocks + + +def _is_python_code(content: str) -> bool: +    """Return True if `content` is valid Python consisting of more than just expressions.""" +    log.trace("Checking if content is Python code.") +    try: +        # Attempt to parse the message into an AST node. +        # Invalid Python code will raise a SyntaxError. +        tree = ast.parse(content) +    except SyntaxError: +        log.trace("Code is not valid Python.") +        return False + +    # Multiple lines of single words could be interpreted as expressions. +    # This check is to avoid all nodes being parsed as expressions. +    # (e.g. words over multiple lines) +    if not all(isinstance(node, ast.Expr) for node in tree.body): +        log.trace("Code is valid python.") +        return True +    else: +        log.trace("Code consists only of expressions.") +        return False + + +def _is_repl_code(content: str, threshold: int = 3) -> bool: +    """Return True if `content` has at least `threshold` number of (I)Python REPL-like lines.""" +    log.trace(f"Checking if content is (I)Python REPL code using a threshold of {threshold}.") + +    repl_lines = 0 +    patterns = (_RE_PYTHON_REPL, _RE_IPYTHON_REPL) + +    for line in content.splitlines(): +        # Check the line against all patterns. +        for pattern in patterns: +            if pattern.match(line): +                repl_lines += 1 + +                # Once a pattern is matched, only use that pattern for the remaining lines. +                patterns = (pattern,) +                break + +        if repl_lines == threshold: +            log.trace("Content is (I)Python REPL code.") +            return True + +    log.trace("Content is not (I)Python REPL code.") +    return False + + +def is_python_code(content: str) -> bool: +    """Return True if `content` is valid Python code or (I)Python REPL output.""" +    dedented = textwrap.dedent(content) + +    # Parse AST twice in case _fix_indentation ends up breaking code due to its inaccuracies. +    return ( +        _is_python_code(dedented) +        or _is_repl_code(dedented) +        or _is_python_code(_fix_indentation(content)) +    ) + + +def parse_bad_language(content: str) -> Optional[BadLanguage]: +    """ +    Return information about a poorly formatted Python language in code block `content`. + +    If the language is not Python, return None. +    """ +    log.trace("Parsing bad language.") + +    match = _RE_LANGUAGE.match(content) +    if not match: +        return None + +    return BadLanguage( +        language=match["lang"], +        has_leading_spaces=match["spaces"] is not None, +        has_terminal_newline=match["newline"] is not None, +    ) + + +def _get_leading_spaces(content: str) -> int: +    """Return the number of spaces at the start of the first line in `content`.""" +    leading_spaces = 0 +    for char in content: +        if char == " ": +            leading_spaces += 1 +        else: +            return leading_spaces + + +def _fix_indentation(content: str) -> str: +    """ +    Attempt to fix badly indented code in `content`. + +    In most cases, this works like textwrap.dedent. However, if the first line ends with a colon, +    all subsequent lines are re-indented to only be one level deep relative to the first line. +    The intent is to fix cases where the leading spaces of the first line of code were accidentally +    not copied, which makes the first line appear not indented. + +    This is fairly naïve and inaccurate. Therefore, it may break some code that was otherwise valid. +    It's meant to catch really common cases, so that's acceptable. Its flaws are: + +    - It assumes that if the first line ends with a colon, it is the start of an indented block +    - It uses 4 spaces as the indentation, regardless of what the rest of the code uses +    """ +    lines = content.splitlines(keepends=True) + +    # Dedent the first line +    first_indent = _get_leading_spaces(content) +    first_line = lines[0][first_indent:] + +    second_indent = _get_leading_spaces(lines[1]) + +    # If the first line ends with a colon, all successive lines need to be indented one +    # additional level (assumes an indent width of 4). +    if first_line.rstrip().endswith(":"): +        second_indent -= 4 + +    # All lines must be dedented at least by the same amount as the first line. +    first_indent = max(first_indent, second_indent) + +    # Dedent the rest of the lines and join them together with the first line. +    content = first_line + "".join(line[first_indent:] for line in lines[1:]) + +    return content diff --git a/bot/exts/info/stats.py b/bot/exts/info/stats.py index 21aa91873..4d8bb645e 100644 --- a/bot/exts/info/stats.py +++ b/bot/exts/info/stats.py @@ -6,7 +6,7 @@ from discord.ext.tasks import loop  from bot.bot import Bot  from bot.constants import Categories, Channels, Guild - +from bot.utils.channel import is_in_category  CHANNEL_NAME_OVERRIDES = {      Channels.off_topic_0: "off_topic_0", @@ -35,8 +35,7 @@ class Stats(Cog):          if message.guild.id != Guild.id:              return -        cat = getattr(message.channel, "category", None) -        if cat is not None and cat.id == Categories.modmail: +        if is_in_category(message.channel, Categories.modmail):              if message.channel.id != Channels.incidents:                  # Do not report modmail channels to stats, there are too many                  # of them for interesting statistics to be drawn out of this. diff --git a/bot/exts/utils/bot.py b/bot/exts/utils/bot.py index ba1fd2a5c..69d623581 100644 --- a/bot/exts/utils/bot.py +++ b/bot/exts/utils/bot.py @@ -1,22 +1,14 @@ -import ast  import logging -import re -import time -from typing import Optional, Tuple +from typing import Optional -from discord import Embed, Message, RawMessageUpdateEvent, TextChannel +from discord import Embed, TextChannel  from discord.ext.commands import Cog, Context, command, group, has_any_role  from bot.bot import Bot -from bot.constants import Categories, Channels, DEBUG_MODE, Guild, MODERATION_ROLES, Roles, URLs -from bot.exts.filters.token_remover import TokenRemover -from bot.exts.filters.webhook_remover import WEBHOOK_URL_RE -from bot.utils.messages import wait_for_deletion +from bot.constants import Guild, MODERATION_ROLES, Roles, URLs  log = logging.getLogger(__name__) -RE_MARKDOWN = re.compile(r'([*_~`|>])') -  class BotCog(Cog, name="Bot"):      """Bot information commands.""" @@ -24,19 +16,6 @@ class BotCog(Cog, name="Bot"):      def __init__(self, bot: Bot):          self.bot = bot -        # Stores allowed channels plus epoch time since last call. -        self.channel_cooldowns = { -            Channels.python_discussion: 0, -        } - -        # These channels will also work, but will not be subject to cooldown -        self.channel_whitelist = ( -            Channels.bot_commands, -        ) - -        # Stores improperly formatted Python codeblock message ids and the corresponding bot message -        self.codeblock_message_ids = {} -      @group(invoke_without_command=True, name="bot", hidden=True)      @has_any_role(Roles.verified)      async def botinfo_group(self, ctx: Context) -> None: @@ -81,305 +60,6 @@ class BotCog(Cog, name="Bot"):          else:              await channel.send(embed=embed) -    def codeblock_stripping(self, msg: str, bad_ticks: bool) -> Optional[Tuple[Tuple[str, ...], str]]: -        """ -        Strip msg in order to find Python code. - -        Tries to strip out Python code out of msg and returns the stripped block or -        None if the block is a valid Python codeblock. -        """ -        if msg.count("\n") >= 3: -            # Filtering valid Python codeblocks and exiting if a valid Python codeblock is found. -            if re.search("```(?:py|python)\n(.*?)```", msg, re.IGNORECASE | re.DOTALL) and not bad_ticks: -                log.trace( -                    "Someone wrote a message that was already a " -                    "valid Python syntax highlighted code block. No action taken." -                ) -                return None - -            else: -                # Stripping backticks from every line of the message. -                log.trace(f"Stripping backticks from message.\n\n{msg}\n\n") -                content = "" -                for line in msg.splitlines(keepends=True): -                    content += line.strip("`") - -                content = content.strip() - -                # Remove "Python" or "Py" from start of the message if it exists. -                log.trace(f"Removing 'py' or 'python' from message.\n\n{content}\n\n") -                pycode = False -                if content.lower().startswith("python"): -                    content = content[6:] -                    pycode = True -                elif content.lower().startswith("py"): -                    content = content[2:] -                    pycode = True - -                if pycode: -                    content = content.splitlines(keepends=True) - -                    # Check if there might be code in the first line, and preserve it. -                    first_line = content[0] -                    if " " in content[0]: -                        first_space = first_line.index(" ") -                        content[0] = first_line[first_space:] -                        content = "".join(content) - -                    # If there's no code we can just get rid of the first line. -                    else: -                        content = "".join(content[1:]) - -                # Strip it again to remove any leading whitespace. This is necessary -                # if the first line of the message looked like ```python <code> -                old = content.strip() - -                # Strips REPL code out of the message if there is any. -                content, repl_code = self.repl_stripping(old) -                if old != content: -                    return (content, old), repl_code - -                # Try to apply indentation fixes to the code. -                content = self.fix_indentation(content) - -                # Check if the code contains backticks, if it does ignore the message. -                if "`" in content: -                    log.trace("Detected ` inside the code, won't reply") -                    return None -                else: -                    log.trace(f"Returning message.\n\n{content}\n\n") -                    return (content,), repl_code - -    def fix_indentation(self, msg: str) -> str: -        """Attempts to fix badly indented code.""" -        def unindent(code: str, skip_spaces: int = 0) -> str: -            """Unindents all code down to the number of spaces given in skip_spaces.""" -            final = "" -            current = code[0] -            leading_spaces = 0 - -            # Get numbers of spaces before code in the first line. -            while current == " ": -                current = code[leading_spaces + 1] -                leading_spaces += 1 -            leading_spaces -= skip_spaces - -            # If there are any, remove that number of spaces from every line. -            if leading_spaces > 0: -                for line in code.splitlines(keepends=True): -                    line = line[leading_spaces:] -                    final += line -                return final -            else: -                return code - -        # Apply fix for "all lines are overindented" case. -        msg = unindent(msg) - -        # If the first line does not end with a colon, we can be -        # certain the next line will be on the same indentation level. -        # -        # If it does end with a colon, we will need to indent all successive -        # lines one additional level. -        first_line = msg.splitlines()[0] -        code = "".join(msg.splitlines(keepends=True)[1:]) -        if not first_line.endswith(":"): -            msg = f"{first_line}\n{unindent(code)}" -        else: -            msg = f"{first_line}\n{unindent(code, 4)}" -        return msg - -    def repl_stripping(self, msg: str) -> Tuple[str, bool]: -        """ -        Strip msg in order to extract Python code out of REPL output. - -        Tries to strip out REPL Python code out of msg and returns the stripped msg. - -        Returns True for the boolean if REPL code was found in the input msg. -        """ -        final = "" -        for line in msg.splitlines(keepends=True): -            if line.startswith(">>>") or line.startswith("..."): -                final += line[4:] -        log.trace(f"Formatted: \n\n{msg}\n\n to \n\n{final}\n\n") -        if not final: -            log.trace(f"Found no REPL code in \n\n{msg}\n\n") -            return msg, False -        else: -            log.trace(f"Found REPL code in \n\n{msg}\n\n") -            return final.rstrip(), True - -    def has_bad_ticks(self, msg: Message) -> bool: -        """Check to see if msg contains ticks that aren't '`'.""" -        not_backticks = [ -            "'''", '"""', "\u00b4\u00b4\u00b4", "\u2018\u2018\u2018", "\u2019\u2019\u2019", -            "\u2032\u2032\u2032", "\u201c\u201c\u201c", "\u201d\u201d\u201d", "\u2033\u2033\u2033", -            "\u3003\u3003\u3003" -        ] - -        return msg.content[:3] in not_backticks - -    @Cog.listener() -    async def on_message(self, msg: Message) -> None: -        """ -        Detect poorly formatted Python code in new messages. - -        If poorly formatted code is detected, send the user a helpful message explaining how to do -        properly formatted Python syntax highlighting codeblocks. -        """ -        is_help_channel = ( -            getattr(msg.channel, "category", None) -            and msg.channel.category.id in (Categories.help_available, Categories.help_in_use) -        ) -        parse_codeblock = ( -            ( -                is_help_channel -                or msg.channel.id in self.channel_cooldowns -                or msg.channel.id in self.channel_whitelist -            ) -            and not msg.author.bot -            and len(msg.content.splitlines()) > 3 -            and not TokenRemover.find_token_in_message(msg) -            and not WEBHOOK_URL_RE.search(msg.content) -        ) - -        if parse_codeblock:  # no token in the msg -            on_cooldown = (time.time() - self.channel_cooldowns.get(msg.channel.id, 0)) < 300 -            if not on_cooldown or DEBUG_MODE: -                try: -                    if self.has_bad_ticks(msg): -                        ticks = msg.content[:3] -                        content = self.codeblock_stripping(f"```{msg.content[3:-3]}```", True) -                        if content is None: -                            return - -                        content, repl_code = content - -                        if len(content) == 2: -                            content = content[1] -                        else: -                            content = content[0] - -                        space_left = 204 -                        if len(content) >= space_left: -                            current_length = 0 -                            lines_walked = 0 -                            for line in content.splitlines(keepends=True): -                                if current_length + len(line) > space_left or lines_walked == 10: -                                    break -                                current_length += len(line) -                                lines_walked += 1 -                            content = content[:current_length] + "#..." -                        content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content) -                        howto = ( -                            "It looks like you are trying to paste code into this channel.\n\n" -                            "You seem to be using the wrong symbols to indicate where the codeblock should start. " -                            f"The correct symbols would be \\`\\`\\`, not `{ticks}`.\n\n" -                            "**Here is an example of how it should look:**\n" -                            f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n" -                            "**This will result in the following:**\n" -                            f"```python\n{content}\n```" -                        ) - -                    else: -                        howto = "" -                        content = self.codeblock_stripping(msg.content, False) -                        if content is None: -                            return - -                        content, repl_code = content -                        # Attempts to parse the message into an AST node. -                        # Invalid Python code will raise a SyntaxError. -                        tree = ast.parse(content[0]) - -                        # Multiple lines of single words could be interpreted as expressions. -                        # This check is to avoid all nodes being parsed as expressions. -                        # (e.g. words over multiple lines) -                        if not all(isinstance(node, ast.Expr) for node in tree.body) or repl_code: -                            # Shorten the code to 10 lines and/or 204 characters. -                            space_left = 204 -                            if content and repl_code: -                                content = content[1] -                            else: -                                content = content[0] - -                            if len(content) >= space_left: -                                current_length = 0 -                                lines_walked = 0 -                                for line in content.splitlines(keepends=True): -                                    if current_length + len(line) > space_left or lines_walked == 10: -                                        break -                                    current_length += len(line) -                                    lines_walked += 1 -                                content = content[:current_length] + "#..." - -                            content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content) -                            howto += ( -                                "It looks like you're trying to paste code into this channel.\n\n" -                                "Discord has support for Markdown, which allows you to post code with full " -                                "syntax highlighting. Please use these whenever you paste code, as this " -                                "helps improve the legibility and makes it easier for us to help you.\n\n" -                                f"**To do this, use the following method:**\n" -                                f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n" -                                "**This will result in the following:**\n" -                                f"```python\n{content}\n```" -                            ) - -                            log.debug(f"{msg.author} posted something that needed to be put inside python code " -                                      "blocks. Sending the user some instructions.") -                        else: -                            log.trace("The code consists only of expressions, not sending instructions") - -                    if howto != "": -                        # Increase amount of codeblock correction in stats -                        self.bot.stats.incr("codeblock_corrections") -                        howto_embed = Embed(description=howto) -                        bot_message = await msg.channel.send(f"Hey {msg.author.mention}!", embed=howto_embed) -                        self.codeblock_message_ids[msg.id] = bot_message.id - -                        self.bot.loop.create_task( -                            wait_for_deletion(bot_message, (msg.author.id,), self.bot) -                        ) -                    else: -                        return - -                    if msg.channel.id not in self.channel_whitelist: -                        self.channel_cooldowns[msg.channel.id] = time.time() - -                except SyntaxError: -                    log.trace( -                        f"{msg.author} posted in a help channel, and when we tried to parse it as Python code, " -                        "ast.parse raised a SyntaxError. This probably just means it wasn't Python code. " -                        f"The message that was posted was:\n\n{msg.content}\n\n" -                    ) - -    @Cog.listener() -    async def on_raw_message_edit(self, payload: RawMessageUpdateEvent) -> None: -        """Check to see if an edited message (previously called out) still contains poorly formatted code.""" -        if ( -            # Checks to see if the message was called out by the bot -            payload.message_id not in self.codeblock_message_ids -            # Makes sure that there is content in the message -            or payload.data.get("content") is None -            # Makes sure there's a channel id in the message payload -            or payload.data.get("channel_id") is None -        ): -            return - -        # Retrieve channel and message objects for use later -        channel = self.bot.get_channel(int(payload.data.get("channel_id"))) -        user_message = await channel.fetch_message(payload.message_id) - -        #  Checks to see if the user has corrected their codeblock.  If it's fixed, has_fixed_codeblock will be None -        has_fixed_codeblock = self.codeblock_stripping(payload.data.get("content"), self.has_bad_ticks(user_message)) - -        # If the message is fixed, delete the bot message and the entry from the id dictionary -        if has_fixed_codeblock is None: -            bot_message = await channel.fetch_message(self.codeblock_message_ids[payload.message_id]) -            await bot_message.delete() -            del self.codeblock_message_ids[payload.message_id] -            log.trace("User's incorrect code block has been fixed. Removing bot formatting message.") -  def setup(bot: Bot) -> None:      """Load the Bot cog.""" diff --git a/bot/utils/__init__.py b/bot/utils/__init__.py index 60170a88f..13533a467 100644 --- a/bot/utils/__init__.py +++ b/bot/utils/__init__.py @@ -1,4 +1,4 @@ -from bot.utils.helpers import CogABCMeta, find_nth_occurrence, pad_base64 +from bot.utils.helpers import CogABCMeta, find_nth_occurrence, has_lines, pad_base64  from bot.utils.services import send_to_paste_service -__all__ = ['CogABCMeta', 'find_nth_occurrence', 'pad_base64', 'send_to_paste_service'] +__all__ = ['CogABCMeta', 'find_nth_occurrence', 'has_lines', 'pad_base64', 'send_to_paste_service'] diff --git a/bot/utils/channel.py b/bot/utils/channel.py new file mode 100644 index 000000000..851f9e1fe --- /dev/null +++ b/bot/utils/channel.py @@ -0,0 +1,33 @@ +import logging + +import discord + +from bot.constants import Categories + +log = logging.getLogger(__name__) + + +def is_help_channel(channel: discord.TextChannel) -> bool: +    """Return True if `channel` is in one of the help categories (excluding dormant).""" +    log.trace(f"Checking if #{channel} is a help channel.") +    categories = (Categories.help_available, Categories.help_in_use) + +    return any(is_in_category(channel, category) for category in categories) + + +def is_in_category(channel: discord.TextChannel, category_id: int) -> bool: +    """Return True if `channel` is within a category with `category_id`.""" +    return getattr(channel, "category_id", None) == category_id + + +async def try_get_channel(channel_id: int, client: discord.Client) -> discord.abc.GuildChannel: +    """Attempt to get or fetch a channel and return it.""" +    log.trace(f"Getting the channel {channel_id}.") + +    channel = client.get_channel(channel_id) +    if not channel: +        log.debug(f"Channel {channel_id} is not in cache; fetching from API.") +        channel = await client.fetch_channel(channel_id) + +    log.trace(f"Channel #{channel} ({channel_id}) retrieved.") +    return channel diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py index d9b60af07..3501a3933 100644 --- a/bot/utils/helpers.py +++ b/bot/utils/helpers.py @@ -18,6 +18,15 @@ def find_nth_occurrence(string: str, substring: str, n: int) -> Optional[int]:      return index +def has_lines(string: str, count: int) -> bool: +    """Return True if `string` has at least `count` lines.""" +    # Benchmarks show this is significantly faster than using str.count("\n") or a for loop & break. +    split = string.split("\n", count - 1) + +    # Make sure the last part isn't empty, which would happen if there was a final newline. +    return split[-1] and len(split) == count + +  def pad_base64(data: str) -> str:      """Return base64 `data` with padding characters to ensure its length is a multiple of 4."""      return data + "=" * (-len(data) % 4) diff --git a/config-default.yml b/config-default.yml index 4f7b1e217..0e7ebf2e3 100644 --- a/config-default.yml +++ b/config-default.yml @@ -145,8 +145,8 @@ guild:          dev_log:            &DEV_LOG        622895325144940554          # Discussion -        meta:               429409067623251969 -        python_discussion:  267624335836053506 +        meta:                               429409067623251969 +        python_discussion:  &PY_DISCUSSION  267624335836053506          # Python Help: Available          how_to_get_help:    704250143020417084 @@ -394,6 +394,23 @@ big_brother:      header_message_limit: 15 +code_block: +    # The channels in which code blocks will be detected. They are not subject to a cooldown. +    channel_whitelist: +        - *BOT_CMD + +    # The channels which will be affected by a cooldown. These channels are also whitelisted. +    cooldown_channels: +        - *PY_DISCUSSION + +    # Sending instructions triggers a cooldown on a per-channel basis. +    # More instruction messages will not be sent in the same channel until the cooldown has elapsed. +    cooldown_seconds: 300 + +    # The minimum amount of lines a message or code block must have for instructions to be sent. +    minimum_lines: 4 + +  free:      # Seconds to elapse for a channel      # to be considered inactive. | 
