diff options
| author | 2020-05-07 14:59:04 -0700 | |
|---|---|---|
| committer | 2020-06-13 11:21:05 -0700 | |
| commit | 89c54fbda81d790d09213fa3093772261d0c4947 (patch) | |
| tree | 9cafd0e983f41ce148300885dc6721871d3308a3 /bot/cogs/codeblock | |
| parent | Code block: remove obsolete functions (diff) | |
Code block: move parsing functions to a separate module
This reduces clutter in the cog. The cog should only have Discord-
related functionality.
Diffstat (limited to '')
| -rw-r--r-- | bot/cogs/codeblock/cog.py | 128 | ||||
| -rw-r--r-- | bot/cogs/codeblock/parsing.py | 117 | 
2 files changed, 126 insertions, 119 deletions
diff --git a/bot/cogs/codeblock/cog.py b/bot/cogs/codeblock/cog.py index d0ffcab3f..dad0cc9cc 100644 --- a/bot/cogs/codeblock/cog.py +++ b/bot/cogs/codeblock/cog.py @@ -1,8 +1,6 @@ -import ast  import logging -import re  import time -from typing import NamedTuple, Optional, Sequence +from typing import Optional  import discord  from discord import Embed, Message, RawMessageUpdateEvent @@ -11,46 +9,10 @@ from discord.ext.commands import Bot, Cog  from bot.cogs.token_remover import TokenRemover  from bot.constants import Categories, Channels, DEBUG_MODE  from bot.utils.messages import wait_for_deletion +from . import parsing  log = logging.getLogger(__name__) -RE_MARKDOWN = re.compile(r'([*_~`|>])') -RE_CODE_BLOCK_LANGUAGE = re.compile(r"```(?:[^\W_]+)\n(.*?)```", re.DOTALL) -BACKTICK = "`" -TICKS = { -    BACKTICK, -    "'", -    '"', -    "\u00b4",  # ACUTE ACCENT -    "\u2018",  # LEFT SINGLE QUOTATION MARK -    "\u2019",  # RIGHT SINGLE QUOTATION MARK -    "\u2032",  # PRIME -    "\u201c",  # LEFT DOUBLE QUOTATION MARK -    "\u201d",  # RIGHT DOUBLE QUOTATION MARK -    "\u2033",  # DOUBLE PRIME -    "\u3003",  # VERTICAL KANA REPEAT MARK UPPER HALF -} -RE_CODE_BLOCK = re.compile( -    fr""" -    ( -        ([{''.join(TICKS)}])  # Put all ticks into a character class within a group. -        \2{{2}}               # Match the previous group 2 more times to ensure it's the same char. -    ) -    ([^\W_]+\n)?              # Optionally match a language specifier followed by a newline. -    (.+?)                     # Match the actual code within the block. -    \1                        # Match the same 3 ticks used at the start of the block. -    """, -    re.DOTALL | re.VERBOSE -) - - -class CodeBlock(NamedTuple): -    """Represents a Markdown code block.""" - -    content: str -    language: str -    tick: str -  class CodeBlockCog(Cog, name="Code Block"):      """Detect improperly formatted code blocks and suggest proper formatting.""" @@ -85,8 +47,8 @@ class CodeBlockCog(Cog, name="Code Block"):          else:              content = content[0] -        content = self.truncate(content) -        content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content) +        content = parsing.truncate(content) +        content_escaped_markdown = parsing.RE_MARKDOWN.sub(r'\\\1', content)          return (              "It looks like you are trying to paste code into this channel.\n\n" @@ -106,7 +68,7 @@ class CodeBlockCog(Cog, name="Code Block"):          content, repl_code = content -        if not repl_code and not self.is_python_code(content[0]): +        if not repl_code and not parsing.is_python_code(content[0]):              return          if content and repl_code: @@ -114,14 +76,14 @@ class CodeBlockCog(Cog, name="Code Block"):          else:              content = content[0] -        content = self.truncate(content) +        content = parsing.truncate(content)          log.debug(              f"{message.author} posted something that needed to be put inside python code "              f"blocks. Sending the user some instructions."          ) -        content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content) +        content_escaped_markdown = parsing.RE_MARKDOWN.sub(r'\\\1', content)          return (              "It looks like you're trying to paste code into this channel.\n\n"              "Discord has support for Markdown, which allows you to post code with full " @@ -134,44 +96,6 @@ class CodeBlockCog(Cog, name="Code Block"):          )      @staticmethod -    def find_code_blocks(message: str) -> Sequence[CodeBlock]: -        """ -        Find and return all Markdown code blocks in the `message`. - -        Code blocks with 3 or less lines are excluded. - -        If the `message` contains at least one code block with valid ticks and a specified language, -        return an empty sequence. This is based on the assumption that if the user managed to get -        one code block right, they already know how to fix the rest themselves. -        """ -        code_blocks = [] -        for _, tick, language, content in RE_CODE_BLOCK.finditer(message): -            language = language.strip() -            if tick == BACKTICK and language: -                return () -            elif len(content.split("\n", 3)) > 3: -                code_block = CodeBlock(content, language, tick) -                code_blocks.append(code_block) - -    @staticmethod -    def is_repl_code(content: str, threshold: int = 3) -> bool: -        """Return True if `content` has at least `threshold` number of Python REPL-like lines.""" -        repl_lines = 0 -        for line in content.splitlines(): -            if line.startswith(">>> ") or line.startswith("... "): -                repl_lines += 1 - -            if repl_lines == threshold: -                return True - -        return False - -    @staticmethod -    def has_bad_ticks(message: discord.Message) -> bool: -        """Return True if `message` starts with 3 characters which look like but aren't '`'.""" -        return message.content[:3] in TICKS - -    @staticmethod      def is_help_channel(channel: discord.TextChannel) -> bool:          """Return True if `channel` is in one of the help categories."""          return ( @@ -187,26 +111,6 @@ class CodeBlockCog(Cog, name="Code Block"):          """          return (time.time() - self.channel_cooldowns.get(channel.id, 0)) < 300 -    @staticmethod -    def is_python_code(content: str) -> bool: -        """Return True if `content` is valid Python consisting of more than just expressions.""" -        try: -            # Attempt to parse the message into an AST node. -            # Invalid Python code will raise a SyntaxError. -            tree = ast.parse(content) -        except SyntaxError: -            log.trace("Code is not valid Python.") -            return False - -        # Multiple lines of single words could be interpreted as expressions. -        # This check is to avoid all nodes being parsed as expressions. -        # (e.g. words over multiple lines) -        if not all(isinstance(node, ast.Expr) for node in tree.body): -            return True -        else: -            log.trace("Code consists only of expressions.") -            return False -      def is_valid_channel(self, channel: discord.TextChannel) -> bool:          """Return True if `channel` is a help channel, may be on cooldown, or is whitelisted."""          return ( @@ -247,20 +151,6 @@ class CodeBlockCog(Cog, name="Code Block"):              and not TokenRemover.find_token_in_message(message)          ) -    @staticmethod -    def truncate(content: str, max_chars: int = 204, max_lines: int = 10) -> str: -        """Return `content` truncated to be at most `max_chars` or `max_lines` in length.""" -        current_length = 0 -        lines_walked = 0 - -        for line in content.splitlines(keepends=True): -            if current_length + len(line) > max_chars or lines_walked == max_lines: -                break -            current_length += len(line) -            lines_walked += 1 - -        return content[:current_length] + "#..." -      @Cog.listener()      async def on_message(self, msg: Message) -> None:          """ @@ -277,7 +167,7 @@ class CodeBlockCog(Cog, name="Code Block"):              return          try: -            if self.has_bad_ticks(msg): +            if parsing.has_bad_ticks(msg):                  description = self.format_bad_ticks_message(msg)              else:                  description = self.format_guide_message(msg) @@ -311,7 +201,7 @@ class CodeBlockCog(Cog, name="Code Block"):          user_message = await channel.fetch_message(payload.message_id)          #  Checks to see if the user has corrected their codeblock.  If it's fixed, has_fixed_codeblock will be None -        has_fixed_codeblock = self.codeblock_stripping(payload.data.get("content"), self.has_bad_ticks(user_message)) +        has_fixed_codeblock = self.codeblock_stripping(payload.data.get("content"), parsing.has_bad_ticks(user_message))          # If the message is fixed, delete the bot message and the entry from the id dictionary          if has_fixed_codeblock is None: diff --git a/bot/cogs/codeblock/parsing.py b/bot/cogs/codeblock/parsing.py new file mode 100644 index 000000000..7a096758b --- /dev/null +++ b/bot/cogs/codeblock/parsing.py @@ -0,0 +1,117 @@ +import ast +import logging +import re +from typing import NamedTuple, Sequence + +import discord + +log = logging.getLogger(__name__) + +RE_MARKDOWN = re.compile(r'([*_~`|>])') +RE_CODE_BLOCK_LANGUAGE = re.compile(r"```(?:[^\W_]+)\n(.*?)```", re.DOTALL) +BACKTICK = "`" +TICKS = { +    BACKTICK, +    "'", +    '"', +    "\u00b4",  # ACUTE ACCENT +    "\u2018",  # LEFT SINGLE QUOTATION MARK +    "\u2019",  # RIGHT SINGLE QUOTATION MARK +    "\u2032",  # PRIME +    "\u201c",  # LEFT DOUBLE QUOTATION MARK +    "\u201d",  # RIGHT DOUBLE QUOTATION MARK +    "\u2033",  # DOUBLE PRIME +    "\u3003",  # VERTICAL KANA REPEAT MARK UPPER HALF +} +RE_CODE_BLOCK = re.compile( +    fr""" +    ( +        ([{''.join(TICKS)}])  # Put all ticks into a character class within a group. +        \2{{2}}               # Match the previous group 2 more times to ensure it's the same char. +    ) +    ([^\W_]+\n)?              # Optionally match a language specifier followed by a newline. +    (.+?)                     # Match the actual code within the block. +    \1                        # Match the same 3 ticks used at the start of the block. +    """, +    re.DOTALL | re.VERBOSE +) + + +class CodeBlock(NamedTuple): +    """Represents a Markdown code block.""" + +    content: str +    language: str +    tick: str + + +def find_code_blocks(message: str) -> Sequence[CodeBlock]: +    """ +    Find and return all Markdown code blocks in the `message`. + +    Code blocks with 3 or less lines are excluded. + +    If the `message` contains at least one code block with valid ticks and a specified language, +    return an empty sequence. This is based on the assumption that if the user managed to get +    one code block right, they already know how to fix the rest themselves. +    """ +    code_blocks = [] +    for _, tick, language, content in RE_CODE_BLOCK.finditer(message): +        language = language.strip() +        if tick == BACKTICK and language: +            return () +        elif len(content.split("\n", 3)) > 3: +            code_block = CodeBlock(content, language, tick) +            code_blocks.append(code_block) + + +def has_bad_ticks(message: discord.Message) -> bool: +    """Return True if `message` starts with 3 characters which look like but aren't '`'.""" +    return message.content[:3] in TICKS + + +def is_python_code(content: str) -> bool: +    """Return True if `content` is valid Python consisting of more than just expressions.""" +    try: +        # Attempt to parse the message into an AST node. +        # Invalid Python code will raise a SyntaxError. +        tree = ast.parse(content) +    except SyntaxError: +        log.trace("Code is not valid Python.") +        return False + +    # Multiple lines of single words could be interpreted as expressions. +    # This check is to avoid all nodes being parsed as expressions. +    # (e.g. words over multiple lines) +    if not all(isinstance(node, ast.Expr) for node in tree.body): +        return True +    else: +        log.trace("Code consists only of expressions.") +        return False + + +def is_repl_code(content: str, threshold: int = 3) -> bool: +    """Return True if `content` has at least `threshold` number of Python REPL-like lines.""" +    repl_lines = 0 +    for line in content.splitlines(): +        if line.startswith(">>> ") or line.startswith("... "): +            repl_lines += 1 + +        if repl_lines == threshold: +            return True + +    return False + + +def truncate(content: str, max_chars: int = 204, max_lines: int = 10) -> str: +    """Return `content` truncated to be at most `max_chars` or `max_lines` in length.""" +    current_length = 0 +    lines_walked = 0 + +    for line in content.splitlines(keepends=True): +        if current_length + len(line) > max_chars or lines_walked == max_lines: +            break +        current_length += len(line) +        lines_walked += 1 + +    return content[:current_length] + "#..."  |