2 files changed, 126 insertions, 119 deletions
diff --git a/bot/cogs/codeblock/cog.py b/bot/cogs/codeblock/cog.py
index d0ffcab3f..dad0cc9cc 100644
--- a/bot/cogs/codeblock/cog.py
+++ b/bot/cogs/codeblock/cog.py
@@ -1,8 +1,6 @@
-import ast
 import logging
-import re
 import time
-from typing import NamedTuple, Optional, Sequence
+from typing import Optional
 
 import discord
 from discord import Embed, Message, RawMessageUpdateEvent
@@ -11,46 +9,10 @@ from discord.ext.commands import Bot, Cog
 from bot.cogs.token_remover import TokenRemover
 from bot.constants import Categories, Channels, DEBUG_MODE
 from bot.utils.messages import wait_for_deletion
+from . import parsing
 
 log = logging.getLogger(__name__)
 
-RE_MARKDOWN = re.compile(r'([*_~`|>])')
-RE_CODE_BLOCK_LANGUAGE = re.compile(r"```(?:[^\W_]+)\n(.*?)```", re.DOTALL)
-BACKTICK = "`"
-TICKS = {
-    BACKTICK,
-    "'",
-    '"',
-    "\u00b4",  # ACUTE ACCENT
-    "\u2018",  # LEFT SINGLE QUOTATION MARK
-    "\u2019",  # RIGHT SINGLE QUOTATION MARK
-    "\u2032",  # PRIME
-    "\u201c",  # LEFT DOUBLE QUOTATION MARK
-    "\u201d",  # RIGHT DOUBLE QUOTATION MARK
-    "\u2033",  # DOUBLE PRIME
-    "\u3003",  # VERTICAL KANA REPEAT MARK UPPER HALF
-}
-RE_CODE_BLOCK = re.compile(
-    fr"""
-    (
-        ([{''.join(TICKS)}])  # Put all ticks into a character class within a group.
-        \2{{2}}               # Match the previous group 2 more times to ensure it's the same char.
-    )
-    ([^\W_]+\n)?              # Optionally match a language specifier followed by a newline.
-    (.+?)                     # Match the actual code within the block.
-    \1                        # Match the same 3 ticks used at the start of the block.
-    """,
-    re.DOTALL | re.VERBOSE
-)
-
-
-class CodeBlock(NamedTuple):
-    """Represents a Markdown code block."""
-
-    content: str
-    language: str
-    tick: str
-
 
 class CodeBlockCog(Cog, name="Code Block"):
     """Detect improperly formatted code blocks and suggest proper formatting."""
@@ -85,8 +47,8 @@ class CodeBlockCog(Cog, name="Code Block"):
         else:
             content = content[0]
 
-        content = self.truncate(content)
-        content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content)
+        content = parsing.truncate(content)
+        content_escaped_markdown = parsing.RE_MARKDOWN.sub(r'\\\1', content)
 
         return (
             "It looks like you are trying to paste code into this channel.\n\n"
@@ -106,7 +68,7 @@ class CodeBlockCog(Cog, name="Code Block"):
 
         content, repl_code = content
 
-        if not repl_code and not self.is_python_code(content[0]):
+        if not repl_code and not parsing.is_python_code(content[0]):
             return
 
         if content and repl_code:
@@ -114,14 +76,14 @@ class CodeBlockCog(Cog, name="Code Block"):
         else:
             content = content[0]
 
-        content = self.truncate(content)
+        content = parsing.truncate(content)
 
         log.debug(
             f"{message.author} posted something that needed to be put inside python code "
             f"blocks. Sending the user some instructions."
         )
 
-        content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content)
+        content_escaped_markdown = parsing.RE_MARKDOWN.sub(r'\\\1', content)
         return (
             "It looks like you're trying to paste code into this channel.\n\n"
             "Discord has support for Markdown, which allows you to post code with full "
@@ -134,44 +96,6 @@ class CodeBlockCog(Cog, name="Code Block"):
         )
 
     @staticmethod
-    def find_code_blocks(message: str) -> Sequence[CodeBlock]:
-        """
-        Find and return all Markdown code blocks in the `message`.
-
-        Code blocks with 3 or less lines are excluded.
-
-        If the `message` contains at least one code block with valid ticks and a specified language,
-        return an empty sequence. This is based on the assumption that if the user managed to get
-        one code block right, they already know how to fix the rest themselves.
-        """
-        code_blocks = []
-        for _, tick, language, content in RE_CODE_BLOCK.finditer(message):
-            language = language.strip()
-            if tick == BACKTICK and language:
-                return ()
-            elif len(content.split("\n", 3)) > 3:
-                code_block = CodeBlock(content, language, tick)
-                code_blocks.append(code_block)
-
-    @staticmethod
-    def is_repl_code(content: str, threshold: int = 3) -> bool:
-        """Return True if `content` has at least `threshold` number of Python REPL-like lines."""
-        repl_lines = 0
-        for line in content.splitlines():
-            if line.startswith(">>> ") or line.startswith("... "):
-                repl_lines += 1
-
-            if repl_lines == threshold:
-                return True
-
-        return False
-
-    @staticmethod
-    def has_bad_ticks(message: discord.Message) -> bool:
-        """Return True if `message` starts with 3 characters which look like but aren't '`'."""
-        return message.content[:3] in TICKS
-
-    @staticmethod
     def is_help_channel(channel: discord.TextChannel) -> bool:
         """Return True if `channel` is in one of the help categories."""
         return (
@@ -187,26 +111,6 @@ class CodeBlockCog(Cog, name="Code Block"):
         """
         return (time.time() - self.channel_cooldowns.get(channel.id, 0)) < 300
 
-    @staticmethod
-    def is_python_code(content: str) -> bool:
-        """Return True if `content` is valid Python consisting of more than just expressions."""
-        try:
-            # Attempt to parse the message into an AST node.
-            # Invalid Python code will raise a SyntaxError.
-            tree = ast.parse(content)
-        except SyntaxError:
-            log.trace("Code is not valid Python.")
-            return False
-
-        # Multiple lines of single words could be interpreted as expressions.
-        # This check is to avoid all nodes being parsed as expressions.
-        # (e.g. words over multiple lines)
-        if not all(isinstance(node, ast.Expr) for node in tree.body):
-            return True
-        else:
-            log.trace("Code consists only of expressions.")
-            return False
-
     def is_valid_channel(self, channel: discord.TextChannel) -> bool:
         """Return True if `channel` is a help channel, may be on cooldown, or is whitelisted."""
         return (
@@ -247,20 +151,6 @@ class CodeBlockCog(Cog, name="Code Block"):
             and not TokenRemover.find_token_in_message(message)
         )
 
-    @staticmethod
-    def truncate(content: str, max_chars: int = 204, max_lines: int = 10) -> str:
-        """Return `content` truncated to be at most `max_chars` or `max_lines` in length."""
-        current_length = 0
-        lines_walked = 0
-
-        for line in content.splitlines(keepends=True):
-            if current_length + len(line) > max_chars or lines_walked == max_lines:
-                break
-            current_length += len(line)
-            lines_walked += 1
-
-        return content[:current_length] + "#..."
-
     @Cog.listener()
     async def on_message(self, msg: Message) -> None:
         """
@@ -277,7 +167,7 @@ class CodeBlockCog(Cog, name="Code Block"):
             return
 
         try:
-            if self.has_bad_ticks(msg):
+            if parsing.has_bad_ticks(msg):
                 description = self.format_bad_ticks_message(msg)
             else:
                 description = self.format_guide_message(msg)
@@ -311,7 +201,7 @@ class CodeBlockCog(Cog, name="Code Block"):
         user_message = await channel.fetch_message(payload.message_id)
 
         #  Checks to see if the user has corrected their codeblock.  If it's fixed, has_fixed_codeblock will be None
-        has_fixed_codeblock = self.codeblock_stripping(payload.data.get("content"), self.has_bad_ticks(user_message))
+        has_fixed_codeblock = self.codeblock_stripping(payload.data.get("content"), parsing.has_bad_ticks(user_message))
 
         # If the message is fixed, delete the bot message and the entry from the id dictionary
         if has_fixed_codeblock is None:
diff --git a/bot/cogs/codeblock/parsing.py b/bot/cogs/codeblock/parsing.py
new file mode 100644
index 000000000..7a096758b
--- /dev/null
+++ b/bot/cogs/codeblock/parsing.py
@@ -0,0 +1,117 @@
+import ast
+import logging
+import re
+from typing import NamedTuple, Sequence
+
+import discord
+
+log = logging.getLogger(__name__)
+
+RE_MARKDOWN = re.compile(r'([*_~`|>])')
+RE_CODE_BLOCK_LANGUAGE = re.compile(r"```(?:[^\W_]+)\n(.*?)```", re.DOTALL)
+BACKTICK = "`"
+TICKS = {
+    BACKTICK,
+    "'",
+    '"',
+    "\u00b4",  # ACUTE ACCENT
+    "\u2018",  # LEFT SINGLE QUOTATION MARK
+    "\u2019",  # RIGHT SINGLE QUOTATION MARK
+    "\u2032",  # PRIME
+    "\u201c",  # LEFT DOUBLE QUOTATION MARK
+    "\u201d",  # RIGHT DOUBLE QUOTATION MARK
+    "\u2033",  # DOUBLE PRIME
+    "\u3003",  # VERTICAL KANA REPEAT MARK UPPER HALF
+}
+RE_CODE_BLOCK = re.compile(
+    fr"""
+    (
+        ([{''.join(TICKS)}])  # Put all ticks into a character class within a group.
+        \2{{2}}               # Match the previous group 2 more times to ensure it's the same char.
+    )
+    ([^\W_]+\n)?              # Optionally match a language specifier followed by a newline.
+    (.+?)                     # Match the actual code within the block.
+    \1                        # Match the same 3 ticks used at the start of the block.
+    """,
+    re.DOTALL | re.VERBOSE
+)
+
+
+class CodeBlock(NamedTuple):
+    """Represents a Markdown code block."""
+
+    content: str
+    language: str
+    tick: str
+
+
+def find_code_blocks(message: str) -> Sequence[CodeBlock]:
+    """
+    Find and return all Markdown code blocks in the `message`.
+
+    Code blocks with 3 or less lines are excluded.
+
+    If the `message` contains at least one code block with valid ticks and a specified language,
+    return an empty sequence. This is based on the assumption that if the user managed to get
+    one code block right, they already know how to fix the rest themselves.
+    """
+    code_blocks = []
+    for _, tick, language, content in RE_CODE_BLOCK.finditer(message):
+        language = language.strip()
+        if tick == BACKTICK and language:
+            return ()
+        elif len(content.split("\n", 3)) > 3:
+            code_block = CodeBlock(content, language, tick)
+            code_blocks.append(code_block)
+
+
+def has_bad_ticks(message: discord.Message) -> bool:
+    """Return True if `message` starts with 3 characters which look like but aren't '`'."""
+    return message.content[:3] in TICKS
+
+
+def is_python_code(content: str) -> bool:
+    """Return True if `content` is valid Python consisting of more than just expressions."""
+    try:
+        # Attempt to parse the message into an AST node.
+        # Invalid Python code will raise a SyntaxError.
+        tree = ast.parse(content)
+    except SyntaxError:
+        log.trace("Code is not valid Python.")
+        return False
+
+    # Multiple lines of single words could be interpreted as expressions.
+    # This check is to avoid all nodes being parsed as expressions.
+    # (e.g. words over multiple lines)
+    if not all(isinstance(node, ast.Expr) for node in tree.body):
+        return True
+    else:
+        log.trace("Code consists only of expressions.")
+        return False
+
+
+def is_repl_code(content: str, threshold: int = 3) -> bool:
+    """Return True if `content` has at least `threshold` number of Python REPL-like lines."""
+    repl_lines = 0
+    for line in content.splitlines():
+        if line.startswith(">>> ") or line.startswith("... "):
+            repl_lines += 1
+
+        if repl_lines == threshold:
+            return True
+
+    return False
+
+
+def truncate(content: str, max_chars: int = 204, max_lines: int = 10) -> str:
+    """Return `content` truncated to be at most `max_chars` or `max_lines` in length."""
+    current_length = 0
+    lines_walked = 0
+
+    for line in content.splitlines(keepends=True):
+        if current_length + len(line) > max_chars or lines_walked == max_lines:
+            break
+        current_length += len(line)
+        lines_walked += 1
+
+    return content[:current_length] + "#..."