aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar MarkKoz <[email protected]>2020-05-07 14:59:04 -0700
committerGravatar MarkKoz <[email protected]>2020-06-13 11:21:05 -0700
commit89c54fbda81d790d09213fa3093772261d0c4947 (patch)
tree9cafd0e983f41ce148300885dc6721871d3308a3
parentCode block: remove obsolete functions (diff)
Code block: move parsing functions to a separate module
This reduces clutter in the cog. The cog should only have Discord- related functionality.
-rw-r--r--bot/cogs/codeblock/cog.py128
-rw-r--r--bot/cogs/codeblock/parsing.py117
2 files changed, 126 insertions, 119 deletions
diff --git a/bot/cogs/codeblock/cog.py b/bot/cogs/codeblock/cog.py
index d0ffcab3f..dad0cc9cc 100644
--- a/bot/cogs/codeblock/cog.py
+++ b/bot/cogs/codeblock/cog.py
@@ -1,8 +1,6 @@
-import ast
import logging
-import re
import time
-from typing import NamedTuple, Optional, Sequence
+from typing import Optional
import discord
from discord import Embed, Message, RawMessageUpdateEvent
@@ -11,46 +9,10 @@ from discord.ext.commands import Bot, Cog
from bot.cogs.token_remover import TokenRemover
from bot.constants import Categories, Channels, DEBUG_MODE
from bot.utils.messages import wait_for_deletion
+from . import parsing
log = logging.getLogger(__name__)
-RE_MARKDOWN = re.compile(r'([*_~`|>])')
-RE_CODE_BLOCK_LANGUAGE = re.compile(r"```(?:[^\W_]+)\n(.*?)```", re.DOTALL)
-BACKTICK = "`"
-TICKS = {
- BACKTICK,
- "'",
- '"',
- "\u00b4", # ACUTE ACCENT
- "\u2018", # LEFT SINGLE QUOTATION MARK
- "\u2019", # RIGHT SINGLE QUOTATION MARK
- "\u2032", # PRIME
- "\u201c", # LEFT DOUBLE QUOTATION MARK
- "\u201d", # RIGHT DOUBLE QUOTATION MARK
- "\u2033", # DOUBLE PRIME
- "\u3003", # VERTICAL KANA REPEAT MARK UPPER HALF
-}
-RE_CODE_BLOCK = re.compile(
- fr"""
- (
- ([{''.join(TICKS)}]) # Put all ticks into a character class within a group.
- \2{{2}} # Match the previous group 2 more times to ensure it's the same char.
- )
- ([^\W_]+\n)? # Optionally match a language specifier followed by a newline.
- (.+?) # Match the actual code within the block.
- \1 # Match the same 3 ticks used at the start of the block.
- """,
- re.DOTALL | re.VERBOSE
-)
-
-
-class CodeBlock(NamedTuple):
- """Represents a Markdown code block."""
-
- content: str
- language: str
- tick: str
-
class CodeBlockCog(Cog, name="Code Block"):
"""Detect improperly formatted code blocks and suggest proper formatting."""
@@ -85,8 +47,8 @@ class CodeBlockCog(Cog, name="Code Block"):
else:
content = content[0]
- content = self.truncate(content)
- content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content)
+ content = parsing.truncate(content)
+ content_escaped_markdown = parsing.RE_MARKDOWN.sub(r'\\\1', content)
return (
"It looks like you are trying to paste code into this channel.\n\n"
@@ -106,7 +68,7 @@ class CodeBlockCog(Cog, name="Code Block"):
content, repl_code = content
- if not repl_code and not self.is_python_code(content[0]):
+ if not repl_code and not parsing.is_python_code(content[0]):
return
if content and repl_code:
@@ -114,14 +76,14 @@ class CodeBlockCog(Cog, name="Code Block"):
else:
content = content[0]
- content = self.truncate(content)
+ content = parsing.truncate(content)
log.debug(
f"{message.author} posted something that needed to be put inside python code "
f"blocks. Sending the user some instructions."
)
- content_escaped_markdown = RE_MARKDOWN.sub(r'\\\1', content)
+ content_escaped_markdown = parsing.RE_MARKDOWN.sub(r'\\\1', content)
return (
"It looks like you're trying to paste code into this channel.\n\n"
"Discord has support for Markdown, which allows you to post code with full "
@@ -134,44 +96,6 @@ class CodeBlockCog(Cog, name="Code Block"):
)
@staticmethod
- def find_code_blocks(message: str) -> Sequence[CodeBlock]:
- """
- Find and return all Markdown code blocks in the `message`.
-
- Code blocks with 3 or less lines are excluded.
-
- If the `message` contains at least one code block with valid ticks and a specified language,
- return an empty sequence. This is based on the assumption that if the user managed to get
- one code block right, they already know how to fix the rest themselves.
- """
- code_blocks = []
- for _, tick, language, content in RE_CODE_BLOCK.finditer(message):
- language = language.strip()
- if tick == BACKTICK and language:
- return ()
- elif len(content.split("\n", 3)) > 3:
- code_block = CodeBlock(content, language, tick)
- code_blocks.append(code_block)
-
- @staticmethod
- def is_repl_code(content: str, threshold: int = 3) -> bool:
- """Return True if `content` has at least `threshold` number of Python REPL-like lines."""
- repl_lines = 0
- for line in content.splitlines():
- if line.startswith(">>> ") or line.startswith("... "):
- repl_lines += 1
-
- if repl_lines == threshold:
- return True
-
- return False
-
- @staticmethod
- def has_bad_ticks(message: discord.Message) -> bool:
- """Return True if `message` starts with 3 characters which look like but aren't '`'."""
- return message.content[:3] in TICKS
-
- @staticmethod
def is_help_channel(channel: discord.TextChannel) -> bool:
"""Return True if `channel` is in one of the help categories."""
return (
@@ -187,26 +111,6 @@ class CodeBlockCog(Cog, name="Code Block"):
"""
return (time.time() - self.channel_cooldowns.get(channel.id, 0)) < 300
- @staticmethod
- def is_python_code(content: str) -> bool:
- """Return True if `content` is valid Python consisting of more than just expressions."""
- try:
- # Attempt to parse the message into an AST node.
- # Invalid Python code will raise a SyntaxError.
- tree = ast.parse(content)
- except SyntaxError:
- log.trace("Code is not valid Python.")
- return False
-
- # Multiple lines of single words could be interpreted as expressions.
- # This check is to avoid all nodes being parsed as expressions.
- # (e.g. words over multiple lines)
- if not all(isinstance(node, ast.Expr) for node in tree.body):
- return True
- else:
- log.trace("Code consists only of expressions.")
- return False
-
def is_valid_channel(self, channel: discord.TextChannel) -> bool:
"""Return True if `channel` is a help channel, may be on cooldown, or is whitelisted."""
return (
@@ -247,20 +151,6 @@ class CodeBlockCog(Cog, name="Code Block"):
and not TokenRemover.find_token_in_message(message)
)
- @staticmethod
- def truncate(content: str, max_chars: int = 204, max_lines: int = 10) -> str:
- """Return `content` truncated to be at most `max_chars` or `max_lines` in length."""
- current_length = 0
- lines_walked = 0
-
- for line in content.splitlines(keepends=True):
- if current_length + len(line) > max_chars or lines_walked == max_lines:
- break
- current_length += len(line)
- lines_walked += 1
-
- return content[:current_length] + "#..."
-
@Cog.listener()
async def on_message(self, msg: Message) -> None:
"""
@@ -277,7 +167,7 @@ class CodeBlockCog(Cog, name="Code Block"):
return
try:
- if self.has_bad_ticks(msg):
+ if parsing.has_bad_ticks(msg):
description = self.format_bad_ticks_message(msg)
else:
description = self.format_guide_message(msg)
@@ -311,7 +201,7 @@ class CodeBlockCog(Cog, name="Code Block"):
user_message = await channel.fetch_message(payload.message_id)
# Checks to see if the user has corrected their codeblock. If it's fixed, has_fixed_codeblock will be None
- has_fixed_codeblock = self.codeblock_stripping(payload.data.get("content"), self.has_bad_ticks(user_message))
+ has_fixed_codeblock = self.codeblock_stripping(payload.data.get("content"), parsing.has_bad_ticks(user_message))
# If the message is fixed, delete the bot message and the entry from the id dictionary
if has_fixed_codeblock is None:
diff --git a/bot/cogs/codeblock/parsing.py b/bot/cogs/codeblock/parsing.py
new file mode 100644
index 000000000..7a096758b
--- /dev/null
+++ b/bot/cogs/codeblock/parsing.py
@@ -0,0 +1,117 @@
+import ast
+import logging
+import re
+from typing import NamedTuple, Sequence
+
+import discord
+
+log = logging.getLogger(__name__)
+
+RE_MARKDOWN = re.compile(r'([*_~`|>])')
+RE_CODE_BLOCK_LANGUAGE = re.compile(r"```(?:[^\W_]+)\n(.*?)```", re.DOTALL)
+BACKTICK = "`"
+TICKS = {
+ BACKTICK,
+ "'",
+ '"',
+ "\u00b4", # ACUTE ACCENT
+ "\u2018", # LEFT SINGLE QUOTATION MARK
+ "\u2019", # RIGHT SINGLE QUOTATION MARK
+ "\u2032", # PRIME
+ "\u201c", # LEFT DOUBLE QUOTATION MARK
+ "\u201d", # RIGHT DOUBLE QUOTATION MARK
+ "\u2033", # DOUBLE PRIME
+ "\u3003", # VERTICAL KANA REPEAT MARK UPPER HALF
+}
+RE_CODE_BLOCK = re.compile(
+ fr"""
+ (
+ ([{''.join(TICKS)}]) # Put all ticks into a character class within a group.
+ \2{{2}} # Match the previous group 2 more times to ensure it's the same char.
+ )
+ ([^\W_]+\n)? # Optionally match a language specifier followed by a newline.
+ (.+?) # Match the actual code within the block.
+ \1 # Match the same 3 ticks used at the start of the block.
+ """,
+ re.DOTALL | re.VERBOSE
+)
+
+
+class CodeBlock(NamedTuple):
+ """Represents a Markdown code block."""
+
+ content: str
+ language: str
+ tick: str
+
+
+def find_code_blocks(message: str) -> Sequence[CodeBlock]:
+ """
+ Find and return all Markdown code blocks in the `message`.
+
+ Code blocks with 3 or less lines are excluded.
+
+ If the `message` contains at least one code block with valid ticks and a specified language,
+ return an empty sequence. This is based on the assumption that if the user managed to get
+ one code block right, they already know how to fix the rest themselves.
+ """
+ code_blocks = []
+ for _, tick, language, content in RE_CODE_BLOCK.finditer(message):
+ language = language.strip()
+ if tick == BACKTICK and language:
+ return ()
+ elif len(content.split("\n", 3)) > 3:
+ code_block = CodeBlock(content, language, tick)
+ code_blocks.append(code_block)
+
+
+def has_bad_ticks(message: discord.Message) -> bool:
+ """Return True if `message` starts with 3 characters which look like but aren't '`'."""
+ return message.content[:3] in TICKS
+
+
+def is_python_code(content: str) -> bool:
+ """Return True if `content` is valid Python consisting of more than just expressions."""
+ try:
+ # Attempt to parse the message into an AST node.
+ # Invalid Python code will raise a SyntaxError.
+ tree = ast.parse(content)
+ except SyntaxError:
+ log.trace("Code is not valid Python.")
+ return False
+
+ # Multiple lines of single words could be interpreted as expressions.
+ # This check is to avoid all nodes being parsed as expressions.
+ # (e.g. words over multiple lines)
+ if not all(isinstance(node, ast.Expr) for node in tree.body):
+ return True
+ else:
+ log.trace("Code consists only of expressions.")
+ return False
+
+
+def is_repl_code(content: str, threshold: int = 3) -> bool:
+ """Return True if `content` has at least `threshold` number of Python REPL-like lines."""
+ repl_lines = 0
+ for line in content.splitlines():
+ if line.startswith(">>> ") or line.startswith("... "):
+ repl_lines += 1
+
+ if repl_lines == threshold:
+ return True
+
+ return False
+
+
+def truncate(content: str, max_chars: int = 204, max_lines: int = 10) -> str:
+ """Return `content` truncated to be at most `max_chars` or `max_lines` in length."""
+ current_length = 0
+ lines_walked = 0
+
+ for line in content.splitlines(keepends=True):
+ if current_length + len(line) > max_chars or lines_walked == max_lines:
+ break
+ current_length += len(line)
+ lines_walked += 1
+
+ return content[:current_length] + "#..."