aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar MarkKoz <[email protected]>2020-05-07 18:33:34 -0700
committerGravatar MarkKoz <[email protected]>2020-06-13 11:21:08 -0700
commit2bfac307c4b06682db93e2a75108012a586d1c7d (patch)
tree03e9cd15cf695db05e93b144f0f1209b61c8be04
parentCode block: create a function to format the example code blocks (diff)
Code block: use regex to parse incorrect languages
Regex is simpler and more versatile in this case. The functions in the `instructions` module should be more focused on formatting than parsing, so the parsing was moved to the `parsing` module. * Move _PY_LANG_CODES to the `parsing` module * Create a separate function in the `parsing` module to parse bad languages
-rw-r--r--bot/cogs/codeblock/instructions.py30
-rw-r--r--bot/cogs/codeblock/parsing.py39
2 files changed, 51 insertions, 18 deletions
diff --git a/bot/cogs/codeblock/instructions.py b/bot/cogs/codeblock/instructions.py
index bba84c66a..c1a6645b3 100644
--- a/bot/cogs/codeblock/instructions.py
+++ b/bot/cogs/codeblock/instructions.py
@@ -5,7 +5,6 @@ from . import parsing
log = logging.getLogger(__name__)
-_PY_LANG_CODES = ("python", "py") # Order is important; "py" is second cause it's a subset.
_EXAMPLE_PY = "{lang}\nprint('Hello, world!')" # Make sure to escape any Markdown symbols here.
_EXAMPLE_CODE_BLOCKS = (
"\\`\\`\\`{content}\n\\`\\`\\`\n\n"
@@ -16,16 +15,14 @@ _EXAMPLE_CODE_BLOCKS = (
def _get_example(language: str) -> str:
"""Return an example of a correct code block using `language` for syntax highlighting."""
- language_lower = language.lower() # It's only valid if it's all lowercase.
-
# Determine the example code to put in the code block based on the language specifier.
- if language_lower in _PY_LANG_CODES:
+ if language.lower() in parsing.PY_LANG_CODES:
log.trace(f"Code block has a Python language specifier `{language}`.")
- content = _EXAMPLE_PY.format(lang=language_lower)
- elif language_lower:
+ content = _EXAMPLE_PY.format(lang=language)
+ elif language:
log.trace(f"Code block has a foreign language specifier `{language}`.")
# It's not feasible to determine what would be a valid example for other languages.
- content = f"{language_lower}\n..."
+ content = f"{language}\n..."
else:
log.trace("Code block has no language specifier.")
content = "Hello, world!"
@@ -92,26 +89,25 @@ def _get_bad_lang_message(content: str) -> Optional[str]:
If `content` doesn't start with "python" or "py" as the language specifier, return None.
"""
log.trace("Creating instructions for a poorly specified language.")
+ info = parsing.parse_bad_language(content)
- stripped = content.lstrip().lower()
- lang = next((lang for lang in _PY_LANG_CODES if stripped.startswith(lang)), None)
-
- if lang:
+ if info:
# Note that _get_bad_ticks_message expects the first line to have an extra newline.
lines = ["It looks like you incorrectly specified a language for your code block.\n"]
+ language = info.language
- if content.startswith(" "):
+ if info.leading_spaces:
log.trace("Language specifier was preceded by a space.")
- lines.append(f"Make sure there are no spaces between the back ticks and `{lang}`.")
+ lines.append(f"Make sure there are no spaces between the back ticks and `{language}`.")
- if stripped[len(lang)] != "\n":
+ if not info.terminal_newline:
log.trace("Language specifier was not followed by a newline.")
lines.append(
- f"Make sure you put your code on a new line following `{lang}`. "
- f"There must not be any spaces after `{lang}`."
+ f"Make sure you put your code on a new line following `{language}`. "
+ f"There must not be any spaces after `{language}`."
)
- example_blocks = _get_example(lang)
+ example_blocks = _get_example(language)
lines.append(f"\n**Here is an example of how it should look:**\n{example_blocks}")
return "\n".join(lines)
diff --git a/bot/cogs/codeblock/parsing.py b/bot/cogs/codeblock/parsing.py
index a49ecc8f7..6fa6811cc 100644
--- a/bot/cogs/codeblock/parsing.py
+++ b/bot/cogs/codeblock/parsing.py
@@ -22,7 +22,7 @@ _TICKS = {
_RE_CODE_BLOCK = re.compile(
fr"""
(?P<ticks>
- (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
+ (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
\2{{2}} # Match previous group 2 more times to ensure the same char.
)
(?P<lang>[^\W_]+\n)? # Optionally match a language specifier followed by a newline.
@@ -32,6 +32,16 @@ _RE_CODE_BLOCK = re.compile(
re.DOTALL | re.VERBOSE
)
+PY_LANG_CODES = ("python", "py") # Order is important; "py" is second cause it's a subset.
+_RE_LANGUAGE = re.compile(
+ fr"""
+ ^(?P<spaces>\s+)? # Optionally match leading spaces from the beginning.
+ (?P<lang>{'|'.join(PY_LANG_CODES)}) # Match a Python language.
+ (?P<newline>\n)? # Optionally match a newline following the language.
+ """,
+ re.IGNORECASE | re.VERBOSE
+)
+
class CodeBlock(NamedTuple):
"""Represents a Markdown code block."""
@@ -41,6 +51,14 @@ class CodeBlock(NamedTuple):
tick: str
+class BadLanguage(NamedTuple):
+ """Parsed information about a poorly formatted language specifier."""
+
+ language: str
+ leading_spaces: bool
+ terminal_newline: bool
+
+
def find_code_blocks(message: str) -> Optional[Sequence[CodeBlock]]:
"""
Find and return all Markdown code blocks in the `message`.
@@ -108,3 +126,22 @@ def is_repl_code(content: str, threshold: int = 3) -> bool:
log.trace("Content is not Python REPL code.")
return False
+
+
+def parse_bad_language(content: str) -> Optional[BadLanguage]:
+ """
+ Return information about a poorly formatted Python language in code block `content`.
+
+ If the language is not Python, return None.
+ """
+ log.trace("Parsing bad language.")
+
+ match = _RE_LANGUAGE.match(content)
+ if not match:
+ return None
+
+ return BadLanguage(
+ language=match["lang"],
+ leading_spaces=match["spaces"] is not None,
+ terminal_newline=match["newline"] is not None,
+ )