aboutsummaryrefslogtreecommitdiffstats
path: root/bot/cogs/codeblock/parsing.py
diff options
context:
space:
mode:
authorGravatar MarkKoz <[email protected]>2020-05-07 18:33:34 -0700
committerGravatar MarkKoz <[email protected]>2020-06-13 11:21:08 -0700
commit2bfac307c4b06682db93e2a75108012a586d1c7d (patch)
tree03e9cd15cf695db05e93b144f0f1209b61c8be04 /bot/cogs/codeblock/parsing.py
parentCode block: create a function to format the example code blocks (diff)
Code block: use regex to parse incorrect languages
Regex is simpler and more versatile in this case. The functions in the `instructions` module should be more focused on formatting than parsing, so the parsing was moved to the `parsing` module. * Move _PY_LANG_CODES to the `parsing` module * Create a separate function in the `parsing` module to parse bad languages
Diffstat (limited to '')
-rw-r--r--bot/cogs/codeblock/parsing.py39
1 files changed, 38 insertions, 1 deletions
diff --git a/bot/cogs/codeblock/parsing.py b/bot/cogs/codeblock/parsing.py
index a49ecc8f7..6fa6811cc 100644
--- a/bot/cogs/codeblock/parsing.py
+++ b/bot/cogs/codeblock/parsing.py
@@ -22,7 +22,7 @@ _TICKS = {
_RE_CODE_BLOCK = re.compile(
fr"""
(?P<ticks>
- (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
+ (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
\2{{2}} # Match previous group 2 more times to ensure the same char.
)
(?P<lang>[^\W_]+\n)? # Optionally match a language specifier followed by a newline.
@@ -32,6 +32,16 @@ _RE_CODE_BLOCK = re.compile(
re.DOTALL | re.VERBOSE
)
+PY_LANG_CODES = ("python", "py") # Order is important; "py" is second cause it's a subset.
+_RE_LANGUAGE = re.compile(
+ fr"""
+ ^(?P<spaces>\s+)? # Optionally match leading spaces from the beginning.
+ (?P<lang>{'|'.join(PY_LANG_CODES)}) # Match a Python language.
+ (?P<newline>\n)? # Optionally match a newline following the language.
+ """,
+ re.IGNORECASE | re.VERBOSE
+)
+
class CodeBlock(NamedTuple):
"""Represents a Markdown code block."""
@@ -41,6 +51,14 @@ class CodeBlock(NamedTuple):
tick: str
+class BadLanguage(NamedTuple):
+ """Parsed information about a poorly formatted language specifier."""
+
+ language: str
+ leading_spaces: bool
+ terminal_newline: bool
+
+
def find_code_blocks(message: str) -> Optional[Sequence[CodeBlock]]:
"""
Find and return all Markdown code blocks in the `message`.
@@ -108,3 +126,22 @@ def is_repl_code(content: str, threshold: int = 3) -> bool:
log.trace("Content is not Python REPL code.")
return False
+
+
+def parse_bad_language(content: str) -> Optional[BadLanguage]:
+ """
+ Return information about a poorly formatted Python language in code block `content`.
+
+ If the language is not Python, return None.
+ """
+ log.trace("Parsing bad language.")
+
+ match = _RE_LANGUAGE.match(content)
+ if not match:
+ return None
+
+ return BadLanguage(
+ language=match["lang"],
+ leading_spaces=match["spaces"] is not None,
+ terminal_newline=match["newline"] is not None,
+ )