diff options
| author | 2020-05-07 18:33:34 -0700 | |
|---|---|---|
| committer | 2020-06-13 11:21:08 -0700 | |
| commit | 2bfac307c4b06682db93e2a75108012a586d1c7d (patch) | |
| tree | 03e9cd15cf695db05e93b144f0f1209b61c8be04 | |
| parent | Code block: create a function to format the example code blocks (diff) | |
Code block: use regex to parse incorrect languages
Regex is simpler and more versatile in this case. The functions in the
`instructions` module should be more focused on formatting than parsing,
so the parsing was moved to the `parsing` module.
* Move _PY_LANG_CODES to the `parsing` module
* Create a separate function in the `parsing` module to parse bad
languages
| -rw-r--r-- | bot/cogs/codeblock/instructions.py | 30 | ||||
| -rw-r--r-- | bot/cogs/codeblock/parsing.py | 39 |
2 files changed, 51 insertions, 18 deletions
diff --git a/bot/cogs/codeblock/instructions.py b/bot/cogs/codeblock/instructions.py index bba84c66a..c1a6645b3 100644 --- a/bot/cogs/codeblock/instructions.py +++ b/bot/cogs/codeblock/instructions.py @@ -5,7 +5,6 @@ from . import parsing log = logging.getLogger(__name__) -_PY_LANG_CODES = ("python", "py") # Order is important; "py" is second cause it's a subset. _EXAMPLE_PY = "{lang}\nprint('Hello, world!')" # Make sure to escape any Markdown symbols here. _EXAMPLE_CODE_BLOCKS = ( "\\`\\`\\`{content}\n\\`\\`\\`\n\n" @@ -16,16 +15,14 @@ _EXAMPLE_CODE_BLOCKS = ( def _get_example(language: str) -> str: """Return an example of a correct code block using `language` for syntax highlighting.""" - language_lower = language.lower() # It's only valid if it's all lowercase. - # Determine the example code to put in the code block based on the language specifier. - if language_lower in _PY_LANG_CODES: + if language.lower() in parsing.PY_LANG_CODES: log.trace(f"Code block has a Python language specifier `{language}`.") - content = _EXAMPLE_PY.format(lang=language_lower) - elif language_lower: + content = _EXAMPLE_PY.format(lang=language) + elif language: log.trace(f"Code block has a foreign language specifier `{language}`.") # It's not feasible to determine what would be a valid example for other languages. - content = f"{language_lower}\n..." + content = f"{language}\n..." else: log.trace("Code block has no language specifier.") content = "Hello, world!" @@ -92,26 +89,25 @@ def _get_bad_lang_message(content: str) -> Optional[str]: If `content` doesn't start with "python" or "py" as the language specifier, return None. """ log.trace("Creating instructions for a poorly specified language.") + info = parsing.parse_bad_language(content) - stripped = content.lstrip().lower() - lang = next((lang for lang in _PY_LANG_CODES if stripped.startswith(lang)), None) - - if lang: + if info: # Note that _get_bad_ticks_message expects the first line to have an extra newline. lines = ["It looks like you incorrectly specified a language for your code block.\n"] + language = info.language - if content.startswith(" "): + if info.leading_spaces: log.trace("Language specifier was preceded by a space.") - lines.append(f"Make sure there are no spaces between the back ticks and `{lang}`.") + lines.append(f"Make sure there are no spaces between the back ticks and `{language}`.") - if stripped[len(lang)] != "\n": + if not info.terminal_newline: log.trace("Language specifier was not followed by a newline.") lines.append( - f"Make sure you put your code on a new line following `{lang}`. " - f"There must not be any spaces after `{lang}`." + f"Make sure you put your code on a new line following `{language}`. " + f"There must not be any spaces after `{language}`." ) - example_blocks = _get_example(lang) + example_blocks = _get_example(language) lines.append(f"\n**Here is an example of how it should look:**\n{example_blocks}") return "\n".join(lines) diff --git a/bot/cogs/codeblock/parsing.py b/bot/cogs/codeblock/parsing.py index a49ecc8f7..6fa6811cc 100644 --- a/bot/cogs/codeblock/parsing.py +++ b/bot/cogs/codeblock/parsing.py @@ -22,7 +22,7 @@ _TICKS = { _RE_CODE_BLOCK = re.compile( fr""" (?P<ticks> - (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group. + (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group. \2{{2}} # Match previous group 2 more times to ensure the same char. ) (?P<lang>[^\W_]+\n)? # Optionally match a language specifier followed by a newline. @@ -32,6 +32,16 @@ _RE_CODE_BLOCK = re.compile( re.DOTALL | re.VERBOSE ) +PY_LANG_CODES = ("python", "py") # Order is important; "py" is second cause it's a subset. +_RE_LANGUAGE = re.compile( + fr""" + ^(?P<spaces>\s+)? # Optionally match leading spaces from the beginning. + (?P<lang>{'|'.join(PY_LANG_CODES)}) # Match a Python language. + (?P<newline>\n)? # Optionally match a newline following the language. + """, + re.IGNORECASE | re.VERBOSE +) + class CodeBlock(NamedTuple): """Represents a Markdown code block.""" @@ -41,6 +51,14 @@ class CodeBlock(NamedTuple): tick: str +class BadLanguage(NamedTuple): + """Parsed information about a poorly formatted language specifier.""" + + language: str + leading_spaces: bool + terminal_newline: bool + + def find_code_blocks(message: str) -> Optional[Sequence[CodeBlock]]: """ Find and return all Markdown code blocks in the `message`. @@ -108,3 +126,22 @@ def is_repl_code(content: str, threshold: int = 3) -> bool: log.trace("Content is not Python REPL code.") return False + + +def parse_bad_language(content: str) -> Optional[BadLanguage]: + """ + Return information about a poorly formatted Python language in code block `content`. + + If the language is not Python, return None. + """ + log.trace("Parsing bad language.") + + match = _RE_LANGUAGE.match(content) + if not match: + return None + + return BadLanguage( + language=match["lang"], + leading_spaces=match["spaces"] is not None, + terminal_newline=match["newline"] is not None, + ) |