diff options
| author | 2020-05-07 18:33:34 -0700 | |
|---|---|---|
| committer | 2020-06-13 11:21:08 -0700 | |
| commit | 2bfac307c4b06682db93e2a75108012a586d1c7d (patch) | |
| tree | 03e9cd15cf695db05e93b144f0f1209b61c8be04 /bot/cogs/codeblock/parsing.py | |
| parent | Code block: create a function to format the example code blocks (diff) | |
Code block: use regex to parse incorrect languages
Regex is simpler and more versatile in this case. The functions in the
`instructions` module should be more focused on formatting than parsing,
so the parsing was moved to the `parsing` module.
* Move _PY_LANG_CODES to the `parsing` module
* Create a separate function in the `parsing` module to parse bad
  languages
Diffstat (limited to '')
| -rw-r--r-- | bot/cogs/codeblock/parsing.py | 39 | 
1 files changed, 38 insertions, 1 deletions
| diff --git a/bot/cogs/codeblock/parsing.py b/bot/cogs/codeblock/parsing.py index a49ecc8f7..6fa6811cc 100644 --- a/bot/cogs/codeblock/parsing.py +++ b/bot/cogs/codeblock/parsing.py @@ -22,7 +22,7 @@ _TICKS = {  _RE_CODE_BLOCK = re.compile(      fr"""      (?P<ticks> -        (?P<tick>[{''.join(_TICKS)}])  # Put all ticks into a character class within a group. +        (?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.          \2{{2}}                       # Match previous group 2 more times to ensure the same char.      )      (?P<lang>[^\W_]+\n)?              # Optionally match a language specifier followed by a newline. @@ -32,6 +32,16 @@ _RE_CODE_BLOCK = re.compile(      re.DOTALL | re.VERBOSE  ) +PY_LANG_CODES = ("python", "py")  # Order is important; "py" is second cause it's a subset. +_RE_LANGUAGE = re.compile( +    fr""" +    ^(?P<spaces>\s+)?                    # Optionally match leading spaces from the beginning. +    (?P<lang>{'|'.join(PY_LANG_CODES)})  # Match a Python language. +    (?P<newline>\n)?                     # Optionally match a newline following the language. +    """, +    re.IGNORECASE | re.VERBOSE +) +  class CodeBlock(NamedTuple):      """Represents a Markdown code block.""" @@ -41,6 +51,14 @@ class CodeBlock(NamedTuple):      tick: str +class BadLanguage(NamedTuple): +    """Parsed information about a poorly formatted language specifier.""" + +    language: str +    leading_spaces: bool +    terminal_newline: bool + +  def find_code_blocks(message: str) -> Optional[Sequence[CodeBlock]]:      """      Find and return all Markdown code blocks in the `message`. @@ -108,3 +126,22 @@ def is_repl_code(content: str, threshold: int = 3) -> bool:      log.trace("Content is not Python REPL code.")      return False + + +def parse_bad_language(content: str) -> Optional[BadLanguage]: +    """ +    Return information about a poorly formatted Python language in code block `content`. + +    If the language is not Python, return None. +    """ +    log.trace("Parsing bad language.") + +    match = _RE_LANGUAGE.match(content) +    if not match: +        return None + +    return BadLanguage( +        language=match["lang"], +        leading_spaces=match["spaces"] is not None, +        terminal_newline=match["newline"] is not None, +    ) | 
