From 20c5a6946a140ef9e79f8a7c4edb60e2d5372298 Mon Sep 17 00:00:00 2001 From: mbaruh Date: Sun, 4 Oct 2020 16:59:22 +0300 Subject: Added interleaving text in code blocks option If the message contains both plaintext and code blocks, the text will be ignored. If several code blocks are present, they are concatenated. --- bot/exts/utils/snekbox.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/bot/exts/utils/snekbox.py b/bot/exts/utils/snekbox.py index ca6fbf5cb..e1839bdf7 100644 --- a/bot/exts/utils/snekbox.py +++ b/bot/exts/utils/snekbox.py @@ -31,6 +31,15 @@ FORMATTED_CODE_REGEX = re.compile( r"\s*$", # any trailing whitespace until the end of the string re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive ) +CODE_BLOCK_REGEX = re.compile( + r"```" # code block delimiter: 3 batckticks + r"([a-z]+\n)?" # match optional language (only letters plus newline) + r"(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code + r"(?P.*?)" # extract all code inside the markup + r"\s*" # any more whitespace before the end of the code markup + r"```", # code block end + re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive +) RAW_CODE_REGEX = re.compile( r"^(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code r"(?P.*?)" # extract all the rest as code @@ -78,7 +87,9 @@ class Snekbox(Cog): def prepare_input(code: str) -> str: """Extract code from the Markdown, format it, and insert it into the code template.""" match = FORMATTED_CODE_REGEX.fullmatch(code) - if match: + + # Despite the wildcard being lazy, this is a fullmatch so we need to check the presence of the delim explicitly. + if match and match.group("delim") not in match.group("code"): code, block, lang, delim = match.group("code", "block", "lang", "delim") code = textwrap.dedent(code) if block: @@ -86,12 +97,20 @@ class Snekbox(Cog): else: info = f"{delim}-enclosed inline code" log.trace(f"Extracted {info} for evaluation:\n{code}") + else: - code = textwrap.dedent(RAW_CODE_REGEX.fullmatch(code).group("code")) - log.trace( - f"Eval message contains unformatted or badly formatted code, " - f"stripping whitespace only:\n{code}" - ) + code_parts = CODE_BLOCK_REGEX.finditer(code) + merge = '\n'.join(map(lambda part: part.group("code"), code_parts)) + if merge: + code = textwrap.dedent(merge) + log.trace(f"Merged one or more code blocks from text combined with code:\n{code}") + + else: + code = textwrap.dedent(RAW_CODE_REGEX.fullmatch(code).group("code")) + log.trace( + f"Eval message contains unformatted or badly formatted code, " + f"stripping whitespace only:\n{code}" + ) return code -- cgit v1.2.3 From 08140e8ceab3ab46a1c956b7a4c90b771064d3c6 Mon Sep 17 00:00:00 2001 From: mbaruh Date: Sun, 4 Oct 2020 18:34:50 +0300 Subject: Improved style and fixed comment. --- bot/exts/utils/snekbox.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot/exts/utils/snekbox.py b/bot/exts/utils/snekbox.py index e1839bdf7..e782ed745 100644 --- a/bot/exts/utils/snekbox.py +++ b/bot/exts/utils/snekbox.py @@ -88,7 +88,7 @@ class Snekbox(Cog): """Extract code from the Markdown, format it, and insert it into the code template.""" match = FORMATTED_CODE_REGEX.fullmatch(code) - # Despite the wildcard being lazy, this is a fullmatch so we need to check the presence of the delim explicitly. + # Despite the wildcard being lazy, the pattern is from start to end and will eat any delimiters in the middle. if match and match.group("delim") not in match.group("code"): code, block, lang, delim = match.group("code", "block", "lang", "delim") code = textwrap.dedent(code) @@ -100,7 +100,7 @@ class Snekbox(Cog): else: code_parts = CODE_BLOCK_REGEX.finditer(code) - merge = '\n'.join(map(lambda part: part.group("code"), code_parts)) + merge = '\n'.join(part.group("code") for part in code_parts) if merge: code = textwrap.dedent(merge) log.trace(f"Merged one or more code blocks from text combined with code:\n{code}") -- cgit v1.2.3 From 507451b8e67eb0a8425fa1dd2b5d386ead18ce00 Mon Sep 17 00:00:00 2001 From: mbaruh Date: Wed, 7 Oct 2020 01:44:01 +0300 Subject: prepare_input uses one regex less --- bot/exts/utils/snekbox.py | 52 ++++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/bot/exts/utils/snekbox.py b/bot/exts/utils/snekbox.py index e782ed745..77830209e 100644 --- a/bot/exts/utils/snekbox.py +++ b/bot/exts/utils/snekbox.py @@ -21,23 +21,12 @@ log = logging.getLogger(__name__) ESCAPE_REGEX = re.compile("[`\u202E\u200B]{3,}") FORMATTED_CODE_REGEX = re.compile( - r"^\s*" # any leading whitespace from the beginning of the string r"(?P(?P```)|``?)" # code delimiter: 1-3 backticks; (?P=block) only matches if it's a block r"(?(block)(?:(?P[a-z]+)\n)?)" # if we're in a block, match optional language (only letters plus newline) r"(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code r"(?P.*?)" # extract all code inside the markup r"\s*" # any more whitespace before the end of the code markup - r"(?P=delim)" # match the exact same delimiter from the start again - r"\s*$", # any trailing whitespace until the end of the string - re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive -) -CODE_BLOCK_REGEX = re.compile( - r"```" # code block delimiter: 3 batckticks - r"([a-z]+\n)?" # match optional language (only letters plus newline) - r"(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code - r"(?P.*?)" # extract all code inside the markup - r"\s*" # any more whitespace before the end of the code markup - r"```", # code block end + r"(?P=delim)", # match the exact same delimiter from the start again re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive ) RAW_CODE_REGEX = re.compile( @@ -86,32 +75,25 @@ class Snekbox(Cog): @staticmethod def prepare_input(code: str) -> str: """Extract code from the Markdown, format it, and insert it into the code template.""" - match = FORMATTED_CODE_REGEX.fullmatch(code) - - # Despite the wildcard being lazy, the pattern is from start to end and will eat any delimiters in the middle. - if match and match.group("delim") not in match.group("code"): - code, block, lang, delim = match.group("code", "block", "lang", "delim") - code = textwrap.dedent(code) - if block: - info = (f"'{lang}' highlighted" if lang else "plain") + " code block" - else: - info = f"{delim}-enclosed inline code" - log.trace(f"Extracted {info} for evaluation:\n{code}") - - else: - code_parts = CODE_BLOCK_REGEX.finditer(code) - merge = '\n'.join(part.group("code") for part in code_parts) - if merge: - code = textwrap.dedent(merge) - log.trace(f"Merged one or more code blocks from text combined with code:\n{code}") + if match := list(FORMATTED_CODE_REGEX.finditer(code)): + blocks = [block for block in match if block.group("block")] + if len(blocks) > 1: + code = '\n'.join(block.group("code") for block in blocks) + info = "several code blocks" else: - code = textwrap.dedent(RAW_CODE_REGEX.fullmatch(code).group("code")) - log.trace( - f"Eval message contains unformatted or badly formatted code, " - f"stripping whitespace only:\n{code}" - ) + match = match[0] if len(blocks) == 0 else blocks[0] + code, block, lang, delim = match.group("code", "block", "lang", "delim") + if block: + info = (f"'{lang}' highlighted" if lang else "plain") + " code block" + else: + info = f"{delim}-enclosed inline code" + else: + code = RAW_CODE_REGEX.fullmatch(code).group("code") + info = "unformatted or badly formatted code" + code = textwrap.dedent(code) + log.trace(f"Extracted {info} for evaluation:\n{code}") return code @staticmethod -- cgit v1.2.3 From d0635ea328ed5bc659d77820752dedef3c19df0c Mon Sep 17 00:00:00 2001 From: mbaruh Date: Thu, 8 Oct 2020 01:21:19 +0300 Subject: adjusted prepare_input docs and unittests --- bot/exts/utils/snekbox.py | 8 +++++++- tests/bot/exts/utils/test_snekbox.py | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/bot/exts/utils/snekbox.py b/bot/exts/utils/snekbox.py index 77830209e..295c84901 100644 --- a/bot/exts/utils/snekbox.py +++ b/bot/exts/utils/snekbox.py @@ -74,7 +74,13 @@ class Snekbox(Cog): @staticmethod def prepare_input(code: str) -> str: - """Extract code from the Markdown, format it, and insert it into the code template.""" + """ + Extract code from the Markdown, format it, and insert it into the code template. + + If there is Markdown, ignores surrounding text. + If there are several Markdown parts in the message, concatenates only the code blocks. + If there is inline code but no code blocks, takes the first instance of inline code. + """ if match := list(FORMATTED_CODE_REGEX.finditer(code)): blocks = [block for block in match if block.group("block")] diff --git a/tests/bot/exts/utils/test_snekbox.py b/tests/bot/exts/utils/test_snekbox.py index 6601fad2c..9a42d0610 100644 --- a/tests/bot/exts/utils/test_snekbox.py +++ b/tests/bot/exts/utils/test_snekbox.py @@ -52,6 +52,13 @@ class SnekboxTests(unittest.IsolatedAsyncioTestCase): ('`print("Hello world!")`', 'print("Hello world!")', 'one line code block'), ('```\nprint("Hello world!")```', 'print("Hello world!")', 'multiline code block'), ('```py\nprint("Hello world!")```', 'print("Hello world!")', 'multiline python code block'), + ('text```print("Hello world!")```text', 'print("Hello world!")', 'code block surrounded by text'), + ('```print("Hello world!")```\ntext\n```py\nprint("Hello world!")```', + 'print("Hello world!")\nprint("Hello world!")', 'two code blocks with text in-between'), + ('`print("Hello world!")`\ntext\n```print("How\'s it going?")```', + 'print("How\'s it going?")', 'code block preceded by inline code'), + ('`print("Hello world!")`\ntext\n`print("Hello world!")`', + 'print("Hello world!")', 'one inline code block of two') ) for case, expected, testname in cases: with self.subTest(msg=f'Extract code from {testname}.'): -- cgit v1.2.3 From b55ce89f01ef4d66a8b930dcbdc061cdef3563f3 Mon Sep 17 00:00:00 2001 From: Boris Muratov <8bee278@gmail.com> Date: Thu, 8 Oct 2020 03:05:02 +0300 Subject: clarify prepare_input doc Co-authored-by: Mark --- bot/exts/utils/snekbox.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bot/exts/utils/snekbox.py b/bot/exts/utils/snekbox.py index 295c84901..da3e07f42 100644 --- a/bot/exts/utils/snekbox.py +++ b/bot/exts/utils/snekbox.py @@ -77,9 +77,9 @@ class Snekbox(Cog): """ Extract code from the Markdown, format it, and insert it into the code template. - If there is Markdown, ignores surrounding text. - If there are several Markdown parts in the message, concatenates only the code blocks. - If there is inline code but no code blocks, takes the first instance of inline code. + If there is any code block, ignore text outside the code block. + Use the first code block, but prefer a fenced code block. + If there are several fenced code blocks, concatenate only the fenced code blocks. """ if match := list(FORMATTED_CODE_REGEX.finditer(code)): blocks = [block for block in match if block.group("block")] -- cgit v1.2.3