diff options
-rw-r--r-- | botcore/utils/regex.py | 7 | ||||
-rw-r--r-- | docs/changelog.rst | 3 | ||||
-rw-r--r-- | pyproject.toml | 2 | ||||
-rw-r--r-- | tests/botcore/utils/test_regex.py | 69 |
4 files changed, 50 insertions, 31 deletions
diff --git a/botcore/utils/regex.py b/botcore/utils/regex.py index 56c50dad..de82a1ed 100644 --- a/botcore/utils/regex.py +++ b/botcore/utils/regex.py @@ -3,6 +3,7 @@ import re DISCORD_INVITE = re.compile( + r"(https?://)?(www\.)?" # Optional http(s) and www. r"(discord([.,]|dot)gg|" # Could be discord.gg/ r"discord([.,]|dot)com(/|slash)invite|" # or discord.com/invite/ r"discordapp([.,]|dot)com(/|slash)invite|" # or discordapp.com/invite/ @@ -10,7 +11,7 @@ DISCORD_INVITE = re.compile( r"discord([.,]|dot)li|" # or discord.li r"discord([.,]|dot)io|" # or discord.io. r"((?<!\w)([.,]|dot))gg" # or .gg/ - r")([/]|slash)" # / or 'slash' + r")(/|slash)" # / or 'slash' r"(?P<invite>\S+)", # the invite code itself flags=re.IGNORECASE ) @@ -32,7 +33,7 @@ FORMATTED_CODE_REGEX = re.compile( r"(?P<code>.*?)" # extract all code inside the markup r"\s*" # any more whitespace before the end of the code markup r"(?P=delim)", # match the exact same delimiter from the start again - re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive + flags=re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive ) """ Regex for formatted code, using Discord's code blocks. @@ -44,7 +45,7 @@ RAW_CODE_REGEX = re.compile( r"^(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code r"(?P<code>.*?)" # extract all the rest as code r"\s*$", # any trailing whitespace until the end of the string - re.DOTALL # "." also matches newlines + flags=re.DOTALL # "." also matches newlines ) """ Regex for raw code, *not* using Discord's code blocks. diff --git a/docs/changelog.rst b/docs/changelog.rst index f944c8d6..643a54fc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -3,6 +3,9 @@ Changelog ========= +- :release:`8.1.0 <16th August 2022>` +- :support:`124` Updated :obj:`botcore.utils.regex.DISCORD_INVITE` regex to optionally match leading "http[s]" and "www". + - :release:`8.0.0 <27th July 2022>` - :breaking:`110` Bump async-rediscache to v1.0.0-rc2 diff --git a/pyproject.toml b/pyproject.toml index 3448c67f..2680efab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bot-core" -version = "8.0.0" +version = "8.1.0" description = "Bot-Core provides the core functionality and utilities for the bots of the Python Discord community." authors = ["Python Discord <[email protected]>"] license = "MIT" diff --git a/tests/botcore/utils/test_regex.py b/tests/botcore/utils/test_regex.py index 2ffd0e46..491e22bd 100644 --- a/tests/botcore/utils/test_regex.py +++ b/tests/botcore/utils/test_regex.py @@ -4,8 +4,18 @@ from typing import Optional from botcore.utils.regex import DISCORD_INVITE -def use_regex(s: str) -> Optional[str]: - """Helper function to run the Regex on a string. +def match_regex(s: str) -> Optional[str]: + """Helper function to run re.match on a string. + + Return the invite capture group, if the string matches the pattern + else return None + """ + result = DISCORD_INVITE.match(s) + return result if result is None else result.group("invite") + + +def search_regex(s: str) -> Optional[str]: + """Helper function to run re.search on a string. Return the invite capture group, if the string matches the pattern else return None @@ -19,32 +29,37 @@ class UtilsRegexTests(unittest.TestCase): def test_discord_invite_positives(self): """Test the DISCORD_INVITE regex on a set of strings we would expect to capture.""" - self.assertEqual(use_regex("discord.gg/python"), "python") - self.assertEqual(use_regex("https://discord.gg/python"), "python") - self.assertEqual(use_regex("discord.com/invite/python"), "python") - self.assertEqual(use_regex("discordapp.com/invite/python"), "python") - self.assertEqual(use_regex("discord.me/python"), "python") - self.assertEqual(use_regex("discord.li/python"), "python") - self.assertEqual(use_regex("discord.io/python"), "python") - self.assertEqual(use_regex(".gg/python"), "python") - - self.assertEqual(use_regex("discord.gg/python/but/extra"), "python/but/extra") - self.assertEqual(use_regex("discord.me/this/isnt/python"), "this/isnt/python") - self.assertEqual(use_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a") - self.assertEqual(use_regex("discordapp.com/invite/python/snakescord"), "python/snakescord") - self.assertEqual(use_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython") - self.assertEqual(use_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython") - self.assertEqual(use_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython") - self.assertEqual(use_regex("https://discord.gg/python/~/notpython"), "python/~/notpython") - - self.assertEqual(use_regex("https://discord.gg/python with whitespace"), "python") - self.assertEqual(use_regex(" https://discord.gg/python "), "python") + self.assertEqual(match_regex("discord.gg/python"), "python") + self.assertEqual(match_regex("https://discord.gg/python"), "python") + self.assertEqual(match_regex("https://www.discord.gg/python"), "python") + self.assertEqual(match_regex("discord.com/invite/python"), "python") + self.assertEqual(match_regex("www.discord.com/invite/python"), "python") + self.assertEqual(match_regex("discordapp.com/invite/python"), "python") + self.assertEqual(match_regex("discord.me/python"), "python") + self.assertEqual(match_regex("discord.li/python"), "python") + self.assertEqual(match_regex("discord.io/python"), "python") + self.assertEqual(match_regex(".gg/python"), "python") + + self.assertEqual(match_regex("discord.gg/python/but/extra"), "python/but/extra") + self.assertEqual(match_regex("discord.me/this/isnt/python"), "this/isnt/python") + self.assertEqual(match_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a") + self.assertEqual(match_regex("discordapp.com/invite/python/snakescord"), "python/snakescord") + self.assertEqual(match_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython") + self.assertEqual(match_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython") + self.assertEqual(match_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython") + self.assertEqual(match_regex("https://discord.gg/python/~/notpython"), "python/~/notpython") + + self.assertEqual(search_regex("https://discord.gg/python with whitespace"), "python") + self.assertEqual(search_regex(" https://discord.gg/python "), "python") def test_discord_invite_negatives(self): """Test the DISCORD_INVITE regex on a set of strings we would expect to not capture.""" - self.assertEqual(use_regex("another string"), None) - self.assertEqual(use_regex("https://pythondiscord.com"), None) - self.assertEqual(use_regex("https://discord.com"), None) - self.assertEqual(use_regex("https://discord.gg"), None) - self.assertEqual(use_regex("https://discord.gg/ python"), None) + self.assertEqual(match_regex("another string"), None) + self.assertEqual(match_regex("https://pythondiscord.com"), None) + self.assertEqual(match_regex("https://discord.com"), None) + self.assertEqual(match_regex("https://discord.gg"), None) + self.assertEqual(match_regex("https://discord.gg/ python"), None) + + self.assertEqual(search_regex("https://discord.com with whitespace"), None) + self.assertEqual(search_regex(" https://discord.com "), None) |