diff options
author | 2019-09-24 13:35:28 -0400 | |
---|---|---|
committer | 2019-09-24 13:35:28 -0400 | |
commit | cea29d3600f83ffcf4cd4593024ae435c9d8979d (patch) | |
tree | ebcc8c52948189abd5da9048cf789507c1cfc48d | |
parent | Merge pull request #443 from Akarys42/master (diff) | |
parent | Merge branch 'master' into rich-embed-false-positive-fix (diff) |
Merge pull request #450 from python-discord/rich-embed-false-positive-fix
Prevent false-positives of the rich embed filter
-rw-r--r-- | bot/cogs/filtering.py | 48 |
1 files changed, 33 insertions, 15 deletions
diff --git a/bot/cogs/filtering.py b/bot/cogs/filtering.py index 9cd1b7203..bd8c6ed67 100644 --- a/bot/cogs/filtering.py +++ b/bot/cogs/filtering.py @@ -15,18 +15,26 @@ from bot.constants import ( log = logging.getLogger(__name__) -INVITE_RE = ( +INVITE_RE = re.compile( r"(?:discord(?:[\.,]|dot)gg|" # Could be discord.gg/ r"discord(?:[\.,]|dot)com(?:\/|slash)invite|" # or discord.com/invite/ r"discordapp(?:[\.,]|dot)com(?:\/|slash)invite|" # or discordapp.com/invite/ r"discord(?:[\.,]|dot)me|" # or discord.me r"discord(?:[\.,]|dot)io" # or discord.io. r")(?:[\/]|slash)" # / or 'slash' - r"([a-zA-Z0-9]+)" # the invite code itself + r"([a-zA-Z0-9]+)", # the invite code itself + flags=re.IGNORECASE ) -URL_RE = r"(https?://[^\s]+)" -ZALGO_RE = r"[\u0300-\u036F\u0489]" +URL_RE = re.compile(r"(https?://[^\s]+)", flags=re.IGNORECASE) +ZALGO_RE = re.compile(r"[\u0300-\u036F\u0489]") + +WORD_WATCHLIST_PATTERNS = [ + re.compile(fr'\b{expression}\b', flags=re.IGNORECASE) for expression in Filter.word_watchlist +] +TOKEN_WATCHLIST_PATTERNS = [ + re.compile(fr'{expression}', flags=re.IGNORECASE) for expression in Filter.token_watchlist +] class Filtering(Cog): @@ -228,8 +236,8 @@ class Filtering(Cog): Only matches words with boundaries before and after the expression. """ - for expression in Filter.word_watchlist: - if re.search(fr"\b{expression}\b", text, re.IGNORECASE): + for regex_pattern in WORD_WATCHLIST_PATTERNS: + if regex_pattern.search(text): return True return False @@ -241,11 +249,11 @@ class Filtering(Cog): This will match the expression even if it does not have boundaries before and after. """ - for expression in Filter.token_watchlist: - if re.search(fr"{expression}", text, re.IGNORECASE): + for regex_pattern in TOKEN_WATCHLIST_PATTERNS: + if regex_pattern.search(text): # Make sure it's not a URL - if not re.search(URL_RE, text, re.IGNORECASE): + if not URL_RE.search(text): return True return False @@ -253,7 +261,7 @@ class Filtering(Cog): @staticmethod async def _has_urls(text: str) -> bool: """Returns True if the text contains one of the blacklisted URLs from the config file.""" - if not re.search(URL_RE, text, re.IGNORECASE): + if not URL_RE.search(text): return False text = text.lower() @@ -271,7 +279,7 @@ class Filtering(Cog): Zalgo range is \u0300 – \u036F and \u0489. """ - return bool(re.search(ZALGO_RE, text)) + return bool(ZALGO_RE.search(text)) async def _has_invites(self, text: str) -> Union[dict, bool]: """ @@ -286,7 +294,7 @@ class Filtering(Cog): # discord\.gg/gdudes-pony-farm text = text.replace("\\", "") - invites = re.findall(INVITE_RE, text, re.IGNORECASE) + invites = INVITE_RE.findall(text) invite_data = dict() for invite in invites: if invite in invite_data: @@ -323,11 +331,21 @@ class Filtering(Cog): @staticmethod async def _has_rich_embed(msg: Message) -> bool: - """Returns True if any of the embeds in the message are of type 'rich', but are not twitter embeds.""" + """Determines if `msg` contains any rich embeds not auto-generated from a URL.""" if msg.embeds: for embed in msg.embeds: - if embed.type == "rich" and (not embed.url or "twitter.com" not in embed.url): - return True + if embed.type == "rich": + urls = URL_RE.findall(msg.content) + if not embed.url or embed.url not in urls: + # If `embed.url` does not exist or if `embed.url` is not part of the content + # of the message, it's unlikely to be an auto-generated embed by Discord. + return True + else: + log.trace( + "Found a rich embed sent by a regular user account, " + "but it was likely just an automatic URL embed." + ) + return False return False async def notify_member(self, filtered_member: Member, reason: str, channel: TextChannel) -> None: |