aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar S. Co1 <[email protected]>2019-09-24 13:35:28 -0400
committerGravatar GitHub <[email protected]>2019-09-24 13:35:28 -0400
commitcea29d3600f83ffcf4cd4593024ae435c9d8979d (patch)
treeebcc8c52948189abd5da9048cf789507c1cfc48d
parentMerge pull request #443 from Akarys42/master (diff)
parentMerge branch 'master' into rich-embed-false-positive-fix (diff)
Merge pull request #450 from python-discord/rich-embed-false-positive-fix
Prevent false-positives of the rich embed filter
-rw-r--r--bot/cogs/filtering.py48
1 files changed, 33 insertions, 15 deletions
diff --git a/bot/cogs/filtering.py b/bot/cogs/filtering.py
index 9cd1b7203..bd8c6ed67 100644
--- a/bot/cogs/filtering.py
+++ b/bot/cogs/filtering.py
@@ -15,18 +15,26 @@ from bot.constants import (
log = logging.getLogger(__name__)
-INVITE_RE = (
+INVITE_RE = re.compile(
r"(?:discord(?:[\.,]|dot)gg|" # Could be discord.gg/
r"discord(?:[\.,]|dot)com(?:\/|slash)invite|" # or discord.com/invite/
r"discordapp(?:[\.,]|dot)com(?:\/|slash)invite|" # or discordapp.com/invite/
r"discord(?:[\.,]|dot)me|" # or discord.me
r"discord(?:[\.,]|dot)io" # or discord.io.
r")(?:[\/]|slash)" # / or 'slash'
- r"([a-zA-Z0-9]+)" # the invite code itself
+ r"([a-zA-Z0-9]+)", # the invite code itself
+ flags=re.IGNORECASE
)
-URL_RE = r"(https?://[^\s]+)"
-ZALGO_RE = r"[\u0300-\u036F\u0489]"
+URL_RE = re.compile(r"(https?://[^\s]+)", flags=re.IGNORECASE)
+ZALGO_RE = re.compile(r"[\u0300-\u036F\u0489]")
+
+WORD_WATCHLIST_PATTERNS = [
+ re.compile(fr'\b{expression}\b', flags=re.IGNORECASE) for expression in Filter.word_watchlist
+]
+TOKEN_WATCHLIST_PATTERNS = [
+ re.compile(fr'{expression}', flags=re.IGNORECASE) for expression in Filter.token_watchlist
+]
class Filtering(Cog):
@@ -228,8 +236,8 @@ class Filtering(Cog):
Only matches words with boundaries before and after the expression.
"""
- for expression in Filter.word_watchlist:
- if re.search(fr"\b{expression}\b", text, re.IGNORECASE):
+ for regex_pattern in WORD_WATCHLIST_PATTERNS:
+ if regex_pattern.search(text):
return True
return False
@@ -241,11 +249,11 @@ class Filtering(Cog):
This will match the expression even if it does not have boundaries before and after.
"""
- for expression in Filter.token_watchlist:
- if re.search(fr"{expression}", text, re.IGNORECASE):
+ for regex_pattern in TOKEN_WATCHLIST_PATTERNS:
+ if regex_pattern.search(text):
# Make sure it's not a URL
- if not re.search(URL_RE, text, re.IGNORECASE):
+ if not URL_RE.search(text):
return True
return False
@@ -253,7 +261,7 @@ class Filtering(Cog):
@staticmethod
async def _has_urls(text: str) -> bool:
"""Returns True if the text contains one of the blacklisted URLs from the config file."""
- if not re.search(URL_RE, text, re.IGNORECASE):
+ if not URL_RE.search(text):
return False
text = text.lower()
@@ -271,7 +279,7 @@ class Filtering(Cog):
Zalgo range is \u0300 – \u036F and \u0489.
"""
- return bool(re.search(ZALGO_RE, text))
+ return bool(ZALGO_RE.search(text))
async def _has_invites(self, text: str) -> Union[dict, bool]:
"""
@@ -286,7 +294,7 @@ class Filtering(Cog):
# discord\.gg/gdudes-pony-farm
text = text.replace("\\", "")
- invites = re.findall(INVITE_RE, text, re.IGNORECASE)
+ invites = INVITE_RE.findall(text)
invite_data = dict()
for invite in invites:
if invite in invite_data:
@@ -323,11 +331,21 @@ class Filtering(Cog):
@staticmethod
async def _has_rich_embed(msg: Message) -> bool:
- """Returns True if any of the embeds in the message are of type 'rich', but are not twitter embeds."""
+ """Determines if `msg` contains any rich embeds not auto-generated from a URL."""
if msg.embeds:
for embed in msg.embeds:
- if embed.type == "rich" and (not embed.url or "twitter.com" not in embed.url):
- return True
+ if embed.type == "rich":
+ urls = URL_RE.findall(msg.content)
+ if not embed.url or embed.url not in urls:
+ # If `embed.url` does not exist or if `embed.url` is not part of the content
+ # of the message, it's unlikely to be an auto-generated embed by Discord.
+ return True
+ else:
+ log.trace(
+ "Found a rich embed sent by a regular user account, "
+ "but it was likely just an automatic URL embed."
+ )
+ return False
return False
async def notify_member(self, filtered_member: Member, reason: str, channel: TextChannel) -> None: