From 2a83c5f8f7b9bfd60a50e67f20c04f31651bbcaa Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Sat, 28 Aug 2021 11:38:38 +0100 Subject: Only check URL-like objects against domain filters Previously a message such as 'https://google.com hello! flask.request.method' would be filtered due to us filtering the url shortener t.me. This commit changes to logic so that we only check parts of the messages that matched the URL regex against our blacklist, to avoid these false-positives. --- bot/exts/filters/filtering.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py index 10cc7885d..b7e91395e 100644 --- a/bot/exts/filters/filtering.py +++ b/bot/exts/filters/filtering.py @@ -478,15 +478,16 @@ class Filtering(Cog): Second return value is a reason of URL blacklisting (can be None). """ text = self.clean_input(text) - if not URL_RE.search(text): + matches = URL_RE.findall(text) + if not matches: return False, None - text = text.lower() domain_blacklist = self._get_filterlist_items("domain_name", allowed=False) for url in domain_blacklist: - if url.lower() in text: - return True, self._get_filterlist_value("domain_name", url, allowed=False)["comment"] + for match in matches: + if url.lower() in match.lower(): + return True, self._get_filterlist_value("domain_name", url, allowed=False)["comment"] return False, None -- cgit v1.2.3 From fddd34158b3fda284bd39970297c00e7d54d122a Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Mon, 6 Sep 2021 13:36:42 +0100 Subject: Refactor & simplifiy domain filter check --- bot/exts/filters/filtering.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py index b7e91395e..7e698880f 100644 --- a/bot/exts/filters/filtering.py +++ b/bot/exts/filters/filtering.py @@ -478,17 +478,12 @@ class Filtering(Cog): Second return value is a reason of URL blacklisting (can be None). """ text = self.clean_input(text) - matches = URL_RE.findall(text) - if not matches: - return False, None domain_blacklist = self._get_filterlist_items("domain_name", allowed=False) - - for url in domain_blacklist: - for match in matches: - if url.lower() in match.lower(): + for match in URL_RE.finditer(text): + for url in domain_blacklist: + if url.lower() in match.group(1).lower(): return True, self._get_filterlist_value("domain_name", url, allowed=False)["comment"] - return False, None @staticmethod -- cgit v1.2.3