Drop newlines and url unquote messages before passing through invite filter (#3184)

author: ChrisJL <[email protected]> 2024-11-23 18:52:26 +0000
committer: GitHub <[email protected]> 2024-11-23 18:52:26 +0000
commit: 161d928fffdd908161795f30dbfaba22ea39d915 (patch)
tree: afb1b870452ae77b87510e4c1d7c9999fe14dc61
parent: fix: Replace unfriendly error messages (#3194) (diff)
2 files changed, 11 insertions, 5 deletions
diff --git a/bot/exts/filtering/_filter_lists/invite.py b/bot/exts/filtering/_filter_lists/invite.py
index b43e1bb7c..669281818 100644
--- a/bot/exts/filtering/_filter_lists/invite.py
+++ b/bot/exts/filtering/_filter_lists/invite.py
@@ -62,9 +62,6 @@ class InviteList(FilterList[InviteFilter]):
         """Dispatch the given event to the list's filters, and return actions to take and messages to relay to mods."""
         text = clean_input(ctx.content)
 
-        # Avoid escape characters
-        text = text.replace("\\", "")
-
         matches = list(DISCORD_INVITE.finditer(text))
         invite_codes = {m.group("invite") for m in matches}
         if not invite_codes:
diff --git a/bot/exts/filtering/_utils.py b/bot/exts/filtering/_utils.py
index a12d09875..9861f9ddc 100644
--- a/bot/exts/filtering/_utils.py
+++ b/bot/exts/filtering/_utils.py
@@ -5,6 +5,7 @@ import importlib.util
 import inspect
 import pkgutil
 import types
+import urllib.parse
 import warnings
 from abc import ABC, abstractmethod
 from collections import defaultdict
@@ -55,8 +56,16 @@ def clean_input(string: str) -> str:
     # For future consideration: remove characters in the Mc, Sk, and Lm categories too.
     # Can be normalised with form C to merge char + combining char into a single char to avoid
     # removing legit diacritics, but this would open up a way to bypass _filters.
-    no_zalgo = ZALGO_RE.sub("", string)
-    return INVISIBLE_RE.sub("", no_zalgo)
+    content = ZALGO_RE.sub("", string)
+
+    # URL quoted strings can be used to hide links to servers
+    content = urllib.parse.unquote(content)
+    # Drop newlines that can be used to bypass filter
+    content = content.replace("\n", "")
+    # Avoid escape characters
+    content = content.replace("\\", "")
+
+    return INVISIBLE_RE.sub("", content)
 
 
 def past_tense(word: str) -> str:
author	ChrisJL <[email protected]>	2024-11-23 18:52:26 +0000
committer	GitHub <[email protected]>	2024-11-23 18:52:26 +0000
commit	161d928fffdd908161795f30dbfaba22ea39d915 (patch)
tree	afb1b870452ae77b87510e4c1d7c9999fe14dc61
parent	fix: Replace unfriendly error messages (#3194) (diff)