aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar ChrisJL <[email protected]>2024-11-23 18:52:26 +0000
committerGravatar GitHub <[email protected]>2024-11-23 18:52:26 +0000
commit161d928fffdd908161795f30dbfaba22ea39d915 (patch)
treeafb1b870452ae77b87510e4c1d7c9999fe14dc61
parentfix: Replace unfriendly error messages (#3194) (diff)
Drop newlines and url unquote messages before passing through invite filter (#3184)
-rw-r--r--bot/exts/filtering/_filter_lists/invite.py3
-rw-r--r--bot/exts/filtering/_utils.py13
2 files changed, 11 insertions, 5 deletions
diff --git a/bot/exts/filtering/_filter_lists/invite.py b/bot/exts/filtering/_filter_lists/invite.py
index b43e1bb7c..669281818 100644
--- a/bot/exts/filtering/_filter_lists/invite.py
+++ b/bot/exts/filtering/_filter_lists/invite.py
@@ -62,9 +62,6 @@ class InviteList(FilterList[InviteFilter]):
"""Dispatch the given event to the list's filters, and return actions to take and messages to relay to mods."""
text = clean_input(ctx.content)
- # Avoid escape characters
- text = text.replace("\\", "")
-
matches = list(DISCORD_INVITE.finditer(text))
invite_codes = {m.group("invite") for m in matches}
if not invite_codes:
diff --git a/bot/exts/filtering/_utils.py b/bot/exts/filtering/_utils.py
index a12d09875..9861f9ddc 100644
--- a/bot/exts/filtering/_utils.py
+++ b/bot/exts/filtering/_utils.py
@@ -5,6 +5,7 @@ import importlib.util
import inspect
import pkgutil
import types
+import urllib.parse
import warnings
from abc import ABC, abstractmethod
from collections import defaultdict
@@ -55,8 +56,16 @@ def clean_input(string: str) -> str:
# For future consideration: remove characters in the Mc, Sk, and Lm categories too.
# Can be normalised with form C to merge char + combining char into a single char to avoid
# removing legit diacritics, but this would open up a way to bypass _filters.
- no_zalgo = ZALGO_RE.sub("", string)
- return INVISIBLE_RE.sub("", no_zalgo)
+ content = ZALGO_RE.sub("", string)
+
+ # URL quoted strings can be used to hide links to servers
+ content = urllib.parse.unquote(content)
+ # Drop newlines that can be used to bypass filter
+ content = content.replace("\n", "")
+ # Avoid escape characters
+ content = content.replace("\\", "")
+
+ return INVISIBLE_RE.sub("", content)
def past_tense(word: str) -> str: