From aa08fe2258ce4205272c7f27e1e2380c37275552 Mon Sep 17 00:00:00 2001
From: Chris Lovering <chris.lovering.95@gmail.com>
Date: Mon, 18 Oct 2021 22:22:47 +0100
Subject: Normalise names before checking for matches

---
 bot/exts/filters/filtering.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py
index 79b7abe9f..e51d2aad6 100644
--- a/bot/exts/filters/filtering.py
+++ b/bot/exts/filters/filtering.py
@@ -2,6 +2,7 @@ import asyncio
 import re
 from datetime import timedelta
 from typing import Any, Dict, List, Mapping, NamedTuple, Optional, Tuple, Union
+from unicodedata import normalize
 
 import arrow
 import dateutil.parser
@@ -207,12 +208,19 @@ class Filtering(Cog):
 
     def get_name_matches(self, name: str) -> List[re.Match]:
         """Check bad words from passed string (name). Return list of matches."""
-        name = self.clean_input(name)
+        normalised_name = normalize("NFKC", name)
         matches = []
+
+        # Run filters against normalized and original version,
+        # in case we have filters for one but not the other.
+        names_to_check = (name, normalised_name)
+
         watchlist_patterns = self._get_filterlist_items('filter_token', allowed=False)
         for pattern in watchlist_patterns:
-            if match := re.search(pattern, name, flags=re.IGNORECASE):
-                matches.append(match)
+            for name in names_to_check:
+                if match := re.search(pattern, name, flags=re.IGNORECASE):
+                    matches.append(match)
+                    break  # No need to see if other variations of this name match too.
         return matches
 
     async def check_send_alert(self, member: Member) -> bool:
-- 
cgit v1.2.3


From baf8239be8c6a4f6da4bd7ce8f8b2abeaf55e58a Mon Sep 17 00:00:00 2001
From: Chris Lovering <chris.lovering.95@gmail.com>
Date: Mon, 18 Oct 2021 22:51:31 +0100
Subject: Check if we recently alerted about a bad name before running all
 filter tokens again

---
 bot/exts/filters/filtering.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py
index e51d2aad6..4b1de9638 100644
--- a/bot/exts/filters/filtering.py
+++ b/bot/exts/filters/filtering.py
@@ -237,10 +237,14 @@ class Filtering(Cog):
         """Send a mod alert every 3 days if a username still matches a watchlist pattern."""
         # Use lock to avoid race conditions
         async with self.name_lock:
+            # Check if we recently alerted about this user first,
+            # to avoid running all the filter tokens against their name again.
+            if not await self.check_send_alert(member):
+                return
+
             # Check whether the users display name contains any words in our blacklist
             matches = self.get_name_matches(member.display_name)
-
-            if not matches or not await self.check_send_alert(member):
+            if not matches:
                 return
 
             log.info(f"Sending bad nickname alert for '{member.display_name}' ({member.id}).")
-- 
cgit v1.2.3


From 8efbff61aa9a8697ddb140fa5978630a6c609054 Mon Sep 17 00:00:00 2001
From: Chris Lovering <chris.lovering.95@gmail.com>
Date: Mon, 18 Oct 2021 22:56:22 +0100
Subject: Return early when getting name matches

Ss soon as we get a match for a bad name, return it, rather than running it against the rest of the filters.
---
 bot/exts/filters/filtering.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py
index 4b1de9638..fb1d62e48 100644
--- a/bot/exts/filters/filtering.py
+++ b/bot/exts/filters/filtering.py
@@ -206,10 +206,9 @@ class Filtering(Cog):
             delta = relativedelta(after.edited_at, before.edited_at).microseconds
         await self._filter_message(after, delta)
 
-    def get_name_matches(self, name: str) -> List[re.Match]:
-        """Check bad words from passed string (name). Return list of matches."""
+    def get_name_match(self, name: str) -> Optional[re.Match]:
+        """Check bad words from passed string (name). Return the first match found."""
         normalised_name = normalize("NFKC", name)
-        matches = []
 
         # Run filters against normalized and original version,
         # in case we have filters for one but not the other.
@@ -219,9 +218,8 @@ class Filtering(Cog):
         for pattern in watchlist_patterns:
             for name in names_to_check:
                 if match := re.search(pattern, name, flags=re.IGNORECASE):
-                    matches.append(match)
-                    break  # No need to see if other variations of this name match too.
-        return matches
+                    return match
+        return None
 
     async def check_send_alert(self, member: Member) -> bool:
         """When there is less than 3 days after last alert, return `False`, otherwise `True`."""
@@ -243,8 +241,8 @@ class Filtering(Cog):
                 return
 
             # Check whether the users display name contains any words in our blacklist
-            matches = self.get_name_matches(member.display_name)
-            if not matches:
+            match = self.get_name_match(member.display_name)
+            if not match:
                 return
 
             log.info(f"Sending bad nickname alert for '{member.display_name}' ({member.id}).")
@@ -252,7 +250,7 @@ class Filtering(Cog):
             log_string = (
                 f"**User:** {format_user(member)}\n"
                 f"**Display Name:** {escape_markdown(member.display_name)}\n"
-                f"**Bad Matches:** {', '.join(match.group() for match in matches)}"
+                f"**Bad Match:** {match.group()}"
             )
 
             await self.mod_log.send_log_message(
-- 
cgit v1.2.3


From 5901ac0ba4544f2bd479a74d5d6a345b3d31cb01 Mon Sep 17 00:00:00 2001
From: Chris Lovering <chris.lovering.95@gmail.com>
Date: Tue, 19 Oct 2021 17:00:30 +0100
Subject: Also run name filters against a cleaned version of the normalised
 name

---
 bot/exts/filters/filtering.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py
index fb1d62e48..21ed090ea 100644
--- a/bot/exts/filters/filtering.py
+++ b/bot/exts/filters/filtering.py
@@ -1,8 +1,8 @@
 import asyncio
 import re
+import unicodedata
 from datetime import timedelta
 from typing import Any, Dict, List, Mapping, NamedTuple, Optional, Tuple, Union
-from unicodedata import normalize
 
 import arrow
 import dateutil.parser
@@ -208,11 +208,12 @@ class Filtering(Cog):
 
     def get_name_match(self, name: str) -> Optional[re.Match]:
         """Check bad words from passed string (name). Return the first match found."""
-        normalised_name = normalize("NFKC", name)
+        normalised_name = unicodedata.normalize("NFKC", name)
+        cleaned_normalised_name = "".join(c for c in normalised_name if not unicodedata.combining(c))
 
-        # Run filters against normalized and original version,
+        # Run filters against normalised, cleaned normalised and the original name,
         # in case we have filters for one but not the other.
-        names_to_check = (name, normalised_name)
+        names_to_check = (name, normalised_name, cleaned_normalised_name)
 
         watchlist_patterns = self._get_filterlist_items('filter_token', allowed=False)
         for pattern in watchlist_patterns:
-- 
cgit v1.2.3


From d0dc7a0e4e3fc6618ae49d43b24938c84793dcf0 Mon Sep 17 00:00:00 2001
From: Chris Lovering <chris.lovering.95@gmail.com>
Date: Mon, 6 Dec 2021 22:59:35 +0000
Subject: Build an intermediate list for speed in filtering cog

---
 bot/exts/filters/filtering.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py
index 21ed090ea..8accc61f8 100644
--- a/bot/exts/filters/filtering.py
+++ b/bot/exts/filters/filtering.py
@@ -209,7 +209,7 @@ class Filtering(Cog):
     def get_name_match(self, name: str) -> Optional[re.Match]:
         """Check bad words from passed string (name). Return the first match found."""
         normalised_name = unicodedata.normalize("NFKC", name)
-        cleaned_normalised_name = "".join(c for c in normalised_name if not unicodedata.combining(c))
+        cleaned_normalised_name = "".join([c for c in normalised_name if not unicodedata.combining(c)])
 
         # Run filters against normalised, cleaned normalised and the original name,
         # in case we have filters for one but not the other.
-- 
cgit v1.2.3