aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar MarkKoz <[email protected]>2020-03-27 11:22:34 -0700
committerGravatar MarkKoz <[email protected]>2020-03-27 11:30:27 -0700
commite88bb946fea8c4bc861f17772f8aca28f99be512 (patch)
tree6b46dd9150bf8f517665e013ed825a1981da8df1
parentMerge pull request #842 from python-discord/bug/util/831/snekbox-reeval-parsing (diff)
Filtering: merge the word and token watch filters
The only difference was the automatic addition of word boundaries. Otherwise, they shared a lot of code. The regex lists were kept separate in the config to retain the convenience of word boundaries automatically being added. * Rename filter to `watch_regex` * Expand spoilers for both words and tokens * Ignore URLs for both words and tokens
-rw-r--r--bot/cogs/filtering.py56
-rw-r--r--bot/constants.py3
-rw-r--r--config-default.yml3
3 files changed, 21 insertions, 41 deletions
diff --git a/bot/cogs/filtering.py b/bot/cogs/filtering.py
index 6651d38e4..3f3dbb853 100644
--- a/bot/cogs/filtering.py
+++ b/bot/cogs/filtering.py
@@ -38,6 +38,7 @@ WORD_WATCHLIST_PATTERNS = [
TOKEN_WATCHLIST_PATTERNS = [
re.compile(fr'{expression}', flags=re.IGNORECASE) for expression in Filter.token_watchlist
]
+WATCHLIST_PATTERNS = WORD_WATCHLIST_PATTERNS + TOKEN_WATCHLIST_PATTERNS
def expand_spoilers(text: str) -> str:
@@ -88,24 +89,18 @@ class Filtering(Cog):
f"Your URL has been removed because it matched a blacklisted domain. {staff_mistake_str}"
)
},
+ "watch_regex": {
+ "enabled": Filter.watch_regex,
+ "function": self._has_watch_regex_match,
+ "type": "watchlist",
+ "content_only": True,
+ },
"watch_rich_embeds": {
"enabled": Filter.watch_rich_embeds,
"function": self._has_rich_embed,
"type": "watchlist",
"content_only": False,
},
- "watch_words": {
- "enabled": Filter.watch_words,
- "function": self._has_watchlist_words,
- "type": "watchlist",
- "content_only": True,
- },
- "watch_tokens": {
- "enabled": Filter.watch_tokens,
- "function": self._has_watchlist_tokens,
- "type": "watchlist",
- "content_only": True,
- },
}
@property
@@ -191,8 +186,8 @@ class Filtering(Cog):
else:
channel_str = f"in {msg.channel.mention}"
- # Word and match stats for watch_words and watch_tokens
- if filter_name in ("watch_words", "watch_tokens"):
+ # Word and match stats for watch_regex
+ if filter_name == "watch_regex":
surroundings = match.string[max(match.start() - 10, 0): match.end() + 10]
message_content = (
f"**Match:** '{match[0]}'\n"
@@ -248,37 +243,24 @@ class Filtering(Cog):
break # We don't want multiple filters to trigger
@staticmethod
- async def _has_watchlist_words(text: str) -> Union[bool, re.Match]:
+ async def _has_watch_regex_match(text: str) -> Union[bool, re.Match]:
"""
- Returns True if the text contains one of the regular expressions from the word_watchlist in our filter config.
+ Return True if `text` matches any regex from `word_watchlist` or `token_watchlist` configs.
- Only matches words with boundaries before and after the expression.
+ `word_watchlist`'s patterns are placed between word boundaries while `token_watchlist` is
+ matched as-is. Spoilers are expanded, if any, and URLs are ignored.
"""
if SPOILER_RE.search(text):
text = expand_spoilers(text)
- for regex_pattern in WORD_WATCHLIST_PATTERNS:
- match = regex_pattern.search(text)
- if match:
- return match # match objects always have a boolean value of True
- return False
-
- @staticmethod
- async def _has_watchlist_tokens(text: str) -> Union[bool, re.Match]:
- """
- Returns True if the text contains one of the regular expressions from the token_watchlist in our filter config.
+ # Make sure it's not a URL
+ if URL_RE.search(text):
+ return False
- This will match the expression even if it does not have boundaries before and after.
- """
- for regex_pattern in TOKEN_WATCHLIST_PATTERNS:
- match = regex_pattern.search(text)
+ for pattern in WATCHLIST_PATTERNS:
+ match = pattern.search(text)
if match:
-
- # Make sure it's not a URL
- if not URL_RE.search(text):
- return match # match objects always have a boolean value of True
-
- return False
+ return match
@staticmethod
async def _has_urls(text: str) -> bool:
diff --git a/bot/constants.py b/bot/constants.py
index 14f8dc094..549e69c8f 100644
--- a/bot/constants.py
+++ b/bot/constants.py
@@ -206,9 +206,8 @@ class Filter(metaclass=YAMLGetter):
filter_zalgo: bool
filter_invites: bool
filter_domains: bool
+ watch_regex: bool
watch_rich_embeds: bool
- watch_words: bool
- watch_tokens: bool
# Notifications are not expected for "watchlist" type filters
notify_user_zalgo: bool
diff --git a/config-default.yml b/config-default.yml
index 5788d1e12..ef0ed970f 100644
--- a/config-default.yml
+++ b/config-default.yml
@@ -248,9 +248,8 @@ filter:
filter_zalgo: false
filter_invites: true
filter_domains: true
+ watch_regex: true
watch_rich_embeds: true
- watch_words: true
- watch_tokens: true
# Notify user on filter?
# Notifications are not expected for "watchlist" type filters