diff options
| author | 2020-03-27 11:22:34 -0700 | |
|---|---|---|
| committer | 2020-03-27 11:30:27 -0700 | |
| commit | e88bb946fea8c4bc861f17772f8aca28f99be512 (patch) | |
| tree | 6b46dd9150bf8f517665e013ed825a1981da8df1 | |
| parent | Merge pull request #842 from python-discord/bug/util/831/snekbox-reeval-parsing (diff) | |
Filtering: merge the word and token watch filters
The only difference was the automatic addition of word boundaries.
Otherwise, they shared a lot of code. The regex lists were kept separate
in the config to retain the convenience of word boundaries automatically
being added.
* Rename filter to `watch_regex`
* Expand spoilers for both words and tokens
* Ignore URLs for both words and tokens
| -rw-r--r-- | bot/cogs/filtering.py | 56 | ||||
| -rw-r--r-- | bot/constants.py | 3 | ||||
| -rw-r--r-- | config-default.yml | 3 |
3 files changed, 21 insertions, 41 deletions
diff --git a/bot/cogs/filtering.py b/bot/cogs/filtering.py index 6651d38e4..3f3dbb853 100644 --- a/bot/cogs/filtering.py +++ b/bot/cogs/filtering.py @@ -38,6 +38,7 @@ WORD_WATCHLIST_PATTERNS = [ TOKEN_WATCHLIST_PATTERNS = [ re.compile(fr'{expression}', flags=re.IGNORECASE) for expression in Filter.token_watchlist ] +WATCHLIST_PATTERNS = WORD_WATCHLIST_PATTERNS + TOKEN_WATCHLIST_PATTERNS def expand_spoilers(text: str) -> str: @@ -88,24 +89,18 @@ class Filtering(Cog): f"Your URL has been removed because it matched a blacklisted domain. {staff_mistake_str}" ) }, + "watch_regex": { + "enabled": Filter.watch_regex, + "function": self._has_watch_regex_match, + "type": "watchlist", + "content_only": True, + }, "watch_rich_embeds": { "enabled": Filter.watch_rich_embeds, "function": self._has_rich_embed, "type": "watchlist", "content_only": False, }, - "watch_words": { - "enabled": Filter.watch_words, - "function": self._has_watchlist_words, - "type": "watchlist", - "content_only": True, - }, - "watch_tokens": { - "enabled": Filter.watch_tokens, - "function": self._has_watchlist_tokens, - "type": "watchlist", - "content_only": True, - }, } @property @@ -191,8 +186,8 @@ class Filtering(Cog): else: channel_str = f"in {msg.channel.mention}" - # Word and match stats for watch_words and watch_tokens - if filter_name in ("watch_words", "watch_tokens"): + # Word and match stats for watch_regex + if filter_name == "watch_regex": surroundings = match.string[max(match.start() - 10, 0): match.end() + 10] message_content = ( f"**Match:** '{match[0]}'\n" @@ -248,37 +243,24 @@ class Filtering(Cog): break # We don't want multiple filters to trigger @staticmethod - async def _has_watchlist_words(text: str) -> Union[bool, re.Match]: + async def _has_watch_regex_match(text: str) -> Union[bool, re.Match]: """ - Returns True if the text contains one of the regular expressions from the word_watchlist in our filter config. + Return True if `text` matches any regex from `word_watchlist` or `token_watchlist` configs. - Only matches words with boundaries before and after the expression. + `word_watchlist`'s patterns are placed between word boundaries while `token_watchlist` is + matched as-is. Spoilers are expanded, if any, and URLs are ignored. """ if SPOILER_RE.search(text): text = expand_spoilers(text) - for regex_pattern in WORD_WATCHLIST_PATTERNS: - match = regex_pattern.search(text) - if match: - return match # match objects always have a boolean value of True - return False - - @staticmethod - async def _has_watchlist_tokens(text: str) -> Union[bool, re.Match]: - """ - Returns True if the text contains one of the regular expressions from the token_watchlist in our filter config. + # Make sure it's not a URL + if URL_RE.search(text): + return False - This will match the expression even if it does not have boundaries before and after. - """ - for regex_pattern in TOKEN_WATCHLIST_PATTERNS: - match = regex_pattern.search(text) + for pattern in WATCHLIST_PATTERNS: + match = pattern.search(text) if match: - - # Make sure it's not a URL - if not URL_RE.search(text): - return match # match objects always have a boolean value of True - - return False + return match @staticmethod async def _has_urls(text: str) -> bool: diff --git a/bot/constants.py b/bot/constants.py index 14f8dc094..549e69c8f 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -206,9 +206,8 @@ class Filter(metaclass=YAMLGetter): filter_zalgo: bool filter_invites: bool filter_domains: bool + watch_regex: bool watch_rich_embeds: bool - watch_words: bool - watch_tokens: bool # Notifications are not expected for "watchlist" type filters notify_user_zalgo: bool diff --git a/config-default.yml b/config-default.yml index 5788d1e12..ef0ed970f 100644 --- a/config-default.yml +++ b/config-default.yml @@ -248,9 +248,8 @@ filter: filter_zalgo: false filter_invites: true filter_domains: true + watch_regex: true watch_rich_embeds: true - watch_words: true - watch_tokens: true # Notify user on filter? # Notifications are not expected for "watchlist" type filters |