diff options
| author | 2020-03-31 13:46:22 -0400 | |
|---|---|---|
| committer | 2020-03-31 13:46:22 -0400 | |
| commit | 624748b26003b3758d9f2b8dd71a68728caa48ea (patch) | |
| tree | 486464fc26f8db4ab5fb234136ecb1a230d5e334 | |
| parent | Merge pull request #833 from ks129/zen-match-fix (diff) | |
| parent | Merge branch 'master' into feat/filter/322/merge-tokens-words (diff) | |
Merge pull request #846 from python-discord/feat/filter/322/merge-tokens-words
Filtering: merge the word and token watch filters
| -rw-r--r-- | bot/cogs/filtering.py | 56 | ||||
| -rw-r--r-- | bot/constants.py | 3 | ||||
| -rw-r--r-- | config-default.yml | 3 | 
3 files changed, 21 insertions, 41 deletions
| diff --git a/bot/cogs/filtering.py b/bot/cogs/filtering.py index 6651d38e4..3f3dbb853 100644 --- a/bot/cogs/filtering.py +++ b/bot/cogs/filtering.py @@ -38,6 +38,7 @@ WORD_WATCHLIST_PATTERNS = [  TOKEN_WATCHLIST_PATTERNS = [      re.compile(fr'{expression}', flags=re.IGNORECASE) for expression in Filter.token_watchlist  ] +WATCHLIST_PATTERNS = WORD_WATCHLIST_PATTERNS + TOKEN_WATCHLIST_PATTERNS  def expand_spoilers(text: str) -> str: @@ -88,24 +89,18 @@ class Filtering(Cog):                      f"Your URL has been removed because it matched a blacklisted domain. {staff_mistake_str}"                  )              }, +            "watch_regex": { +                "enabled": Filter.watch_regex, +                "function": self._has_watch_regex_match, +                "type": "watchlist", +                "content_only": True, +            },              "watch_rich_embeds": {                  "enabled": Filter.watch_rich_embeds,                  "function": self._has_rich_embed,                  "type": "watchlist",                  "content_only": False,              }, -            "watch_words": { -                "enabled": Filter.watch_words, -                "function": self._has_watchlist_words, -                "type": "watchlist", -                "content_only": True, -            }, -            "watch_tokens": { -                "enabled": Filter.watch_tokens, -                "function": self._has_watchlist_tokens, -                "type": "watchlist", -                "content_only": True, -            },          }      @property @@ -191,8 +186,8 @@ class Filtering(Cog):                          else:                              channel_str = f"in {msg.channel.mention}" -                        # Word and match stats for watch_words and watch_tokens -                        if filter_name in ("watch_words", "watch_tokens"): +                        # Word and match stats for watch_regex +                        if filter_name == "watch_regex":                              surroundings = match.string[max(match.start() - 10, 0): match.end() + 10]                              message_content = (                                  f"**Match:** '{match[0]}'\n" @@ -248,37 +243,24 @@ class Filtering(Cog):                          break  # We don't want multiple filters to trigger      @staticmethod -    async def _has_watchlist_words(text: str) -> Union[bool, re.Match]: +    async def _has_watch_regex_match(text: str) -> Union[bool, re.Match]:          """ -        Returns True if the text contains one of the regular expressions from the word_watchlist in our filter config. +        Return True if `text` matches any regex from `word_watchlist` or `token_watchlist` configs. -        Only matches words with boundaries before and after the expression. +        `word_watchlist`'s patterns are placed between word boundaries while `token_watchlist` is +        matched as-is. Spoilers are expanded, if any, and URLs are ignored.          """          if SPOILER_RE.search(text):              text = expand_spoilers(text) -        for regex_pattern in WORD_WATCHLIST_PATTERNS: -            match = regex_pattern.search(text) -            if match: -                return match  # match objects always have a boolean value of True -        return False - -    @staticmethod -    async def _has_watchlist_tokens(text: str) -> Union[bool, re.Match]: -        """ -        Returns True if the text contains one of the regular expressions from the token_watchlist in our filter config. +        # Make sure it's not a URL +        if URL_RE.search(text): +            return False -        This will match the expression even if it does not have boundaries before and after. -        """ -        for regex_pattern in TOKEN_WATCHLIST_PATTERNS: -            match = regex_pattern.search(text) +        for pattern in WATCHLIST_PATTERNS: +            match = pattern.search(text)              if match: - -                # Make sure it's not a URL -                if not URL_RE.search(text): -                    return match  # match objects always have a boolean value of True - -        return False +                return match      @staticmethod      async def _has_urls(text: str) -> bool: diff --git a/bot/constants.py b/bot/constants.py index 14f8dc094..549e69c8f 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -206,9 +206,8 @@ class Filter(metaclass=YAMLGetter):      filter_zalgo: bool      filter_invites: bool      filter_domains: bool +    watch_regex: bool      watch_rich_embeds: bool -    watch_words: bool -    watch_tokens: bool      # Notifications are not expected for "watchlist" type filters      notify_user_zalgo: bool diff --git a/config-default.yml b/config-default.yml index 5788d1e12..ef0ed970f 100644 --- a/config-default.yml +++ b/config-default.yml @@ -248,9 +248,8 @@ filter:      filter_zalgo:       false      filter_invites:     true      filter_domains:     true +    watch_regex:        true      watch_rich_embeds:  true -    watch_words:        true -    watch_tokens:       true      # Notify user on filter?      # Notifications are not expected for "watchlist" type filters | 
