diff options
| author | 2020-07-18 15:55:56 +0200 | |
|---|---|---|
| committer | 2020-07-18 15:55:56 +0200 | |
| commit | d07b1af634787f53ee381d31a4c125498af52beb (patch) | |
| tree | a3971cecabe394af0eaf5e063bf6004845e069cd | |
| parent | Add more logging to AllowDenyLists cog. (diff) | |
Remove Filtering constants, use cache data.
Instead of fetching the guild invite IDs from config-default.yml, we
will now be using the AllowDenyList cache to check these.
| -rw-r--r-- | bot/cogs/filtering.py | 62 | ||||
| -rw-r--r-- | bot/constants.py | 4 | ||||
| -rw-r--r-- | config-default.yml | 101 |
3 files changed, 32 insertions, 135 deletions
diff --git a/bot/cogs/filtering.py b/bot/cogs/filtering.py index bd665f424..9e35a83d1 100644 --- a/bot/cogs/filtering.py +++ b/bot/cogs/filtering.py @@ -22,6 +22,7 @@ from bot.utils.scheduling import Scheduler log = logging.getLogger(__name__) +# Regular expressions INVITE_RE = re.compile( r"(?:discord(?:[\.,]|dot)gg|" # Could be discord.gg/ r"discord(?:[\.,]|dot)com(?:\/|slash)invite|" # or discord.com/invite/ @@ -37,25 +38,8 @@ SPOILER_RE = re.compile(r"(\|\|.+?\|\|)", re.DOTALL) URL_RE = re.compile(r"(https?://[^\s]+)", flags=re.IGNORECASE) ZALGO_RE = re.compile(r"[\u0300-\u036F\u0489]") -WORD_WATCHLIST_PATTERNS = [ - re.compile(fr'\b{expression}\b', flags=re.IGNORECASE) for expression in Filter.word_watchlist -] -TOKEN_WATCHLIST_PATTERNS = [ - re.compile(fr'{expression}', flags=re.IGNORECASE) for expression in Filter.token_watchlist -] -WATCHLIST_PATTERNS = WORD_WATCHLIST_PATTERNS + TOKEN_WATCHLIST_PATTERNS - +# Other constants. DAYS_BETWEEN_ALERTS = 3 - - -def expand_spoilers(text: str) -> str: - """Return a string containing all interpretations of a spoilered message.""" - split_text = SPOILER_RE.split(text) - return ''.join( - split_text[0::2] + split_text[1::2] + split_text - ) - - OFFENSIVE_MSG_DELETE_TIME = timedelta(days=Filter.offensive_msg_delete_days) @@ -125,6 +109,23 @@ class Filtering(Cog): self.bot.loop.create_task(self.reschedule_offensive_msg_deletion()) + def _get_allowlist_items(self, allow: bool, list_type: str, compiled: Optional[bool] = False) -> list: + """Fetch items from the allow_deny_list_cache.""" + items = self.bot.allow_deny_list_cache[f"{list_type}.{allow}"] + + if compiled: + return [re.compile(fr'{item.get("content")}', flags=re.IGNORECASE) for item in items] + else: + return [item.get("content") for item in items] + + @staticmethod + def _expand_spoilers(text: str) -> str: + """Return a string containing all interpretations of a spoilered message.""" + split_text = SPOILER_RE.split(text) + return ''.join( + split_text[0::2] + split_text[1::2] + split_text + ) + @property def mod_log(self) -> ModLog: """Get currently loaded ModLog cog instance.""" @@ -149,11 +150,11 @@ class Filtering(Cog): delta = relativedelta(after.edited_at, before.edited_at).microseconds await self._filter_message(after, delta) - @staticmethod - def get_name_matches(name: str) -> List[re.Match]: + def get_name_matches(self, name: str) -> List[re.Match]: """Check bad words from passed string (name). Return list of matches.""" matches = [] - for pattern in WATCHLIST_PATTERNS: + watchlist_patterns = self._get_allowlist_items(False, 'word_watchlist', compiled=True) + for pattern in watchlist_patterns: if match := pattern.search(name): matches.append(match) return matches @@ -403,8 +404,7 @@ class Filtering(Cog): and not msg.author.bot # Author not a bot ) - @staticmethod - async def _has_watch_regex_match(text: str) -> Union[bool, re.Match]: + async def _has_watch_regex_match(self, text: str) -> Union[bool, re.Match]: """ Return True if `text` matches any regex from `word_watchlist` or `token_watchlist` configs. @@ -412,26 +412,27 @@ class Filtering(Cog): matched as-is. Spoilers are expanded, if any, and URLs are ignored. """ if SPOILER_RE.search(text): - text = expand_spoilers(text) + text = self._expand_spoilers(text) # Make sure it's not a URL if URL_RE.search(text): return False - for pattern in WATCHLIST_PATTERNS: + watchlist_patterns = self._get_allowlist_items(False, 'word_watchlist', compiled=True) + for pattern in watchlist_patterns: match = pattern.search(text) if match: return match - @staticmethod - async def _has_urls(text: str) -> bool: + async def _has_urls(self, text: str) -> bool: """Returns True if the text contains one of the blacklisted URLs from the config file.""" if not URL_RE.search(text): return False text = text.lower() + domain_blacklist = self._get_allowlist_items(False, "domain_name") - for url in Filter.domain_blacklist: + for url in domain_blacklist: if url.lower() in text: return True @@ -476,9 +477,10 @@ class Filtering(Cog): # between invalid and expired invites return True - guild_id = int(guild.get("id")) + guild_id = guild.get("id") + guild_invite_whitelist = self._get_allowlist_items(True, "guild_invite_id") - if guild_id not in Filter.guild_invite_whitelist: + if guild_id not in guild_invite_whitelist: guild_icon_hash = guild["icon"] guild_icon = ( "https://cdn.discordapp.com/icons/" diff --git a/bot/constants.py b/bot/constants.py index 778bc093c..f5245ca50 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -227,10 +227,6 @@ class Filter(metaclass=YAMLGetter): ping_everyone: bool offensive_msg_delete_days: int - guild_invite_whitelist: List[int] - domain_blacklist: List[str] - word_watchlist: List[str] - token_watchlist: List[str] channel_whitelist: List[int] role_whitelist: List[int] diff --git a/config-default.yml b/config-default.yml index f2eb17b89..81c8c40d5 100644 --- a/config-default.yml +++ b/config-default.yml @@ -272,107 +272,6 @@ filter: ping_everyone: true offensive_msg_delete_days: 7 # How many days before deleting an offensive message? - guild_invite_whitelist: - - 280033776820813825 # Functional Programming - - 267624335836053506 # Python Discord - - 440186186024222721 # Python Discord: Emojis 1 - - 578587418123304970 # Python Discord: Emojis 2 - - 273944235143593984 # STEM - - 348658686962696195 # RLBot - - 531221516914917387 # Pallets - - 249111029668249601 # Gentoo - - 327254708534116352 # Adafruit - - 544525886180032552 # kennethreitz.org - - 590806733924859943 # Discord Hack Week - - 423249981340778496 # Kivy - - 197038439483310086 # Discord Testers - - 286633898581164032 # Ren'Py - - 349505959032389632 # PyGame - - 438622377094414346 # Pyglet - - 524691714909274162 # Panda3D - - 336642139381301249 # discord.py - - 405403391410438165 # Sentdex - - 172018499005317120 # The Coding Den - - 666560367173828639 # PyWeek - - 702724176489873509 # Microsoft Python - - 150662382874525696 # Microsoft Community - - 81384788765712384 # Discord API - - 613425648685547541 # Discord Developers - - 185590609631903755 # Blender Hub - - 420324994703163402 # /r/FlutterDev - - 488751051629920277 # Python Atlanta - - 143867839282020352 # C# - - 159039020565790721 # Django - - 238666723824238602 # Programming Discussions - - 433980600391696384 # JetBrains Community - - 204621105720328193 # Raspberry Pi - - 244230771232079873 # Programmers Hangout - - 239433591950540801 # SpeakJS - - 174075418410876928 # DevCord - - 489222168727519232 # Unity - - 494558898880118785 # Programmer Humor - - domain_blacklist: - - pornhub.com - - liveleak.com - - grabify.link - - bmwforum.co - - leancoding.co - - spottyfly.com - - stopify.co - - yoütu.be - - discörd.com - - minecräft.com - - freegiftcards.co - - disçordapp.com - - fortnight.space - - fortnitechat.site - - joinmy.site - - curiouscat.club - - catsnthings.fun - - yourtube.site - - youtubeshort.watch - - catsnthing.com - - youtubeshort.pro - - canadianlumberjacks.online - - poweredbydialup.club - - poweredbydialup.online - - poweredbysecurity.org - - poweredbysecurity.online - - ssteam.site - - steamwalletgift.com - - discord.gift - - lmgtfy.com - - word_watchlist: - - goo+ks* - - ky+s+ - - ki+ke+s* - - beaner+s? - - coo+ns* - - nig+lets* - - slant-eyes* - - towe?l-?head+s* - - chi*n+k+s* - - spick*s* - - kill* +(?:yo)?urself+ - - jew+s* - - suicide - - rape - - (re+)tar+(d+|t+)(ed)? - - ta+r+d+ - - cunts* - - trann*y - - shemale - - token_watchlist: - - fa+g+s* - - 卐 - - 卍 - - cuck(?!oo+) - - nigg+(?:e*r+|a+h*?|u+h+)s? - - fag+o+t+s* - # Censor doesn't apply to these channel_whitelist: - *ADMINS |