From e06c7771dd30813271072c508d9e26e77fc6c5f3 Mon Sep 17 00:00:00 2001 From: Leon Sandøy Date: Fri, 28 Sep 2018 11:14:49 +0000 Subject: Filtering fixes --- bot/cogs/filtering.py | 36 +++++++++++++++++++++++++++--------- bot/constants.py | 12 +++++++++--- config-default.yml | 18 ++++++++++-------- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/bot/cogs/filtering.py b/bot/cogs/filtering.py index 70254fd88..36be78a7e 100644 --- a/bot/cogs/filtering.py +++ b/bot/cogs/filtering.py @@ -7,7 +7,7 @@ from discord.ext.commands import Bot from bot.cogs.modlog import ModLog from bot.constants import ( Channels, Colours, DEBUG_MODE, - Filter, Icons + Filter, Icons, URLs ) log = logging.getLogger(__name__) @@ -24,6 +24,9 @@ INVITE_RE = ( URL_RE = "(https?://[^\s]+)" ZALGO_RE = r"[\u0300-\u036F\u0489]" +RETARDED_RE = r"(re+)tar+(d+|t+)(ed)?" +SELF_DEPRECATION_RE = fr"((i'?m)|(i am)|(it'?s)|(it is)) (.+? )?{RETARDED_RE}" +RETARDED_QUESTIONS_RE = fr"{RETARDED_RE} questions?" class Filtering: @@ -148,6 +151,18 @@ class Filtering: for expression in Filter.word_watchlist: if re.search(fr"\b{expression}\b", text, re.IGNORECASE): + + # Special handling for `retarded` + if expression == RETARDED_RE: + + # stuff like "I'm just retarded" + if re.search(SELF_DEPRECATION_RE, text, re.IGNORECASE): + return False + + # stuff like "sorry for all the retarded questions" + elif re.search(RETARDED_QUESTIONS_RE, text, re.IGNORECASE): + return False + return True return False @@ -165,7 +180,10 @@ class Filtering: for expression in Filter.token_watchlist: if re.search(fr"{expression}", text, re.IGNORECASE): - return True + + # Make sure it's not a URL + if not re.search(URL_RE, text, re.IGNORECASE): + return True return False @@ -197,8 +215,7 @@ class Filtering: return bool(re.search(ZALGO_RE, text)) - @staticmethod - async def _has_invites(text: str) -> bool: + async def _has_invites(self, text: str) -> bool: """ Returns True if the text contains an invite which is not on the guild_invite_whitelist in config.yml. @@ -207,7 +224,7 @@ class Filtering: """ # Remove spaces to prevent cases like - # d i s c o r d . c o m / i n v i t e / p y t h o n + # d i s c o r d . c o m / i n v i t e / s e x y t e e n s text = text.replace(" ", "") # Remove backslashes to prevent escape character aroundfuckery like @@ -217,12 +234,13 @@ class Filtering: invites = re.findall(INVITE_RE, text, re.IGNORECASE) for invite in invites: - filter_invite = ( - invite not in Filter.guild_invite_whitelist - and invite.lower() not in Filter.vanity_url_whitelist + response = await self.bot.http_session.get( + f"{URLs.discord_invite_api}/{invite}" ) + response = await response.json() + guild_id = int(response.get("guild", {}).get("id")) - if filter_invite: + if guild_id not in Filter.guild_invite_whitelist: return True return False diff --git a/bot/constants.py b/bot/constants.py index 3ade4ac7b..68fbc2bc4 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -202,8 +202,7 @@ class Filter(metaclass=YAMLGetter): watch_tokens: bool ping_everyone: bool - guild_invite_whitelist: List[str] - vanity_url_whitelist: List[str] + guild_invite_whitelist: List[int] domain_blacklist: List[str] word_watchlist: List[str] token_watchlist: List[str] @@ -375,10 +374,18 @@ class RabbitMQ(metaclass=YAMLGetter): class URLs(metaclass=YAMLGetter): section = "urls" + # Discord API endpoints + discord_api: str + discord_invite_api: str + + # Misc endpoints bot_avatar: str deploy: str gitlab_bot_repo: str omdb: str + status: str + + # Site endpoints site: str site_api: str site_facts_api: str @@ -401,7 +408,6 @@ class URLs(metaclass=YAMLGetter): site_infractions_by_id: str site_infractions_user_type_current: str site_infractions_user_type: str - status: str paste_service: str diff --git a/config-default.yml b/config-default.yml index b621c5b90..ce7639186 100644 --- a/config-default.yml +++ b/config-default.yml @@ -134,11 +134,10 @@ filter: ping_everyone: true # Ping @everyone when we send a mod-alert? guild_invite_whitelist: - - kWJYurV # Functional Programming - - XBGetGp # STEM - - vanity_url_whitelist: - - python # Python Discord + - 280033776820813825 # Functional Programming + - 267624335836053506 # Python Discord + - 440186186024222721 # Python Discord: ModLog Emojis + - 273944235143593984 # STEM domain_blacklist: - pornhub.com @@ -147,7 +146,6 @@ filter: word_watchlist: - goo+ks* - ky+s+ - - gh?[ae]+y+s* - ki+ke+s* - beaner+s? - coo+ns* @@ -209,6 +207,7 @@ urls: # PyDis site vars site: &DOMAIN "pythondiscord.com" site_api: &API !JOIN ["api.", *DOMAIN] + site_paste: &PASTE !JOIN ["paste.", *DOMAIN] site_schema: &SCHEMA "https://" site_bigbrother_api: !JOIN [*SCHEMA, *API, "/bot/bigbrother"] @@ -231,17 +230,20 @@ urls: site_tags_api: !JOIN [*SCHEMA, *API, "/bot/tags"] site_user_api: !JOIN [*SCHEMA, *API, "/bot/users"] site_user_complete_api: !JOIN [*SCHEMA, *API, "/bot/users/complete"] + paste_service: !JOIN [*SCHEMA, *PASTE, "/{key}"] # Env vars deploy: !ENV "DEPLOY_URL" status: !ENV "STATUS_URL" + # Discord API URLs + discord_api: &DISCORD_API "https://discordapp.com/api/v7/" + discord_invite_api: !JOIN [*DISCORD_API, "invites"] + # Misc URLs bot_avatar: "https://raw.githubusercontent.com/discord-python/branding/master/logos/logo_circle/logo_circle.png" gitlab_bot_repo: "https://gitlab.com/python-discord/projects/bot" omdb: "http://omdbapi.com" - paste_service: "https://paste.pydis.com/{key}" - anti_spam: # Clean messages that violate a rule. -- cgit v1.2.3