From 77eaae2434aaa0894fe9dd888b7a208285c2665b Mon Sep 17 00:00:00 2001 From: Matteo Bertucci Date: Sun, 17 Oct 2021 11:48:00 +0200 Subject: Regex: add a word boundary before .gg (#1817) * Regex: add a word boundary before .gg and use named groups Before this commit, `an-arbitrary-domain.gg/notaninvite` would trigger the filter. This solve the issue by adding a word boundary before this branch of the pattern. * Regex: replace the word boundary by a word char Co-authored-by: ChrisJL Co-authored-by: ChrisJL Co-authored-by: Xithrius <15021300+Xithrius@users.noreply.github.com> --- bot/converters.py | 4 ++-- bot/exts/filters/filtering.py | 2 +- bot/utils/regex.py | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bot/converters.py b/bot/converters.py index 4a4d3b544..dd02f6ae6 100644 --- a/bot/converters.py +++ b/bot/converters.py @@ -71,10 +71,10 @@ class ValidDiscordServerInvite(Converter): async def convert(self, ctx: Context, server_invite: str) -> dict: """Check whether the string is a valid Discord server invite.""" - invite_code = INVITE_RE.search(server_invite) + invite_code = INVITE_RE.match(server_invite) if invite_code: response = await ctx.bot.http_session.get( - f"{URLs.discord_invite_api}/{invite_code[1]}" + f"{URLs.discord_invite_api}/{invite_code.group('invite')}" ) if response.status != 404: invite_data = await response.json() diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py index 7faf063b9..a151db1f0 100644 --- a/bot/exts/filters/filtering.py +++ b/bot/exts/filters/filtering.py @@ -507,7 +507,7 @@ class Filtering(Cog): # discord\.gg/gdudes-pony-farm text = text.replace("\\", "") - invites = INVITE_RE.findall(text) + invites = [m.group("invite") for m in INVITE_RE.finditer(text)] invite_data = dict() for invite in invites: if invite in invite_data: diff --git a/bot/utils/regex.py b/bot/utils/regex.py index 7bad1e627..d77f5950b 100644 --- a/bot/utils/regex.py +++ b/bot/utils/regex.py @@ -1,14 +1,14 @@ import re INVITE_RE = re.compile( - r"(?:discord(?:[\.,]|dot)gg|" # Could be discord.gg/ - r"discord(?:[\.,]|dot)com(?:\/|slash)invite|" # or discord.com/invite/ - r"discordapp(?:[\.,]|dot)com(?:\/|slash)invite|" # or discordapp.com/invite/ - r"discord(?:[\.,]|dot)me|" # or discord.me - r"discord(?:[\.,]|dot)li|" # or discord.li - r"discord(?:[\.,]|dot)io|" # or discord.io. - r"(?:[\.,]|dot)gg" # or .gg/ - r")(?:[\/]|slash)" # / or 'slash' - r"([a-zA-Z0-9\-]+)", # the invite code itself + r"(discord([\.,]|dot)gg|" # Could be discord.gg/ + r"discord([\.,]|dot)com(\/|slash)invite|" # or discord.com/invite/ + r"discordapp([\.,]|dot)com(\/|slash)invite|" # or discordapp.com/invite/ + r"discord([\.,]|dot)me|" # or discord.me + r"discord([\.,]|dot)li|" # or discord.li + r"discord([\.,]|dot)io|" # or discord.io. + r"((?[a-zA-Z0-9\-]+)", # the invite code itself flags=re.IGNORECASE ) -- cgit v1.2.3