diff options
-rw-r--r-- | bot/exts/filters/token_remover.py | 77 | ||||
-rw-r--r-- | tests/bot/exts/filters/test_token_remover.py | 150 |
2 files changed, 188 insertions, 39 deletions
diff --git a/bot/exts/filters/token_remover.py b/bot/exts/filters/token_remover.py index ba86e557a..bd6a1f97a 100644 --- a/bot/exts/filters/token_remover.py +++ b/bot/exts/filters/token_remover.py @@ -19,6 +19,11 @@ LOG_MESSAGE = ( "Censored a seemingly valid token sent by {author} in {channel}, " "token was `{user_id}.{timestamp}.{hmac}`" ) +UNKNOWN_USER_LOG_MESSAGE = "Decoded user ID: `{user_id}` (Not present in server)." +KNOWN_USER_LOG_MESSAGE = ( + "Decoded user ID: `{user_id}` **(Present in server)**.\n" + "This matches `{user_name}` and means this is likely a valid **{kind}** token." +) DELETION_MESSAGE_TEMPLATE = ( "Hey {mention}! I noticed you posted a seemingly valid Discord API " "token in your message and have removed your message. " @@ -94,6 +99,7 @@ class TokenRemover(Cog): await msg.channel.send(DELETION_MESSAGE_TEMPLATE.format(mention=msg.author.mention)) log_message = self.format_log_message(msg, found_token) + userid_message, mention_everyone = self.format_userid_log_message(msg, found_token) log.debug(log_message) # Send pretty mod log embed to mod-alerts @@ -101,16 +107,41 @@ class TokenRemover(Cog): icon_url=Icons.token_removed, colour=Colour(Colours.soft_red), title="Token removed!", - text=log_message, + text=log_message + "\n" + userid_message, thumbnail=msg.author.avatar_url_as(static_format="png"), channel_id=Channels.mod_alerts, + ping_everyone=mention_everyone, ) self.bot.stats.incr("tokens.removed_tokens") + @classmethod + def format_userid_log_message(cls, msg: Message, token: Token) -> t.Tuple[str, bool]: + """ + Format the portion of the log message that includes details about the detected user ID. + + If the user is resolved to a member, the format includes the user ID, name, and the + kind of user detected. + + If we resolve to a member and it is not a bot, we also return True to ping everyone. + + Returns a tuple of (log_message, mention_everyone) + """ + user_id = cls.extract_user_id(token.user_id) + user = msg.guild.get_member(user_id) + + if user: + return KNOWN_USER_LOG_MESSAGE.format( + user_id=user_id, + user_name=str(user), + kind="BOT" if user.bot else "USER", + ), not user.bot + else: + return UNKNOWN_USER_LOG_MESSAGE.format(user_id=user_id), False + @staticmethod def format_log_message(msg: Message, token: Token) -> str: - """Return the log message to send for `token` being censored in `msg`.""" + """Return the generic portion of the log message to send for `token` being censored in `msg`.""" return LOG_MESSAGE.format( author=format_user(msg.author), channel=msg.channel.mention, @@ -126,7 +157,11 @@ class TokenRemover(Cog): # token check (e.g. `message.channel.send` also matches our token pattern) for match in TOKEN_RE.finditer(msg.content): token = Token(*match.groups()) - if cls.is_valid_user_id(token.user_id) and cls.is_valid_timestamp(token.timestamp): + if ( + (cls.extract_user_id(token.user_id) is not None) + and cls.is_valid_timestamp(token.timestamp) + and cls.is_maybe_valid_hmac(token.hmac) + ): # Short-circuit on first match return token @@ -134,22 +169,20 @@ class TokenRemover(Cog): return @staticmethod - def is_valid_user_id(b64_content: str) -> bool: - """ - Check potential token to see if it contains a valid Discord user ID. - - See: https://discordapp.com/developers/docs/reference#snowflakes - """ + def extract_user_id(b64_content: str) -> t.Optional[int]: + """Return a user ID integer from part of a potential token, or None if it couldn't be decoded.""" b64_content = utils.pad_base64(b64_content) try: decoded_bytes = base64.urlsafe_b64decode(b64_content) string = decoded_bytes.decode('utf-8') - - # isdigit on its own would match a lot of other Unicode characters, hence the isascii. - return string.isascii() and string.isdigit() + if not (string.isascii() and string.isdigit()): + # This case triggers if there are fancy unicode digits in the base64 encoding, + # that means it's not a valid user id. + return None + return int(string) except (binascii.Error, ValueError): - return False + return None @staticmethod def is_valid_timestamp(b64_content: str) -> bool: @@ -176,6 +209,24 @@ class TokenRemover(Cog): log.debug(f"Invalid token timestamp '{b64_content}': smaller than Discord epoch") return False + @staticmethod + def is_maybe_valid_hmac(b64_content: str) -> bool: + """ + Determine if a given HMAC portion of a token is potentially valid. + + If the HMAC has 3 or less characters, it's probably a dummy value like "xxxxxxxxxx", + and thus the token can probably be skipped. + """ + unique = len(set(b64_content.lower())) + if unique <= 3: + log.debug( + f"Considering the HMAC {b64_content} a dummy because it has {unique}" + " case-insensitively unique characters" + ) + return False + else: + return True + def setup(bot: Bot) -> None: """Load the TokenRemover cog.""" diff --git a/tests/bot/exts/filters/test_token_remover.py b/tests/bot/exts/filters/test_token_remover.py index ea822053b..f99cc3370 100644 --- a/tests/bot/exts/filters/test_token_remover.py +++ b/tests/bot/exts/filters/test_token_remover.py @@ -23,23 +23,25 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): self.msg = MockMessage(id=555, content="hello world") self.msg.channel.mention = "#lemonade-stand" + self.msg.guild.get_member.return_value.bot = False + self.msg.guild.get_member.return_value.__str__.return_value = "Woody" self.msg.author.__str__ = MagicMock(return_value=self.msg.author.name) self.msg.author.avatar_url_as.return_value = "picture-lemon.png" - def test_is_valid_user_id_valid(self): - """Should consider user IDs valid if they decode entirely to ASCII digits.""" - ids = ( - "NDcyMjY1OTQzMDYyNDEzMzMy", - "NDc1MDczNjI5Mzk5NTQ3OTA0", - "NDY3MjIzMjMwNjUwNzc3NjQx", + def test_extract_user_id_valid(self): + """Should consider user IDs valid if they decode into an integer ID.""" + id_pairs = ( + ("NDcyMjY1OTQzMDYyNDEzMzMy", 472265943062413332), + ("NDc1MDczNjI5Mzk5NTQ3OTA0", 475073629399547904), + ("NDY3MjIzMjMwNjUwNzc3NjQx", 467223230650777641), ) - for user_id in ids: - with self.subTest(user_id=user_id): - result = TokenRemover.is_valid_user_id(user_id) - self.assertTrue(result) + for token_id, user_id in id_pairs: + with self.subTest(token_id=token_id): + result = TokenRemover.extract_user_id(token_id) + self.assertEqual(result, user_id) - def test_is_valid_user_id_invalid(self): + def test_extract_user_id_invalid(self): """Should consider non-digit and non-ASCII IDs invalid.""" ids = ( ("SGVsbG8gd29ybGQ", "non-digit ASCII"), @@ -53,8 +55,8 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): for user_id, msg in ids: with self.subTest(msg=msg): - result = TokenRemover.is_valid_user_id(user_id) - self.assertFalse(result) + result = TokenRemover.extract_user_id(user_id) + self.assertIsNone(result) def test_is_valid_timestamp_valid(self): """Should consider timestamps valid if they're greater than the Discord epoch.""" @@ -86,6 +88,34 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): result = TokenRemover.is_valid_timestamp(timestamp) self.assertFalse(result) + def test_is_valid_hmac_valid(self): + """Should consider an HMAC valid if it has at least 3 unique characters.""" + valid_hmacs = ( + "VXmErH7j511turNpfURmb0rVNm8", + "Ysnu2wacjaKs7qnoo46S8Dm2us8", + "sJf6omBPORBPju3WJEIAcwW9Zds", + "s45jqDV_Iisn-symw0yDRrk_jf4", + ) + + for hmac in valid_hmacs: + with self.subTest(msg=hmac): + result = TokenRemover.is_maybe_valid_hmac(hmac) + self.assertTrue(result) + + def test_is_invalid_hmac_invalid(self): + """Should consider an HMAC invalid if has fewer than 3 unique characters.""" + invalid_hmacs = ( + ("xxxxxxxxxxxxxxxxxx", "Single character"), + ("XxXxXxXxXxXxXxXxXx", "Single character alternating case"), + ("ASFasfASFasfASFASsf", "Three characters alternating-case"), + ("asdasdasdasdasdasdasd", "Three characters one case"), + ) + + for hmac, msg in invalid_hmacs: + with self.subTest(msg=msg): + result = TokenRemover.is_maybe_valid_hmac(hmac) + self.assertFalse(result) + def test_mod_log_property(self): """The `mod_log` property should ask the bot to return the `ModLog` cog.""" self.bot.get_cog.return_value = 'lemon' @@ -143,11 +173,18 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): self.assertIsNone(return_value) token_re.finditer.assert_called_once_with(self.msg.content) - @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp") + @autospec(TokenRemover, "extract_user_id", "is_valid_timestamp", "is_maybe_valid_hmac") @autospec("bot.exts.filters.token_remover", "Token") @autospec("bot.exts.filters.token_remover", "TOKEN_RE") - def test_find_token_valid_match(self, token_re, token_cls, is_valid_id, is_valid_timestamp): - """The first match with a valid user ID and timestamp should be returned as a `Token`.""" + def test_find_token_valid_match( + self, + token_re, + token_cls, + extract_user_id, + is_valid_timestamp, + is_maybe_valid_hmac, + ): + """The first match with a valid user ID, timestamp, and HMAC should be returned as a `Token`.""" matches = [ mock.create_autospec(Match, spec_set=True, instance=True), mock.create_autospec(Match, spec_set=True, instance=True), @@ -159,23 +196,32 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): token_re.finditer.return_value = matches token_cls.side_effect = tokens - is_valid_id.side_effect = (False, True) # The 1st match will be invalid, 2nd one valid. + extract_user_id.side_effect = (None, True) # The 1st match will be invalid, 2nd one valid. is_valid_timestamp.return_value = True + is_maybe_valid_hmac.return_value = True return_value = TokenRemover.find_token_in_message(self.msg) self.assertEqual(tokens[1], return_value) token_re.finditer.assert_called_once_with(self.msg.content) - @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp") + @autospec(TokenRemover, "extract_user_id", "is_valid_timestamp", "is_maybe_valid_hmac") @autospec("bot.exts.filters.token_remover", "Token") @autospec("bot.exts.filters.token_remover", "TOKEN_RE") - def test_find_token_invalid_matches(self, token_re, token_cls, is_valid_id, is_valid_timestamp): - """None should be returned if no matches have valid user IDs or timestamps.""" + def test_find_token_invalid_matches( + self, + token_re, + token_cls, + extract_user_id, + is_valid_timestamp, + is_maybe_valid_hmac, + ): + """None should be returned if no matches have valid user IDs, HMACs, and timestamps.""" token_re.finditer.return_value = [mock.create_autospec(Match, spec_set=True, instance=True)] token_cls.return_value = mock.create_autospec(Token, spec_set=True, instance=True) - is_valid_id.return_value = False + extract_user_id.return_value = None is_valid_timestamp.return_value = False + is_maybe_valid_hmac.return_value = False return_value = TokenRemover.find_token_in_message(self.msg) @@ -234,7 +280,7 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): @autospec("bot.exts.filters.token_remover", "LOG_MESSAGE") def test_format_log_message(self, log_message): """Should correctly format the log message with info from the message and token.""" - token = Token("NDY3MjIzMjMwNjUwNzc3NjQx", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") + token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") log_message.format.return_value = "Howdy" return_value = TokenRemover.format_log_message(self.msg, token) @@ -248,18 +294,68 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): hmac="x" * len(token.hmac), ) + @autospec("bot.exts.filters.token_remover", "UNKNOWN_USER_LOG_MESSAGE") + def test_format_userid_log_message_unknown(self, unknown_user_log_message): + """Should correctly format the user ID portion when the actual user it belongs to is unknown.""" + token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") + unknown_user_log_message.format.return_value = " Partner" + msg = MockMessage(id=555, content="hello world") + msg.guild.get_member.return_value = None + + return_value = TokenRemover.format_userid_log_message(msg, token) + + self.assertEqual(return_value, (unknown_user_log_message.format.return_value, False)) + unknown_user_log_message.format.assert_called_once_with(user_id=472265943062413332) + + @autospec("bot.exts.filters.token_remover", "KNOWN_USER_LOG_MESSAGE") + def test_format_userid_log_message_bot(self, known_user_log_message): + """Should correctly format the user ID portion when the ID belongs to a known bot.""" + token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") + known_user_log_message.format.return_value = " Partner" + msg = MockMessage(id=555, content="hello world") + msg.guild.get_member.return_value.__str__.return_value = "Sam" + msg.guild.get_member.return_value.bot = True + + return_value = TokenRemover.format_userid_log_message(msg, token) + + self.assertEqual(return_value, (known_user_log_message.format.return_value, False)) + + known_user_log_message.format.assert_called_once_with( + user_id=472265943062413332, + user_name="Sam", + kind="BOT", + ) + + @autospec("bot.exts.filters.token_remover", "KNOWN_USER_LOG_MESSAGE") + def test_format_log_message_user_token_user(self, user_token_message): + """Should correctly format the user ID portion when the ID belongs to a known user.""" + token = Token("NDY3MjIzMjMwNjUwNzc3NjQx", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") + user_token_message.format.return_value = "Partner" + + return_value = TokenRemover.format_userid_log_message(self.msg, token) + + self.assertEqual(return_value, (user_token_message.format.return_value, True)) + user_token_message.format.assert_called_once_with( + user_id=467223230650777641, + user_name="Woody", + kind="USER", + ) + @mock.patch.object(TokenRemover, "mod_log", new_callable=mock.PropertyMock) @autospec("bot.exts.filters.token_remover", "log") - @autospec(TokenRemover, "format_log_message") - async def test_take_action(self, format_log_message, logger, mod_log_property): + @autospec(TokenRemover, "format_log_message", "format_userid_log_message") + async def test_take_action(self, format_log_message, format_userid_log_message, logger, mod_log_property): """Should delete the message and send a mod log.""" cog = TokenRemover(self.bot) mod_log = mock.create_autospec(ModLog, spec_set=True, instance=True) token = mock.create_autospec(Token, spec_set=True, instance=True) + token.user_id = "no-id" log_msg = "testing123" + userid_log_message = "userid-log-message" mod_log_property.return_value = mod_log format_log_message.return_value = log_msg + format_userid_log_message.return_value = (userid_log_message, True) await cog.take_action(self.msg, token) @@ -269,6 +365,7 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): ) format_log_message.assert_called_once_with(self.msg, token) + format_userid_log_message.assert_called_once_with(self.msg, token) logger.debug.assert_called_with(log_msg) self.bot.stats.incr.assert_called_once_with("tokens.removed_tokens") @@ -277,9 +374,10 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): icon_url=constants.Icons.token_removed, colour=Colour(constants.Colours.soft_red), title="Token removed!", - text=log_msg, + text=log_msg + "\n" + userid_log_message, thumbnail=self.msg.author.avatar_url_as.return_value, - channel_id=constants.Channels.mod_alerts + channel_id=constants.Channels.mod_alerts, + ping_everyone=True, ) @mock.patch.object(TokenRemover, "mod_log", new_callable=mock.PropertyMock) |