diff options
-rw-r--r-- | bot/exts/filters/token_remover.py | 85 | ||||
-rw-r--r-- | tests/bot/exts/filters/test_token_remover.py | 85 |
2 files changed, 145 insertions, 25 deletions
diff --git a/bot/exts/filters/token_remover.py b/bot/exts/filters/token_remover.py index 0eda3dc6a..a31912d5b 100644 --- a/bot/exts/filters/token_remover.py +++ b/bot/exts/filters/token_remover.py @@ -1,5 +1,6 @@ import base64 import binascii +import collections import logging import re import typing as t @@ -18,6 +19,11 @@ LOG_MESSAGE = ( "Censored a seemingly valid token sent by {author} (`{author_id}`) in {channel}, " "token was `{user_id}.{timestamp}.{hmac}`" ) +DECODED_LOG_MESSAGE = "The token user_id decodes into {user_id}." +USER_TOKEN_MESSAGE = ( + "The token user_id decodes into {user_id}, " + "which matches `{user_name}` and means this is a valid USER token." +) DELETION_MESSAGE_TEMPLATE = ( "Hey {mention}! I noticed you posted a seemingly valid Discord API " "token in your message and have removed your message. " @@ -92,7 +98,14 @@ class TokenRemover(Cog): await msg.channel.send(DELETION_MESSAGE_TEMPLATE.format(mention=msg.author.mention)) - log_message = self.format_log_message(msg, found_token) + user_name = None + user_id = self.extract_user_id(found_token.user_id) + user = msg.guild.get_member(user_id) + + if user: + user_name = str(user) + + log_message = self.format_log_message(msg, found_token, user_id, user_name) log.debug(log_message) # Send pretty mod log embed to mod-alerts @@ -103,14 +116,24 @@ class TokenRemover(Cog): text=log_message, thumbnail=msg.author.avatar_url_as(static_format="png"), channel_id=Channels.mod_alerts, + ping_everyone=user_name is not None, ) self.bot.stats.incr("tokens.removed_tokens") @staticmethod - def format_log_message(msg: Message, token: Token) -> str: - """Return the log message to send for `token` being censored in `msg`.""" - return LOG_MESSAGE.format( + def format_log_message( + msg: Message, + token: Token, + user_id: int, + user_name: t.Optional[str] = None, + ) -> str: + """ + Return the log message to send for `token` being censored in `msg`. + + Additonally, mention if the token was decodable into a user id, and if that resolves to a user on the server. + """ + message = LOG_MESSAGE.format( author=msg.author, author_id=msg.author.id, channel=msg.channel.mention, @@ -118,6 +141,11 @@ class TokenRemover(Cog): timestamp=token.timestamp, hmac='x' * len(token.hmac), ) + if user_name: + more = USER_TOKEN_MESSAGE.format(user_id=user_id, user_name=user_name) + else: + more = DECODED_LOG_MESSAGE.format(user_id=user_id) + return message + "\n" + more @classmethod def find_token_in_message(cls, msg: Message) -> t.Optional[Token]: @@ -126,7 +154,9 @@ class TokenRemover(Cog): # token check (e.g. `message.channel.send` also matches our token pattern) for match in TOKEN_RE.finditer(msg.content): token = Token(*match.groups()) - if cls.is_valid_user_id(token.user_id) and cls.is_valid_timestamp(token.timestamp): + if cls.is_valid_user_id(token.user_id) \ + and cls.is_valid_timestamp(token.timestamp) \ + and cls.is_maybevalid_hmac(token.hmac): # Short-circuit on first match return token @@ -134,23 +164,34 @@ class TokenRemover(Cog): return @staticmethod - def is_valid_user_id(b64_content: str) -> bool: - """ - Check potential token to see if it contains a valid Discord user ID. - - See: https://discordapp.com/developers/docs/reference#snowflakes - """ + def extract_user_id(b64_content: str) -> t.Optional[int]: + """Return a userid integer from part of a potential token, or None if it couldn't be decoded.""" b64_content = utils.pad_base64(b64_content) try: decoded_bytes = base64.urlsafe_b64decode(b64_content) string = decoded_bytes.decode('utf-8') - - # isdigit on its own would match a lot of other Unicode characters, hence the isascii. - return string.isascii() and string.isdigit() + if not (string.isascii() and string.isdigit()): + # This case triggers if there are fancy unicode digits in the base64 encoding, + # that means it's not a valid user id. + return None + return int(string) except (binascii.Error, ValueError): + return None + + @classmethod + def is_valid_user_id(cls, b64_content: str) -> bool: + """ + Check potential token to see if it contains a valid Discord user ID. + + See: https://discordapp.com/developers/docs/reference#snowflakes + """ + decoded_id = cls.extract_user_id(b64_content) + if not decoded_id: return False + return True + @staticmethod def is_valid_timestamp(b64_content: str) -> bool: """ @@ -176,6 +217,22 @@ class TokenRemover(Cog): log.debug(f"Invalid token timestamp '{b64_content}': smaller than Discord epoch") return False + @staticmethod + def is_maybevalid_hmac(b64_content: str) -> bool: + """ + Determine if a given hmac portion of a token is potentially valid. + + If the HMAC has 3 or less characters, it's probably a dummy value like "xxxxxxxxxx", + and thus the token can probably be skipped. + """ + unique = len(collections.Counter(b64_content.lower()).keys()) + if unique <= 3: + log.debug(f"Considering the hmac {b64_content} a dummy because it has {unique}" + " case-insensitively unique characters") + return False + else: + return True + def setup(bot: Bot) -> None: """Load the TokenRemover cog.""" diff --git a/tests/bot/exts/filters/test_token_remover.py b/tests/bot/exts/filters/test_token_remover.py index a0ff8a877..8742b73c5 100644 --- a/tests/bot/exts/filters/test_token_remover.py +++ b/tests/bot/exts/filters/test_token_remover.py @@ -22,6 +22,7 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): self.msg = MockMessage(id=555, content="hello world") self.msg.channel.mention = "#lemonade-stand" + self.msg.guild.get_member = MagicMock(return_value="Bob") self.msg.author.__str__ = MagicMock(return_value=self.msg.author.name) self.msg.author.avatar_url_as.return_value = "picture-lemon.png" @@ -85,6 +86,34 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): result = TokenRemover.is_valid_timestamp(timestamp) self.assertFalse(result) + def test_is_valid_hmac_valid(self): + """Should consider hmac valid if it is a valid hmac with a variety of characters.""" + valid_hmacs = ( + "VXmErH7j511turNpfURmb0rVNm8", + "Ysnu2wacjaKs7qnoo46S8Dm2us8", + "sJf6omBPORBPju3WJEIAcwW9Zds", + "s45jqDV_Iisn-symw0yDRrk_jf4", + ) + + for hmac in valid_hmacs: + with self.subTest(msg=hmac): + result = TokenRemover.is_maybevalid_hmac(hmac) + self.assertTrue(result) + + def test_is_invalid_hmac_invalid(self): + """Should consider hmac invalid if it possesses too little variety.""" + invalid_hmacs = ( + ("xxxxxxxxxxxxxxxxxx", "Single character"), + ("XxXxXxXxXxXxXxXxXx", "Single character alternating case"), + ("ASFasfASFasfASFASsf", "Three characters alternating-case"), + ("asdasdasdasdasdasdasd", "Three characters one case"), + ) + + for hmac, msg in invalid_hmacs: + with self.subTest(msg=msg): + result = TokenRemover.is_maybevalid_hmac(hmac) + self.assertFalse(result) + def test_mod_log_property(self): """The `mod_log` property should ask the bot to return the `ModLog` cog.""" self.bot.get_cog.return_value = 'lemon' @@ -142,11 +171,11 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): self.assertIsNone(return_value) token_re.finditer.assert_called_once_with(self.msg.content) - @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp") + @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp", "is_maybevalid_hmac") @autospec("bot.exts.filters.token_remover", "Token") @autospec("bot.exts.filters.token_remover", "TOKEN_RE") - def test_find_token_valid_match(self, token_re, token_cls, is_valid_id, is_valid_timestamp): - """The first match with a valid user ID and timestamp should be returned as a `Token`.""" + def test_find_token_valid_match(self, token_re, token_cls, is_valid_id, is_valid_timestamp, is_maybevalid_hmac): + """The first match with a valid user ID. timestamp and hmac should be returned as a `Token`.""" matches = [ mock.create_autospec(Match, spec_set=True, instance=True), mock.create_autospec(Match, spec_set=True, instance=True), @@ -160,21 +189,23 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): token_cls.side_effect = tokens is_valid_id.side_effect = (False, True) # The 1st match will be invalid, 2nd one valid. is_valid_timestamp.return_value = True + is_maybevalid_hmac.return_value = True return_value = TokenRemover.find_token_in_message(self.msg) self.assertEqual(tokens[1], return_value) token_re.finditer.assert_called_once_with(self.msg.content) - @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp") + @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp", "is_maybevalid_hmac") @autospec("bot.exts.filters.token_remover", "Token") @autospec("bot.exts.filters.token_remover", "TOKEN_RE") - def test_find_token_invalid_matches(self, token_re, token_cls, is_valid_id, is_valid_timestamp): + def test_find_token_invalid_matches(self, token_re, token_cls, is_valid_id, is_valid_timestamp, is_maybevalid_hmac): """None should be returned if no matches have valid user IDs or timestamps.""" token_re.finditer.return_value = [mock.create_autospec(Match, spec_set=True, instance=True)] token_cls.return_value = mock.create_autospec(Token, spec_set=True, instance=True) is_valid_id.return_value = False is_valid_timestamp.return_value = False + is_maybevalid_hmac.return_value = False return_value = TokenRemover.find_token_in_message(self.msg) @@ -230,15 +261,41 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): results = [match[0] for match in results] self.assertCountEqual((token_1, token_2), results) - @autospec("bot.exts.filters.token_remover", "LOG_MESSAGE") - def test_format_log_message(self, log_message): + @autospec("bot.exts.filters.token_remover", "LOG_MESSAGE", "DECODED_LOG_MESSAGE") + def test_format_log_message(self, log_message, decoded_log_message): + """Should correctly format the log message with info from the message and token.""" + token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") + log_message.format.return_value = "Howdy" + decoded_log_message.format.return_value = " Partner" + + return_value = TokenRemover.format_log_message(self.msg, token, 472265943062413332, None) + + self.assertEqual( + return_value, + log_message.format.return_value + "\n" + decoded_log_message.format.return_value, + ) + log_message.format.assert_called_once_with( + author=self.msg.author, + author_id=self.msg.author.id, + channel=self.msg.channel.mention, + user_id=token.user_id, + timestamp=token.timestamp, + hmac="x" * len(token.hmac), + ) + + @autospec("bot.exts.filters.token_remover", "LOG_MESSAGE", "USER_TOKEN_MESSAGE") + def test_format_log_message_user_token(self, log_message, user_token_message): """Should correctly format the log message with info from the message and token.""" token = Token("NDY3MjIzMjMwNjUwNzc3NjQx", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") log_message.format.return_value = "Howdy" + user_token_message.format.return_value = "Partner" - return_value = TokenRemover.format_log_message(self.msg, token) + return_value = TokenRemover.format_log_message(self.msg, token, 467223230650777641, "Bob") - self.assertEqual(return_value, log_message.format.return_value) + self.assertEqual( + return_value, + log_message.format.return_value + "\n" + user_token_message.format.return_value, + ) log_message.format.assert_called_once_with( author=self.msg.author, author_id=self.msg.author.id, @@ -247,6 +304,10 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): timestamp=token.timestamp, hmac="x" * len(token.hmac), ) + user_token_message.format.assert_called_once_with( + user_id=467223230650777641, + user_name="Bob", + ) @mock.patch.object(TokenRemover, "mod_log", new_callable=mock.PropertyMock) @autospec("bot.exts.filters.token_remover", "log") @@ -256,6 +317,7 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): cog = TokenRemover(self.bot) mod_log = mock.create_autospec(ModLog, spec_set=True, instance=True) token = mock.create_autospec(Token, spec_set=True, instance=True) + token.user_id = "no-id" log_msg = "testing123" mod_log_property.return_value = mod_log @@ -268,7 +330,7 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): token_remover.DELETION_MESSAGE_TEMPLATE.format(mention=self.msg.author.mention) ) - format_log_message.assert_called_once_with(self.msg, token) + format_log_message.assert_called_once_with(self.msg, token, None, "Bob") logger.debug.assert_called_with(log_msg) self.bot.stats.incr.assert_called_once_with("tokens.removed_tokens") @@ -279,7 +341,8 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase): title="Token removed!", text=log_msg, thumbnail=self.msg.author.avatar_url_as.return_value, - channel_id=constants.Channels.mod_alerts + channel_id=constants.Channels.mod_alerts, + ping_everyone=True, ) @mock.patch.object(TokenRemover, "mod_log", new_callable=mock.PropertyMock) |