aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Bast <[email protected]>2020-09-19 12:16:40 -0700
committerGravatar Bast <[email protected]>2020-09-19 12:16:40 -0700
commit83e17627d3fa4e0eb135b5039decd02eaf3d060c (patch)
treeec4c017d10cf5861d84b69665c33a64ebdc7dd85
parentAdd feature to token_remover: log detected user ID, and ping if it's a user i... (diff)
Make token_remover check basic HMAC validity (not low entropy)
Handles cases like xxx.xxxxx.xxxxxxxx where a user has intentionally censored part of a token, and will not consider them "valid"
-rw-r--r--bot/cogs/token_remover.py21
-rw-r--r--tests/bot/cogs/test_token_remover.py40
2 files changed, 55 insertions, 6 deletions
diff --git a/bot/cogs/token_remover.py b/bot/cogs/token_remover.py
index 93ceda6be..17778b415 100644
--- a/bot/cogs/token_remover.py
+++ b/bot/cogs/token_remover.py
@@ -1,5 +1,6 @@
import base64
import binascii
+import collections
import logging
import re
import typing as t
@@ -153,7 +154,9 @@ class TokenRemover(Cog):
# token check (e.g. `message.channel.send` also matches our token pattern)
for match in TOKEN_RE.finditer(msg.content):
token = Token(*match.groups())
- if cls.is_valid_user_id(token.user_id) and cls.is_valid_timestamp(token.timestamp):
+ if cls.is_valid_user_id(token.user_id) \
+ and cls.is_valid_timestamp(token.timestamp) \
+ and cls.is_maybevalid_hmac(token.hmac):
# Short-circuit on first match
return token
@@ -214,6 +217,22 @@ class TokenRemover(Cog):
log.debug(f"Invalid token timestamp '{b64_content}': smaller than Discord epoch")
return False
+ @staticmethod
+ def is_maybevalid_hmac(b64_content: str) -> bool:
+ """
+ Determine if a given hmac portion of a token is potentially valid.
+
+ If the HMAC has 3 or less characters, it's probably a dummy value like "xxxxxxxxxx",
+ and thus the token can probably be skipped.
+ """
+ unique = len(collections.Counter(b64_content.lower()).keys())
+ if unique <= 3:
+ log.debug(f"Considering the hmac {b64_content} a dummy because it has {unique}"
+ " case-insensitively unique characters")
+ return False
+ else:
+ return True
+
def setup(bot: Bot) -> None:
"""Load the TokenRemover cog."""
diff --git a/tests/bot/cogs/test_token_remover.py b/tests/bot/cogs/test_token_remover.py
index 275350144..56d269105 100644
--- a/tests/bot/cogs/test_token_remover.py
+++ b/tests/bot/cogs/test_token_remover.py
@@ -86,6 +86,34 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase):
result = TokenRemover.is_valid_timestamp(timestamp)
self.assertFalse(result)
+ def test_is_valid_hmac_valid(self):
+ """Should consider hmac valid if it is a valid hmac with a variety of characters."""
+ valid_hmacs = (
+ "VXmErH7j511turNpfURmb0rVNm8",
+ "Ysnu2wacjaKs7qnoo46S8Dm2us8",
+ "sJf6omBPORBPju3WJEIAcwW9Zds",
+ "s45jqDV_Iisn-symw0yDRrk_jf4",
+ )
+
+ for hmac in valid_hmacs:
+ with self.subTest(msg=hmac):
+ result = TokenRemover.is_maybevalid_hmac(hmac)
+ self.assertTrue(result)
+
+ def test_is_invalid_hmac_invalid(self):
+ """Should consider hmac invalid if it possesses too little variety."""
+ invalid_hmacs = (
+ ("xxxxxxxxxxxxxxxxxx", "Single character"),
+ ("XxXxXxXxXxXxXxXxXx", "Single character alternating case"),
+ ("ASFasfASFasfASFASsf", "Three characters alternating-case"),
+ ("asdasdasdasdasdasdasd", "Three characters one case"),
+ )
+
+ for hmac, msg in invalid_hmacs:
+ with self.subTest(msg=msg):
+ result = TokenRemover.is_maybevalid_hmac(hmac)
+ self.assertFalse(result)
+
def test_mod_log_property(self):
"""The `mod_log` property should ask the bot to return the `ModLog` cog."""
self.bot.get_cog.return_value = 'lemon'
@@ -143,11 +171,11 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase):
self.assertIsNone(return_value)
token_re.finditer.assert_called_once_with(self.msg.content)
- @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp")
+ @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp", "is_maybevalid_hmac")
@autospec("bot.cogs.token_remover", "Token")
@autospec("bot.cogs.token_remover", "TOKEN_RE")
- def test_find_token_valid_match(self, token_re, token_cls, is_valid_id, is_valid_timestamp):
- """The first match with a valid user ID and timestamp should be returned as a `Token`."""
+ def test_find_token_valid_match(self, token_re, token_cls, is_valid_id, is_valid_timestamp, is_maybevalid_hmac):
+ """The first match with a valid user ID. timestamp and hmac should be returned as a `Token`."""
matches = [
mock.create_autospec(Match, spec_set=True, instance=True),
mock.create_autospec(Match, spec_set=True, instance=True),
@@ -161,21 +189,23 @@ class TokenRemoverTests(unittest.IsolatedAsyncioTestCase):
token_cls.side_effect = tokens
is_valid_id.side_effect = (False, True) # The 1st match will be invalid, 2nd one valid.
is_valid_timestamp.return_value = True
+ is_maybevalid_hmac.return_value = True
return_value = TokenRemover.find_token_in_message(self.msg)
self.assertEqual(tokens[1], return_value)
token_re.finditer.assert_called_once_with(self.msg.content)
- @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp")
+ @autospec(TokenRemover, "is_valid_user_id", "is_valid_timestamp", "is_maybevalid_hmac")
@autospec("bot.cogs.token_remover", "Token")
@autospec("bot.cogs.token_remover", "TOKEN_RE")
- def test_find_token_invalid_matches(self, token_re, token_cls, is_valid_id, is_valid_timestamp):
+ def test_find_token_invalid_matches(self, token_re, token_cls, is_valid_id, is_valid_timestamp, is_maybevalid_hmac):
"""None should be returned if no matches have valid user IDs or timestamps."""
token_re.finditer.return_value = [mock.create_autospec(Match, spec_set=True, instance=True)]
token_cls.return_value = mock.create_autospec(Token, spec_set=True, instance=True)
is_valid_id.return_value = False
is_valid_timestamp.return_value = False
+ is_maybevalid_hmac.return_value = False
return_value = TokenRemover.find_token_in_message(self.msg)