aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar MarkKoz <[email protected]>2020-05-21 21:34:10 -0700
committerGravatar MarkKoz <[email protected]>2020-05-21 21:34:10 -0700
commit95ef2dc01143902289c9aacde7969fb5c9e1a85c (patch)
treef0b66ab847b06fc5cd9ebf2f3a1221037fdd555b
parentToken remover: decode ID using URL-safe base64 (diff)
Token remover: match only base64 in regex
Making the regex more accurate reduces false positives at an earlier stage. There's no benefit to matching non-base64 as that would just be weeded out as invalid at a later stage anyway when it tries to decode it.
-rw-r--r--bot/cogs/token_remover.py13
1 files changed, 6 insertions, 7 deletions
diff --git a/bot/cogs/token_remover.py b/bot/cogs/token_remover.py
index 5b4598959..fa0647828 100644
--- a/bot/cogs/token_remover.py
+++ b/bot/cogs/token_remover.py
@@ -29,13 +29,12 @@ DELETION_MESSAGE_TEMPLATE = (
)
DISCORD_EPOCH = 1_420_070_400_000
TOKEN_EPOCH = 1_293_840_000
-TOKEN_RE = re.compile(
- r"[^\s\.()\"']+" # Matches token part 1: The user ID string, encoded as base64
- r"\." # Matches a literal dot between the token parts
- r"[^\s\.()\"']+" # Matches token part 2: The creation timestamp, as an integer
- r"\." # Matches a literal dot between the token parts
- r"[^\s\.()\"']+" # Matches token part 3: The HMAC, unused by us, but check that it isn't empty
-)
+
+# Three parts delimited by dots: user ID, creation timestamp, HMAC.
+# The HMAC isn't parsed further, but it's in the regex to ensure it at least exists in the string.
+# Each part only matches base64 URL-safe characters.
+# Padding has never been observed, but the padding character '=' is matched just in case.
+TOKEN_RE = re.compile(r"[\w-=]+\.[\w-=]+\.[\w-=]+", re.ASCII)
class TokenRemover(Cog):