diff options
| author | 2020-05-21 21:34:10 -0700 | |
|---|---|---|
| committer | 2020-05-21 21:34:10 -0700 | |
| commit | 95ef2dc01143902289c9aacde7969fb5c9e1a85c (patch) | |
| tree | f0b66ab847b06fc5cd9ebf2f3a1221037fdd555b | |
| parent | Token remover: decode ID using URL-safe base64 (diff) | |
Token remover: match only base64 in regex
Making the regex more accurate reduces false positives at an earlier
stage. There's no benefit to matching non-base64 as that would
just be weeded out as invalid at a later stage anyway when it tries to
decode it.
| -rw-r--r-- | bot/cogs/token_remover.py | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/bot/cogs/token_remover.py b/bot/cogs/token_remover.py index 5b4598959..fa0647828 100644 --- a/bot/cogs/token_remover.py +++ b/bot/cogs/token_remover.py @@ -29,13 +29,12 @@ DELETION_MESSAGE_TEMPLATE = ( ) DISCORD_EPOCH = 1_420_070_400_000 TOKEN_EPOCH = 1_293_840_000 -TOKEN_RE = re.compile( - r"[^\s\.()\"']+" # Matches token part 1: The user ID string, encoded as base64 - r"\." # Matches a literal dot between the token parts - r"[^\s\.()\"']+" # Matches token part 2: The creation timestamp, as an integer - r"\." # Matches a literal dot between the token parts - r"[^\s\.()\"']+" # Matches token part 3: The HMAC, unused by us, but check that it isn't empty -) + +# Three parts delimited by dots: user ID, creation timestamp, HMAC. +# The HMAC isn't parsed further, but it's in the regex to ensure it at least exists in the string. +# Each part only matches base64 URL-safe characters. +# Padding has never been observed, but the padding character '=' is matched just in case. +TOKEN_RE = re.compile(r"[\w-=]+\.[\w-=]+\.[\w-=]+", re.ASCII) class TokenRemover(Cog): |