From 5aa039845be7f0f972c3f6b622db402790268428 Mon Sep 17 00:00:00 2001 From: arielle Date: Mon, 13 Oct 2025 13:48:35 -0400 Subject: Implement URL normalization in code snippet handler Add URL normalization checks using yarl in code snippets. --- bot/exts/info/code_snippets.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py index 6f67eda3c..0d890a853 100644 --- a/bot/exts/info/code_snippets.py +++ b/bot/exts/info/code_snippets.py @@ -5,6 +5,7 @@ from typing import Any from urllib.parse import quote_plus import discord +import yarl from aiohttp import ClientResponseError from discord.ext.commands import Cog @@ -272,6 +273,20 @@ class CodeSnippets(Cog): for pattern, handler in self.pattern_handlers: for match in pattern.finditer(content): + # ensure that the matched URL meets url normalization rules. + # parsing with yarl resolves all parent urls such as `/../`, + # we then check the regex again to make sure our groups stay the same + unsanitized = match.group(0) + normalized = str(yarl.URL(unsanitized)) + if normalized != unsanitized: + match = pattern.fullmatch(normalized) + if not match: + log.info( + "Received code snippet url %s which " + "attempted to circumvent url normalisation.", + unsanitized + ) + continue try: result = await handler(**match.groupdict()) except ClientResponseError as error: -- cgit v1.2.3 From b8ab1139cead37655654486517ac2db4d2e94613 Mon Sep 17 00:00:00 2001 From: onerandomusername Date: Tue, 21 Oct 2025 14:55:32 -0400 Subject: build: add explicit dependency on yarl --- pyproject.toml | 1 + uv.lock | 2 ++ 2 files changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e58ea4a50..65d6ab42a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "sentry-sdk==2.22.0", "tenacity==9.0.0", "tldextract==5.1.3", + "yarl==1.22.0", ] name = "bot" version = "1.0.1" diff --git a/uv.lock b/uv.lock index 693f72d8d..77e612bd4 100644 --- a/uv.lock +++ b/uv.lock @@ -205,6 +205,7 @@ dependencies = [ { name = "sentry-sdk" }, { name = "tenacity" }, { name = "tldextract" }, + { name = "yarl" }, ] [package.dev-dependencies] @@ -242,6 +243,7 @@ requires-dist = [ { name = "sentry-sdk", specifier = "==2.22.0" }, { name = "tenacity", specifier = "==9.0.0" }, { name = "tldextract", specifier = "==5.1.3" }, + { name = "yarl", specifier = ">=1.22.0" }, ] [package.metadata.requires-dev] -- cgit v1.2.3 From 4b36bfcee7f223793ec3532a6dc5244156cca701 Mon Sep 17 00:00:00 2001 From: onerandomusername Date: Tue, 21 Oct 2025 14:57:17 -0400 Subject: clarify when yarl performs this normalisation --- bot/exts/info/code_snippets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/exts/info/code_snippets.py b/bot/exts/info/code_snippets.py index 0d890a853..1ba4151c7 100644 --- a/bot/exts/info/code_snippets.py +++ b/bot/exts/info/code_snippets.py @@ -274,7 +274,7 @@ class CodeSnippets(Cog): for pattern, handler in self.pattern_handlers: for match in pattern.finditer(content): # ensure that the matched URL meets url normalization rules. - # parsing with yarl resolves all parent urls such as `/../`, + # parsing an absolute url with yarl resolves all parent urls such as `/../`, # we then check the regex again to make sure our groups stay the same unsanitized = match.group(0) normalized = str(yarl.URL(unsanitized)) -- cgit v1.2.3