From 0bf93ebc2a7b4b7bb0a3a5618353fda5c576939c Mon Sep 17 00:00:00 2001 From: Amrou Bellalouna Date: Wed, 28 Sep 2022 13:38:35 +0100 Subject: use tldextract for a correct url decomposition --- bot/utils/helpers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'bot/utils/helpers.py') diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py index 3e45a71a3..75bcc2ede 100644 --- a/bot/utils/helpers.py +++ b/bot/utils/helpers.py @@ -3,6 +3,7 @@ from typing import Optional from urllib.parse import urlparse from discord.ext.commands import CogMeta +from tldextract import extract class CogABCMeta(CogMeta, ABCMeta): @@ -34,11 +35,10 @@ def pad_base64(data: str) -> str: def remove_subdomain_from_url(url: str) -> str: - """Transforms potential relative urls to absolute ones.""" + """Removes subdomains from a URL whilst preserving the original URL composition.""" parsed_url = urlparse(url) - netloc_components = parsed_url.netloc.split(".") - # Eliminate subdomain and use the second level domain and top level domain only - netloc_components[:] = netloc_components[-2:] - netloc = ".".join(netloc_components) + extracted_url = extract(url) + # Eliminate subdomain by using the registered domain only + netloc = extracted_url.registered_domain parsed_url = parsed_url._replace(netloc=netloc) return parsed_url.geturl() -- cgit v1.2.3