diff options
author | 2022-09-28 13:38:35 +0100 | |
---|---|---|
committer | 2022-09-28 13:38:35 +0100 | |
commit | 0bf93ebc2a7b4b7bb0a3a5618353fda5c576939c (patch) | |
tree | 88443ed100802ac90e59312eda624d5e169b8a79 | |
parent | check for the url existence in the final_urls set (diff) |
use tldextract for a correct url decomposition
-rw-r--r-- | bot/utils/helpers.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py index 3e45a71a3..75bcc2ede 100644 --- a/bot/utils/helpers.py +++ b/bot/utils/helpers.py @@ -3,6 +3,7 @@ from typing import Optional from urllib.parse import urlparse from discord.ext.commands import CogMeta +from tldextract import extract class CogABCMeta(CogMeta, ABCMeta): @@ -34,11 +35,10 @@ def pad_base64(data: str) -> str: def remove_subdomain_from_url(url: str) -> str: - """Transforms potential relative urls to absolute ones.""" + """Removes subdomains from a URL whilst preserving the original URL composition.""" parsed_url = urlparse(url) - netloc_components = parsed_url.netloc.split(".") - # Eliminate subdomain and use the second level domain and top level domain only - netloc_components[:] = netloc_components[-2:] - netloc = ".".join(netloc_components) + extracted_url = extract(url) + # Eliminate subdomain by using the registered domain only + netloc = extracted_url.registered_domain parsed_url = parsed_url._replace(netloc=netloc) return parsed_url.geturl() |