aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Amrou Bellalouna <[email protected]>2022-09-28 13:38:35 +0100
committerGravatar Amrou Bellalouna <[email protected]>2022-09-28 13:38:35 +0100
commit0bf93ebc2a7b4b7bb0a3a5618353fda5c576939c (patch)
tree88443ed100802ac90e59312eda624d5e169b8a79
parentcheck for the url existence in the final_urls set (diff)
use tldextract for a correct url decomposition
-rw-r--r--bot/utils/helpers.py10
1 files changed, 5 insertions, 5 deletions
diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py
index 3e45a71a3..75bcc2ede 100644
--- a/bot/utils/helpers.py
+++ b/bot/utils/helpers.py
@@ -3,6 +3,7 @@ from typing import Optional
from urllib.parse import urlparse
from discord.ext.commands import CogMeta
+from tldextract import extract
class CogABCMeta(CogMeta, ABCMeta):
@@ -34,11 +35,10 @@ def pad_base64(data: str) -> str:
def remove_subdomain_from_url(url: str) -> str:
- """Transforms potential relative urls to absolute ones."""
+ """Removes subdomains from a URL whilst preserving the original URL composition."""
parsed_url = urlparse(url)
- netloc_components = parsed_url.netloc.split(".")
- # Eliminate subdomain and use the second level domain and top level domain only
- netloc_components[:] = netloc_components[-2:]
- netloc = ".".join(netloc_components)
+ extracted_url = extract(url)
+ # Eliminate subdomain by using the registered domain only
+ netloc = extracted_url.registered_domain
parsed_url = parsed_url._replace(netloc=netloc)
return parsed_url.geturl()