diff options
author | 2022-10-18 19:40:30 +0300 | |
---|---|---|
committer | 2022-10-18 19:40:30 +0300 | |
commit | 312bf4343e6002a4556214c0671c61c16ec6897c (patch) | |
tree | 457dc4de4e0ad6dc227012bb20f05349d37bfc5e | |
parent | Merge pull request #2293 from python-discord/fix-sequency-proxy-breaking-change (diff) | |
parent | Merge branch 'main' into 1379-tranform-urls-upon-rich-embed-filter-application (diff) |
Merge pull request #2283 from shtlrs/1379-tranform-urls-upon-rich-embed-filter-application
1379 tranform urls upon rich embed filter application
-rw-r--r-- | bot/exts/filters/filtering.py | 12 | ||||
-rw-r--r-- | bot/utils/helpers.py | 12 |
2 files changed, 22 insertions, 2 deletions
diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py index e4df0b1fd..3fb40b719 100644 --- a/bot/exts/filters/filtering.py +++ b/bot/exts/filters/filtering.py @@ -23,10 +23,12 @@ from bot.constants import Bot as BotConfig, Channels, Colours, Filter, Guild, Ic from bot.exts.events.code_jams._channels import CATEGORY_NAME as JAM_CATEGORY_NAME from bot.exts.moderation.modlog import ModLog from bot.log import get_logger +from bot.utils.helpers import remove_subdomain_from_url from bot.utils.messages import format_user log = get_logger(__name__) + # Regular expressions CODE_BLOCK_RE = re.compile( r"(?P<delim>``?)[^`]+?(?P=delim)(?!`+)" # Inline codeblock @@ -583,7 +585,7 @@ class Filtering(Cog): """ text = self.clean_input(text) - # Remove backslashes to prevent escape character aroundfuckery like + # Remove backslashes to prevent escape character around fuckery like # discord\.gg/gdudes-pony-farm text = text.replace("\\", "") @@ -649,7 +651,13 @@ class Filtering(Cog): for embed in msg.embeds: if embed.type == "rich": urls = URL_RE.findall(msg.content) - if not embed.url or embed.url not in urls: + final_urls = set(urls) + # This is due to way discord renders relative urls in Embdes + # if we send the following url: https://mobile.twitter.com/something + # Discord renders it as https://twitter.com/something + for url in urls: + final_urls.add(remove_subdomain_from_url(url)) + if not embed.url or embed.url not in final_urls: # If `embed.url` does not exist or if `embed.url` is not part of the content # of the message, it's unlikely to be an auto-generated embed by Discord. return msg.embeds diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py index 621198752..2a81fbed6 100644 --- a/bot/utils/helpers.py +++ b/bot/utils/helpers.py @@ -1,7 +1,9 @@ from abc import ABCMeta from typing import Optional +from urllib.parse import urlparse from discord.ext.commands import CogMeta +from tldextract import extract class CogABCMeta(CogMeta, ABCMeta): # noqa: B024 (Ignore abstract class with no abstract methods.) @@ -30,3 +32,13 @@ def has_lines(string: str, count: int) -> bool: def pad_base64(data: str) -> str: """Return base64 `data` with padding characters to ensure its length is a multiple of 4.""" return data + "=" * (-len(data) % 4) + + +def remove_subdomain_from_url(url: str) -> str: + """Removes subdomains from a URL whilst preserving the original URL composition.""" + parsed_url = urlparse(url) + extracted_url = extract(url) + # Eliminate subdomain by using the registered domain only + netloc = extracted_url.registered_domain + parsed_url = parsed_url._replace(netloc=netloc) + return parsed_url.geturl() |