aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Boris Muratov <[email protected]>2022-10-18 19:40:30 +0300
committerGravatar GitHub <[email protected]>2022-10-18 19:40:30 +0300
commit312bf4343e6002a4556214c0671c61c16ec6897c (patch)
tree457dc4de4e0ad6dc227012bb20f05349d37bfc5e
parentMerge pull request #2293 from python-discord/fix-sequency-proxy-breaking-change (diff)
parentMerge branch 'main' into 1379-tranform-urls-upon-rich-embed-filter-application (diff)
Merge pull request #2283 from shtlrs/1379-tranform-urls-upon-rich-embed-filter-application
1379 tranform urls upon rich embed filter application
-rw-r--r--bot/exts/filters/filtering.py12
-rw-r--r--bot/utils/helpers.py12
2 files changed, 22 insertions, 2 deletions
diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py
index e4df0b1fd..3fb40b719 100644
--- a/bot/exts/filters/filtering.py
+++ b/bot/exts/filters/filtering.py
@@ -23,10 +23,12 @@ from bot.constants import Bot as BotConfig, Channels, Colours, Filter, Guild, Ic
from bot.exts.events.code_jams._channels import CATEGORY_NAME as JAM_CATEGORY_NAME
from bot.exts.moderation.modlog import ModLog
from bot.log import get_logger
+from bot.utils.helpers import remove_subdomain_from_url
from bot.utils.messages import format_user
log = get_logger(__name__)
+
# Regular expressions
CODE_BLOCK_RE = re.compile(
r"(?P<delim>``?)[^`]+?(?P=delim)(?!`+)" # Inline codeblock
@@ -583,7 +585,7 @@ class Filtering(Cog):
"""
text = self.clean_input(text)
- # Remove backslashes to prevent escape character aroundfuckery like
+ # Remove backslashes to prevent escape character around fuckery like
# discord\.gg/gdudes-pony-farm
text = text.replace("\\", "")
@@ -649,7 +651,13 @@ class Filtering(Cog):
for embed in msg.embeds:
if embed.type == "rich":
urls = URL_RE.findall(msg.content)
- if not embed.url or embed.url not in urls:
+ final_urls = set(urls)
+ # This is due to way discord renders relative urls in Embdes
+ # if we send the following url: https://mobile.twitter.com/something
+ # Discord renders it as https://twitter.com/something
+ for url in urls:
+ final_urls.add(remove_subdomain_from_url(url))
+ if not embed.url or embed.url not in final_urls:
# If `embed.url` does not exist or if `embed.url` is not part of the content
# of the message, it's unlikely to be an auto-generated embed by Discord.
return msg.embeds
diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py
index 621198752..2a81fbed6 100644
--- a/bot/utils/helpers.py
+++ b/bot/utils/helpers.py
@@ -1,7 +1,9 @@
from abc import ABCMeta
from typing import Optional
+from urllib.parse import urlparse
from discord.ext.commands import CogMeta
+from tldextract import extract
class CogABCMeta(CogMeta, ABCMeta): # noqa: B024 (Ignore abstract class with no abstract methods.)
@@ -30,3 +32,13 @@ def has_lines(string: str, count: int) -> bool:
def pad_base64(data: str) -> str:
"""Return base64 `data` with padding characters to ensure its length is a multiple of 4."""
return data + "=" * (-len(data) % 4)
+
+
+def remove_subdomain_from_url(url: str) -> str:
+ """Removes subdomains from a URL whilst preserving the original URL composition."""
+ parsed_url = urlparse(url)
+ extracted_url = extract(url)
+ # Eliminate subdomain by using the registered domain only
+ netloc = extracted_url.registered_domain
+ parsed_url = parsed_url._replace(netloc=netloc)
+ return parsed_url.geturl()