diff options
| -rw-r--r-- | bot/exts/filters/filtering.py | 12 | ||||
| -rw-r--r-- | bot/utils/helpers.py | 12 | 
2 files changed, 22 insertions, 2 deletions
| diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py index e4df0b1fd..3fb40b719 100644 --- a/bot/exts/filters/filtering.py +++ b/bot/exts/filters/filtering.py @@ -23,10 +23,12 @@ from bot.constants import Bot as BotConfig, Channels, Colours, Filter, Guild, Ic  from bot.exts.events.code_jams._channels import CATEGORY_NAME as JAM_CATEGORY_NAME  from bot.exts.moderation.modlog import ModLog  from bot.log import get_logger +from bot.utils.helpers import remove_subdomain_from_url  from bot.utils.messages import format_user  log = get_logger(__name__) +  # Regular expressions  CODE_BLOCK_RE = re.compile(      r"(?P<delim>``?)[^`]+?(?P=delim)(?!`+)"  # Inline codeblock @@ -583,7 +585,7 @@ class Filtering(Cog):          """          text = self.clean_input(text) -        # Remove backslashes to prevent escape character aroundfuckery like +        # Remove backslashes to prevent escape character around fuckery like          # discord\.gg/gdudes-pony-farm          text = text.replace("\\", "") @@ -649,7 +651,13 @@ class Filtering(Cog):              for embed in msg.embeds:                  if embed.type == "rich":                      urls = URL_RE.findall(msg.content) -                    if not embed.url or embed.url not in urls: +                    final_urls = set(urls) +                    # This is due to way discord renders relative urls in Embdes +                    # if we send the following url: https://mobile.twitter.com/something +                    # Discord renders it as https://twitter.com/something +                    for url in urls: +                        final_urls.add(remove_subdomain_from_url(url)) +                    if not embed.url or embed.url not in final_urls:                          # If `embed.url` does not exist or if `embed.url` is not part of the content                          # of the message, it's unlikely to be an auto-generated embed by Discord.                          return msg.embeds diff --git a/bot/utils/helpers.py b/bot/utils/helpers.py index 621198752..2a81fbed6 100644 --- a/bot/utils/helpers.py +++ b/bot/utils/helpers.py @@ -1,7 +1,9 @@  from abc import ABCMeta  from typing import Optional +from urllib.parse import urlparse  from discord.ext.commands import CogMeta +from tldextract import extract  class CogABCMeta(CogMeta, ABCMeta):  # noqa: B024 (Ignore abstract class with no abstract methods.) @@ -30,3 +32,13 @@ def has_lines(string: str, count: int) -> bool:  def pad_base64(data: str) -> str:      """Return base64 `data` with padding characters to ensure its length is a multiple of 4."""      return data + "=" * (-len(data) % 4) + + +def remove_subdomain_from_url(url: str) -> str: +    """Removes subdomains from a URL whilst preserving the original URL composition.""" +    parsed_url = urlparse(url) +    extracted_url = extract(url) +    # Eliminate subdomain by using the registered domain only +    netloc = extracted_url.registered_domain +    parsed_url = parsed_url._replace(netloc=netloc) +    return parsed_url.geturl() | 
