From 296e5656242edda2eebe8777b631dc7cb1cd75e3 Mon Sep 17 00:00:00 2001 From: Kronifer <44979306+Kronifer@users.noreply.github.com> Date: Sun, 17 Oct 2021 17:39:52 -0500 Subject: feat: added url parsing to filters with support for relative URLs --- bot/exts/filters/filtering.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/bot/exts/filters/filtering.py b/bot/exts/filters/filtering.py index 8accc61f8..a1362d791 100644 --- a/bot/exts/filters/filtering.py +++ b/bot/exts/filters/filtering.py @@ -2,6 +2,8 @@ import asyncio import re import unicodedata from datetime import timedelta +import urllib.parse +from datetime import timedelta from typing import Any, Dict, List, Mapping, NamedTuple, Optional, Tuple, Union import arrow @@ -534,8 +536,14 @@ class Filtering(Cog): domain_blacklist = self._get_filterlist_items("domain_name", allowed=False) for match in URL_RE.finditer(text): for url in domain_blacklist: - if url.lower() in match.group(1).lower(): - return True, self._get_filterlist_value("domain_name", url, allowed=False)["comment"] + blacklisted_parsed = urllib.parse.urlparse(url) + url_parsed = urllib.parse.urlparse(match.group(1).lower()) + if blacklisted_parsed.netloc != "": + if url_parsed.netloc in (f"www.{blacklisted_parsed.netloc}", blacklisted_parsed.netloc): + return True, self._get_filterlist_value("domain_name", url, allowed=False)["comment"] + else: + if url_parsed.netloc in (f"www.{blacklisted_parsed.path}", blacklisted_parsed.path): + return True, self._get_filterlist_value("domain_name", url, allowed=False)["comment"] return False, None @staticmethod -- cgit v1.2.3