aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Steele Farnsworth <[email protected]>2025-01-30 18:32:22 -0500
committerGravatar GitHub <[email protected]>2025-01-30 18:32:22 -0500
commit186d5e95a5560cc9789c168412805e8bc6c796de (patch)
treea0a95bcc632eb0251a284f48fdffb54c79bd952e
parentMerge pull request #3241 from python-discord/pastebin-auto-upload (diff)
Revert "Apply filtering to text attachments; offer to auto-upload text attachments to paste bin"
-rw-r--r--bot/exts/filtering/_filter_lists/extension.py39
-rw-r--r--bot/exts/filtering/_filter_lists/filter_list.py5
-rw-r--r--bot/exts/filtering/_settings.py14
-rw-r--r--bot/exts/filtering/filtering.py19
-rw-r--r--bot/exts/utils/attachment_pastebin_uploader.py144
-rw-r--r--bot/exts/utils/snekbox/_cog.py3
6 files changed, 44 insertions, 180 deletions
diff --git a/bot/exts/filtering/_filter_lists/extension.py b/bot/exts/filtering/_filter_lists/extension.py
index e48564092..d656bc6d2 100644
--- a/bot/exts/filtering/_filter_lists/extension.py
+++ b/bot/exts/filtering/_filter_lists/extension.py
@@ -14,6 +14,18 @@ from bot.exts.filtering._settings import ActionSettings
if typing.TYPE_CHECKING:
from bot.exts.filtering.filtering import Filtering
+PASTE_URL = "https://paste.pythondiscord.com"
+PY_EMBED_DESCRIPTION = (
+ "It looks like you tried to attach a Python file - "
+ f"please use a code-pasting service such as {PASTE_URL}"
+)
+
+TXT_LIKE_FILES = {".txt", ".csv", ".json"}
+TXT_EMBED_DESCRIPTION = (
+ "You either uploaded a `{blocked_extension}` file or entered a message that was too long. "
+ f"Please use our [paste bin]({PASTE_URL}) instead."
+)
+
DISALLOWED_EMBED_DESCRIPTION = (
"It looks like you tried to attach file type(s) that we do not allow ({joined_blacklist}). "
"We currently allow the following file types: **{joined_whitelist}**.\n\n"
@@ -75,23 +87,30 @@ class ExtensionsList(FilterList[ExtensionFilter]):
not_allowed = {ext: filename for ext, filename in all_ext if ext not in allowed_ext}
if ctx.event == Event.SNEKBOX:
- not_allowed = dict(not_allowed.items())
+ not_allowed = {ext: filename for ext, filename in not_allowed.items() if ext not in TXT_LIKE_FILES}
if not not_allowed: # Yes, it's a double negative. Meaning all attachments are allowed :)
return None, [], {ListType.ALLOW: triggered}
# At this point, something is disallowed.
if ctx.event != Event.SNEKBOX: # Don't post the embed if it's a snekbox response.
- meta_channel = bot.instance.get_channel(Channels.meta)
- if not self._whitelisted_description:
- self._whitelisted_description = ", ".join(
- filter_.content for filter_ in self[ListType.ALLOW].filters.values()
+ if ".py" in not_allowed:
+ # Provide a pastebin link for .py files.
+ ctx.dm_embed = PY_EMBED_DESCRIPTION
+ elif txt_extensions := {ext for ext in TXT_LIKE_FILES if ext in not_allowed}:
+ # Work around Discord auto-conversion of messages longer than 2000 chars to .txt
+ ctx.dm_embed = TXT_EMBED_DESCRIPTION.format(blocked_extension=txt_extensions.pop())
+ else:
+ meta_channel = bot.instance.get_channel(Channels.meta)
+ if not self._whitelisted_description:
+ self._whitelisted_description = ", ".join(
+ filter_.content for filter_ in self[ListType.ALLOW].filters.values()
+ )
+ ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
+ joined_whitelist=self._whitelisted_description,
+ joined_blacklist=", ".join(not_allowed),
+ meta_channel_mention=meta_channel.mention,
)
- ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
- joined_whitelist=self._whitelisted_description,
- joined_blacklist=", ".join(not_allowed),
- meta_channel_mention=meta_channel.mention,
- )
ctx.matches += not_allowed.values()
ctx.blocked_exts |= set(not_allowed)
diff --git a/bot/exts/filtering/_filter_lists/filter_list.py b/bot/exts/filtering/_filter_lists/filter_list.py
index 48d05c97a..2cc54e8fb 100644
--- a/bot/exts/filtering/_filter_lists/filter_list.py
+++ b/bot/exts/filtering/_filter_lists/filter_list.py
@@ -157,7 +157,10 @@ class AtomicList:
return hash(id(self))
-class FilterList[T: Filter](dict[ListType, AtomicList], FieldRequiring):
+T = typing.TypeVar("T", bound=Filter)
+
+
+class FilterList(dict[ListType, AtomicList], typing.Generic[T], FieldRequiring):
"""Dispatches events to lists of _filters, and aggregates the responses into a single list of actions to take."""
# Each subclass must define a name matching the filter_list name we're expecting to receive from the database.
diff --git a/bot/exts/filtering/_settings.py b/bot/exts/filtering/_settings.py
index 6760a3f01..7005dd2d1 100644
--- a/bot/exts/filtering/_settings.py
+++ b/bot/exts/filtering/_settings.py
@@ -5,7 +5,7 @@ import traceback
from abc import abstractmethod
from copy import copy
from functools import reduce
-from typing import Any, NamedTuple, Self
+from typing import Any, NamedTuple, Self, TypeVar
from bot.exts.filtering._filter_context import FilterContext
from bot.exts.filtering._settings_types import settings_types
@@ -13,9 +13,13 @@ from bot.exts.filtering._settings_types.settings_entry import ActionEntry, Setti
from bot.exts.filtering._utils import FieldRequiring
from bot.log import get_logger
+TSettings = TypeVar("TSettings", bound="Settings")
+
log = get_logger(__name__)
-_already_warned = set[str]()
+_already_warned: set[str] = set()
+
+T = TypeVar("T", bound=SettingsEntry)
def create_settings(
@@ -51,7 +55,7 @@ def create_settings(
)
-class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):
+class Settings(FieldRequiring, dict[str, T]):
"""
A collection of settings.
@@ -65,7 +69,7 @@ class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):
entry_type: type[T]
- _already_warned = set[str]()
+ _already_warned: set[str] = set()
@abstractmethod # ABCs have to have at least once abstract method to actually count as such.
def __init__(self, settings_data: dict, *, defaults: Settings | None = None, keep_empty: bool = False):
@@ -100,7 +104,7 @@ class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):
"""Return a dictionary of overrides across all entries."""
return reduce(operator.or_, (entry.overrides for entry in self.values() if entry), {})
- def copy(self: Self) -> Self:
+ def copy(self: TSettings) -> TSettings:
"""Create a shallow copy of the object."""
return copy(self)
diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py
index 377cff015..844f2942e 100644
--- a/bot/exts/filtering/filtering.py
+++ b/bot/exts/filtering/filtering.py
@@ -66,14 +66,6 @@ OFFENSIVE_MSG_DELETE_TIME = datetime.timedelta(days=7)
WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday
-async def _extract_text_file_content(att: discord.Attachment) -> str:
- """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
- file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1)
- file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines()
- first_n_lines = "\n".join(file_lines[:30])[:2_000]
- return f"{att.filename}: {first_n_lines}"
-
-
class Filtering(Cog):
"""Filtering and alerting for content posted on the server."""
@@ -88,7 +80,7 @@ class Filtering(Cog):
def __init__(self, bot: Bot):
self.bot = bot
self.filter_lists: dict[str, FilterList] = {}
- self._subscriptions = defaultdict[Event, list[FilterList]](list)
+ self._subscriptions: defaultdict[Event, list[FilterList]] = defaultdict(list)
self.delete_scheduler = scheduling.Scheduler(self.__class__.__name__)
self.webhook: discord.Webhook | None = None
@@ -231,15 +223,6 @@ class Filtering(Cog):
self.message_cache.append(msg)
ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache)
-
- text_contents = [
- await _extract_text_file_content(a)
- for a in msg.attachments if "charset" in a.content_type
- ]
- if text_contents:
- attachment_content = "\n\n".join(text_contents)
- ctx = ctx.replace(content=f"{ctx.content}\n\n{attachment_content}")
-
result_actions, list_messages, triggers = await self._resolve_action(ctx)
self.message_cache.update(msg, metadata=triggers)
if result_actions:
diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py
deleted file mode 100644
index 805abd238..000000000
--- a/bot/exts/utils/attachment_pastebin_uploader.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from __future__ import annotations
-
-import re
-
-import aiohttp
-import discord
-from discord.ext import commands
-from pydis_core.utils import paste_service
-
-from bot.bot import Bot
-from bot.constants import Emojis
-from bot.log import get_logger
-
-log = get_logger(__name__)
-
-PASTEBIN_UPLOAD_EMOJI = Emojis.check_mark
-DELETE_PASTE_EMOJI = Emojis.trashcan
-
-
-class EmbedFileHandler(commands.Cog):
- """
- Handles automatic uploading of attachments to the paste bin.
-
- Whenever a user uploads one or more attachments that is text-based (py, txt, csv, etc.), this cog offers to upload
- all the attachments to the paste bin automatically. The steps are as follows:
- - The bot replies to the message containing the attachments, asking the user to react with a checkmark to consent
- to having the content uploaded.
- - If consent is given, the bot uploads the contents and edits its own message to contain the link.
- - The bot DMs the user the delete link for the paste.
- - The bot waits for the user to react with a trashcan emoji, in which case the bot deletes the paste and its own
- message.
- """
-
- def __init__(self, bot: Bot):
- self.bot = bot
- self.pending_messages = set[int]()
-
- @staticmethod
- async def _convert_attachment(attachment: discord.Attachment) -> paste_service.PasteFile:
- """Converts an attachment to a PasteFile, according to the attachment's file encoding."""
- encoding = re.search(r"charset=(\S+)", attachment.content_type).group(1)
- file_content = (await attachment.read()).decode(encoding)
- return paste_service.PasteFile(content=file_content, name=attachment.filename)
-
- @commands.Cog.listener()
- async def on_message_delete(self, message: discord.Message) -> None:
- """Allows us to know which messages with attachments have been deleted."""
- self.pending_messages.discard(message.id)
-
- @commands.Cog.listener()
- async def on_message(self, message: discord.Message) -> None:
- """Listens for messages containing attachments and offers to upload them to the pastebin."""
- # Check if the message contains an embedded file and is not sent by a bot.
- if message.author.bot or not any(a.content_type.startswith("text") for a in message.attachments):
- return
-
- log.trace(f"Offering to upload attachments for {message.author} in {message.channel}, message {message.id}")
- self.pending_messages.add(message.id)
-
- # Offer to upload the attachments and wait for the user's reaction.
- bot_reply = await message.reply(
- f"Please react with {PASTEBIN_UPLOAD_EMOJI} to upload your file(s) to our "
- f"[paste bin](<https://paste.pythondiscord.com/>), which is more accessible for some users."
- )
- await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI)
-
- def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) -> bool:
- return (
- reaction.message.id == bot_reply.id
- and str(reaction.emoji) == PASTEBIN_UPLOAD_EMOJI
- and user == message.author
- )
-
- try:
- # Wait for the reaction with a timeout of 60 seconds.
- await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission)
- except TimeoutError:
- # The user does not grant permission before the timeout. Exit early.
- log.trace(f"{message.author} didn't give permission to upload {message.id} content; aborting.")
- await bot_reply.edit(content=f"~~{bot_reply.content}~~")
- await bot_reply.clear_reactions()
-
- if message.id not in self.pending_messages:
- log.trace(f"{message.author}'s message was deleted before the attachments could be uploaded; aborting.")
- await bot_reply.delete()
- return
-
- # In either case, we do not want the message ID in pending_messages anymore.
- self.pending_messages.discard(message.id)
-
- # Extract the attachments.
- files = [
- await self._convert_attachment(f)
- for f in message.attachments
- if "charset" in f.content_type
- ]
-
- # Upload the files to the paste bin, exiting early if there's an error.
- log.trace(f"Attempting to upload {len(files)} file(s) to pastebin.")
- try:
- async with aiohttp.ClientSession() as session:
- paste_response = await paste_service.send_to_paste_service(files=files, http_session=session)
- except (paste_service.PasteTooLongError, ValueError):
- log.trace(f"{message.author}'s attachments were too long.")
- await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.")
- return
- except paste_service.PasteUploadError:
- log.trace(f"Unexpected error uploading {message.author}'s attachments.")
- await bot_reply.edit(content="There was an error uploading your paste.")
- return
-
- # Send the user a DM with the delete link for the paste.
- # The angle brackets around the remove link are required to stop Discord from visiting the URL to produce a
- # preview, thereby deleting the paste
- await message.author.send(content=f"[Click here](<{paste_response.removal}>) to delete your recent paste.")
-
- # Edit the bot message to contain the link to the paste.
- await bot_reply.edit(content=f"[Click here]({paste_response.link}) to see this code in our pastebin.")
- await bot_reply.clear_reactions()
- await bot_reply.add_reaction(DELETE_PASTE_EMOJI)
-
- # Wait for the user to react with a trash can, which they can use to delete the paste.
-
- def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> bool:
- return (
- reaction.message.id == bot_reply.id
- and str(reaction.emoji) == DELETE_PASTE_EMOJI
- and user == message.author
- )
-
- try:
- log.trace(f"Offering to delete {message.author}'s attachments in {message.channel}, message {message.id}")
- await self.bot.wait_for("reaction_add", timeout=60.0 * 10, check=wait_for_delete_reaction)
- # Delete the paste by visiting the removal URL.
- async with aiohttp.ClientSession() as session:
- await session.get(paste_response.removal)
- await bot_reply.delete()
- except TimeoutError:
- log.trace(f"Offer to delete {message.author}'s attachments timed out.")
-
-
-async def setup(bot: Bot) -> None:
- """Load the EmbedFileHandler cog."""
- await bot.add_cog(EmbedFileHandler(bot))
diff --git a/bot/exts/utils/snekbox/_cog.py b/bot/exts/utils/snekbox/_cog.py
index 9e635e18a..39f61c6e2 100644
--- a/bot/exts/utils/snekbox/_cog.py
+++ b/bot/exts/utils/snekbox/_cog.py
@@ -17,6 +17,7 @@ from pydis_core.utils.regex import FORMATTED_CODE_REGEX, RAW_CODE_REGEX
from bot.bot import Bot
from bot.constants import BaseURLs, Channels, Emojis, MODERATION_ROLES, Roles, URLs
from bot.decorators import redirect_output
+from bot.exts.filtering._filter_lists.extension import TXT_LIKE_FILES
from bot.exts.help_channels._channel import is_help_forum_post
from bot.exts.utils.snekbox._eval import EvalJob, EvalResult
from bot.exts.utils.snekbox._io import FileAttachment
@@ -31,8 +32,6 @@ log = get_logger(__name__)
ANSI_REGEX = re.compile(r"\N{ESC}\[[0-9;:]*m")
ESCAPE_REGEX = re.compile("[`\u202E\u200B]{3,}")
-TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"}
-
# The timeit command should only output the very last line, so all other output should be suppressed.
# This will be used as the setup code along with any setup code provided.
TIMEIT_SETUP_WRAPPER = """