diff options
| author | 2025-01-30 18:17:32 -0500 | |
|---|---|---|
| committer | 2025-01-30 18:17:32 -0500 | |
| commit | 1a8ee2c73fc6e04aaa4ee2c40f50f113e9dcd45d (patch) | |
| tree | 61c249ead436bf3c82ff3c30cb9c421ffb01af57 | |
| parent | Add missing arg in help post archive call (diff) | |
| parent | Merge branch 'main' into pastebin-auto-upload (diff) | |
Merge pull request #3241 from python-discord/pastebin-auto-upload
Apply filtering to text attachments; offer to auto-upload text attachments to paste bin
| -rw-r--r-- | bot/exts/filtering/_filter_lists/extension.py | 39 | ||||
| -rw-r--r-- | bot/exts/filtering/_filter_lists/filter_list.py | 5 | ||||
| -rw-r--r-- | bot/exts/filtering/_settings.py | 14 | ||||
| -rw-r--r-- | bot/exts/filtering/filtering.py | 19 | ||||
| -rw-r--r-- | bot/exts/utils/attachment_pastebin_uploader.py | 144 | ||||
| -rw-r--r-- | bot/exts/utils/snekbox/_cog.py | 3 | 
6 files changed, 180 insertions, 44 deletions
| diff --git a/bot/exts/filtering/_filter_lists/extension.py b/bot/exts/filtering/_filter_lists/extension.py index d656bc6d2..e48564092 100644 --- a/bot/exts/filtering/_filter_lists/extension.py +++ b/bot/exts/filtering/_filter_lists/extension.py @@ -14,18 +14,6 @@ from bot.exts.filtering._settings import ActionSettings  if typing.TYPE_CHECKING:      from bot.exts.filtering.filtering import Filtering -PASTE_URL = "https://paste.pythondiscord.com" -PY_EMBED_DESCRIPTION = ( -    "It looks like you tried to attach a Python file - " -    f"please use a code-pasting service such as {PASTE_URL}" -) - -TXT_LIKE_FILES = {".txt", ".csv", ".json"} -TXT_EMBED_DESCRIPTION = ( -    "You either uploaded a `{blocked_extension}` file or entered a message that was too long. " -    f"Please use our [paste bin]({PASTE_URL}) instead." -) -  DISALLOWED_EMBED_DESCRIPTION = (      "It looks like you tried to attach file type(s) that we do not allow ({joined_blacklist}). "      "We currently allow the following file types: **{joined_whitelist}**.\n\n" @@ -87,30 +75,23 @@ class ExtensionsList(FilterList[ExtensionFilter]):          not_allowed = {ext: filename for ext, filename in all_ext if ext not in allowed_ext}          if ctx.event == Event.SNEKBOX: -            not_allowed = {ext: filename for ext, filename in not_allowed.items() if ext not in TXT_LIKE_FILES} +            not_allowed = dict(not_allowed.items())          if not not_allowed:  # Yes, it's a double negative. Meaning all attachments are allowed :)              return None, [], {ListType.ALLOW: triggered}          # At this point, something is disallowed.          if ctx.event != Event.SNEKBOX:  # Don't post the embed if it's a snekbox response. -            if ".py" in not_allowed: -                # Provide a pastebin link for .py files. -                ctx.dm_embed = PY_EMBED_DESCRIPTION -            elif txt_extensions := {ext for ext in TXT_LIKE_FILES if ext in not_allowed}: -                # Work around Discord auto-conversion of messages longer than 2000 chars to .txt -                ctx.dm_embed = TXT_EMBED_DESCRIPTION.format(blocked_extension=txt_extensions.pop()) -            else: -                meta_channel = bot.instance.get_channel(Channels.meta) -                if not self._whitelisted_description: -                    self._whitelisted_description = ", ".join( -                        filter_.content for filter_ in self[ListType.ALLOW].filters.values() -                    ) -                ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format( -                    joined_whitelist=self._whitelisted_description, -                    joined_blacklist=", ".join(not_allowed), -                    meta_channel_mention=meta_channel.mention, +            meta_channel = bot.instance.get_channel(Channels.meta) +            if not self._whitelisted_description: +                self._whitelisted_description = ", ".join( +                    filter_.content for filter_ in self[ListType.ALLOW].filters.values()                  ) +            ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format( +                joined_whitelist=self._whitelisted_description, +                joined_blacklist=", ".join(not_allowed), +                meta_channel_mention=meta_channel.mention, +            )          ctx.matches += not_allowed.values()          ctx.blocked_exts |= set(not_allowed) diff --git a/bot/exts/filtering/_filter_lists/filter_list.py b/bot/exts/filtering/_filter_lists/filter_list.py index 2cc54e8fb..48d05c97a 100644 --- a/bot/exts/filtering/_filter_lists/filter_list.py +++ b/bot/exts/filtering/_filter_lists/filter_list.py @@ -157,10 +157,7 @@ class AtomicList:          return hash(id(self)) -T = typing.TypeVar("T", bound=Filter) - - -class FilterList(dict[ListType, AtomicList], typing.Generic[T], FieldRequiring): +class FilterList[T: Filter](dict[ListType, AtomicList], FieldRequiring):      """Dispatches events to lists of _filters, and aggregates the responses into a single list of actions to take."""      # Each subclass must define a name matching the filter_list name we're expecting to receive from the database. diff --git a/bot/exts/filtering/_settings.py b/bot/exts/filtering/_settings.py index 7005dd2d1..6760a3f01 100644 --- a/bot/exts/filtering/_settings.py +++ b/bot/exts/filtering/_settings.py @@ -5,7 +5,7 @@ import traceback  from abc import abstractmethod  from copy import copy  from functools import reduce -from typing import Any, NamedTuple, Self, TypeVar +from typing import Any, NamedTuple, Self  from bot.exts.filtering._filter_context import FilterContext  from bot.exts.filtering._settings_types import settings_types @@ -13,13 +13,9 @@ from bot.exts.filtering._settings_types.settings_entry import ActionEntry, Setti  from bot.exts.filtering._utils import FieldRequiring  from bot.log import get_logger -TSettings = TypeVar("TSettings", bound="Settings") -  log = get_logger(__name__) -_already_warned: set[str] = set() - -T = TypeVar("T", bound=SettingsEntry) +_already_warned = set[str]()  def create_settings( @@ -55,7 +51,7 @@ def create_settings(      ) -class Settings(FieldRequiring, dict[str, T]): +class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):      """      A collection of settings. @@ -69,7 +65,7 @@ class Settings(FieldRequiring, dict[str, T]):      entry_type: type[T] -    _already_warned: set[str] = set() +    _already_warned = set[str]()      @abstractmethod  # ABCs have to have at least once abstract method to actually count as such.      def __init__(self, settings_data: dict, *, defaults: Settings | None = None, keep_empty: bool = False): @@ -104,7 +100,7 @@ class Settings(FieldRequiring, dict[str, T]):          """Return a dictionary of overrides across all entries."""          return reduce(operator.or_, (entry.overrides for entry in self.values() if entry), {}) -    def copy(self: TSettings) -> TSettings: +    def copy(self: Self) -> Self:          """Create a shallow copy of the object."""          return copy(self) diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index 844f2942e..377cff015 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -66,6 +66,14 @@ OFFENSIVE_MSG_DELETE_TIME = datetime.timedelta(days=7)  WEEKLY_REPORT_ISO_DAY = 3  # 1=Monday, 7=Sunday +async def _extract_text_file_content(att: discord.Attachment) -> str: +    """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment.""" +    file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1) +    file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines() +    first_n_lines = "\n".join(file_lines[:30])[:2_000] +    return f"{att.filename}: {first_n_lines}" + +  class Filtering(Cog):      """Filtering and alerting for content posted on the server.""" @@ -80,7 +88,7 @@ class Filtering(Cog):      def __init__(self, bot: Bot):          self.bot = bot          self.filter_lists: dict[str, FilterList] = {} -        self._subscriptions: defaultdict[Event, list[FilterList]] = defaultdict(list) +        self._subscriptions = defaultdict[Event, list[FilterList]](list)          self.delete_scheduler = scheduling.Scheduler(self.__class__.__name__)          self.webhook: discord.Webhook | None = None @@ -223,6 +231,15 @@ class Filtering(Cog):          self.message_cache.append(msg)          ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache) + +        text_contents = [ +            await _extract_text_file_content(a) +            for a in msg.attachments if "charset" in a.content_type +        ] +        if text_contents: +            attachment_content = "\n\n".join(text_contents) +            ctx = ctx.replace(content=f"{ctx.content}\n\n{attachment_content}") +          result_actions, list_messages, triggers = await self._resolve_action(ctx)          self.message_cache.update(msg, metadata=triggers)          if result_actions: diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py new file mode 100644 index 000000000..805abd238 --- /dev/null +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import re + +import aiohttp +import discord +from discord.ext import commands +from pydis_core.utils import paste_service + +from bot.bot import Bot +from bot.constants import Emojis +from bot.log import get_logger + +log = get_logger(__name__) + +PASTEBIN_UPLOAD_EMOJI = Emojis.check_mark +DELETE_PASTE_EMOJI = Emojis.trashcan + + +class EmbedFileHandler(commands.Cog): +    """ +    Handles automatic uploading of attachments to the paste bin. + +    Whenever a user uploads one or more attachments that is text-based (py, txt, csv, etc.), this cog offers to upload +    all the attachments to the paste bin automatically. The steps are as follows: +    - The bot replies to the message containing the attachments, asking the user to react with a checkmark to consent +        to having the content uploaded. +    - If consent is given, the bot uploads the contents and edits its own message to contain the link. +    - The bot DMs the user the delete link for the paste. +    - The bot waits for the user to react with a trashcan emoji, in which case the bot deletes the paste and its own +        message. +    """ + +    def __init__(self, bot: Bot): +        self.bot = bot +        self.pending_messages = set[int]() + +    @staticmethod +    async def _convert_attachment(attachment: discord.Attachment) -> paste_service.PasteFile: +        """Converts an attachment to a PasteFile, according to the attachment's file encoding.""" +        encoding = re.search(r"charset=(\S+)", attachment.content_type).group(1) +        file_content = (await attachment.read()).decode(encoding) +        return paste_service.PasteFile(content=file_content, name=attachment.filename) + +    @commands.Cog.listener() +    async def on_message_delete(self, message: discord.Message) -> None: +        """Allows us to know which messages with attachments have been deleted.""" +        self.pending_messages.discard(message.id) + +    @commands.Cog.listener() +    async def on_message(self, message: discord.Message) -> None: +        """Listens for messages containing attachments and offers to upload them to the pastebin.""" +        # Check if the message contains an embedded file and is not sent by a bot. +        if message.author.bot or not any(a.content_type.startswith("text") for a in message.attachments): +            return + +        log.trace(f"Offering to upload attachments for {message.author} in {message.channel}, message {message.id}") +        self.pending_messages.add(message.id) + +        # Offer to upload the attachments and wait for the user's reaction. +        bot_reply = await message.reply( +            f"Please react with {PASTEBIN_UPLOAD_EMOJI} to upload your file(s) to our " +            f"[paste bin](<https://paste.pythondiscord.com/>), which is more accessible for some users." +        ) +        await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI) + +        def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) -> bool: +            return ( +                reaction.message.id == bot_reply.id +                and str(reaction.emoji) == PASTEBIN_UPLOAD_EMOJI +                and user == message.author +            ) + +        try: +            # Wait for the reaction with a timeout of 60 seconds. +            await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission) +        except TimeoutError: +            # The user does not grant permission before the timeout. Exit early. +            log.trace(f"{message.author} didn't give permission to upload {message.id} content; aborting.") +            await bot_reply.edit(content=f"~~{bot_reply.content}~~") +            await bot_reply.clear_reactions() + +        if message.id not in self.pending_messages: +            log.trace(f"{message.author}'s message was deleted before the attachments could be uploaded; aborting.") +            await bot_reply.delete() +            return + +        # In either case, we do not want the message ID in pending_messages anymore. +        self.pending_messages.discard(message.id) + +        # Extract the attachments. +        files = [ +            await self._convert_attachment(f) +            for f in message.attachments +            if "charset" in f.content_type +        ] + +        # Upload the files to the paste bin, exiting early if there's an error. +        log.trace(f"Attempting to upload {len(files)} file(s) to pastebin.") +        try: +            async with aiohttp.ClientSession() as session: +                paste_response = await paste_service.send_to_paste_service(files=files, http_session=session) +        except (paste_service.PasteTooLongError, ValueError): +            log.trace(f"{message.author}'s attachments were too long.") +            await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.") +            return +        except paste_service.PasteUploadError: +            log.trace(f"Unexpected error uploading {message.author}'s attachments.") +            await bot_reply.edit(content="There was an error uploading your paste.") +            return + +        # Send the user a DM with the delete link for the paste. +        # The angle brackets around the remove link are required to stop Discord from visiting the URL to produce a +        # preview, thereby deleting the paste +        await message.author.send(content=f"[Click here](<{paste_response.removal}>) to delete your recent paste.") + +        # Edit the bot message to contain the link to the paste. +        await bot_reply.edit(content=f"[Click here]({paste_response.link}) to see this code in our pastebin.") +        await bot_reply.clear_reactions() +        await bot_reply.add_reaction(DELETE_PASTE_EMOJI) + +        # Wait for the user to react with a trash can, which they can use to delete the paste. + +        def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> bool: +            return ( +                reaction.message.id == bot_reply.id +                and str(reaction.emoji) == DELETE_PASTE_EMOJI +                and user == message.author +            ) + +        try: +            log.trace(f"Offering to delete {message.author}'s attachments in {message.channel}, message {message.id}") +            await self.bot.wait_for("reaction_add", timeout=60.0 * 10, check=wait_for_delete_reaction) +            # Delete the paste by visiting the removal URL. +            async with aiohttp.ClientSession() as session: +                await session.get(paste_response.removal) +            await bot_reply.delete() +        except TimeoutError: +            log.trace(f"Offer to delete {message.author}'s attachments timed out.") + + +async def setup(bot: Bot) -> None: +    """Load the EmbedFileHandler cog.""" +    await bot.add_cog(EmbedFileHandler(bot)) diff --git a/bot/exts/utils/snekbox/_cog.py b/bot/exts/utils/snekbox/_cog.py index 39f61c6e2..9e635e18a 100644 --- a/bot/exts/utils/snekbox/_cog.py +++ b/bot/exts/utils/snekbox/_cog.py @@ -17,7 +17,6 @@ from pydis_core.utils.regex import FORMATTED_CODE_REGEX, RAW_CODE_REGEX  from bot.bot import Bot  from bot.constants import BaseURLs, Channels, Emojis, MODERATION_ROLES, Roles, URLs  from bot.decorators import redirect_output -from bot.exts.filtering._filter_lists.extension import TXT_LIKE_FILES  from bot.exts.help_channels._channel import is_help_forum_post  from bot.exts.utils.snekbox._eval import EvalJob, EvalResult  from bot.exts.utils.snekbox._io import FileAttachment @@ -32,6 +31,8 @@ log = get_logger(__name__)  ANSI_REGEX = re.compile(r"\N{ESC}\[[0-9;:]*m")  ESCAPE_REGEX = re.compile("[`\u202E\u200B]{3,}") +TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"} +  # The timeit command should only output the very last line, so all other output should be suppressed.  # This will be used as the setup code along with any setup code provided.  TIMEIT_SETUP_WRAPPER = """ | 
