From f022f5f814f7306991c23846a6640e0b13c58e15 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Thu, 30 Jan 2025 18:06:24 -0500 Subject: Change how a file is determined to be text-like. Decode text with correct encoding. Attachments with "charset" in their content type are presumed to be text. The specified charset is always used to decode the text. --- bot/exts/filtering/filtering.py | 5 +++-- bot/exts/utils/attachment_pastebin_uploader.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index e1483e18f..377cff015 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -68,7 +68,8 @@ WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday async def _extract_text_file_content(att: discord.Attachment) -> str: """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment.""" - file_lines: list[str] = (await att.read()).decode().splitlines() + file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1) + file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines() first_n_lines = "\n".join(file_lines[:30])[:2_000] return f"{att.filename}: {first_n_lines}" @@ -233,7 +234,7 @@ class Filtering(Cog): text_contents = [ await _extract_text_file_content(a) - for a in msg.attachments if a.content_type.startswith("text") + for a in msg.attachments if "charset" in a.content_type ] if text_contents: attachment_content = "\n\n".join(text_contents) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index 6c54a4a2b..805abd238 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -92,7 +92,7 @@ class EmbedFileHandler(commands.Cog): files = [ await self._convert_attachment(f) for f in message.attachments - if f.content_type.startswith("text") + if "charset" in f.content_type ] # Upload the files to the paste bin, exiting early if there's an error. -- cgit v1.2.3