aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Steele Farnsworth <[email protected]>2025-01-30 18:06:24 -0500
committerGravatar Steele Farnsworth <[email protected]>2025-01-30 18:06:24 -0500
commitf022f5f814f7306991c23846a6640e0b13c58e15 (patch)
treea190d9d205fbc2e19b36519596e1f2d1445ccadd
parentRemove messages related to disallowed text-like files. (diff)
Change how a file is determined to be text-like. Decode text with correct encoding.
Attachments with "charset" in their content type are presumed to be text. The specified charset is always used to decode the text.
-rw-r--r--bot/exts/filtering/filtering.py5
-rw-r--r--bot/exts/utils/attachment_pastebin_uploader.py2
2 files changed, 4 insertions, 3 deletions
diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py
index e1483e18f..377cff015 100644
--- a/bot/exts/filtering/filtering.py
+++ b/bot/exts/filtering/filtering.py
@@ -68,7 +68,8 @@ WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday
async def _extract_text_file_content(att: discord.Attachment) -> str:
"""Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
- file_lines: list[str] = (await att.read()).decode().splitlines()
+ file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1)
+ file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines()
first_n_lines = "\n".join(file_lines[:30])[:2_000]
return f"{att.filename}: {first_n_lines}"
@@ -233,7 +234,7 @@ class Filtering(Cog):
text_contents = [
await _extract_text_file_content(a)
- for a in msg.attachments if a.content_type.startswith("text")
+ for a in msg.attachments if "charset" in a.content_type
]
if text_contents:
attachment_content = "\n\n".join(text_contents)
diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py
index 6c54a4a2b..805abd238 100644
--- a/bot/exts/utils/attachment_pastebin_uploader.py
+++ b/bot/exts/utils/attachment_pastebin_uploader.py
@@ -92,7 +92,7 @@ class EmbedFileHandler(commands.Cog):
files = [
await self._convert_attachment(f)
for f in message.attachments
- if f.content_type.startswith("text")
+ if "charset" in f.content_type
]
# Upload the files to the paste bin, exiting early if there's an error.