aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Steele Farnsworth <[email protected]>2025-01-30 18:06:24 -0500
committerGravatar Steele Farnsworth <[email protected]>2025-01-30 18:06:24 -0500
commitf022f5f814f7306991c23846a6640e0b13c58e15 (patch)
treea190d9d205fbc2e19b36519596e1f2d1445ccadd
parentRemove messages related to disallowed text-like files. (diff)
Change how a file is determined to be text-like. Decode text with correct encoding.
Attachments with "charset" in their content type are presumed to be text. The specified charset is always used to decode the text.
Diffstat (limited to '')
-rw-r--r--bot/exts/filtering/filtering.py5
-rw-r--r--bot/exts/utils/attachment_pastebin_uploader.py2
2 files changed, 4 insertions, 3 deletions
diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py
index e1483e18f..377cff015 100644
--- a/bot/exts/filtering/filtering.py
+++ b/bot/exts/filtering/filtering.py
@@ -68,7 +68,8 @@ WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday
async def _extract_text_file_content(att: discord.Attachment) -> str:
"""Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
- file_lines: list[str] = (await att.read()).decode().splitlines()
+ file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1)
+ file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines()
first_n_lines = "\n".join(file_lines[:30])[:2_000]
return f"{att.filename}: {first_n_lines}"
@@ -233,7 +234,7 @@ class Filtering(Cog):
text_contents = [
await _extract_text_file_content(a)
- for a in msg.attachments if a.content_type.startswith("text")
+ for a in msg.attachments if "charset" in a.content_type
]
if text_contents:
attachment_content = "\n\n".join(text_contents)
diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py
index 6c54a4a2b..805abd238 100644
--- a/bot/exts/utils/attachment_pastebin_uploader.py
+++ b/bot/exts/utils/attachment_pastebin_uploader.py
@@ -92,7 +92,7 @@ class EmbedFileHandler(commands.Cog):
files = [
await self._convert_attachment(f)
for f in message.attachments
- if f.content_type.startswith("text")
+ if "charset" in f.content_type
]
# Upload the files to the paste bin, exiting early if there's an error.