diff options
author | 2025-01-30 18:06:24 -0500 | |
---|---|---|
committer | 2025-01-30 18:06:24 -0500 | |
commit | f022f5f814f7306991c23846a6640e0b13c58e15 (patch) | |
tree | a190d9d205fbc2e19b36519596e1f2d1445ccadd | |
parent | Remove messages related to disallowed text-like files. (diff) |
Change how a file is determined to be text-like. Decode text with correct encoding.
Attachments with "charset" in their content type are presumed to be text. The specified charset is always used to decode the text.
-rw-r--r-- | bot/exts/filtering/filtering.py | 5 | ||||
-rw-r--r-- | bot/exts/utils/attachment_pastebin_uploader.py | 2 |
2 files changed, 4 insertions, 3 deletions
diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index e1483e18f..377cff015 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -68,7 +68,8 @@ WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday async def _extract_text_file_content(att: discord.Attachment) -> str: """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment.""" - file_lines: list[str] = (await att.read()).decode().splitlines() + file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1) + file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines() first_n_lines = "\n".join(file_lines[:30])[:2_000] return f"{att.filename}: {first_n_lines}" @@ -233,7 +234,7 @@ class Filtering(Cog): text_contents = [ await _extract_text_file_content(a) - for a in msg.attachments if a.content_type.startswith("text") + for a in msg.attachments if "charset" in a.content_type ] if text_contents: attachment_content = "\n\n".join(text_contents) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index 6c54a4a2b..805abd238 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -92,7 +92,7 @@ class EmbedFileHandler(commands.Cog): files = [ await self._convert_attachment(f) for f in message.attachments - if f.content_type.startswith("text") + if "charset" in f.content_type ] # Upload the files to the paste bin, exiting early if there's an error. |