diff options
| author | 2025-01-30 18:06:24 -0500 | |
|---|---|---|
| committer | 2025-01-30 18:06:24 -0500 | |
| commit | f022f5f814f7306991c23846a6640e0b13c58e15 (patch) | |
| tree | a190d9d205fbc2e19b36519596e1f2d1445ccadd | |
| parent | Remove messages related to disallowed text-like files. (diff) | |
Change how a file is determined to be text-like. Decode text with correct encoding.
Attachments with "charset" in their content type are presumed to be text. The specified charset is always used to decode the text.
Diffstat (limited to '')
| -rw-r--r-- | bot/exts/filtering/filtering.py | 5 | ||||
| -rw-r--r-- | bot/exts/utils/attachment_pastebin_uploader.py | 2 |
2 files changed, 4 insertions, 3 deletions
diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index e1483e18f..377cff015 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -68,7 +68,8 @@ WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday async def _extract_text_file_content(att: discord.Attachment) -> str: """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment.""" - file_lines: list[str] = (await att.read()).decode().splitlines() + file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1) + file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines() first_n_lines = "\n".join(file_lines[:30])[:2_000] return f"{att.filename}: {first_n_lines}" @@ -233,7 +234,7 @@ class Filtering(Cog): text_contents = [ await _extract_text_file_content(a) - for a in msg.attachments if a.content_type.startswith("text") + for a in msg.attachments if "charset" in a.content_type ] if text_contents: attachment_content = "\n\n".join(text_contents) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index 6c54a4a2b..805abd238 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -92,7 +92,7 @@ class EmbedFileHandler(commands.Cog): files = [ await self._convert_attachment(f) for f in message.attachments - if f.content_type.startswith("text") + if "charset" in f.content_type ] # Upload the files to the paste bin, exiting early if there's an error. |