aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Steele Farnsworth <[email protected]>2025-01-29 18:51:18 -0500
committerGravatar Steele Farnsworth <[email protected]>2025-01-29 18:51:18 -0500
commit7d424b8d2d5b1445e753cb9fbdffb91346a641e9 (patch)
treee1d863f0d4a983c8e678dd748cc5ec0299516705
parentApply token filters to text attachment content. (diff)
Add helper function for extracting attachment text.
Implements a somewhat arbitrary limit on how much text content is passed along for filtering, to avoid wasting compute time on large attachments that aren't intended to be read (such as CSVs)
-rw-r--r--bot/exts/filtering/filtering.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py
index a281aff79..f902ee9ec 100644
--- a/bot/exts/filtering/filtering.py
+++ b/bot/exts/filtering/filtering.py
@@ -67,6 +67,13 @@ OFFENSIVE_MSG_DELETE_TIME = datetime.timedelta(days=7)
WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday
+async def _extract_text_file_content(att: discord.Attachment) -> str:
+ """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
+ file_lines: list[str] = (await att.read()).decode().splitlines()
+ first_n_lines = "\n".join(file_lines[:30])[:2_000]
+ return f"{att.filename}: {first_n_lines}"
+
+
class Filtering(Cog):
"""Filtering and alerting for content posted on the server."""
@@ -226,7 +233,7 @@ class Filtering(Cog):
ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache)
text_contents = [
- f"{a.filename}: " + (await a.read()).decode()
+ await _extract_text_file_content(a)
for a in msg.attachments if a.content_type.startswith("text")
]
if text_contents: