aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Steele Farnsworth <[email protected]>2025-01-29 18:51:18 -0500
committerGravatar Steele Farnsworth <[email protected]>2025-01-29 18:51:18 -0500
commit7d424b8d2d5b1445e753cb9fbdffb91346a641e9 (patch)
treee1d863f0d4a983c8e678dd748cc5ec0299516705
parentApply token filters to text attachment content. (diff)
Add helper function for extracting attachment text.
Implements a somewhat arbitrary limit on how much text content is passed along for filtering, to avoid wasting compute time on large attachments that aren't intended to be read (such as CSVs)
Diffstat (limited to '')
-rw-r--r--bot/exts/filtering/filtering.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py
index a281aff79..f902ee9ec 100644
--- a/bot/exts/filtering/filtering.py
+++ b/bot/exts/filtering/filtering.py
@@ -67,6 +67,13 @@ OFFENSIVE_MSG_DELETE_TIME = datetime.timedelta(days=7)
WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday
+async def _extract_text_file_content(att: discord.Attachment) -> str:
+ """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
+ file_lines: list[str] = (await att.read()).decode().splitlines()
+ first_n_lines = "\n".join(file_lines[:30])[:2_000]
+ return f"{att.filename}: {first_n_lines}"
+
+
class Filtering(Cog):
"""Filtering and alerting for content posted on the server."""
@@ -226,7 +233,7 @@ class Filtering(Cog):
ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache)
text_contents = [
- f"{a.filename}: " + (await a.read()).decode()
+ await _extract_text_file_content(a)
for a in msg.attachments if a.content_type.startswith("text")
]
if text_contents: