aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Amrou Bellalouna <[email protected]>2024-02-03 08:10:34 +0100
committerGravatar GitHub <[email protected]>2024-02-03 07:10:34 +0000
commit80d20dbfc51edb5883fbb127bda942895edb136f (patch)
treeab2b2dd6dc11f1699a1c5eca902c61d3c90adafa
parentReplace aliased errors with TimeoutError (diff)
Use the new mailing lists API to track news (#2852)
-rw-r--r--bot/exts/info/python_news.py100
1 files changed, 50 insertions, 50 deletions
diff --git a/bot/exts/info/python_news.py b/bot/exts/info/python_news.py
index 8507a0fec..e06c82bb8 100644
--- a/bot/exts/info/python_news.py
+++ b/bot/exts/info/python_news.py
@@ -7,6 +7,7 @@ import feedparser
from bs4 import BeautifulSoup
from discord.ext.commands import Cog
from discord.ext.tasks import loop
+from pydis_core.site_api import ResponseCodeError
from bot import constants
from bot.bot import Bot
@@ -40,7 +41,7 @@ class PythonNews(Cog):
self.bot = bot
self.webhook_names = {}
self.webhook: discord.Webhook | None = None
- self.fetch_new_media.start()
+ self.seen_items: dict[str, set[str]] = {}
async def cog_unload(self) -> None:
"""Stop news posting tasks on cog unload."""
@@ -48,41 +49,25 @@ class PythonNews(Cog):
async def get_webhooks(self) -> None:
"""Get webhook author names from maillist API."""
- await self.bot.wait_until_guild_available()
-
async with self.bot.http_session.get("https://mail.python.org/archives/api/lists") as resp:
lists = await resp.json()
for mail in lists:
- if mail["name"].split("@")[0] in constants.PythonNews.mail_lists:
- self.webhook_names[mail["name"].split("@")[0]] = mail["display_name"]
+ mailing_list_name = mail["name"].split("@")[0]
+ if mailing_list_name in constants.PythonNews.mail_lists:
+ self.webhook_names[mailing_list_name] = mail["display_name"]
self.webhook = await self.bot.fetch_webhook(constants.PythonNews.webhook)
@loop(minutes=20)
async def fetch_new_media(self) -> None:
"""Fetch new mailing list messages and then new PEPs."""
+ await self.bot.wait_until_guild_available()
if not self.webhook:
await self.get_webhooks()
await self.post_maillist_news()
await self.post_pep_news()
- async def sync_maillists(self) -> None:
- """Sync currently in-use maillists with API."""
- # Wait until guild is available to avoid running before everything is ready
- await self.bot.wait_until_guild_available()
-
- response = await self.bot.api_client.get("bot/bot-settings/news")
- for mail in constants.PythonNews.mail_lists:
- if mail not in response["data"]:
- response["data"][mail] = []
-
- # Because we are handling PEPs differently, we don't include it to mail lists
- if "pep" not in response["data"]:
- response["data"]["pep"] = []
-
- await self.bot.api_client.put("bot/bot-settings/news", json=response)
-
@staticmethod
def escape_markdown(content: str) -> str:
"""Escape the markdown underlines and spoilers that aren't in codeblocks."""
@@ -93,16 +78,10 @@ class PythonNews(Cog):
async def post_pep_news(self) -> None:
"""Fetch new PEPs and when they don't have announcement in #python-news, create it."""
- # Wait until everything is ready and http_session available
- await self.bot.wait_until_guild_available()
- await self.sync_maillists()
-
async with self.bot.http_session.get(PEPS_RSS_URL) as resp:
data = feedparser.parse(await resp.text("utf-8"))
- news_listing = await self.bot.api_client.get("bot/bot-settings/news")
- payload = news_listing.copy()
- pep_numbers = news_listing["data"]["pep"]
+ pep_numbers = self.seen_items["pep"]
# Reverse entries to send oldest first
data["entries"].reverse()
@@ -139,26 +118,19 @@ class PythonNews(Cog):
avatar_url=AVATAR_URL,
wait=True,
)
- payload["data"]["pep"].append(pep_nr)
-
- # Increase overall PEP new stat
- self.bot.stats.incr("python_news.posted.pep")
-
- if msg.channel.is_news():
+ pep_successfully_added = await self.add_item_to_mail_list("pep", pep_nr)
+ if pep_successfully_added and msg.channel.is_news():
log.trace("Publishing PEP announcement because it was in a news channel")
await msg.publish()
- # Apply new sent news to DB to avoid duplicate sending
- await self.bot.api_client.put("bot/bot-settings/news", json=payload)
-
async def post_maillist_news(self) -> None:
"""Send new maillist threads to #python-news that is listed in configuration."""
- await self.bot.wait_until_guild_available()
- await self.sync_maillists()
- existing_news = await self.bot.api_client.get("bot/bot-settings/news")
- payload = existing_news.copy()
-
for maillist in constants.PythonNews.mail_lists:
+ if maillist not in self.seen_items:
+ # If for some reason we have a mailing list that isn't tracked.
+ log.warning("Mailing list %s doesn't exist in the database", maillist)
+ continue
+
async with self.bot.http_session.get(RECENT_THREADS_TEMPLATE.format(name=maillist)) as resp:
recents = BeautifulSoup(await resp.text(), features="lxml")
@@ -183,8 +155,9 @@ class PythonNews(Cog):
log.warning(f"Invalid datetime from Thread email: {email_information['date']}")
continue
+ thread_id = thread_information["thread_id"]
if (
- thread_information["thread_id"] in existing_news["data"][maillist]
+ thread_id in self.seen_items[maillist]
or "Re: " in thread_information["subject"]
or new_date.date() < datetime.now(tz=UTC).date()
):
@@ -216,16 +189,30 @@ class PythonNews(Cog):
avatar_url=AVATAR_URL,
wait=True,
)
- payload["data"][maillist].append(thread_information["thread_id"])
-
- # Increase this specific maillist counter in stats
- self.bot.stats.incr(f"python_news.posted.{maillist.replace('-', '_')}")
-
- if msg.channel.is_news():
+ thread_successfully_added = await self.add_item_to_mail_list(maillist, thread_id)
+ if thread_successfully_added and msg.channel.is_news():
log.trace("Publishing mailing list message because it was in a news channel")
await msg.publish()
- await self.bot.api_client.put("bot/bot-settings/news", json=payload)
+ async def add_item_to_mail_list(self, mail_list: str, item_identifier: str) -> bool:
+ """Adds a new item to a particular mailing_list."""
+ try:
+ await self.bot.api_client.post(f"bot/mailing-lists/{mail_list}/seen-items", json=item_identifier)
+ self.seen_items[mail_list].add(item_identifier)
+ # Increase this specific mailing list counter in stats
+ self.bot.stats.incr(f"python_news.posted.{mail_list.replace('-', '_')}")
+ return True
+
+ except ResponseCodeError as e:
+ non_field_errors = e.response_json.get("non_field_errors", [])
+ if non_field_errors != ["Seen item already known."]:
+ raise e
+ log.trace(
+ "Item %s has already been seen in the following mailing list: %s",
+ item_identifier,
+ mail_list
+ )
+ return False
async def get_thread_and_first_mail(self, maillist: str, thread_identifier: str) -> tuple[t.Any, t.Any]:
"""Get mail thread and first mail from mail.python.org based on `maillist` and `thread_identifier`."""
@@ -238,6 +225,19 @@ class PythonNews(Cog):
email_information = await resp.json()
return thread_information, email_information
+ async def cog_load(self) -> None:
+ """Load all existing seen items from db and create any missing mailing lists."""
+ response = await self.bot.api_client.get("bot/mailing-lists")
+ for mailing_list in response:
+ self.seen_items[mailing_list["name"]] = set(mailing_list["seen_items"])
+
+ for mailing_list in ("pep", *constants.PythonNews.mail_lists):
+ if mailing_list not in self.seen_items:
+ await self.bot.api_client.post("bot/mailing-lists", json={"name": mailing_list})
+ self.seen_items[mailing_list] = set()
+
+ self.fetch_new_media.start()
+
async def setup(bot: Bot) -> None:
"""Add `News` cog."""