diff options
-rw-r--r-- | bot/exts/info/python_news.py | 100 |
1 files changed, 50 insertions, 50 deletions
diff --git a/bot/exts/info/python_news.py b/bot/exts/info/python_news.py index 8507a0fec..e06c82bb8 100644 --- a/bot/exts/info/python_news.py +++ b/bot/exts/info/python_news.py @@ -7,6 +7,7 @@ import feedparser from bs4 import BeautifulSoup from discord.ext.commands import Cog from discord.ext.tasks import loop +from pydis_core.site_api import ResponseCodeError from bot import constants from bot.bot import Bot @@ -40,7 +41,7 @@ class PythonNews(Cog): self.bot = bot self.webhook_names = {} self.webhook: discord.Webhook | None = None - self.fetch_new_media.start() + self.seen_items: dict[str, set[str]] = {} async def cog_unload(self) -> None: """Stop news posting tasks on cog unload.""" @@ -48,41 +49,25 @@ class PythonNews(Cog): async def get_webhooks(self) -> None: """Get webhook author names from maillist API.""" - await self.bot.wait_until_guild_available() - async with self.bot.http_session.get("https://mail.python.org/archives/api/lists") as resp: lists = await resp.json() for mail in lists: - if mail["name"].split("@")[0] in constants.PythonNews.mail_lists: - self.webhook_names[mail["name"].split("@")[0]] = mail["display_name"] + mailing_list_name = mail["name"].split("@")[0] + if mailing_list_name in constants.PythonNews.mail_lists: + self.webhook_names[mailing_list_name] = mail["display_name"] self.webhook = await self.bot.fetch_webhook(constants.PythonNews.webhook) @loop(minutes=20) async def fetch_new_media(self) -> None: """Fetch new mailing list messages and then new PEPs.""" + await self.bot.wait_until_guild_available() if not self.webhook: await self.get_webhooks() await self.post_maillist_news() await self.post_pep_news() - async def sync_maillists(self) -> None: - """Sync currently in-use maillists with API.""" - # Wait until guild is available to avoid running before everything is ready - await self.bot.wait_until_guild_available() - - response = await self.bot.api_client.get("bot/bot-settings/news") - for mail in constants.PythonNews.mail_lists: - if mail not in response["data"]: - response["data"][mail] = [] - - # Because we are handling PEPs differently, we don't include it to mail lists - if "pep" not in response["data"]: - response["data"]["pep"] = [] - - await self.bot.api_client.put("bot/bot-settings/news", json=response) - @staticmethod def escape_markdown(content: str) -> str: """Escape the markdown underlines and spoilers that aren't in codeblocks.""" @@ -93,16 +78,10 @@ class PythonNews(Cog): async def post_pep_news(self) -> None: """Fetch new PEPs and when they don't have announcement in #python-news, create it.""" - # Wait until everything is ready and http_session available - await self.bot.wait_until_guild_available() - await self.sync_maillists() - async with self.bot.http_session.get(PEPS_RSS_URL) as resp: data = feedparser.parse(await resp.text("utf-8")) - news_listing = await self.bot.api_client.get("bot/bot-settings/news") - payload = news_listing.copy() - pep_numbers = news_listing["data"]["pep"] + pep_numbers = self.seen_items["pep"] # Reverse entries to send oldest first data["entries"].reverse() @@ -139,26 +118,19 @@ class PythonNews(Cog): avatar_url=AVATAR_URL, wait=True, ) - payload["data"]["pep"].append(pep_nr) - - # Increase overall PEP new stat - self.bot.stats.incr("python_news.posted.pep") - - if msg.channel.is_news(): + pep_successfully_added = await self.add_item_to_mail_list("pep", pep_nr) + if pep_successfully_added and msg.channel.is_news(): log.trace("Publishing PEP announcement because it was in a news channel") await msg.publish() - # Apply new sent news to DB to avoid duplicate sending - await self.bot.api_client.put("bot/bot-settings/news", json=payload) - async def post_maillist_news(self) -> None: """Send new maillist threads to #python-news that is listed in configuration.""" - await self.bot.wait_until_guild_available() - await self.sync_maillists() - existing_news = await self.bot.api_client.get("bot/bot-settings/news") - payload = existing_news.copy() - for maillist in constants.PythonNews.mail_lists: + if maillist not in self.seen_items: + # If for some reason we have a mailing list that isn't tracked. + log.warning("Mailing list %s doesn't exist in the database", maillist) + continue + async with self.bot.http_session.get(RECENT_THREADS_TEMPLATE.format(name=maillist)) as resp: recents = BeautifulSoup(await resp.text(), features="lxml") @@ -183,8 +155,9 @@ class PythonNews(Cog): log.warning(f"Invalid datetime from Thread email: {email_information['date']}") continue + thread_id = thread_information["thread_id"] if ( - thread_information["thread_id"] in existing_news["data"][maillist] + thread_id in self.seen_items[maillist] or "Re: " in thread_information["subject"] or new_date.date() < datetime.now(tz=UTC).date() ): @@ -216,16 +189,30 @@ class PythonNews(Cog): avatar_url=AVATAR_URL, wait=True, ) - payload["data"][maillist].append(thread_information["thread_id"]) - - # Increase this specific maillist counter in stats - self.bot.stats.incr(f"python_news.posted.{maillist.replace('-', '_')}") - - if msg.channel.is_news(): + thread_successfully_added = await self.add_item_to_mail_list(maillist, thread_id) + if thread_successfully_added and msg.channel.is_news(): log.trace("Publishing mailing list message because it was in a news channel") await msg.publish() - await self.bot.api_client.put("bot/bot-settings/news", json=payload) + async def add_item_to_mail_list(self, mail_list: str, item_identifier: str) -> bool: + """Adds a new item to a particular mailing_list.""" + try: + await self.bot.api_client.post(f"bot/mailing-lists/{mail_list}/seen-items", json=item_identifier) + self.seen_items[mail_list].add(item_identifier) + # Increase this specific mailing list counter in stats + self.bot.stats.incr(f"python_news.posted.{mail_list.replace('-', '_')}") + return True + + except ResponseCodeError as e: + non_field_errors = e.response_json.get("non_field_errors", []) + if non_field_errors != ["Seen item already known."]: + raise e + log.trace( + "Item %s has already been seen in the following mailing list: %s", + item_identifier, + mail_list + ) + return False async def get_thread_and_first_mail(self, maillist: str, thread_identifier: str) -> tuple[t.Any, t.Any]: """Get mail thread and first mail from mail.python.org based on `maillist` and `thread_identifier`.""" @@ -238,6 +225,19 @@ class PythonNews(Cog): email_information = await resp.json() return thread_information, email_information + async def cog_load(self) -> None: + """Load all existing seen items from db and create any missing mailing lists.""" + response = await self.bot.api_client.get("bot/mailing-lists") + for mailing_list in response: + self.seen_items[mailing_list["name"]] = set(mailing_list["seen_items"]) + + for mailing_list in ("pep", *constants.PythonNews.mail_lists): + if mailing_list not in self.seen_items: + await self.bot.api_client.post("bot/mailing-lists", json={"name": mailing_list}) + self.seen_items[mailing_list] = set() + + self.fetch_new_media.start() + async def setup(bot: Bot) -> None: """Add `News` cog.""" |