aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bot/cogs/news.py150
1 files changed, 131 insertions, 19 deletions
diff --git a/bot/cogs/news.py b/bot/cogs/news.py
index 878e533ef..52c36da2e 100644
--- a/bot/cogs/news.py
+++ b/bot/cogs/news.py
@@ -1,8 +1,10 @@
import logging
-from datetime import datetime
+import typing as t
+from datetime import date, datetime
import discord
import feedparser
+from bs4 import BeautifulSoup
from discord.ext.commands import Cog
from discord.ext.tasks import loop
@@ -11,6 +13,13 @@ from bot.bot import Bot
PEPS_RSS_URL = "https://www.python.org/dev/peps/peps.rss/"
+RECENT_THREADS_TEMPLATE = "https://mail.python.org/archives/list/{name}@python.org/recent-threads"
+THREAD_TEMPLATE_URL = "https://mail.python.org/archives/api/list/{name}@python.org/thread/{id}/"
+MAILMAN_PROFILE_URL = "https://mail.python.org/archives/users/{id}/"
+THREAD_URL = "https://mail.python.org/archives/list/{list}@python.org/thread/{id}/"
+
+AVATAR_URL = "https://www.python.org/static/opengraph-icon-200x200.png"
+
log = logging.getLogger(__name__)
@@ -24,6 +33,7 @@ class News(Cog):
self.bot.loop.create_task(self.get_webhook_names())
self.post_pep_news.start()
+ self.post_maillist_news.start()
async def sync_maillists(self) -> None:
"""Sync currently in-use maillists with API."""
@@ -74,8 +84,8 @@ class News(Cog):
if message is None:
message = await news_channel.fetch_message(pep_id)
if message is None:
- log.warning(f"Can't fetch news message with ID {pep_id}. Deleting it entry from DB.")
- payload["data"]["pep"].remove(pep_id)
+ log.warning("Can't fetch PEP new message ID.")
+ continue
pep_news.append((message.embeds[0].title, message.embeds[0].timestamp))
# Reverse entries to send oldest first
@@ -87,30 +97,132 @@ class News(Cog):
log.warning(f"Wrong datetime format passed in PEP new: {new['published']}")
continue
if (
- any(pep_new[0] == new["title"] for pep_new in pep_news)
- and any(pep_new[1] == new_datetime for pep_new in pep_news)
+ (any(pep_new[0] == new["title"] for pep_new in pep_news)
+ and any(pep_new[1] == new_datetime for pep_new in pep_news))
+ or new_datetime.date() < date.today()
):
continue
- embed = discord.Embed(
- title=new["title"],
- description=new["summary"],
- timestamp=new_datetime,
- url=new["link"],
- colour=constants.Colours.soft_green
- )
-
- pep_msg = await webhook.send(
- embed=embed,
- username=data["feed"]["title"],
- avatar_url="https://www.python.org/static/opengraph-icon-200x200.png",
- wait=True
+ msg_id = await self.send_webhook(
+ webhook,
+ new["title"],
+ new["summary"],
+ new_datetime,
+ new["link"],
+ None,
+ None,
+ data["feed"]["title"]
)
- payload["data"]["pep"].append(pep_msg.id)
+ payload["data"]["pep"].append(msg_id)
# Apply new sent news to DB to avoid duplicate sending
await self.bot.api_client.put("bot/bot-settings/news", json=payload)
+ @loop(minutes=20)
+ async def post_maillist_news(self) -> None:
+ """Send new maillist threads to #python-news that is listed in configuration."""
+ await self.bot.wait_until_guild_available()
+ webhook = await self.bot.fetch_webhook(constants.PythonNews.webhook)
+ existing_news = await self.bot.api_client.get("bot/bot-settings/news")
+ payload = existing_news.copy()
+
+ for maillist in constants.PythonNews.mail_lists:
+ async with self.bot.http_session.get(RECENT_THREADS_TEMPLATE.format(name=maillist)) as resp:
+ recents = BeautifulSoup(await resp.text())
+
+ for thread in recents.html.body.div.find_all("a", href=True):
+ # We want only these threads that have identifiers
+ if "latest" in thread["href"]:
+ continue
+
+ thread_information, email_information = await self.get_thread_and_first_mail(
+ maillist, thread["href"].split("/")[-2]
+ )
+
+ try:
+ new_date = datetime.strptime(email_information["date"], "%Y-%m-%dT%X%z")
+ except ValueError:
+ log.warning(f"Invalid datetime from Thread email: {email_information['date']}")
+ continue
+
+ if (
+ await self.check_new_exist(thread_information["subject"], new_date, maillist, existing_news)
+ or new_date.date() < date.today()
+ ):
+ continue
+
+ content = email_information["content"]
+ link = THREAD_URL.format(id=thread["href"].split("/")[-2], list=maillist)
+ msg_id = await self.send_webhook(
+ webhook,
+ thread_information["subject"],
+ content[:500] + f"... [continue reading]({link})" if len(content) > 500 else content,
+ new_date,
+ link,
+ f"{email_information['sender_name']} ({email_information['sender']['address']})",
+ MAILMAN_PROFILE_URL.format(id=email_information["sender"]["mailman_id"]),
+ self.webhook_names[maillist]
+ )
+ payload["data"][maillist].append(msg_id)
+
+ await self.bot.api_client.put("bot/bot-settings/news", json=payload)
+
+ async def check_new_exist(self, title: str, timestamp: datetime, maillist: str, news: t.Dict[str, t.Any]) -> bool:
+ """Check does this new title + timestamp already exist in #python-news."""
+ channel = await self.bot.fetch_channel(constants.PythonNews.channel)
+
+ for new in news["data"][maillist]:
+ message = discord.utils.get(self.bot.cached_messages, id=new)
+ if message is None:
+ message = await channel.fetch_message(new)
+ if message is None:
+ return False
+
+ if message.embeds[0].title == title and message.embeds[0].timestamp == timestamp:
+ return True
+ return False
+
+ async def send_webhook(self,
+ webhook: discord.Webhook,
+ title: str,
+ description: str,
+ timestamp: datetime,
+ url: str,
+ author: str,
+ author_url: str,
+ webhook_profile_name: str
+ ) -> int:
+ """Send webhook entry and return ID of message."""
+ embed = discord.Embed(
+ title=title,
+ description=description,
+ timestamp=timestamp,
+ url=url,
+ colour=constants.Colours.soft_green
+ )
+ embed.set_author(
+ name=author,
+ url=author_url
+ )
+ msg = await webhook.send(
+ embed=embed,
+ username=webhook_profile_name,
+ avatar_url=AVATAR_URL,
+ wait=True
+ )
+ return msg.id
+
+ async def get_thread_and_first_mail(self, maillist: str, thread_identifier: str) -> t.Tuple[t.Any, t.Any]:
+ """Get mail thread and first mail from mail.python.org based on `maillist` and `thread_identifier`."""
+ async with self.bot.http_session.get(
+ THREAD_TEMPLATE_URL.format(name=maillist, id=thread_identifier)
+ ) as resp:
+ thread_information = await resp.json()
+
+ async with self.bot.http_session.get(thread_information["starting_email"]) as resp:
+ email_information = await resp.json()
+ return thread_information, email_information
+
def setup(bot: Bot) -> None:
"""Add `News` cog."""