diff options
author | 2020-05-13 13:12:54 +0200 | |
---|---|---|
committer | 2020-05-13 13:12:54 +0200 | |
commit | 64e81d2b625591ec98e2e0ccd4eb59f89b1b33b1 (patch) | |
tree | 4dcea3ce0825c69148431d24cc2aa4bd35a99872 | |
parent | Expand guild whitelist (diff) | |
parent | Merge branch 'master' into python-news (diff) |
Merge pull request #899 from ks129/python-news
Python News implemention
-rw-r--r-- | Pipfile | 2 | ||||
-rw-r--r-- | Pipfile.lock | 110 | ||||
-rw-r--r-- | bot/__init__.py | 1 | ||||
-rw-r--r-- | bot/__main__.py | 1 | ||||
-rw-r--r-- | bot/cogs/python_news.py | 234 | ||||
-rw-r--r-- | bot/constants.py | 8 | ||||
-rw-r--r-- | config-default.yml | 20 |
7 files changed, 321 insertions, 55 deletions
@@ -21,6 +21,8 @@ sentry-sdk = "~=0.14" coloredlogs = "~=14.0" colorama = {version = "~=0.4.3",sys_platform = "== 'win32'"} statsd = "~=3.3" +feedparser = "~=5.2" +beautifulsoup4 = "~=4.9" [dev-packages] coverage = "~=5.0" diff --git a/Pipfile.lock b/Pipfile.lock index 19e03bda4..4e7050a13 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "10636aef5a07f17bd00608df2cc5214fcbfe3de4745cdeea7a076b871754620a" + "sha256": "64620e7e825c74fd3010821fb30843b19f5dafb2b5a1f6eafedc0a5febd99b69" }, "pipfile-spec": 6, "requires": { @@ -91,6 +91,7 @@ "sha256:a4bbe77fd30670455c5296242967a123ec28c37e9702a8a81bd2f20a4baf0368", "sha256:d4e96ac9b0c3a6d3f0caae2e4124e6055c5dcafde8e2f831ff194c104f0775a0" ], + "index": "pypi", "version": "==4.9.0" }, "certifi": { @@ -179,6 +180,15 @@ ], "version": "==0.16" }, + "feedparser": { + "hashes": [ + "sha256:bd030652c2d08532c034c27fcd7c85868e7fa3cb2b17f230a44a6bbc92519bf9", + "sha256:cd2485472e41471632ed3029d44033ee420ad0b57111db95c240c9160a85831c", + "sha256:ce875495c90ebd74b179855449040003a1beb40cd13d5f037a0654251e260b02" + ], + "index": "pypi", + "version": "==5.2.1" + }, "fuzzywuzzy": { "hashes": [ "sha256:45016e92264780e58972dca1b3d939ac864b78437422beecebb3095f8efd00e8", @@ -189,10 +199,10 @@ }, "humanfriendly": { "hashes": [ - "sha256:25c2108a45cfd1e8fbe9cdb30b825d34ef5d5675c8e11e4775c9aedbfb0bdee2", - "sha256:3a831920e40e55ad49adb64c9179ed50c604cabca72cd300e7bd5b51310e4ebb" + "sha256:bf52ec91244819c780341a3438d5d7b09f431d3f113a475147ac9b7b167a3d12", + "sha256:e78960b31198511f45fd455534ae7645a6207d33e512d2e842c766d15d9c8080" ], - "version": "==8.1" + "version": "==8.2" }, "idna": { "hashes": [ @@ -210,10 +220,10 @@ }, "jinja2": { "hashes": [ - "sha256:93187ffbc7808079673ef52771baa950426fd664d3aad1d0fa3e95644360e250", - "sha256:b0eaf100007721b5c16c1fc1eecb87409464edc10469ddc9a22a27a99123be49" + "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", + "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" ], - "version": "==2.11.1" + "version": "==2.11.2" }, "lxml": { "hashes": [ @@ -527,10 +537,10 @@ }, "urllib3": { "hashes": [ - "sha256:2f3db8b19923a873b3e5256dc9c2dedfa883e33d87c690d9c7913e1f40673cdc", - "sha256:87716c2d2a7121198ebcb7ce7cccf6ce5e9ba539041cfbaeecfb641dc0bf6acc" + "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527", + "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115" ], - "version": "==1.25.8" + "version": "==1.25.9" }, "websockets": { "hashes": [ @@ -606,40 +616,40 @@ }, "coverage": { "hashes": [ - "sha256:03f630aba2b9b0d69871c2e8d23a69b7fe94a1e2f5f10df5049c0df99db639a0", - "sha256:046a1a742e66d065d16fb564a26c2a15867f17695e7f3d358d7b1ad8a61bca30", - "sha256:0a907199566269e1cfa304325cc3b45c72ae341fbb3253ddde19fa820ded7a8b", - "sha256:165a48268bfb5a77e2d9dbb80de7ea917332a79c7adb747bd005b3a07ff8caf0", - "sha256:1b60a95fc995649464e0cd48cecc8288bac5f4198f21d04b8229dc4097d76823", - "sha256:1f66cf263ec77af5b8fe14ef14c5e46e2eb4a795ac495ad7c03adc72ae43fafe", - "sha256:2e08c32cbede4a29e2a701822291ae2bc9b5220a971bba9d1e7615312efd3037", - "sha256:3844c3dab800ca8536f75ae89f3cf566848a3eb2af4d9f7b1103b4f4f7a5dad6", - "sha256:408ce64078398b2ee2ec08199ea3fcf382828d2f8a19c5a5ba2946fe5ddc6c31", - "sha256:443be7602c790960b9514567917af538cac7807a7c0c0727c4d2bbd4014920fd", - "sha256:4482f69e0701139d0f2c44f3c395d1d1d37abd81bfafbf9b6efbe2542679d892", - "sha256:4a8a259bf990044351baf69d3b23e575699dd60b18460c71e81dc565f5819ac1", - "sha256:513e6526e0082c59a984448f4104c9bf346c2da9961779ede1fc458e8e8a1f78", - "sha256:5f587dfd83cb669933186661a351ad6fc7166273bc3e3a1531ec5c783d997aac", - "sha256:62061e87071497951155cbccee487980524d7abea647a1b2a6eb6b9647df9006", - "sha256:641e329e7f2c01531c45c687efcec8aeca2a78a4ff26d49184dce3d53fc35014", - "sha256:65a7e00c00472cd0f59ae09d2fb8a8aaae7f4a0cf54b2b74f3138d9f9ceb9cb2", - "sha256:6ad6ca45e9e92c05295f638e78cd42bfaaf8ee07878c9ed73e93190b26c125f7", - "sha256:73aa6e86034dad9f00f4bbf5a666a889d17d79db73bc5af04abd6c20a014d9c8", - "sha256:7c9762f80a25d8d0e4ab3cb1af5d9dffbddb3ee5d21c43e3474c84bf5ff941f7", - "sha256:85596aa5d9aac1bf39fe39d9fa1051b0f00823982a1de5766e35d495b4a36ca9", - "sha256:86a0ea78fd851b313b2e712266f663e13b6bc78c2fb260b079e8b67d970474b1", - "sha256:8a620767b8209f3446197c0e29ba895d75a1e272a36af0786ec70fe7834e4307", - "sha256:922fb9ef2c67c3ab20e22948dcfd783397e4c043a5c5fa5ff5e9df5529074b0a", - "sha256:9fad78c13e71546a76c2f8789623eec8e499f8d2d799f4b4547162ce0a4df435", - "sha256:a37c6233b28e5bc340054cf6170e7090a4e85069513320275a4dc929144dccf0", - "sha256:c3fc325ce4cbf902d05a80daa47b645d07e796a80682c1c5800d6ac5045193e5", - "sha256:cda33311cb9fb9323958a69499a667bd728a39a7aa4718d7622597a44c4f1441", - "sha256:db1d4e38c9b15be1521722e946ee24f6db95b189d1447fa9ff18dd16ba89f732", - "sha256:eda55e6e9ea258f5e4add23bcf33dc53b2c319e70806e180aecbff8d90ea24de", - "sha256:f372cdbb240e09ee855735b9d85e7f50730dcfb6296b74b95a3e5dea0615c4c1" - ], - "index": "pypi", - "version": "==5.0.4" + "sha256:00f1d23f4336efc3b311ed0d807feb45098fc86dee1ca13b3d6768cdab187c8a", + "sha256:01333e1bd22c59713ba8a79f088b3955946e293114479bbfc2e37d522be03355", + "sha256:0cb4be7e784dcdc050fc58ef05b71aa8e89b7e6636b99967fadbdba694cf2b65", + "sha256:0e61d9803d5851849c24f78227939c701ced6704f337cad0a91e0972c51c1ee7", + "sha256:1601e480b9b99697a570cea7ef749e88123c04b92d84cedaa01e117436b4a0a9", + "sha256:2742c7515b9eb368718cd091bad1a1b44135cc72468c731302b3d641895b83d1", + "sha256:2d27a3f742c98e5c6b461ee6ef7287400a1956c11421eb574d843d9ec1f772f0", + "sha256:402e1744733df483b93abbf209283898e9f0d67470707e3c7516d84f48524f55", + "sha256:5c542d1e62eece33c306d66fe0a5c4f7f7b3c08fecc46ead86d7916684b36d6c", + "sha256:5f2294dbf7875b991c381e3d5af2bcc3494d836affa52b809c91697449d0eda6", + "sha256:6402bd2fdedabbdb63a316308142597534ea8e1895f4e7d8bf7476c5e8751fef", + "sha256:66460ab1599d3cf894bb6baee8c684788819b71a5dc1e8fa2ecc152e5d752019", + "sha256:782caea581a6e9ff75eccda79287daefd1d2631cc09d642b6ee2d6da21fc0a4e", + "sha256:79a3cfd6346ce6c13145731d39db47b7a7b859c0272f02cdb89a3bdcbae233a0", + "sha256:7a5bdad4edec57b5fb8dae7d3ee58622d626fd3a0be0dfceda162a7035885ecf", + "sha256:8fa0cbc7ecad630e5b0f4f35b0f6ad419246b02bc750de7ac66db92667996d24", + "sha256:a027ef0492ede1e03a8054e3c37b8def89a1e3c471482e9f046906ba4f2aafd2", + "sha256:a3f3654d5734a3ece152636aad89f58afc9213c6520062db3978239db122f03c", + "sha256:a82b92b04a23d3c8a581fc049228bafde988abacba397d57ce95fe95e0338ab4", + "sha256:acf3763ed01af8410fc36afea23707d4ea58ba7e86a8ee915dfb9ceff9ef69d0", + "sha256:adeb4c5b608574a3d647011af36f7586811a2c1197c861aedb548dd2453b41cd", + "sha256:b83835506dfc185a319031cf853fa4bb1b3974b1f913f5bb1a0f3d98bdcded04", + "sha256:bb28a7245de68bf29f6fb199545d072d1036a1917dca17a1e75bbb919e14ee8e", + "sha256:bf9cb9a9fd8891e7efd2d44deb24b86d647394b9705b744ff6f8261e6f29a730", + "sha256:c317eaf5ff46a34305b202e73404f55f7389ef834b8dbf4da09b9b9b37f76dd2", + "sha256:dbe8c6ae7534b5b024296464f387d57c13caa942f6d8e6e0346f27e509f0f768", + "sha256:de807ae933cfb7f0c7d9d981a053772452217df2bf38e7e6267c9cbf9545a796", + "sha256:dead2ddede4c7ba6cb3a721870f5141c97dc7d85a079edb4bd8d88c3ad5b20c7", + "sha256:dec5202bfe6f672d4511086e125db035a52b00f1648d6407cc8e526912c0353a", + "sha256:e1ea316102ea1e1770724db01998d1603ed921c54a86a2efcb03428d5417e489", + "sha256:f90bfc4ad18450c80b024036eaf91e4a246ae287701aaa88eaebebf150868052" + ], + "index": "pypi", + "version": "==5.1" }, "distlib": { "hashes": [ @@ -671,11 +681,11 @@ }, "flake8-annotations": { "hashes": [ - "sha256:a38b44d01abd480586a92a02a2b0a36231ec42dcc5e114de78fa5db016d8d3f9", - "sha256:d5b0e8704e4e7728b352fa1464e23539ff2341ba11cc153b536fa2cf921ee659" + "sha256:9091d920406a7ff10e401e0dd1baa396d1d7d2e3d101a9beecf815f5894ad554", + "sha256:f59fdceb8c8f380a20aed20e1ba8a57bde05935958166c52be2249f113f7ab75" ], "index": "pypi", - "version": "==2.0.1" + "version": "==2.1.0" }, "flake8-bugbear": { "hashes": [ @@ -836,10 +846,10 @@ }, "virtualenv": { "hashes": [ - "sha256:00cfe8605fb97f5a59d52baab78e6070e72c12ca64f51151695407cc0eb8a431", - "sha256:c8364ec469084046c779c9a11ae6340094e8a0bf1d844330fc55c1cefe67c172" + "sha256:5021396e8f03d0d002a770da90e31e61159684db2859d0ba4850fbea752aa675", + "sha256:ac53ade75ca189bc97b6c1d9ec0f1a50efe33cbf178ae09452dcd9fd309013c1" ], - "version": "==20.0.17" + "version": "==20.0.18" } } } diff --git a/bot/__init__.py b/bot/__init__.py index 4131b69e9..d63086fe2 100644 --- a/bot/__init__.py +++ b/bot/__init__.py @@ -59,6 +59,7 @@ coloredlogs.install(logger=root_log, stream=sys.stdout) logging.getLogger("discord").setLevel(logging.WARNING) logging.getLogger("websockets").setLevel(logging.WARNING) +logging.getLogger("chardet").setLevel(logging.WARNING) logging.getLogger(__name__) diff --git a/bot/__main__.py b/bot/__main__.py index 3aa36bfc0..aa1d1aee8 100644 --- a/bot/__main__.py +++ b/bot/__main__.py @@ -51,6 +51,7 @@ bot.load_extension("bot.cogs.eval") bot.load_extension("bot.cogs.information") bot.load_extension("bot.cogs.jams") bot.load_extension("bot.cogs.moderation") +bot.load_extension("bot.cogs.python_news") bot.load_extension("bot.cogs.off_topic_names") bot.load_extension("bot.cogs.reddit") bot.load_extension("bot.cogs.reminders") diff --git a/bot/cogs/python_news.py b/bot/cogs/python_news.py new file mode 100644 index 000000000..57ce61638 --- /dev/null +++ b/bot/cogs/python_news.py @@ -0,0 +1,234 @@ +import logging +import typing as t +from datetime import date, datetime + +import discord +import feedparser +from bs4 import BeautifulSoup +from discord.ext.commands import Cog +from discord.ext.tasks import loop + +from bot import constants +from bot.bot import Bot + +PEPS_RSS_URL = "https://www.python.org/dev/peps/peps.rss/" + +RECENT_THREADS_TEMPLATE = "https://mail.python.org/archives/list/{name}@python.org/recent-threads" +THREAD_TEMPLATE_URL = "https://mail.python.org/archives/api/list/{name}@python.org/thread/{id}/" +MAILMAN_PROFILE_URL = "https://mail.python.org/archives/users/{id}/" +THREAD_URL = "https://mail.python.org/archives/list/{list}@python.org/thread/{id}/" + +AVATAR_URL = "https://www.python.org/static/opengraph-icon-200x200.png" + +log = logging.getLogger(__name__) + + +class PythonNews(Cog): + """Post new PEPs and Python News to `#python-news`.""" + + def __init__(self, bot: Bot): + self.bot = bot + self.webhook_names = {} + self.webhook: t.Optional[discord.Webhook] = None + + self.bot.loop.create_task(self.get_webhook_names()) + self.bot.loop.create_task(self.get_webhook_and_channel()) + + async def start_tasks(self) -> None: + """Start the tasks for fetching new PEPs and mailing list messages.""" + self.fetch_new_media.start() + + @loop(minutes=20) + async def fetch_new_media(self) -> None: + """Fetch new mailing list messages and then new PEPs.""" + await self.post_maillist_news() + await self.post_pep_news() + + async def sync_maillists(self) -> None: + """Sync currently in-use maillists with API.""" + # Wait until guild is available to avoid running before everything is ready + await self.bot.wait_until_guild_available() + + response = await self.bot.api_client.get("bot/bot-settings/news") + for mail in constants.PythonNews.mail_lists: + if mail not in response["data"]: + response["data"][mail] = [] + + # Because we are handling PEPs differently, we don't include it to mail lists + if "pep" not in response["data"]: + response["data"]["pep"] = [] + + await self.bot.api_client.put("bot/bot-settings/news", json=response) + + async def get_webhook_names(self) -> None: + """Get webhook author names from maillist API.""" + await self.bot.wait_until_guild_available() + + async with self.bot.http_session.get("https://mail.python.org/archives/api/lists") as resp: + lists = await resp.json() + + for mail in lists: + if mail["name"].split("@")[0] in constants.PythonNews.mail_lists: + self.webhook_names[mail["name"].split("@")[0]] = mail["display_name"] + + async def post_pep_news(self) -> None: + """Fetch new PEPs and when they don't have announcement in #python-news, create it.""" + # Wait until everything is ready and http_session available + await self.bot.wait_until_guild_available() + await self.sync_maillists() + + async with self.bot.http_session.get(PEPS_RSS_URL) as resp: + data = feedparser.parse(await resp.text("utf-8")) + + news_listing = await self.bot.api_client.get("bot/bot-settings/news") + payload = news_listing.copy() + pep_numbers = news_listing["data"]["pep"] + + # Reverse entries to send oldest first + data["entries"].reverse() + for new in data["entries"]: + try: + new_datetime = datetime.strptime(new["published"], "%a, %d %b %Y %X %Z") + except ValueError: + log.warning(f"Wrong datetime format passed in PEP new: {new['published']}") + continue + pep_nr = new["title"].split(":")[0].split()[1] + if ( + pep_nr in pep_numbers + or new_datetime.date() < date.today() + ): + continue + + msg = await self.send_webhook( + title=new["title"], + description=new["summary"], + timestamp=new_datetime, + url=new["link"], + webhook_profile_name=data["feed"]["title"], + footer=data["feed"]["title"] + ) + payload["data"]["pep"].append(pep_nr) + + if msg.channel.is_news(): + log.trace("Publishing PEP annnouncement because it was in a news channel") + await msg.publish() + + # Apply new sent news to DB to avoid duplicate sending + await self.bot.api_client.put("bot/bot-settings/news", json=payload) + + async def post_maillist_news(self) -> None: + """Send new maillist threads to #python-news that is listed in configuration.""" + await self.bot.wait_until_guild_available() + await self.sync_maillists() + existing_news = await self.bot.api_client.get("bot/bot-settings/news") + payload = existing_news.copy() + + for maillist in constants.PythonNews.mail_lists: + async with self.bot.http_session.get(RECENT_THREADS_TEMPLATE.format(name=maillist)) as resp: + recents = BeautifulSoup(await resp.text(), features="lxml") + + # When a <p> element is present in the response then the mailing list + # has not had any activity during the current month, so therefore it + # can be ignored. + if recents.p: + continue + + for thread in recents.html.body.div.find_all("a", href=True): + # We want only these threads that have identifiers + if "latest" in thread["href"]: + continue + + thread_information, email_information = await self.get_thread_and_first_mail( + maillist, thread["href"].split("/")[-2] + ) + + try: + new_date = datetime.strptime(email_information["date"], "%Y-%m-%dT%X%z") + except ValueError: + log.warning(f"Invalid datetime from Thread email: {email_information['date']}") + continue + + if ( + thread_information["thread_id"] in existing_news["data"][maillist] + or new_date.date() < date.today() + ): + continue + + content = email_information["content"] + link = THREAD_URL.format(id=thread["href"].split("/")[-2], list=maillist) + msg = await self.send_webhook( + title=thread_information["subject"], + description=content[:500] + f"... [continue reading]({link})" if len(content) > 500 else content, + timestamp=new_date, + url=link, + author=f"{email_information['sender_name']} ({email_information['sender']['address']})", + author_url=MAILMAN_PROFILE_URL.format(id=email_information["sender"]["mailman_id"]), + webhook_profile_name=self.webhook_names[maillist], + footer=f"Posted to {self.webhook_names[maillist]}" + ) + payload["data"][maillist].append(thread_information["thread_id"]) + + if msg.channel.is_news(): + log.trace("Publishing mailing list message because it was in a news channel") + await msg.publish() + + await self.bot.api_client.put("bot/bot-settings/news", json=payload) + + async def send_webhook(self, + title: str, + description: str, + timestamp: datetime, + url: str, + webhook_profile_name: str, + footer: str, + author: t.Optional[str] = None, + author_url: t.Optional[str] = None, + ) -> discord.Message: + """Send webhook entry and return sent message.""" + embed = discord.Embed( + title=title, + description=description, + timestamp=timestamp, + url=url, + colour=constants.Colours.soft_green + ) + if author and author_url: + embed.set_author( + name=author, + url=author_url + ) + embed.set_footer(text=footer, icon_url=AVATAR_URL) + + return await self.webhook.send( + embed=embed, + username=webhook_profile_name, + avatar_url=AVATAR_URL, + wait=True + ) + + async def get_thread_and_first_mail(self, maillist: str, thread_identifier: str) -> t.Tuple[t.Any, t.Any]: + """Get mail thread and first mail from mail.python.org based on `maillist` and `thread_identifier`.""" + async with self.bot.http_session.get( + THREAD_TEMPLATE_URL.format(name=maillist, id=thread_identifier) + ) as resp: + thread_information = await resp.json() + + async with self.bot.http_session.get(thread_information["starting_email"]) as resp: + email_information = await resp.json() + return thread_information, email_information + + async def get_webhook_and_channel(self) -> None: + """Storage #python-news channel Webhook and `TextChannel` to `News.webhook` and `channel`.""" + await self.bot.wait_until_guild_available() + self.webhook = await self.bot.fetch_webhook(constants.PythonNews.webhook) + + await self.start_tasks() + + def cog_unload(self) -> None: + """Stop news posting tasks on cog unload.""" + self.fetch_new_media.cancel() + + +def setup(bot: Bot) -> None: + """Add `News` cog.""" + bot.add_cog(PythonNews(bot)) diff --git a/bot/constants.py b/bot/constants.py index da29125eb..fd280e9de 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -564,6 +564,14 @@ class Sync(metaclass=YAMLGetter): max_diff: int +class PythonNews(metaclass=YAMLGetter): + section = 'python_news' + + mail_lists: List[str] + channel: int + webhook: int + + class Event(Enum): """ Event names. This does not include every event (for example, raw diff --git a/config-default.yml b/config-default.yml index 6d97b7f33..83ea59016 100644 --- a/config-default.yml +++ b/config-default.yml @@ -122,6 +122,7 @@ guild: channels: announcements: 354619224620138496 user_event_announcements: &USER_EVENT_A 592000283102674944 + python_news: &PYNEWS_CHANNEL 704372456592506880 # Development dev_contrib: &DEV_CONTRIB 635950537262759947 @@ -235,11 +236,12 @@ guild: - *HELPERS_ROLE webhooks: - talent_pool: 569145364800602132 - big_brother: 569133704568373283 - reddit: 635408384794951680 - duck_pond: 637821475327311927 - dev_log: 680501655111729222 + talent_pool: 569145364800602132 + big_brother: 569133704568373283 + reddit: 635408384794951680 + duck_pond: 637821475327311927 + dev_log: 680501655111729222 + python_news: &PYNEWS_WEBHOOK 704381182279942324 filter: @@ -575,5 +577,13 @@ duck_pond: - *DUCKY_MAUL - *DUCKY_SANTA +python_news: + mail_lists: + - 'python-ideas' + - 'python-announce-list' + - 'pypi-announce' + channel: *PYNEWS_CHANNEL + webhook: *PYNEWS_WEBHOOK + config: required_keys: ['bot.token'] |