diff options
author | 2020-11-30 15:18:01 +0100 | |
---|---|---|
committer | 2020-11-30 15:18:01 +0100 | |
commit | 556f0c5349cd5b4875953154242b863fdee510a2 (patch) | |
tree | a2a13567372ba380bda28ea53435d9a579a1d43b /bot/exts/halloween/hacktoberstats.py | |
parent | Make prideavatar support specifying the image by URL (diff) | |
parent | Merge pull request #532 from python-discord/sebastiaan/ci/add-core-dev-approv... (diff) |
Merge branch 'master' into prideavatar-url
Diffstat (limited to 'bot/exts/halloween/hacktoberstats.py')
-rw-r--r-- | bot/exts/halloween/hacktoberstats.py | 358 |
1 files changed, 220 insertions, 138 deletions
diff --git a/bot/exts/halloween/hacktoberstats.py b/bot/exts/halloween/hacktoberstats.py index ed1755e3..84b75022 100644 --- a/bot/exts/halloween/hacktoberstats.py +++ b/bot/exts/halloween/hacktoberstats.py @@ -1,28 +1,30 @@ -import json import logging import re from collections import Counter -from datetime import datetime -from pathlib import Path -from typing import List, Tuple +from datetime import datetime, timedelta +from typing import List, Tuple, Union import aiohttp import discord +from async_rediscache import RedisCache from discord.ext import commands from bot.constants import Channels, Month, Tokens, WHITELISTED_CHANNELS from bot.utils.decorators import in_month, override_in_channel -from bot.utils.persist import make_persistent log = logging.getLogger(__name__) CURRENT_YEAR = datetime.now().year # Used to construct GH API query PRS_FOR_SHIRT = 4 # Minimum number of PRs before a shirt is awarded +REVIEW_DAYS = 14 # number of days needed after PR can be mature HACKTOBER_WHITELIST = WHITELISTED_CHANNELS + (Channels.hacktoberfest_2020,) REQUEST_HEADERS = {"User-Agent": "Python Discord Hacktoberbot"} +# using repo topics API during preview period requires an accept header +GITHUB_TOPICS_ACCEPT_HEADER = {"Accept": "application/vnd.github.mercy-preview+json"} if GITHUB_TOKEN := Tokens.github: REQUEST_HEADERS["Authorization"] = f"token {GITHUB_TOKEN}" + GITHUB_TOPICS_ACCEPT_HEADER["Authorization"] = f"token {GITHUB_TOKEN}" GITHUB_NONEXISTENT_USER_MESSAGE = ( "The listed users cannot be searched either because the users do not exist " @@ -33,10 +35,11 @@ GITHUB_NONEXISTENT_USER_MESSAGE = ( class HacktoberStats(commands.Cog): """Hacktoberfest statistics Cog.""" + # Stores mapping of user IDs and GitHub usernames + linked_accounts = RedisCache() + def __init__(self, bot: commands.Bot): self.bot = bot - self.link_json = make_persistent(Path("bot", "resources", "halloween", "github_links.json")) - self.linked_accounts = self.load_linked_users() @in_month(Month.SEPTEMBER, Month.OCTOBER, Month.NOVEMBER) @commands.group(name="hacktoberstats", aliases=("hackstats",), invoke_without_command=True) @@ -50,10 +53,10 @@ class HacktoberStats(commands.Cog): get that user's contributions """ if not github_username: - author_id, author_mention = HacktoberStats._author_mention_from_context(ctx) + author_id, author_mention = self._author_mention_from_context(ctx) - if str(author_id) in self.linked_accounts.keys(): - github_username = self.linked_accounts[author_id]["github_username"] + if await self.linked_accounts.contains(author_id): + github_username = await self.linked_accounts.get(author_id) logging.info(f"Getting stats for {author_id} linked GitHub account '{github_username}'") else: msg = ( @@ -73,30 +76,19 @@ class HacktoberStats(commands.Cog): """ Link the invoking user's Github github_username to their Discord ID. - Linked users are stored as a nested dict: - { - Discord_ID: { - "github_username": str - "date_added": datetime - } - } + Linked users are stored in Redis: User ID => GitHub Username. """ - author_id, author_mention = HacktoberStats._author_mention_from_context(ctx) + author_id, author_mention = self._author_mention_from_context(ctx) if github_username: - if str(author_id) in self.linked_accounts.keys(): - old_username = self.linked_accounts[author_id]["github_username"] + if await self.linked_accounts.contains(author_id): + old_username = await self.linked_accounts.get(author_id) logging.info(f"{author_id} has changed their github link from '{old_username}' to '{github_username}'") await ctx.send(f"{author_mention}, your GitHub username has been updated to: '{github_username}'") else: logging.info(f"{author_id} has added a github link to '{github_username}'") await ctx.send(f"{author_mention}, your GitHub username has been added") - self.linked_accounts[author_id] = { - "github_username": github_username, - "date_added": datetime.now() - } - - self.save_linked_users() + await self.linked_accounts.set(author_id, github_username) else: logging.info(f"{author_id} tried to link a GitHub account but didn't provide a username") await ctx.send(f"{author_mention}, a GitHub username is required to link your account") @@ -106,9 +98,9 @@ class HacktoberStats(commands.Cog): @override_in_channel(HACKTOBER_WHITELIST) async def unlink_user(self, ctx: commands.Context) -> None: """Remove the invoking user's account link from the log.""" - author_id, author_mention = HacktoberStats._author_mention_from_context(ctx) + author_id, author_mention = self._author_mention_from_context(ctx) - stored_user = self.linked_accounts.pop(author_id, None) + stored_user = await self.linked_accounts.pop(author_id, None) if stored_user: await ctx.send(f"{author_mention}, your GitHub profile has been unlinked") logging.info(f"{author_id} has unlinked their GitHub account") @@ -116,53 +108,15 @@ class HacktoberStats(commands.Cog): await ctx.send(f"{author_mention}, you do not currently have a linked GitHub account") logging.info(f"{author_id} tried to unlink their GitHub account but no account was linked") - self.save_linked_users() - - def load_linked_users(self) -> dict: - """ - Load list of linked users from local JSON file. - - Linked users are stored as a nested dict: - { - Discord_ID: { - "github_username": str - "date_added": datetime - } - } - """ - if self.link_json.exists(): - logging.info(f"Loading linked GitHub accounts from '{self.link_json}'") - with open(self.link_json, 'r', encoding="utf8") as file: - linked_accounts = json.load(file) - - logging.info(f"Loaded {len(linked_accounts)} linked GitHub accounts from '{self.link_json}'") - return linked_accounts - else: - logging.info(f"Linked account log: '{self.link_json}' does not exist") - return {} - - def save_linked_users(self) -> None: - """ - Save list of linked users to local JSON file. - - Linked users are stored as a nested dict: - { - Discord_ID: { - "github_username": str - "date_added": datetime - } - } - """ - logging.info(f"Saving linked_accounts to '{self.link_json}'") - with open(self.link_json, 'w', encoding="utf8") as file: - json.dump(self.linked_accounts, file, default=str) - logging.info(f"linked_accounts saved to '{self.link_json}'") - async def get_stats(self, ctx: commands.Context, github_username: str) -> None: """ Query GitHub's API for PRs created by a GitHub user during the month of October. - PRs with the 'invalid' tag are ignored + PRs with an 'invalid' or 'spam' label are ignored + + For PRs created after October 3rd, they have to be in a repository that has a + 'hacktoberfest' topic, unless the PR is labelled 'hacktoberfest-accepted' for it + to count. If a valid github_username is provided, an embed is generated and posted to the channel @@ -172,19 +126,19 @@ class HacktoberStats(commands.Cog): prs = await self.get_october_prs(github_username) if prs: - stats_embed = self.build_embed(github_username, prs) + stats_embed = await self.build_embed(github_username, prs) await ctx.send('Here are some stats!', embed=stats_embed) else: - await ctx.send(f"No October GitHub contributions found for '{github_username}'") + await ctx.send(f"No valid October GitHub contributions found for '{github_username}'") - def build_embed(self, github_username: str, prs: List[dict]) -> discord.Embed: + async def build_embed(self, github_username: str, prs: List[dict]) -> discord.Embed: """Return a stats embed built from github_username's PRs.""" logging.info(f"Building Hacktoberfest embed for GitHub user: '{github_username}'") - pr_stats = self._summarize_prs(prs) + in_review, accepted = await self._categorize_prs(prs) - n = pr_stats['n_prs'] + n = len(accepted) + len(in_review) # total number of PRs if n >= PRS_FOR_SHIRT: - shirtstr = f"**{github_username} has earned a T-shirt or a tree!**" + shirtstr = f"**{github_username} is eligible for a T-shirt or a tree!**" elif n == PRS_FOR_SHIRT - 1: shirtstr = f"**{github_username} is 1 PR away from a T-shirt or a tree!**" else: @@ -194,8 +148,8 @@ class HacktoberStats(commands.Cog): title=f"{github_username}'s Hacktoberfest", color=discord.Color(0x9c4af7), description=( - f"{github_username} has made {n} " - f"{HacktoberStats._contributionator(n)} in " + f"{github_username} has made {n} valid " + f"{self._contributionator(n)} in " f"October\n\n" f"{shirtstr}\n\n" ) @@ -207,54 +161,64 @@ class HacktoberStats(commands.Cog): url="https://hacktoberfest.digitalocean.com", icon_url="https://avatars1.githubusercontent.com/u/35706162?s=200&v=4" ) + + # this will handle when no PRs in_review or accepted + review_str = self._build_prs_string(in_review, github_username) or "None" + accepted_str = self._build_prs_string(accepted, github_username) or "None" stats_embed.add_field( - name="Top 5 Repositories:", - value=self._build_top5str(pr_stats) + name=":clock1: In Review", + value=review_str + ) + stats_embed.add_field( + name=":tada: Accepted", + value=accepted_str ) logging.info(f"Hacktoberfest PR built for GitHub user '{github_username}'") return stats_embed @staticmethod - async def get_october_prs(github_username: str) -> List[dict]: + async def get_october_prs(github_username: str) -> Union[List[dict], None]: """ Query GitHub's API for PRs created during the month of October by github_username. - PRs with an 'invalid' tag are ignored + PRs with an 'invalid' or 'spam' label are ignored unless it is merged or approved + + For PRs created after October 3rd, they have to be in a repository that has a + 'hacktoberfest' topic, unless the PR is labelled 'hacktoberfest-accepted' for it + to count. If PRs are found, return a list of dicts with basic PR information For each PR: - { + { "repo_url": str - "repo_shortname": str (e.g. "python-discord/seasonalbot") + "repo_shortname": str (e.g. "python-discord/sir-lancebot") "created_at": datetime.datetime - } + "number": int + } Otherwise, return None """ - logging.info(f"Generating Hacktoberfest PR query for GitHub user: '{github_username}'") + logging.info(f"Fetching Hacktoberfest Stats for GitHub user: '{github_username}'") base_url = "https://api.github.com/search/issues?q=" - not_label = "invalid" action_type = "pr" - is_query = f"public+author:{github_username}" + is_query = "public" not_query = "draft" - date_range = f"{CURRENT_YEAR}-10-01T00:00:00%2B14:00..{CURRENT_YEAR}-10-31T23:59:59-11:00" + date_range = f"{CURRENT_YEAR}-09-30T10:00Z..{CURRENT_YEAR}-11-01T12:00Z" per_page = "300" query_url = ( f"{base_url}" - f"-label:{not_label}" f"+type:{action_type}" f"+is:{is_query}" + f"+author:{github_username}" f"+-is:{not_query}" f"+created:{date_range}" f"&per_page={per_page}" ) + logging.debug(f"GitHub query URL generated: {query_url}") - async with aiohttp.ClientSession() as session: - async with session.get(query_url, headers=REQUEST_HEADERS) as resp: - jsonresp = await resp.json() - + jsonresp = await HacktoberStats._fetch_url(query_url, REQUEST_HEADERS) if "message" in jsonresp.keys(): # One of the parameters is invalid, short circuit for now api_message = jsonresp["errors"][0]["message"] @@ -264,75 +228,193 @@ class HacktoberStats(commands.Cog): logging.debug(f"No GitHub user found named '{github_username}'") else: logging.error(f"GitHub API request for '{github_username}' failed with message: {api_message}") + return + if jsonresp["total_count"] == 0: + # Short circuit if there aren't any PRs + logging.info(f"No Hacktoberfest PRs found for GitHub user: '{github_username}'") return - else: - if jsonresp["total_count"] == 0: - # Short circuit if there aren't any PRs - logging.info(f"No Hacktoberfest PRs found for GitHub user: '{github_username}'") - return - else: - logging.info(f"Found {len(jsonresp['items'])} Hacktoberfest PRs for GitHub user: '{github_username}'") - outlist = [] - for item in jsonresp["items"]: - shortname = HacktoberStats._get_shortname(item["repository_url"]) - itemdict = { - "repo_url": f"https://www.github.com/{shortname}", - "repo_shortname": shortname, - "created_at": datetime.strptime( - item["created_at"], r"%Y-%m-%dT%H:%M:%SZ" - ), - } + logging.info(f"Found {len(jsonresp['items'])} Hacktoberfest PRs for GitHub user: '{github_username}'") + outlist = [] # list of pr information dicts that will get returned + oct3 = datetime(int(CURRENT_YEAR), 10, 3, 23, 59, 59, tzinfo=None) + hackto_topics = {} # cache whether each repo has the appropriate topic (bool values) + for item in jsonresp["items"]: + shortname = HacktoberStats._get_shortname(item["repository_url"]) + itemdict = { + "repo_url": f"https://www.github.com/{shortname}", + "repo_shortname": shortname, + "created_at": datetime.strptime( + item["created_at"], r"%Y-%m-%dT%H:%M:%SZ" + ), + "number": item["number"] + } + + # if the PR has 'invalid' or 'spam' labels, the PR must be + # either merged or approved for it to be included + if HacktoberStats._has_label(item, ["invalid", "spam"]): + if not await HacktoberStats._is_accepted(itemdict): + continue + + # PRs before oct 3 no need to check for topics + # continue the loop if 'hacktoberfest-accepted' is labelled then + # there is no need to check for its topics + if itemdict["created_at"] < oct3: + outlist.append(itemdict) + continue + + # checking PR's labels for "hacktoberfest-accepted" + if HacktoberStats._has_label(item, "hacktoberfest-accepted"): + outlist.append(itemdict) + continue + + # no need to query github if repo topics are fetched before already + if shortname in hackto_topics.keys(): + if hackto_topics[shortname]: outlist.append(itemdict) - return outlist + continue + # fetch topics for the pr repo + topics_query_url = f"https://api.github.com/repos/{shortname}/topics" + logging.debug(f"Fetching repo topics for {shortname} with url: {topics_query_url}") + jsonresp2 = await HacktoberStats._fetch_url(topics_query_url, GITHUB_TOPICS_ACCEPT_HEADER) + if jsonresp2.get("names") is None: + logging.error(f"Error fetching topics for {shortname}: {jsonresp2['message']}") + return + + # PRs after oct 3 that doesn't have 'hacktoberfest-accepted' label + # must be in repo with 'hacktoberfest' topic + if "hacktoberfest" in jsonresp2["names"]: + hackto_topics[shortname] = True # cache result in the dict for later use if needed + outlist.append(itemdict) + return outlist + + @staticmethod + async def _fetch_url(url: str, headers: dict) -> dict: + """Retrieve API response from URL.""" + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as resp: + jsonresp = await resp.json() + return jsonresp + + @staticmethod + def _has_label(pr: dict, labels: Union[List[str], str]) -> bool: + """ + Check if a PR has label 'labels'. + + 'labels' can be a string or a list of strings, if it's a list of strings + it will return true if any of the labels match. + """ + if not pr.get("labels"): # if PR has no labels + return False + if (isinstance(labels, str)) and (any(label["name"].casefold() == labels for label in pr["labels"])): + return True + for item in labels: + if any(label["name"].casefold() == item for label in pr["labels"]): + return True + return False + + @staticmethod + async def _is_accepted(pr: dict) -> bool: + """Check if a PR is merged, approved, or labelled hacktoberfest-accepted.""" + # checking for merge status + query_url = f"https://api.github.com/repos/{pr['repo_shortname']}/pulls/" + query_url += str(pr["number"]) + jsonresp = await HacktoberStats._fetch_url(query_url, REQUEST_HEADERS) + + if "message" in jsonresp.keys(): + logging.error( + f"Error fetching PR stats for #{pr['number']} in repo {pr['repo_shortname']}:\n" + f"{jsonresp['message']}" + ) + return False + if ("merged" in jsonresp.keys()) and jsonresp["merged"]: + return True + + # checking for the label, using `jsonresp` which has the label information + if HacktoberStats._has_label(jsonresp, "hacktoberfest-accepted"): + return True + + # checking approval + query_url += "/reviews" + jsonresp2 = await HacktoberStats._fetch_url(query_url, REQUEST_HEADERS) + if isinstance(jsonresp2, dict): + # if API request is unsuccessful it will be a dict with the error in 'message' + logging.error( + f"Error fetching PR reviews for #{pr['number']} in repo {pr['repo_shortname']}:\n" + f"{jsonresp2['message']}" + ) + return False + # if it is successful it will be a list instead of a dict + if len(jsonresp2) == 0: # if PR has no reviews + return False + + # loop through reviews and check for approval + for item in jsonresp2: + if "status" in item.keys(): + if item['status'] == "APPROVED": + return True + return False @staticmethod def _get_shortname(in_url: str) -> str: """ Extract shortname from https://api.github.com/repos/* URL. - e.g. "https://api.github.com/repos/python-discord/seasonalbot" + e.g. "https://api.github.com/repos/python-discord/sir-lancebot" | V - "python-discord/seasonalbot" + "python-discord/sir-lancebot" """ exp = r"https?:\/\/api.github.com\/repos\/([/\-\_\.\w]+)" return re.findall(exp, in_url)[0] @staticmethod - def _summarize_prs(prs: List[dict]) -> dict: + async def _categorize_prs(prs: List[dict]) -> tuple: """ - Generate statistics from an input list of PR dictionaries, as output by get_october_prs. + Categorize PRs into 'in_review' and 'accepted' and returns as a tuple. - Return a dictionary containing: - { - "n_prs": int - "top5": [(repo_shortname, ncontributions), ...] - } + PRs created less than 14 days ago are 'in_review', PRs that are not + are 'accepted' (after 14 days review period). + + PRs that are accepted must either be merged, approved, or labelled + 'hacktoberfest-accepted. """ - contributed_repos = [pr["repo_shortname"] for pr in prs] - return {"n_prs": len(prs), "top5": Counter(contributed_repos).most_common(5)} + now = datetime.now() + oct3 = datetime(CURRENT_YEAR, 10, 3, 23, 59, 59, tzinfo=None) + in_review = [] + accepted = [] + for pr in prs: + if (pr['created_at'] + timedelta(REVIEW_DAYS)) > now: + in_review.append(pr) + elif (pr['created_at'] <= oct3) or await HacktoberStats._is_accepted(pr): + accepted.append(pr) + + return in_review, accepted @staticmethod - def _build_top5str(stats: List[tuple]) -> str: + def _build_prs_string(prs: List[tuple], user: str) -> str: """ - Build a string from the Top 5 contributions that is compatible with a discord.Embed field. - - Top 5 contributions should be a list of tuples, as output in the stats dictionary by - _summarize_prs + Builds a discord embed compatible string for a list of PRs. - String is of the form: - n contribution(s) to [shortname](url) - ... + Repository name with the link to pull requests authored by 'user' for + each PR. """ base_url = "https://www.github.com/" - contributionstrs = [] - for repo in stats['top5']: - n = repo[1] - contributionstrs.append(f"{n} {HacktoberStats._contributionator(n)} to [{repo[0]}]({base_url}{repo[0]})") - - return "\n".join(contributionstrs) + str_list = [] + repo_list = [pr["repo_shortname"] for pr in prs] + prs_list = Counter(repo_list).most_common(5) # get first 5 counted PRs + more = len(prs) - sum(i[1] for i in prs_list) + + for pr in prs_list: + # for example: https://www.github.com/python-discord/bot/pulls/octocat + # will display pull requests authored by octocat. + # pr[1] is the number of PRs to the repo + string = f"{pr[1]} to [{pr[0]}]({base_url}{pr[0]}/pulls/{user})" + str_list.append(string) + if more: + str_list.append(f"...and {more} more") + + return "\n".join(str_list) @staticmethod def _contributionator(n: int) -> str: |