diff options
| -rw-r--r-- | bot/exts/halloween/hacktoberstats.py | 275 | 
1 files changed, 205 insertions, 70 deletions
| diff --git a/bot/exts/halloween/hacktoberstats.py b/bot/exts/halloween/hacktoberstats.py index ed1755e3..94bfe138 100644 --- a/bot/exts/halloween/hacktoberstats.py +++ b/bot/exts/halloween/hacktoberstats.py @@ -2,9 +2,9 @@ import json  import logging  import re  from collections import Counter -from datetime import datetime +from datetime import datetime, timedelta  from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Union  import aiohttp  import discord @@ -18,11 +18,15 @@ log = logging.getLogger(__name__)  CURRENT_YEAR = datetime.now().year  # Used to construct GH API query  PRS_FOR_SHIRT = 4  # Minimum number of PRs before a shirt is awarded +REVIEW_DAYS = 14  # number of days needed after PR can be mature  HACKTOBER_WHITELIST = WHITELISTED_CHANNELS + (Channels.hacktoberfest_2020,)  REQUEST_HEADERS = {"User-Agent": "Python Discord Hacktoberbot"} +# using repo topics API during preview period requires an accept header +GITHUB_TOPICS_ACCEPT_HEADER = {"Accept": "application/vnd.github.mercy-preview+json"}  if GITHUB_TOKEN := Tokens.github:      REQUEST_HEADERS["Authorization"] = f"token {GITHUB_TOKEN}" +    GITHUB_TOPICS_ACCEPT_HEADER["Authorization"] = f"token {GITHUB_TOKEN}"  GITHUB_NONEXISTENT_USER_MESSAGE = (      "The listed users cannot be searched either because the users do not exist " @@ -50,7 +54,7 @@ class HacktoberStats(commands.Cog):          get that user's contributions          """          if not github_username: -            author_id, author_mention = HacktoberStats._author_mention_from_context(ctx) +            author_id, author_mention = self._author_mention_from_context(ctx)              if str(author_id) in self.linked_accounts.keys():                  github_username = self.linked_accounts[author_id]["github_username"] @@ -81,7 +85,7 @@ class HacktoberStats(commands.Cog):                  }              }          """ -        author_id, author_mention = HacktoberStats._author_mention_from_context(ctx) +        author_id, author_mention = self._author_mention_from_context(ctx)          if github_username:              if str(author_id) in self.linked_accounts.keys():                  old_username = self.linked_accounts[author_id]["github_username"] @@ -106,7 +110,7 @@ class HacktoberStats(commands.Cog):      @override_in_channel(HACKTOBER_WHITELIST)      async def unlink_user(self, ctx: commands.Context) -> None:          """Remove the invoking user's account link from the log.""" -        author_id, author_mention = HacktoberStats._author_mention_from_context(ctx) +        author_id, author_mention = self._author_mention_from_context(ctx)          stored_user = self.linked_accounts.pop(author_id, None)          if stored_user: @@ -162,7 +166,11 @@ class HacktoberStats(commands.Cog):          """          Query GitHub's API for PRs created by a GitHub user during the month of October. -        PRs with the 'invalid' tag are ignored +        PRs with an 'invalid' or 'spam' label are ignored + +        For PRs created after October 3rd, they have to be in a repository that has a +        'hacktoberfest' topic, unless the PR is labelled 'hacktoberfest-accepted' for it +        to count.          If a valid github_username is provided, an embed is generated and posted to the channel @@ -172,19 +180,19 @@ class HacktoberStats(commands.Cog):              prs = await self.get_october_prs(github_username)              if prs: -                stats_embed = self.build_embed(github_username, prs) +                stats_embed = await self.build_embed(github_username, prs)                  await ctx.send('Here are some stats!', embed=stats_embed)              else: -                await ctx.send(f"No October GitHub contributions found for '{github_username}'") +                await ctx.send(f"No valid October GitHub contributions found for '{github_username}'") -    def build_embed(self, github_username: str, prs: List[dict]) -> discord.Embed: +    async def build_embed(self, github_username: str, prs: List[dict]) -> discord.Embed:          """Return a stats embed built from github_username's PRs."""          logging.info(f"Building Hacktoberfest embed for GitHub user: '{github_username}'") -        pr_stats = self._summarize_prs(prs) +        in_review, accepted = await self._categorize_prs(prs) -        n = pr_stats['n_prs'] +        n = len(accepted) + len(in_review)  # total number of PRs          if n >= PRS_FOR_SHIRT: -            shirtstr = f"**{github_username} has earned a T-shirt or a tree!**" +            shirtstr = f"**{github_username} is eligible for a T-shirt or a tree!**"          elif n == PRS_FOR_SHIRT - 1:              shirtstr = f"**{github_username} is 1 PR away from a T-shirt or a tree!**"          else: @@ -194,8 +202,8 @@ class HacktoberStats(commands.Cog):              title=f"{github_username}'s Hacktoberfest",              color=discord.Color(0x9c4af7),              description=( -                f"{github_username} has made {n} " -                f"{HacktoberStats._contributionator(n)} in " +                f"{github_username} has made {n} valid " +                f"{self._contributionator(n)} in "                  f"October\n\n"                  f"{shirtstr}\n\n"              ) @@ -207,54 +215,64 @@ class HacktoberStats(commands.Cog):              url="https://hacktoberfest.digitalocean.com",              icon_url="https://avatars1.githubusercontent.com/u/35706162?s=200&v=4"          ) + +        # this will handle when no PRs in_review or accepted +        review_str = self._build_prs_string(in_review, github_username) or "None" +        accepted_str = self._build_prs_string(accepted, github_username) or "None"          stats_embed.add_field( -            name="Top 5 Repositories:", -            value=self._build_top5str(pr_stats) +            name=":clock1: In Review", +            value=review_str +        ) +        stats_embed.add_field( +            name=":tada: Accepted", +            value=accepted_str          )          logging.info(f"Hacktoberfest PR built for GitHub user '{github_username}'")          return stats_embed      @staticmethod -    async def get_october_prs(github_username: str) -> List[dict]: +    async def get_october_prs(github_username: str) -> Union[List[dict], None]:          """          Query GitHub's API for PRs created during the month of October by github_username. -        PRs with an 'invalid' tag are ignored +        PRs with an 'invalid' or 'spam' label are ignored unless it is merged or approved + +        For PRs created after October 3rd, they have to be in a repository that has a +        'hacktoberfest' topic, unless the PR is labelled 'hacktoberfest-accepted' for it +        to count.          If PRs are found, return a list of dicts with basic PR information          For each PR: -            { +        {              "repo_url": str              "repo_shortname": str (e.g. "python-discord/seasonalbot")              "created_at": datetime.datetime -            } +            "number": int +        }          Otherwise, return None          """ -        logging.info(f"Generating Hacktoberfest PR query for GitHub user: '{github_username}'") +        logging.info(f"Fetching Hacktoberfest Stats for GitHub user: '{github_username}'")          base_url = "https://api.github.com/search/issues?q=" -        not_label = "invalid"          action_type = "pr" -        is_query = f"public+author:{github_username}" +        is_query = "public"          not_query = "draft" -        date_range = f"{CURRENT_YEAR}-10-01T00:00:00%2B14:00..{CURRENT_YEAR}-10-31T23:59:59-11:00" +        date_range = f"{CURRENT_YEAR}-10-01T00:00:00%2B14:00..{CURRENT_YEAR}-11-01T00:00:00-11:00"          per_page = "300"          query_url = (              f"{base_url}" -            f"-label:{not_label}"              f"+type:{action_type}"              f"+is:{is_query}" +            f"+author:{github_username}"              f"+-is:{not_query}"              f"+created:{date_range}"              f"&per_page={per_page}"          ) +        logging.debug(f"GitHub query URL generated: {query_url}") -        async with aiohttp.ClientSession() as session: -            async with session.get(query_url, headers=REQUEST_HEADERS) as resp: -                jsonresp = await resp.json() - +        jsonresp = await HacktoberStats._fetch_url(query_url, REQUEST_HEADERS)          if "message" in jsonresp.keys():              # One of the parameters is invalid, short circuit for now              api_message = jsonresp["errors"][0]["message"] @@ -264,28 +282,132 @@ class HacktoberStats(commands.Cog):                  logging.debug(f"No GitHub user found named '{github_username}'")              else:                  logging.error(f"GitHub API request for '{github_username}' failed with message: {api_message}") +            return +        if jsonresp["total_count"] == 0: +            # Short circuit if there aren't any PRs +            logging.info(f"No Hacktoberfest PRs found for GitHub user: '{github_username}'")              return -        else: -            if jsonresp["total_count"] == 0: -                # Short circuit if there aren't any PRs -                logging.info(f"No Hacktoberfest PRs found for GitHub user: '{github_username}'") -                return -            else: -                logging.info(f"Found {len(jsonresp['items'])} Hacktoberfest PRs for GitHub user: '{github_username}'") -                outlist = [] -                for item in jsonresp["items"]: -                    shortname = HacktoberStats._get_shortname(item["repository_url"]) -                    itemdict = { -                        "repo_url": f"https://www.github.com/{shortname}", -                        "repo_shortname": shortname, -                        "created_at": datetime.strptime( -                            item["created_at"], r"%Y-%m-%dT%H:%M:%SZ" -                        ), -                    } +        logging.info(f"Found {len(jsonresp['items'])} Hacktoberfest PRs for GitHub user: '{github_username}'") +        outlist = []  # list of pr information dicts that will get returned +        oct3 = datetime(int(CURRENT_YEAR), 10, 3, 0, 0, 0) +        hackto_topics = {}  # cache whether each repo has the appropriate topic (bool values) +        for item in jsonresp["items"]: +            shortname = HacktoberStats._get_shortname(item["repository_url"]) +            itemdict = { +                "repo_url": f"https://www.github.com/{shortname}", +                "repo_shortname": shortname, +                "created_at": datetime.strptime( +                    item["created_at"], r"%Y-%m-%dT%H:%M:%SZ" +                ), +                "number": item["number"] +            } + +            # if the PR has 'invalid' or 'spam' labels, the PR must be +            # either merged or approved for it to be included +            if HacktoberStats._has_label(item, ["invalid", "spam"]): +                if not await HacktoberStats._is_accepted(item): +                    continue + +            # PRs before oct 3 no need to check for topics +            # continue the loop if 'hacktoberfest-accepted' is labelled then +            # there is no need to check for its topics +            if itemdict["created_at"] < oct3: +                outlist.append(itemdict) +                continue + +            # checking PR's labels for "hacktoberfest-accepted" +            if HacktoberStats._has_label(item, "hacktoberfest-accepted"): +                outlist.append(itemdict) +                continue + +            # no need to query github if repo topics are fetched before already +            if shortname in hackto_topics.keys(): +                if hackto_topics[shortname]:                      outlist.append(itemdict) -                return outlist +                    continue +            # fetch topics for the pr repo +            topics_query_url = f"https://api.github.com/repos/{shortname}/topics" +            logging.debug(f"Fetching repo topics for {shortname} with url: {topics_query_url}") +            jsonresp2 = await HacktoberStats._fetch_url(topics_query_url, GITHUB_TOPICS_ACCEPT_HEADER) +            if jsonresp2.get("names") is None: +                logging.error(f"Error fetching topics for {shortname}: {jsonresp2['message']}") +                return + +            # PRs after oct 3 that doesn't have 'hacktoberfest-accepted' label +            # must be in repo with 'hacktoberfest' topic +            if "hacktoberfest" in jsonresp2["names"]: +                hackto_topics[shortname] = True  # cache result in the dict for later use if needed +                outlist.append(itemdict) +        return outlist + +    @staticmethod +    async def _fetch_url(url: str, headers: dict) -> dict: +        """Retrieve API response from URL.""" +        async with aiohttp.ClientSession() as session: +            async with session.get(url, headers=headers) as resp: +                jsonresp = await resp.json() +        return jsonresp + +    @staticmethod +    def _has_label(pr: dict, labels: Union[List[str], str]) -> bool: +        """ +        Check if a PR has label 'labels'. + +        'labels' can be a string or a list of strings, if it's a list of strings +        it will return true if any of the labels match. +        """ +        if not pr.get("labels"):  # if PR has no labels +            return False +        if (isinstance(labels, str)) and (any(label["name"].casefold() == labels for label in pr["labels"])): +            return True +        for item in labels: +            if any(label["name"].casefold() == item for label in pr["labels"]): +                return True +        return False + +    @staticmethod +    async def _is_accepted(pr: dict) -> bool: +        """Check if a PR is merged, approved, or labelled hacktoberfest-accepted.""" +        # checking for merge status +        query_url = f"https://api.github.com/repos/{pr['repo_shortname']}/pulls/" +        query_url += str(pr["number"]) +        jsonresp = await HacktoberStats._fetch_url(query_url, REQUEST_HEADERS) + +        if "message" in jsonresp.keys(): +            logging.error( +                f"Error fetching PR stats for #{pr['number']} in repo {pr['repo_shortname']}:\n" +                f"{jsonresp['message']}" +            ) +            return False +        if ("merged" in jsonresp.keys()) and jsonresp["merged"]: +            return True + +        # checking for the label, using `jsonresp` which has the label information +        if HacktoberStats._has_label(jsonresp, "hacktoberfest-accepted"): +            return True + +        # checking approval +        query_url += "/reviews" +        jsonresp2 = await HacktoberStats._fetch_url(query_url, REQUEST_HEADERS) +        if isinstance(jsonresp2, dict): +            # if API request is unsuccessful it will be a dict with the error in 'message' +            logging.error( +                f"Error fetching PR reviews for #{pr['number']} in repo {pr['repo_shortname']}:\n" +                f"{jsonresp2['message']}" +            ) +            return False +        # if it is successful it will be a list instead of a dict +        if len(jsonresp2) == 0:  # if PR has no reviews +            return False + +        # loop through reviews and check for approval +        for item in jsonresp2: +            if "status" in item.keys(): +                if item['status'] == "APPROVED": +                    return True +        return False      @staticmethod      def _get_shortname(in_url: str) -> str: @@ -301,38 +423,51 @@ class HacktoberStats(commands.Cog):          return re.findall(exp, in_url)[0]      @staticmethod -    def _summarize_prs(prs: List[dict]) -> dict: +    async def _categorize_prs(prs: List[dict]) -> tuple:          """ -        Generate statistics from an input list of PR dictionaries, as output by get_october_prs. +        Categorize PRs into 'in_review' and 'accepted' and returns as a tuple. -        Return a dictionary containing: -            { -            "n_prs": int -            "top5": [(repo_shortname, ncontributions), ...] -            } +        PRs created less than 14 days ago are 'in_review', PRs that are not +        are 'accepted' (after 14 days review period). + +        PRs that are accepted must either be merged, approved, or labelled +        'hacktoberfest-accepted.          """ -        contributed_repos = [pr["repo_shortname"] for pr in prs] -        return {"n_prs": len(prs), "top5": Counter(contributed_repos).most_common(5)} +        now = datetime.now() +        in_review = [] +        accepted = [] +        for pr in prs: +            if (pr['created_at'] + timedelta(REVIEW_DAYS)) > now: +                in_review.append(pr) +            elif await HacktoberStats._is_accepted(pr): +                accepted.append(pr) + +        return in_review, accepted      @staticmethod -    def _build_top5str(stats: List[tuple]) -> str: +    def _build_prs_string(prs: List[tuple], user: str) -> str:          """ -        Build a string from the Top 5 contributions that is compatible with a discord.Embed field. +        Builds a discord embed compatible string for a list of PRs. -        Top 5 contributions should be a list of tuples, as output in the stats dictionary by -        _summarize_prs - -        String is of the form: -           n contribution(s) to [shortname](url) -           ... +        Repository name with the link to pull requests authored by 'user' for +        each PR.          """          base_url = "https://www.github.com/" -        contributionstrs = [] -        for repo in stats['top5']: -            n = repo[1] -            contributionstrs.append(f"{n} {HacktoberStats._contributionator(n)} to [{repo[0]}]({base_url}{repo[0]})") - -        return "\n".join(contributionstrs) +        str_list = [] +        repo_list = [pr["repo_shortname"] for pr in prs] +        prs_list = Counter(repo_list).most_common(5)  # get first 5 counted PRs +        more = len(prs) - sum(i[1] for i in prs_list) + +        for pr in prs_list: +            # for example: https://www.github.com/python-discord/bot/pulls/octocat +            # will display pull requests authored by octocat. +            # pr[1] is the number of PRs to the repo +            string = f"{pr[1]} to [{pr[0]}]({base_url}{pr[0]}/pulls/{user})" +            str_list.append(string) +        if more: +            str_list.append(f"...and {more} more") + +        return "\n".join(str_list)      @staticmethod      def _contributionator(n: int) -> str: | 
