diff options
Diffstat (limited to 'pydis_site/apps/content')
-rw-r--r-- | pydis_site/apps/content/migrations/0001_add_tags.py | 5 | ||||
-rw-r--r-- | pydis_site/apps/content/models/tag.py | 14 | ||||
-rw-r--r-- | pydis_site/apps/content/utils.py | 124 |
3 files changed, 120 insertions, 23 deletions
diff --git a/pydis_site/apps/content/migrations/0001_add_tags.py b/pydis_site/apps/content/migrations/0001_add_tags.py index 2e9d8c45..73525243 100644 --- a/pydis_site/apps/content/migrations/0001_add_tags.py +++ b/pydis_site/apps/content/migrations/0001_add_tags.py @@ -1,4 +1,4 @@ -# Generated by Django 4.0.6 on 2022-08-16 16:17 +# Generated by Django 4.0.6 on 2022-08-16 17:38 import django.db.models.deletion from django.db import migrations, models @@ -25,10 +25,11 @@ class Migration(migrations.Migration): name='Tag', fields=[ ('last_updated', models.DateTimeField(auto_now=True, help_text='The date and time this data was last fetched.')), + ('sha', models.CharField(help_text="The tag's hash, as calculated by GitHub.", max_length=40)), ('name', models.CharField(help_text="The tag's name.", max_length=50, primary_key=True, serialize=False)), ('group', models.CharField(help_text='The group the tag belongs to.', max_length=50, null=True)), ('body', models.TextField(help_text='The content of the tag.')), - ('last_commit', models.OneToOneField(help_text='The commit this file was last touched in.', null=True, on_delete=django.db.models.deletion.CASCADE, to='content.commit')), + ('last_commit', models.ForeignKey(help_text='The commit this file was last touched in.', null=True, on_delete=django.db.models.deletion.CASCADE, to='content.commit')), ], ), ] diff --git a/pydis_site/apps/content/models/tag.py b/pydis_site/apps/content/models/tag.py index 1c89fe1e..3c729768 100644 --- a/pydis_site/apps/content/models/tag.py +++ b/pydis_site/apps/content/models/tag.py @@ -1,3 +1,4 @@ +import collections.abc import json from django.db import models @@ -22,13 +23,10 @@ class Commit(models.Model): """The URL to the commit on GitHub.""" return self.URL_BASE + self.sha - @property - def format_users(self) -> str: + def format_users(self) -> collections.abc.Iterable[str]: """Return a nice representation of the user(s)' name and email.""" - authors = [] for author in json.loads(self.author): - authors.append(f"{author['name']} <{author['email']}>") - return ", ".join(authors) + yield f"{author['name']} <{author['email']}>" class Tag(models.Model): @@ -40,7 +38,11 @@ class Tag(models.Model): help_text="The date and time this data was last fetched.", auto_now=True, ) - last_commit = models.OneToOneField( + sha = models.CharField( + help_text="The tag's hash, as calculated by GitHub.", + max_length=40, + ) + last_commit = models.ForeignKey( Commit, help_text="The commit this file was last touched in.", null=True, diff --git a/pydis_site/apps/content/utils.py b/pydis_site/apps/content/utils.py index 11100ba5..7b078de6 100644 --- a/pydis_site/apps/content/utils.py +++ b/pydis_site/apps/content/utils.py @@ -1,5 +1,6 @@ import datetime import functools +import json import tarfile import tempfile import typing @@ -15,11 +16,26 @@ from django.utils import timezone from markdown.extensions.toc import TocExtension from pydis_site import settings -from .models import Tag +from .models import Commit, Tag TAG_CACHE_TTL = datetime.timedelta(hours=1) +def github_client(**kwargs) -> httpx.Client: + """Get a client to access the GitHub API with important settings pre-configured.""" + client = httpx.Client( + base_url=settings.GITHUB_API, + follow_redirects=True, + timeout=settings.TIMEOUT_PERIOD, + **kwargs + ) + if settings.GITHUB_TOKEN: # pragma: no cover + if not client.headers.get("Authorization"): + client.headers = {"Authorization": f"token {settings.GITHUB_TOKEN}"} + + return client + + def get_category(path: Path) -> dict[str, str]: """Load category information by name from _info.yml.""" if not path.is_dir(): @@ -60,19 +76,31 @@ def fetch_tags() -> list[Tag]: The entire repository is downloaded and extracted locally because getting file content would require one request per file, and can get rate-limited. """ - if settings.GITHUB_TOKEN: # pragma: no cover - headers = {"Authorization": f"token {settings.GITHUB_TOKEN}"} - else: - headers = {} + client = github_client() + + # Grab metadata + metadata = client.get("/repos/python-discord/bot/contents/bot/resources") + metadata.raise_for_status() + + hashes = {} + for entry in metadata.json(): + if entry["type"] == "dir": + # Tag group + files = client.get(entry["url"]) + files.raise_for_status() + files = files.json() + else: + files = [entry] - tar_file = httpx.get( - f"{settings.GITHUB_API}/repos/python-discord/bot/tarball", - follow_redirects=True, - timeout=settings.TIMEOUT_PERIOD, - headers=headers, - ) + for file in files: + hashes[file["name"]] = file["sha"] + + # Download the files + tar_file = client.get("/repos/python-discord/bot/tarball") tar_file.raise_for_status() + client.close() + tags = [] with tempfile.TemporaryDirectory() as folder: with tarfile.open(fileobj=BytesIO(tar_file.content)) as repo: @@ -83,20 +111,83 @@ def fetch_tags() -> list[Tag]: repo.extractall(folder, included) for tag_file in Path(folder).rglob("*.md"): + name = tag_file.name group = None if tag_file.parent.name != "tags": # Tags in sub-folders are considered part of a group group = tag_file.parent.name tags.append(Tag( - name=tag_file.name.removesuffix(".md"), + name=name.removesuffix(".md"), + sha=hashes[name], group=group, body=tag_file.read_text(encoding="utf-8"), + last_commit=None, )) return tags +def set_tag_commit(tag: Tag) -> Tag: + """Fetch commit information from the API, and save it for the tag.""" + path = "/bot/resources/tags" + if tag.group: + path += f"/{tag.group}" + path += f"/{tag.name}.md" + + # Fetch and set the commit + with github_client() as client: + data = client.get("/repos/python-discord/bot/commits", params={"path": path}) + data.raise_for_status() + data = data.json()[0] + + commit = data["commit"] + author, committer = commit["author"], commit["committer"] + + date = datetime.datetime.strptime(committer["date"], settings.GITHUB_TIMESTAMP_FORMAT) + date = date.replace(tzinfo=datetime.timezone.utc) + + if author["email"] == committer["email"]: + commit_author = [author] + else: + commit_author = [author, committer] + + commit_obj, _ = Commit.objects.get_or_create( + sha=data["sha"], + message=commit["message"], + date=date, + author=json.dumps(commit_author), + ) + tag.last_commit = commit_obj + tag.save() + + return tag + + +def record_tags(tags: list[Tag]) -> None: + """Sync the database with an updated set of tags.""" + # Remove entries which no longer exist + Tag.objects.exclude(name__in=[tag.name for tag in tags]).delete() + + # Insert/update the tags + for tag in tags: + try: + old_tag = Tag.objects.get(name=tag.name) + except Tag.DoesNotExist: + # The tag is not in the database yet, + # pretend it's previous state is the current state + old_tag = tag + + if old_tag.sha == tag.sha and old_tag.last_commit is not None: + # We still have an up-to-date commit entry + tag.last_commit = old_tag.last_commit + + tag.save() + + # Drop old, unused commits + Commit.objects.filter(tag__isnull=True).delete() + + def get_tags() -> list[Tag]: """Return a list of all tags visible to the application, from the cache or API.""" if settings.STATIC_BUILD: # pragma: no cover @@ -113,9 +204,7 @@ def get_tags() -> list[Tag]: tags = get_tags_static() else: tags = fetch_tags() - Tag.objects.exclude(name__in=[tag.name for tag in tags]).delete() - for tag in tags: - tag.save() + record_tags(tags) return tags else: @@ -127,6 +216,9 @@ def get_tag(path: str) -> typing.Union[Tag, list[Tag]]: """ Return a tag based on the search location. + If certain tag data is out of sync (for instance a commit date is missing), + an extra request will be made to sync the information. + The tag name and group must match. If only one argument is provided in the path, it's assumed to either be a group name, or a no-group tag name. @@ -142,6 +234,8 @@ def get_tag(path: str) -> typing.Union[Tag, list[Tag]]: matches = [] for tag in get_tags(): if tag.name == name and tag.group == group: + if tag.last_commit is None: + set_tag_commit(tag) return tag elif tag.group == name and group is None: matches.append(tag) |