aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Johannes Christ <[email protected]>2024-04-01 20:08:02 +0200
committerGravatar Johannes Christ <[email protected]>2024-04-01 20:21:49 +0200
commit084b52b1c5f7cf9e02c64361972b75b19bc1a8dc (patch)
tree84fb250db8e44bcb396a2bf0ed033cd5d97fe5ec
parentMerge pull request #1286 from python-discord/jb3/filterlist-perf-improvements (diff)
Upsert tags in two queries
Before, we had an N+1 query, where each tag would be separately fetched and updated. Instead, first delete any tags we no longer care about, then perform an upsert of all fields except for `last_commit`, which we want to keep from the existing tag. This comes with one caveat: on the first update of the tags, the `tags` list is no longer modified in-place, because mutation is bad, however, the database state is updated accordingly.
-rw-r--r--pydis_site/apps/content/tests/test_utils.py12
-rw-r--r--pydis_site/apps/content/utils.py31
2 files changed, 25 insertions, 18 deletions
diff --git a/pydis_site/apps/content/tests/test_utils.py b/pydis_site/apps/content/tests/test_utils.py
index 7f7736f9..d26c59d5 100644
--- a/pydis_site/apps/content/tests/test_utils.py
+++ b/pydis_site/apps/content/tests/test_utils.py
@@ -370,7 +370,7 @@ class TagUtilsTests(TestCase):
self.assertEqual(self.commit, models.Tag.objects.get(name=tag.name).last_commit)
@mock.patch.object(utils, "set_tag_commit")
- def test_exiting_commit(self, set_commit_mock: mock.Mock):
+ def test_existing_commit(self, set_commit_mock: mock.Mock):
"""Test that a commit is saved when the data has not changed."""
tag = models.Tag.objects.create(name="tag-name", body="old body", last_commit=self.commit)
@@ -378,8 +378,18 @@ class TagUtilsTests(TestCase):
tag.last_commit = None
utils.record_tags([tag])
+ tag.refresh_from_db()
self.assertEqual(self.commit, tag.last_commit)
result = utils.get_tag("tag-name")
self.assertEqual(tag, result)
set_commit_mock.assert_not_called()
+
+ def test_deletes_tags_no_longer_present(self):
+ """Test that no longer known tags are deleted."""
+ tag = models.Tag.objects.create(name="tag-name", body="old body", last_commit=self.commit)
+
+ utils.record_tags([])
+
+ with self.assertRaises(models.Tag.DoesNotExist):
+ tag.refresh_from_db()
diff --git a/pydis_site/apps/content/utils.py b/pydis_site/apps/content/utils.py
index 5a146e10..720063e4 100644
--- a/pydis_site/apps/content/utils.py
+++ b/pydis_site/apps/content/utils.py
@@ -12,6 +12,7 @@ import frontmatter
import httpx
import markdown
import yaml
+from django.db import transaction
from django.http import Http404
from django.utils import timezone
from markdown.extensions.toc import TocExtension
@@ -194,23 +195,19 @@ def set_tag_commit(tag: Tag) -> None:
def record_tags(tags: list[Tag]) -> None:
"""Sync the database with an updated set of tags."""
- # Remove entries which no longer exist
- Tag.objects.exclude(name__in=[tag.name for tag in tags]).delete()
-
- # Insert/update the tags
- for new_tag in tags:
- try:
- old_tag = Tag.objects.get(name=new_tag.name)
- except Tag.DoesNotExist:
- # The tag is not in the database yet,
- # pretend it's previous state is the current state
- old_tag = new_tag
-
- if old_tag.sha == new_tag.sha and old_tag.last_commit_id is not None:
- # We still have an up-to-date commit entry
- new_tag.last_commit_id = old_tag.last_commit_id
-
- new_tag.save()
+ with transaction.atomic():
+ # Remove any tags that we don't want to keep in the future
+ Tag.objects.exclude(name__in=(tag.name for tag in tags)).delete()
+
+ # Upsert the data!
+ Tag.objects.bulk_create(
+ tags,
+ update_conflicts=True,
+ # last_commit is not included here. We want to keep that
+ # from the tag that might already be in the database.
+ update_fields=('last_updated', 'sha', 'group', 'body'),
+ unique_fields=('name',),
+ )
# Drop old, unused commits
Commit.objects.filter(tag__isnull=True).delete()