From 084b52b1c5f7cf9e02c64361972b75b19bc1a8dc Mon Sep 17 00:00:00 2001 From: Johannes Christ Date: Mon, 1 Apr 2024 20:08:02 +0200 Subject: Upsert tags in two queries Before, we had an N+1 query, where each tag would be separately fetched and updated. Instead, first delete any tags we no longer care about, then perform an upsert of all fields except for `last_commit`, which we want to keep from the existing tag. This comes with one caveat: on the first update of the tags, the `tags` list is no longer modified in-place, because mutation is bad, however, the database state is updated accordingly. --- pydis_site/apps/content/tests/test_utils.py | 12 ++++++++++- pydis_site/apps/content/utils.py | 31 +++++++++++++---------------- 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'pydis_site/apps/content') diff --git a/pydis_site/apps/content/tests/test_utils.py b/pydis_site/apps/content/tests/test_utils.py index 7f7736f9..d26c59d5 100644 --- a/pydis_site/apps/content/tests/test_utils.py +++ b/pydis_site/apps/content/tests/test_utils.py @@ -370,7 +370,7 @@ class TagUtilsTests(TestCase): self.assertEqual(self.commit, models.Tag.objects.get(name=tag.name).last_commit) @mock.patch.object(utils, "set_tag_commit") - def test_exiting_commit(self, set_commit_mock: mock.Mock): + def test_existing_commit(self, set_commit_mock: mock.Mock): """Test that a commit is saved when the data has not changed.""" tag = models.Tag.objects.create(name="tag-name", body="old body", last_commit=self.commit) @@ -378,8 +378,18 @@ class TagUtilsTests(TestCase): tag.last_commit = None utils.record_tags([tag]) + tag.refresh_from_db() self.assertEqual(self.commit, tag.last_commit) result = utils.get_tag("tag-name") self.assertEqual(tag, result) set_commit_mock.assert_not_called() + + def test_deletes_tags_no_longer_present(self): + """Test that no longer known tags are deleted.""" + tag = models.Tag.objects.create(name="tag-name", body="old body", last_commit=self.commit) + + utils.record_tags([]) + + with self.assertRaises(models.Tag.DoesNotExist): + tag.refresh_from_db() diff --git a/pydis_site/apps/content/utils.py b/pydis_site/apps/content/utils.py index 5a146e10..720063e4 100644 --- a/pydis_site/apps/content/utils.py +++ b/pydis_site/apps/content/utils.py @@ -12,6 +12,7 @@ import frontmatter import httpx import markdown import yaml +from django.db import transaction from django.http import Http404 from django.utils import timezone from markdown.extensions.toc import TocExtension @@ -194,23 +195,19 @@ def set_tag_commit(tag: Tag) -> None: def record_tags(tags: list[Tag]) -> None: """Sync the database with an updated set of tags.""" - # Remove entries which no longer exist - Tag.objects.exclude(name__in=[tag.name for tag in tags]).delete() - - # Insert/update the tags - for new_tag in tags: - try: - old_tag = Tag.objects.get(name=new_tag.name) - except Tag.DoesNotExist: - # The tag is not in the database yet, - # pretend it's previous state is the current state - old_tag = new_tag - - if old_tag.sha == new_tag.sha and old_tag.last_commit_id is not None: - # We still have an up-to-date commit entry - new_tag.last_commit_id = old_tag.last_commit_id - - new_tag.save() + with transaction.atomic(): + # Remove any tags that we don't want to keep in the future + Tag.objects.exclude(name__in=(tag.name for tag in tags)).delete() + + # Upsert the data! + Tag.objects.bulk_create( + tags, + update_conflicts=True, + # last_commit is not included here. We want to keep that + # from the tag that might already be in the database. + update_fields=('last_updated', 'sha', 'group', 'body'), + unique_fields=('name',), + ) # Drop old, unused commits Commit.objects.filter(tag__isnull=True).delete() -- cgit v1.2.3