diff options
author | 2024-04-01 20:08:02 +0200 | |
---|---|---|
committer | 2024-04-01 20:21:49 +0200 | |
commit | 084b52b1c5f7cf9e02c64361972b75b19bc1a8dc (patch) | |
tree | 84fb250db8e44bcb396a2bf0ed033cd5d97fe5ec | |
parent | Merge pull request #1286 from python-discord/jb3/filterlist-perf-improvements (diff) |
Upsert tags in two queries
Before, we had an N+1 query, where each tag would be separately fetched
and updated. Instead, first delete any tags we no longer care about,
then perform an upsert of all fields except for `last_commit`, which we
want to keep from the existing tag.
This comes with one caveat: on the first update of the tags, the `tags`
list is no longer modified in-place, because mutation is bad, however,
the database state is updated accordingly.
-rw-r--r-- | pydis_site/apps/content/tests/test_utils.py | 12 | ||||
-rw-r--r-- | pydis_site/apps/content/utils.py | 31 |
2 files changed, 25 insertions, 18 deletions
diff --git a/pydis_site/apps/content/tests/test_utils.py b/pydis_site/apps/content/tests/test_utils.py index 7f7736f9..d26c59d5 100644 --- a/pydis_site/apps/content/tests/test_utils.py +++ b/pydis_site/apps/content/tests/test_utils.py @@ -370,7 +370,7 @@ class TagUtilsTests(TestCase): self.assertEqual(self.commit, models.Tag.objects.get(name=tag.name).last_commit) @mock.patch.object(utils, "set_tag_commit") - def test_exiting_commit(self, set_commit_mock: mock.Mock): + def test_existing_commit(self, set_commit_mock: mock.Mock): """Test that a commit is saved when the data has not changed.""" tag = models.Tag.objects.create(name="tag-name", body="old body", last_commit=self.commit) @@ -378,8 +378,18 @@ class TagUtilsTests(TestCase): tag.last_commit = None utils.record_tags([tag]) + tag.refresh_from_db() self.assertEqual(self.commit, tag.last_commit) result = utils.get_tag("tag-name") self.assertEqual(tag, result) set_commit_mock.assert_not_called() + + def test_deletes_tags_no_longer_present(self): + """Test that no longer known tags are deleted.""" + tag = models.Tag.objects.create(name="tag-name", body="old body", last_commit=self.commit) + + utils.record_tags([]) + + with self.assertRaises(models.Tag.DoesNotExist): + tag.refresh_from_db() diff --git a/pydis_site/apps/content/utils.py b/pydis_site/apps/content/utils.py index 5a146e10..720063e4 100644 --- a/pydis_site/apps/content/utils.py +++ b/pydis_site/apps/content/utils.py @@ -12,6 +12,7 @@ import frontmatter import httpx import markdown import yaml +from django.db import transaction from django.http import Http404 from django.utils import timezone from markdown.extensions.toc import TocExtension @@ -194,23 +195,19 @@ def set_tag_commit(tag: Tag) -> None: def record_tags(tags: list[Tag]) -> None: """Sync the database with an updated set of tags.""" - # Remove entries which no longer exist - Tag.objects.exclude(name__in=[tag.name for tag in tags]).delete() - - # Insert/update the tags - for new_tag in tags: - try: - old_tag = Tag.objects.get(name=new_tag.name) - except Tag.DoesNotExist: - # The tag is not in the database yet, - # pretend it's previous state is the current state - old_tag = new_tag - - if old_tag.sha == new_tag.sha and old_tag.last_commit_id is not None: - # We still have an up-to-date commit entry - new_tag.last_commit_id = old_tag.last_commit_id - - new_tag.save() + with transaction.atomic(): + # Remove any tags that we don't want to keep in the future + Tag.objects.exclude(name__in=(tag.name for tag in tags)).delete() + + # Upsert the data! + Tag.objects.bulk_create( + tags, + update_conflicts=True, + # last_commit is not included here. We want to keep that + # from the tag that might already be in the database. + update_fields=('last_updated', 'sha', 'group', 'body'), + unique_fields=('name',), + ) # Drop old, unused commits Commit.objects.filter(tag__isnull=True).delete() |