From 7898288420401566deef6c1cd23bde946bbfc614 Mon Sep 17 00:00:00 2001 From: Chris Lovering Date: Sun, 30 Jun 2024 18:57:23 +0100 Subject: Improve user in-guild sync process Previously we set all users in_guild to False, and relied on users being set back to in_guild when iterating through guild.members However, this caused two problems 1. For a short window a users in_guild status was incorrect 2. It required an update for all users in_guild to be sent to postgres to update in_guild back to True. This diff changes that, so instead only users who are not found in the guild have in_guild set to False. The bottleneck for this query is the number of users that are currently in_guild=False. Testing locally, with 360k users off guild, this took 7.4s to query out, and 0.5s to process & 15.1 s to commit. --- metricity/exts/event_listeners/startup_sync.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/metricity/exts/event_listeners/startup_sync.py b/metricity/exts/event_listeners/startup_sync.py index 0f6264f..017ea1f 100644 --- a/metricity/exts/event_listeners/startup_sync.py +++ b/metricity/exts/event_listeners/startup_sync.py @@ -5,7 +5,7 @@ import math import discord from discord.ext import commands from pydis_core.utils import logging, scheduling -from sqlalchemy import column, update +from sqlalchemy import column, select from sqlalchemy.dialects.postgresql import insert from metricity import models @@ -35,10 +35,6 @@ class StartupSyncer(commands.Cog): await _syncer_utils.sync_thread_archive_state(guild) log.info("Beginning user synchronisation process") - async with async_session() as sess: - await sess.execute(update(models.User).values(in_guild=False)) - await sess.commit() - users = ( { "id": str(user.id), @@ -85,7 +81,6 @@ class StartupSyncer(commands.Cog): )) objs = list(res) - created += [obj[0] == 0 for obj in objs].count(True) updated += [obj[0] != 0 for obj in objs].count(True) @@ -95,6 +90,20 @@ class StartupSyncer(commands.Cog): await sess.commit() log.info("User upsert complete") + log.info("Beginning user in_guild sync") + + users_updated = 0 + guild_member_ids = {str(member.id) for member in guild.members} + async with async_session() as sess: + res = await sess.execute(select(models.User).filter_by(in_guild=True)) + in_guild_users = res.scalars() + for user in in_guild_users: + if user.id not in guild_member_ids: + users_updated += 1 + user.in_guild = False + await sess.commit() + log.info("User in_guild sync updated %d users to be off guild", users_updated) + log.info("User sync complete") self.bot.sync_process_complete.set() -- cgit v1.2.3