Improve automatic post cleanup worker performances (#24785)

This commit is contained in:
Claire 2023-05-03 10:31:40 +02:00 committed by GitHub
parent a0c9f2447e
commit 8b636a29c6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 8 deletions

View file

@ -38,17 +38,37 @@ class Scheduler::AccountsStatusesCleanupScheduler
return if under_load?
budget = compute_budget
first_policy_id = last_processed_id
# If the budget allows it, we want to consider all accounts with enabled
# auto cleanup at least once.
#
# We start from `first_policy_id` (the last processed id in the previous
# run) and process each policy until we loop to `first_policy_id`,
# recording into `affected_policies` any policy that caused posts to be
# deleted.
#
# After that, we set `full_iteration` to `false` and continue looping on
# policies from `affected_policies`.
first_policy_id = last_processed_id || 0
first_iteration = true
full_iteration = true
affected_policies = []
loop do
num_processed_accounts = 0
scope = AccountStatusesCleanupPolicy.where(enabled: true)
scope = scope.where(id: first_policy_id...) if first_policy_id.present?
scope = cleanup_policies(first_policy_id, affected_policies, first_iteration, full_iteration)
scope.find_each(order: :asc) do |policy|
num_deleted = AccountStatusesCleanupService.new.call(policy, [budget, PER_ACCOUNT_BUDGET].min)
num_processed_accounts += 1 unless num_deleted.zero?
budget -= num_deleted
unless num_deleted.zero?
num_processed_accounts += 1
affected_policies << policy.id if full_iteration
end
full_iteration = false if !first_iteration && policy.id >= first_policy_id
if budget.zero?
save_last_processed_id(policy.id)
break
@ -57,9 +77,9 @@ class Scheduler::AccountsStatusesCleanupScheduler
# The idea here is to loop through all policies at least once until the budget is exhausted
# and start back after the last processed account otherwise
break if budget.zero? || (num_processed_accounts.zero? && first_policy_id.nil?)
break if budget.zero? || (num_processed_accounts.zero? && !full_iteration)
first_policy_id = nil
first_iteration = false
end
end
@ -76,12 +96,28 @@ class Scheduler::AccountsStatusesCleanupScheduler
private
def cleanup_policies(first_policy_id, affected_policies, first_iteration, full_iteration)
scope = AccountStatusesCleanupPolicy.where(enabled: true)
if full_iteration
# If we are doing a full iteration, examine all policies we have not examined yet
if first_iteration
scope.where(id: first_policy_id...)
else
scope.where(id: ..first_policy_id).or(scope.where(id: affected_policies))
end
else
# Otherwise, examine only policies that previously yielded posts to delete
scope.where(id: affected_policies)
end
end
def queue_under_load?(name, max_latency)
Sidekiq::Queue.new(name).latency > max_latency
end
def last_processed_id
redis.get('account_statuses_cleanup_scheduler:last_policy_id')
redis.get('account_statuses_cleanup_scheduler:last_policy_id')&.to_i
end
def save_last_processed_id(id)

View file

@ -75,7 +75,7 @@ describe Scheduler::AccountsStatusesCleanupScheduler do
end
end
describe '#get_budget' do
describe '#compute_budget' do
context 'on a single thread' do
let(:process_set_stub) { [{ 'concurrency' => 1, 'queues' => %w(push default) }] }
@ -130,6 +130,19 @@ describe Scheduler::AccountsStatusesCleanupScheduler do
.and change { account3.statuses.count }
.and change { account5.statuses.count }
end
context 'when given a big budget' do
let(:process_set_stub) { [{ 'concurrency' => 400, 'queues' => %w(push default) }] }
before do
stub_const 'Scheduler::AccountsStatusesCleanupScheduler::MAX_BUDGET', 400
end
it 'correctly handles looping in a single run' do
expect(subject.compute_budget).to eq(400)
expect { subject.perform }.to change { Status.count }.by(-30)
end
end
end
end
end