Change search indexing to use batches to minimize resource usage (#18451)

This commit is contained in:
Eugen Rochko 2022-05-18 23:29:14 +02:00 committed by GitHub
parent ded5a0254a
commit 679b7158e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 64 additions and 19 deletions

View file

@ -81,7 +81,7 @@ gem 'scenic', '~> 1.6'
gem 'sidekiq', '~> 6.4'
gem 'sidekiq-scheduler', '~> 4.0'
gem 'sidekiq-unique-jobs', '~> 7.1'
gem 'sidekiq-bulk', '~>0.2.0'
gem 'sidekiq-bulk', '~> 0.2.0'
gem 'simple-navigation', '~> 4.3'
gem 'simple_form', '~> 5.1'
gem 'sprockets-rails', '~> 3.4', require: 'sprockets/railtie'

View file

@ -1,7 +1,7 @@
# frozen_string_literal: true
class AccountsIndex < Chewy::Index
settings index: { refresh_interval: '5m' }, analysis: {
settings index: { refresh_interval: '30s' }, analysis: {
analyzer: {
content: {
tokenizer: 'whitespace',

View file

@ -3,7 +3,7 @@
class StatusesIndex < Chewy::Index
include FormattingHelper
settings index: { refresh_interval: '15m' }, analysis: {
settings index: { refresh_interval: '30s' }, analysis: {
filter: {
english_stop: {
type: 'stop',

View file

@ -1,7 +1,7 @@
# frozen_string_literal: true
class TagsIndex < Chewy::Index
settings index: { refresh_interval: '15m' }, analysis: {
settings index: { refresh_interval: '30s' }, analysis: {
analyzer: {
content: {
tokenizer: 'keyword',

View file

@ -0,0 +1,26 @@
# frozen_string_literal: true
class Scheduler::IndexingScheduler
include Sidekiq::Worker
include Redisable
sidekiq_options retry: 0
def perform
indexes.each do |type|
with_redis do |redis|
ids = redis.smembers("chewy:queue:#{type.name}")
type.import!(ids)
redis.pipelined do |pipeline|
ids.each { |id| pipeline.srem("chewy:queue:#{type.name}", id) }
end
end
end
end
def indexes
[AccountsIndex, TagsIndex, StatusesIndex]
end
end

View file

@ -38,7 +38,7 @@ require_relative '../lib/mastodon/version'
require_relative '../lib/mastodon/rack_middleware'
require_relative '../lib/devise/two_factor_ldap_authenticatable'
require_relative '../lib/devise/two_factor_pam_authenticatable'
require_relative '../lib/chewy/strategy/custom_sidekiq'
require_relative '../lib/chewy/strategy/mastodon'
require_relative '../lib/webpacker/manifest_extensions'
require_relative '../lib/webpacker/helper_extensions'
require_relative '../lib/rails/engine_extensions'

View file

@ -13,15 +13,14 @@ Chewy.settings = {
journal: false,
user: user,
password: password,
sidekiq: { queue: 'pull' },
}
# We use our own async strategy even outside the request-response
# cycle, which takes care of checking if Elasticsearch is enabled
# or not. However, mind that for the Rails console, the :urgent
# strategy is set automatically with no way to override it.
Chewy.root_strategy = :custom_sidekiq
Chewy.request_strategy = :custom_sidekiq
Chewy.root_strategy = :mastodon
Chewy.request_strategy = :mastodon
Chewy.use_after_commit_callbacks = false
module Chewy

View file

@ -21,6 +21,10 @@
every: '6h'
class: Scheduler::Trends::ReviewNotificationsScheduler
queue: scheduler
indexing_scheduler:
every: '5m'
class: Scheduler::IndexingScheduler
queue: scheduler
media_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::MediaCleanupScheduler

View file

@ -1,11 +0,0 @@
# frozen_string_literal: true
module Chewy
class Strategy
class CustomSidekiq < Sidekiq
def update(_type, _objects, _options = {})
super if Chewy.enabled?
end
end
end
end

View file

@ -0,0 +1,27 @@
# frozen_string_literal: true
module Chewy
class Strategy
class Mastodon < Base
def initialize
super
@stash = Hash.new { |hash, key| hash[key] = [] }
end
def update(type, objects, _options = {})
@stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled?
end
def leave
RedisConfiguration.with do |redis|
redis.pipelined do |pipeline|
@stash.each do |type, ids|
pipeline.sadd("chewy:queue:#{type.name}", ids)
end
end
end
end
end
end
end