Fix resolving accounts sometimes creating duplicate records for a given AP id (#15364)

* Fix ResolveAccountService accepting mismatching acct: URI

* Set attributes that should be updated regardless of suspension

* Fix key fetching

* Automatically merge remote accounts with duplicate `uri`

* Add tests

* Add "tootctl accounts fix-duplicates"

Finds duplicate accounts sharing a same ActivityPub `id`, re-fetch them and
merge them under the canonical `acct:` URI.

Co-authored-by: Claire <claire.github-309c@sitedethib.com>
This commit is contained in:
ThibG 2020-12-18 23:26:26 +01:00 committed by GitHub
parent 052249588b
commit a60d9335d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 116 additions and 24 deletions

View file

@ -28,7 +28,7 @@ class ActivityPub::FetchRemoteAccountService < BaseService
return unless only_key || verified_webfinger?
ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key)
ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key, verified_webfinger: !only_key)
rescue Oj::ParseError
nil
end

View file

@ -28,6 +28,8 @@ class ActivityPub::ProcessAccountService < BaseService
update_account
process_tags
process_attachments
process_duplicate_accounts! if @options[:verified_webfinger]
else
raise Mastodon::RaceConditionError
end
@ -69,34 +71,42 @@ class ActivityPub::ProcessAccountService < BaseService
@account.protocol = :activitypub
set_suspension!
set_immediate_protocol_attributes!
set_fetchable_key! unless @account.suspended? && @account.suspension_origin_local?
set_immediate_attributes! unless @account.suspended?
set_fetchable_attributes! unless @options[:only_keys] || @account.suspended?
set_fetchable_attributes! unless @options[:only_key] || @account.suspended?
@account.save_with_optional_media!
end
def set_immediate_attributes!
def set_immediate_protocol_attributes!
@account.inbox_url = @json['inbox'] || ''
@account.outbox_url = @json['outbox'] || ''
@account.shared_inbox_url = (@json['endpoints'].is_a?(Hash) ? @json['endpoints']['sharedInbox'] : @json['sharedInbox']) || ''
@account.followers_url = @json['followers'] || ''
@account.featured_collection_url = @json['featured'] || ''
@account.devices_url = @json['devices'] || ''
@account.url = url || @uri
@account.uri = @uri
@account.actor_type = actor_type
end
def set_immediate_attributes!
@account.featured_collection_url = @json['featured'] || ''
@account.devices_url = @json['devices'] || ''
@account.display_name = @json['name'] || ''
@account.note = @json['summary'] || ''
@account.locked = @json['manuallyApprovesFollowers'] || false
@account.fields = property_values || {}
@account.also_known_as = as_array(@json['alsoKnownAs'] || []).map { |item| value_or_id(item) }
@account.actor_type = actor_type
@account.discoverable = @json['discoverable'] || false
end
def set_fetchable_key!
@account.public_key = public_key || ''
end
def set_fetchable_attributes!
@account.avatar_remote_url = image_url('icon') || '' unless skip_download?
@account.header_remote_url = image_url('image') || '' unless skip_download?
@account.public_key = public_key || ''
@account.statuses_count = outbox_total_items if outbox_total_items.present?
@account.following_count = following_total_items if following_total_items.present?
@account.followers_count = followers_total_items if followers_total_items.present?
@ -140,6 +150,12 @@ class ActivityPub::ProcessAccountService < BaseService
VerifyAccountLinksWorker.perform_async(@account.id)
end
def process_duplicate_accounts!
return unless Account.where(uri: @account.uri).where.not(id: @account.id).exists?
AccountMergingWorker.perform_async(@account.id)
end
def actor_type
if @json['type'].is_a?(Array)
@json['type'].find { |type| ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES.include?(type) }

View file

@ -49,7 +49,7 @@ class ResolveAccountService < BaseService
# Now it is certain, it is definitely a remote account, and it
# either needs to be created, or updated from fresh data
process_account!
fetch_account!
rescue Webfinger::Error, Oj::ParseError => e
Rails.logger.debug "Webfinger query for #{@uri} failed: #{e}"
nil
@ -104,16 +104,12 @@ class ResolveAccountService < BaseService
acct.gsub(/\Aacct:/, '').split('@')
end
def process_account!
def fetch_account!
return unless activitypub_ready?
RedisLock.acquire(lock_options) do |lock|
if lock.acquired?
@account = Account.find_remote(@username, @domain)
next if actor_json.nil?
@account = ActivityPub::ProcessAccountService.new.call(@username, @domain, actor_json)
@account = ActivityPub::FetchRemoteAccountService.new.call(actor_url)
else
raise Mastodon::RaceConditionError
end
@ -136,13 +132,6 @@ class ResolveAccountService < BaseService
@actor_url ||= @webfinger.link('self', 'href')
end
def actor_json
return @actor_json if defined?(@actor_json)
json = fetch_resource(actor_url, false)
@actor_json = supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) ? json : nil
end
def gone_from_origin?
@gone
end

View file

@ -0,0 +1,18 @@
# frozen_string_literal: true
class AccountMergingWorker
include Sidekiq::Worker
sidekiq_options queue: 'pull'
def perform(account_id)
account = Account.find(account_id)
return true if account.nil? || account.local?
Account.where(uri: account.uri).where.not(id: account.id).find_each do |duplicate|
account.merge_with!(duplicate)
duplicate.destroy
end
end
end

View file

@ -236,6 +236,25 @@ module Mastodon
say('OK', :green)
end
desc 'fix-duplicates', 'Find duplicate remote accounts and merge them'
option :dry_run, type: :boolean
long_desc <<-LONG_DESC
Merge known remote accounts sharing an ActivityPub actor identifier.
Such duplicates can occur when a remote server admin misconfigures their
domain configuration.
LONG_DESC
def fix_duplicates
Account.remote.select(:uri, 'count(*)').group(:uri).having('count(*) > 1').pluck_each(:uri) do |uri|
say("Duplicates found for #{uri}")
begin
ActivityPub::FetchRemotAccountService.new.call(uri) unless options[:dry_run]
rescue => e
say("Error processing #{uri}: #{e}", :red)
end
end
end
desc 'backup USERNAME', 'Request a backup for a user'
long_desc <<-LONG_DESC
Request a new backup for an account with a given USERNAME.

View file

@ -60,7 +60,22 @@ RSpec.describe ResolveAccountService, type: :service do
context 'with a legitimate webfinger redirection' do
before do
webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] }
webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' })
end
it 'returns new remote account' do
account = subject.call('Foo@redirected.example.com')
expect(account.activitypub?).to eq true
expect(account.acct).to eq 'foo@ap.example.com'
expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox'
end
end
context 'with a misconfigured redirection' do
before do
webfinger = { subject: 'acct:Foo@redirected.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' })
end
@ -75,9 +90,9 @@ RSpec.describe ResolveAccountService, type: :service do
context 'with too many webfinger redirections' do
before do
webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] }
webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' })
webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] }
webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
stub_request(:get, 'https://evil.example.com/.well-known/webfinger?resource=acct:foo@evil.example.com').to_return(body: Oj.dump(webfinger2), headers: { 'Content-Type': 'application/jrd+json' })
end
@ -111,6 +126,41 @@ RSpec.describe ResolveAccountService, type: :service do
end
end
context 'with an already-known actor changing acct: URI' do
let!(:duplicate) { Fabricate(:account, username: 'foo', domain: 'old.example.com', uri: 'https://ap.example.com/users/foo') }
let!(:status) { Fabricate(:status, account: duplicate, text: 'foo') }
it 'returns new remote account' do
account = subject.call('foo@ap.example.com')
expect(account.activitypub?).to eq true
expect(account.domain).to eq 'ap.example.com'
expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox'
expect(account.uri).to eq 'https://ap.example.com/users/foo'
end
it 'merges accounts' do
account = subject.call('foo@ap.example.com')
expect(status.reload.account_id).to eq account.id
expect(Account.where(uri: account.uri).count).to eq 1
end
end
context 'with an already-known acct: URI changing ActivityPub id' do
let!(:old_account) { Fabricate(:account, username: 'foo', domain: 'ap.example.com', uri: 'https://old.example.com/users/foo', last_webfingered_at: nil) }
let!(:status) { Fabricate(:status, account: old_account, text: 'foo') }
it 'returns new remote account' do
account = subject.call('foo@ap.example.com')
expect(account.activitypub?).to eq true
expect(account.domain).to eq 'ap.example.com'
expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox'
expect(account.uri).to eq 'https://ap.example.com/users/foo'
end
end
it 'processes one remote account at a time using locks' do
wait_for_start = true
fail_occurred = false