From a60d9335d8e7c4aa070f081719ee2a438b0e0202 Mon Sep 17 00:00:00 2001
From: ThibG <thib@sitedethib.com>
Date: Fri, 18 Dec 2020 23:26:26 +0100
Subject: [PATCH] Fix resolving accounts sometimes creating duplicate records
 for a given AP id (#15364)

* Fix ResolveAccountService accepting mismatching acct: URI

* Set attributes that should be updated regardless of suspension

* Fix key fetching

* Automatically merge remote accounts with duplicate `uri`

* Add tests

* Add "tootctl accounts fix-duplicates"

Finds duplicate accounts sharing a same ActivityPub `id`, re-fetch them and
merge them under the canonical `acct:` URI.

Co-authored-by: Claire <claire.github-309c@sitedethib.com>
---
 .../fetch_remote_account_service.rb           |  2 +-
 .../activitypub/process_account_service.rb    | 28 ++++++++--
 app/services/resolve_account_service.rb       | 17 +-----
 app/workers/account_merging_worker.rb         | 18 ++++++
 lib/mastodon/accounts_cli.rb                  | 19 +++++++
 spec/services/resolve_account_service_spec.rb | 56 ++++++++++++++++++-
 6 files changed, 116 insertions(+), 24 deletions(-)
 create mode 100644 app/workers/account_merging_worker.rb

diff --git a/app/services/activitypub/fetch_remote_account_service.rb b/app/services/activitypub/fetch_remote_account_service.rb
index e5bd0c47c..9d01f5386 100644
--- a/app/services/activitypub/fetch_remote_account_service.rb
+++ b/app/services/activitypub/fetch_remote_account_service.rb
@@ -28,7 +28,7 @@ class ActivityPub::FetchRemoteAccountService < BaseService
 
     return unless only_key || verified_webfinger?
 
-    ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key)
+    ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key, verified_webfinger: !only_key)
   rescue Oj::ParseError
     nil
   end
diff --git a/app/services/activitypub/process_account_service.rb b/app/services/activitypub/process_account_service.rb
index 4cb8e09db..6afeb92d6 100644
--- a/app/services/activitypub/process_account_service.rb
+++ b/app/services/activitypub/process_account_service.rb
@@ -28,6 +28,8 @@ class ActivityPub::ProcessAccountService < BaseService
         update_account
         process_tags
         process_attachments
+
+        process_duplicate_accounts! if @options[:verified_webfinger]
       else
         raise Mastodon::RaceConditionError
       end
@@ -69,34 +71,42 @@ class ActivityPub::ProcessAccountService < BaseService
     @account.protocol            = :activitypub
 
     set_suspension!
+    set_immediate_protocol_attributes!
+    set_fetchable_key! unless @account.suspended? && @account.suspension_origin_local?
     set_immediate_attributes! unless @account.suspended?
-    set_fetchable_attributes! unless @options[:only_keys] || @account.suspended?
+    set_fetchable_attributes! unless @options[:only_key] || @account.suspended?
 
     @account.save_with_optional_media!
   end
 
-  def set_immediate_attributes!
+  def set_immediate_protocol_attributes!
     @account.inbox_url               = @json['inbox'] || ''
     @account.outbox_url              = @json['outbox'] || ''
     @account.shared_inbox_url        = (@json['endpoints'].is_a?(Hash) ? @json['endpoints']['sharedInbox'] : @json['sharedInbox']) || ''
     @account.followers_url           = @json['followers'] || ''
-    @account.featured_collection_url = @json['featured'] || ''
-    @account.devices_url             = @json['devices'] || ''
     @account.url                     = url || @uri
     @account.uri                     = @uri
+    @account.actor_type              = actor_type
+  end
+
+  def set_immediate_attributes!
+    @account.featured_collection_url = @json['featured'] || ''
+    @account.devices_url             = @json['devices'] || ''
     @account.display_name            = @json['name'] || ''
     @account.note                    = @json['summary'] || ''
     @account.locked                  = @json['manuallyApprovesFollowers'] || false
     @account.fields                  = property_values || {}
     @account.also_known_as           = as_array(@json['alsoKnownAs'] || []).map { |item| value_or_id(item) }
-    @account.actor_type              = actor_type
     @account.discoverable            = @json['discoverable'] || false
   end
 
+  def set_fetchable_key!
+    @account.public_key        = public_key || ''
+  end
+
   def set_fetchable_attributes!
     @account.avatar_remote_url = image_url('icon')  || '' unless skip_download?
     @account.header_remote_url = image_url('image') || '' unless skip_download?
-    @account.public_key        = public_key || ''
     @account.statuses_count    = outbox_total_items    if outbox_total_items.present?
     @account.following_count   = following_total_items if following_total_items.present?
     @account.followers_count   = followers_total_items if followers_total_items.present?
@@ -140,6 +150,12 @@ class ActivityPub::ProcessAccountService < BaseService
     VerifyAccountLinksWorker.perform_async(@account.id)
   end
 
+  def process_duplicate_accounts!
+    return unless Account.where(uri: @account.uri).where.not(id: @account.id).exists?
+
+    AccountMergingWorker.perform_async(@account.id)
+  end
+
   def actor_type
     if @json['type'].is_a?(Array)
       @json['type'].find { |type| ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES.include?(type) }
diff --git a/app/services/resolve_account_service.rb b/app/services/resolve_account_service.rb
index 74b0b82d0..3301aaf51 100644
--- a/app/services/resolve_account_service.rb
+++ b/app/services/resolve_account_service.rb
@@ -49,7 +49,7 @@ class ResolveAccountService < BaseService
     # Now it is certain, it is definitely a remote account, and it
     # either needs to be created, or updated from fresh data
 
-    process_account!
+    fetch_account!
   rescue Webfinger::Error, Oj::ParseError => e
     Rails.logger.debug "Webfinger query for #{@uri} failed: #{e}"
     nil
@@ -104,16 +104,12 @@ class ResolveAccountService < BaseService
     acct.gsub(/\Aacct:/, '').split('@')
   end
 
-  def process_account!
+  def fetch_account!
     return unless activitypub_ready?
 
     RedisLock.acquire(lock_options) do |lock|
       if lock.acquired?
-        @account = Account.find_remote(@username, @domain)
-
-        next if actor_json.nil?
-
-        @account = ActivityPub::ProcessAccountService.new.call(@username, @domain, actor_json)
+        @account = ActivityPub::FetchRemoteAccountService.new.call(actor_url)
       else
         raise Mastodon::RaceConditionError
       end
@@ -136,13 +132,6 @@ class ResolveAccountService < BaseService
     @actor_url ||= @webfinger.link('self', 'href')
   end
 
-  def actor_json
-    return @actor_json if defined?(@actor_json)
-
-    json        = fetch_resource(actor_url, false)
-    @actor_json = supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) ? json : nil
-  end
-
   def gone_from_origin?
     @gone
   end
diff --git a/app/workers/account_merging_worker.rb b/app/workers/account_merging_worker.rb
new file mode 100644
index 000000000..8c234e7ac
--- /dev/null
+++ b/app/workers/account_merging_worker.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+class AccountMergingWorker
+  include Sidekiq::Worker
+
+  sidekiq_options queue: 'pull'
+
+  def perform(account_id)
+    account = Account.find(account_id)
+
+    return true if account.nil? || account.local?
+
+    Account.where(uri: account.uri).where.not(id: account.id).find_each do |duplicate|
+      account.merge_with!(duplicate)
+      duplicate.destroy
+    end
+  end
+end
diff --git a/lib/mastodon/accounts_cli.rb b/lib/mastodon/accounts_cli.rb
index bef4093a8..474643869 100644
--- a/lib/mastodon/accounts_cli.rb
+++ b/lib/mastodon/accounts_cli.rb
@@ -236,6 +236,25 @@ module Mastodon
       say('OK', :green)
     end
 
+    desc 'fix-duplicates', 'Find duplicate remote accounts and merge them'
+    option :dry_run, type: :boolean
+    long_desc <<-LONG_DESC
+      Merge known remote accounts sharing an ActivityPub actor identifier.
+
+      Such duplicates can occur when a remote server admin misconfigures their
+      domain configuration.
+    LONG_DESC
+    def fix_duplicates
+      Account.remote.select(:uri, 'count(*)').group(:uri).having('count(*) > 1').pluck_each(:uri) do |uri|
+        say("Duplicates found for #{uri}")
+        begin
+          ActivityPub::FetchRemotAccountService.new.call(uri) unless options[:dry_run]
+        rescue => e
+          say("Error processing #{uri}: #{e}", :red)
+        end
+      end
+    end
+
     desc 'backup USERNAME', 'Request a backup for a user'
     long_desc <<-LONG_DESC
       Request a new backup for an account with a given USERNAME.
diff --git a/spec/services/resolve_account_service_spec.rb b/spec/services/resolve_account_service_spec.rb
index 5bd0ec264..a604e90b5 100644
--- a/spec/services/resolve_account_service_spec.rb
+++ b/spec/services/resolve_account_service_spec.rb
@@ -60,7 +60,22 @@ RSpec.describe ResolveAccountService, type: :service do
 
   context 'with a legitimate webfinger redirection' do
     before do
-      webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] }
+      webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
+      stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' })
+    end
+
+    it 'returns new remote account' do
+      account = subject.call('Foo@redirected.example.com')
+
+      expect(account.activitypub?).to eq true
+      expect(account.acct).to eq 'foo@ap.example.com'
+      expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox'
+    end
+  end
+
+  context 'with a misconfigured redirection' do
+    before do
+      webfinger = { subject: 'acct:Foo@redirected.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
       stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' })
     end
 
@@ -75,9 +90,9 @@ RSpec.describe ResolveAccountService, type: :service do
 
   context 'with too many webfinger redirections' do
     before do
-      webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] }
+      webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
       stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' })
-      webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] }
+      webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] }
       stub_request(:get, 'https://evil.example.com/.well-known/webfinger?resource=acct:foo@evil.example.com').to_return(body: Oj.dump(webfinger2), headers: { 'Content-Type': 'application/jrd+json' })
     end
 
@@ -111,6 +126,41 @@ RSpec.describe ResolveAccountService, type: :service do
     end
   end
 
+  context 'with an already-known actor changing acct: URI' do
+    let!(:duplicate) { Fabricate(:account, username: 'foo', domain: 'old.example.com', uri: 'https://ap.example.com/users/foo') }
+    let!(:status)    { Fabricate(:status, account: duplicate, text: 'foo') }
+
+    it 'returns new remote account' do
+      account = subject.call('foo@ap.example.com')
+
+      expect(account.activitypub?).to eq true
+      expect(account.domain).to eq 'ap.example.com'
+      expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox'
+      expect(account.uri).to eq 'https://ap.example.com/users/foo'
+    end
+
+    it 'merges accounts' do
+      account = subject.call('foo@ap.example.com')
+
+      expect(status.reload.account_id).to eq account.id
+      expect(Account.where(uri: account.uri).count).to eq 1
+    end
+  end
+
+  context 'with an already-known acct: URI changing ActivityPub id' do
+    let!(:old_account) { Fabricate(:account, username: 'foo', domain: 'ap.example.com', uri: 'https://old.example.com/users/foo', last_webfingered_at: nil) }
+    let!(:status)    { Fabricate(:status, account: old_account, text: 'foo') }
+
+    it 'returns new remote account' do
+      account = subject.call('foo@ap.example.com')
+
+      expect(account.activitypub?).to eq true
+      expect(account.domain).to eq 'ap.example.com'
+      expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox'
+      expect(account.uri).to eq 'https://ap.example.com/users/foo'
+    end
+  end
+
   it 'processes one remote account at a time using locks' do
     wait_for_start = true
     fail_occurred  = false