From 73850168373366376f3907299b2c32469d287180 Mon Sep 17 00:00:00 2001 From: Christian Schmidt Date: Thu, 21 Nov 2024 15:37:25 +0100 Subject: [PATCH] `rel="me"` check should be case-insenstive (#32238) --- app/lib/link_details_extractor.rb | 2 +- app/lib/nokogiri_handler.rb | 12 ++++++++++++ app/services/fetch_resource_service.rb | 2 +- app/services/verify_link_service.rb | 2 +- spec/services/verify_link_service_spec.rb | 15 +++++++++++++++ 5 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 app/lib/nokogiri_handler.rb diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb index e4e815c38..56533f655 100644 --- a/app/lib/link_details_extractor.rb +++ b/app/lib/link_details_extractor.rb @@ -237,7 +237,7 @@ class LinkDetailsExtractor end def link_tag(name) - document.xpath("//link[@rel=\"#{name}\"]").pick('href') + document.xpath("//link[nokogiri:link_rel_include(@rel, '#{name}')]", NokogiriHandler).pick('href') end def opengraph_tag(name) diff --git a/app/lib/nokogiri_handler.rb b/app/lib/nokogiri_handler.rb new file mode 100644 index 000000000..804bcb9c0 --- /dev/null +++ b/app/lib/nokogiri_handler.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class NokogiriHandler + class << self + # See "set of space-separated tokens" in the HTML5 spec. + WHITE_SPACE = /[ \x09\x0A\x0C\x0D]+/ + + def link_rel_include(token_list, token) + token_list.to_s.downcase.split(WHITE_SPACE).include?(token.downcase) + end + end +end diff --git a/app/services/fetch_resource_service.rb b/app/services/fetch_resource_service.rb index 911950ccc..3fde78455 100644 --- a/app/services/fetch_resource_service.rb +++ b/app/services/fetch_resource_service.rb @@ -74,7 +74,7 @@ class FetchResourceService < BaseService def process_html(response) page = Nokogiri::HTML5(response.body_with_limit) - json_link = page.xpath('//link[@rel="alternate"]').find { |link| ACTIVITY_STREAM_LINK_TYPES.include?(link['type']) } + json_link = page.xpath('//link[nokogiri:link_rel_include(@rel, "alternate")]', NokogiriHandler).find { |link| ACTIVITY_STREAM_LINK_TYPES.include?(link['type']) } process(json_link['href'], terminal: true) unless json_link.nil? end diff --git a/app/services/verify_link_service.rb b/app/services/verify_link_service.rb index 17c86426b..fc3c4cbc2 100644 --- a/app/services/verify_link_service.rb +++ b/app/services/verify_link_service.rb @@ -26,7 +26,7 @@ class VerifyLinkService < BaseService def link_back_present? return false if @body.blank? - links = Nokogiri::HTML5(@body).css("a[rel~='me'],link[rel~='me']") + links = Nokogiri::HTML5(@body).xpath('(//a|//link)[@rel][nokogiri:link_rel_include(@rel, "me")]', NokogiriHandler) if links.any? { |link| link['href']&.downcase == @link_back.downcase } true diff --git a/spec/services/verify_link_service_spec.rb b/spec/services/verify_link_service_spec.rb index a4fd19751..7e2f9607c 100644 --- a/spec/services/verify_link_service_spec.rb +++ b/spec/services/verify_link_service_spec.rb @@ -46,6 +46,21 @@ RSpec.describe VerifyLinkService do end end + context 'when a link contains an back' do + let(:html) do + <<~HTML + + + Follow me on Mastodon + + HTML + end + + it 'marks the field as verified' do + expect(field.verified?).to be true + end + end + context 'when a link contains a back' do let(:html) do <<~HTML