diff --git a/app/models/tag.rb b/app/models/tag.rb index 35be921e2..3f88cb068 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -37,7 +37,7 @@ class Tag < ApplicationRecord HASHTAG_LAST_SEQUENCE = '([[:word:]_]*[[:alpha:]][[:word:]_]*)' HASHTAG_NAME_PAT = "#{HASHTAG_FIRST_SEQUENCE}|#{HASHTAG_LAST_SEQUENCE}" - HASHTAG_RE = %r{(?<![=/)\w])#(#{HASHTAG_NAME_PAT})}i + HASHTAG_RE = %r{(?<![=/)\p{Alnum}])#(#{HASHTAG_NAME_PAT})}i HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i HASHTAG_INVALID_CHARS_RE = /[^[:alnum:]\u0E47-\u0E4E#{HASHTAG_SEPARATORS}]/ diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb index 4c2bdd52f..7799afe44 100644 --- a/spec/models/tag_spec.rb +++ b/spec/models/tag_spec.rb @@ -36,6 +36,10 @@ RSpec.describe Tag do expect(subject.match('https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111895#c4')).to be_nil end + it 'does not match URLs with hashtag-like anchors after a non-ascii character' do + expect(subject.match('https://example.org/testé#foo')).to be_nil + end + it 'does not match URLs with hashtag-like anchors after an empty query parameter' do expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)?foo=#Lawsuit')).to be_nil end