Fix some link anchors being recognized as hashtags (#27271)

This commit is contained in:
Claire 2023-10-23 14:19:38 +02:00
parent 1210524a3d
commit a89a25714d
2 changed files with 16 additions and 12 deletions

View file

@ -33,7 +33,7 @@ class Tag < ApplicationRecord
HASTAG_LAST_SEQUENCE = '([[:word:]_]*[[:alpha:]][[:word:]_]*)'
HASHTAG_NAME_PAT = "#{HASHTAG_FIRST_SEQUENCE}|#{HASTAG_LAST_SEQUENCE}"
HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_PAT})/i
HASHTAG_RE = %r{(?<![=/)[:word]])#(#{HASHTAG_NAME_PAT})}i
HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i
HASHTAG_INVALID_CHARS_RE = /[^[:alnum:]#{HASHTAG_SEPARATORS}]/

View file

@ -31,44 +31,48 @@ RSpec.describe Tag do
expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)#Lawsuit')).to be_nil
end
it 'does not match URLs with hashtag-like anchors after an empty query parameter' do
expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)?foo=#Lawsuit')).to be_nil
end
it 'matches #' do
expect(subject.match('this is #').to_s).to eq ' #'
expect(subject.match('this is #').to_s).to eq '#'
end
it 'matches digits at the start' do
expect(subject.match('hello #3d').to_s).to eq ' #3d'
expect(subject.match('hello #3d').to_s).to eq '#3d'
end
it 'matches digits in the middle' do
expect(subject.match('hello #l33ts35k').to_s).to eq ' #l33ts35k'
expect(subject.match('hello #l33ts35k').to_s).to eq '#l33ts35k'
end
it 'matches digits at the end' do
expect(subject.match('hello #world2016').to_s).to eq ' #world2016'
expect(subject.match('hello #world2016').to_s).to eq '#world2016'
end
it 'matches underscores at the beginning' do
expect(subject.match('hello #_test').to_s).to eq ' #_test'
expect(subject.match('hello #_test').to_s).to eq '#_test'
end
it 'matches underscores at the end' do
expect(subject.match('hello #test_').to_s).to eq ' #test_'
expect(subject.match('hello #test_').to_s).to eq '#test_'
end
it 'matches underscores in the middle' do
expect(subject.match('hello #one_two_three').to_s).to eq ' #one_two_three'
expect(subject.match('hello #one_two_three').to_s).to eq '#one_two_three'
end
it 'matches middle dots' do
expect(subject.match('hello #one·two·three').to_s).to eq ' #one·two·three'
expect(subject.match('hello #one·two·three').to_s).to eq '#one·two·three'
end
it 'matches ・unicode in ぼっち・ざ・ろっく correctly' do
expect(subject.match('testing #ぼっち・ざ・ろっく').to_s).to eq ' #ぼっち・ざ・ろっく'
expect(subject.match('testing #ぼっち・ざ・ろっく').to_s).to eq '#ぼっち・ざ・ろっく'
end
it 'matches ZWNJ' do
expect(subject.match('just add #نرم‌افزار and').to_s).to eq ' #نرم‌افزار'
expect(subject.match('just add #نرم‌افزار and').to_s).to eq '#نرم‌افزار'
end
it 'does not match middle dots at the start' do
@ -76,7 +80,7 @@ RSpec.describe Tag do
end
it 'does not match middle dots at the end' do
expect(subject.match('hello #one·two·three·').to_s).to eq ' #one·two·three'
expect(subject.match('hello #one·two·three·').to_s).to eq '#one·two·three'
end
it 'does not match purely-numeric hashtags' do