Fix hashtag matching pattern matching some URLs (#27584)

This commit is contained in:
Claire 2023-10-27 16:04:51 +02:00
parent a89a25714d
commit ab68df9af0
2 changed files with 5 additions and 1 deletions

View file

@ -33,7 +33,7 @@ class Tag < ApplicationRecord
HASTAG_LAST_SEQUENCE = '([[:word:]_]*[[:alpha:]][[:word:]_]*)' HASTAG_LAST_SEQUENCE = '([[:word:]_]*[[:alpha:]][[:word:]_]*)'
HASHTAG_NAME_PAT = "#{HASHTAG_FIRST_SEQUENCE}|#{HASTAG_LAST_SEQUENCE}" HASHTAG_NAME_PAT = "#{HASHTAG_FIRST_SEQUENCE}|#{HASTAG_LAST_SEQUENCE}"
HASHTAG_RE = %r{(?<![=/)[:word]])#(#{HASHTAG_NAME_PAT})}i HASHTAG_RE = %r{(?<![=/)\w])#(#{HASHTAG_NAME_PAT})}i
HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i
HASHTAG_INVALID_CHARS_RE = /[^[:alnum:]#{HASHTAG_SEPARATORS}]/ HASHTAG_INVALID_CHARS_RE = /[^[:alnum:]#{HASHTAG_SEPARATORS}]/

View file

@ -31,6 +31,10 @@ RSpec.describe Tag do
expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)#Lawsuit')).to be_nil expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)#Lawsuit')).to be_nil
end end
it 'does not match URLs with hashtag-like anchors after a numeral' do
expect(subject.match('https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111895#c4')).to be_nil
end
it 'does not match URLs with hashtag-like anchors after an empty query parameter' do it 'does not match URLs with hashtag-like anchors after an empty query parameter' do
expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)?foo=#Lawsuit')).to be_nil expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)?foo=#Lawsuit')).to be_nil
end end