forked from CGM_Public/pretix_original
Markdown link parser: Fix fediverse URLs and URLs with user or path (#5563)
This commit is contained in:
@@ -54,6 +54,19 @@ from tlds import tld_set
|
|||||||
|
|
||||||
register = template.Library()
|
register = template.Library()
|
||||||
|
|
||||||
|
|
||||||
|
def build_fediverse_re(tlds):
|
||||||
|
return re.compile(
|
||||||
|
r"""\(* # Match any opening parentheses.
|
||||||
|
@[^@]+@
|
||||||
|
([\w-]+\.)+(?:{0})(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)?
|
||||||
|
""".format(
|
||||||
|
"|".join(sorted(tlds))
|
||||||
|
),
|
||||||
|
re.IGNORECASE | re.VERBOSE | re.UNICODE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
ALLOWED_TAGS_SNIPPET = {
|
ALLOWED_TAGS_SNIPPET = {
|
||||||
'a',
|
'a',
|
||||||
'abbr',
|
'abbr',
|
||||||
@@ -112,6 +125,8 @@ URL_RE = SimpleLazyObject(lambda: build_url_re(tlds=sorted(tld_set, key=len, rev
|
|||||||
|
|
||||||
EMAIL_RE = SimpleLazyObject(lambda: build_email_re(tlds=sorted(tld_set, key=len, reverse=True)))
|
EMAIL_RE = SimpleLazyObject(lambda: build_email_re(tlds=sorted(tld_set, key=len, reverse=True)))
|
||||||
|
|
||||||
|
FEDIVERSE_RE = SimpleLazyObject(lambda: build_fediverse_re(tlds=sorted(tld_set, key=len, reverse=True)))
|
||||||
|
|
||||||
DOT_ESCAPE = "|escaped-dot-sGnY9LMK|"
|
DOT_ESCAPE = "|escaped-dot-sGnY9LMK|"
|
||||||
|
|
||||||
|
|
||||||
@@ -144,9 +159,11 @@ def truelink_callback(attrs, new=False):
|
|||||||
|
|
||||||
<a href="https://maps.google.com/location/foo">https://maps.google.com</a>
|
<a href="https://maps.google.com/location/foo">https://maps.google.com</a>
|
||||||
"""
|
"""
|
||||||
text = re.sub(r'[^a-zA-Z0-9.\-/_ ]', '', attrs.get('_text')) # clean up link text
|
text = re.sub(r'[^a-zA-Z0-9.\-/_@: ]', '', attrs.get('_text')) # clean up link text
|
||||||
url = attrs.get((None, 'href'), '/')
|
url = attrs.get((None, 'href'), '/')
|
||||||
href_url = urllib.parse.urlparse(url)
|
href_url = urllib.parse.urlparse(url)
|
||||||
|
|
||||||
|
# Verify server name of URL names
|
||||||
if (None, 'href') in attrs and URL_RE.match(text) and href_url.scheme not in ('tel', 'mailto'):
|
if (None, 'href') in attrs and URL_RE.match(text) and href_url.scheme not in ('tel', 'mailto'):
|
||||||
# link text looks like a url
|
# link text looks like a url
|
||||||
if text.startswith('//'):
|
if text.startswith('//'):
|
||||||
@@ -154,10 +171,20 @@ def truelink_callback(attrs, new=False):
|
|||||||
elif not text.startswith('http'):
|
elif not text.startswith('http'):
|
||||||
text = 'https://' + text
|
text = 'https://' + text
|
||||||
|
|
||||||
|
text_url = urllib.parse.urlparse(text)
|
||||||
|
if text_url.netloc.split("@")[-1] != href_url.netloc.split("@")[-1] or not href_url.path.startswith(text_url.path):
|
||||||
|
# link text contains an URL that has a different base than the actual URL
|
||||||
|
attrs['_text'] = attrs[None, 'href']
|
||||||
|
|
||||||
|
# Verify server name of mastodon display names (@name@server.tld)
|
||||||
|
if (None, 'href') in attrs and FEDIVERSE_RE.match(text):
|
||||||
|
parts = text.split('@')
|
||||||
|
text = f'https://{parts[2]}/@{parts[1]}'
|
||||||
text_url = urllib.parse.urlparse(text)
|
text_url = urllib.parse.urlparse(text)
|
||||||
if text_url.netloc != href_url.netloc or not href_url.path.startswith(href_url.path):
|
if text_url.netloc != href_url.netloc or not href_url.path.startswith(href_url.path):
|
||||||
# link text contains an URL that has a different base than the actual URL
|
# link text contains an URL that has a different base than the actual URL
|
||||||
attrs['_text'] = attrs[None, 'href']
|
attrs['_text'] = attrs[None, 'href']
|
||||||
|
|
||||||
return attrs
|
return attrs
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -73,10 +73,34 @@ from pretix.base.templatetags.rich_text import (
|
|||||||
'<a href="https://goodsite.com.evilsite.com">goodsite.com</a>',
|
'<a href="https://goodsite.com.evilsite.com">goodsite.com</a>',
|
||||||
'<a href="https://goodsite.com.evilsite.com" rel="noopener" target="_blank">https://goodsite.com.evilsite.com</a>',
|
'<a href="https://goodsite.com.evilsite.com" rel="noopener" target="_blank">https://goodsite.com.evilsite.com</a>',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'<a href="https://evilsite.com/deep/path">evilsite.com/bad/path/</a>',
|
||||||
|
'<a href="https://evilsite.com/deep/path" rel="noopener" target="_blank">https://evilsite.com/deep/path</a>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'<a href="https://evilsite.com/deep/path">evilsite.com/deep</a>',
|
||||||
|
'<a href="https://evilsite.com/deep/path" rel="noopener" target="_blank">evilsite.com/deep</a>',
|
||||||
|
),
|
||||||
(
|
(
|
||||||
'<a href="https://evilsite.com/deep/path">evilsite.com</a>',
|
'<a href="https://evilsite.com/deep/path">evilsite.com</a>',
|
||||||
'<a href="https://evilsite.com/deep/path" rel="noopener" target="_blank">evilsite.com</a>',
|
'<a href="https://evilsite.com/deep/path" rel="noopener" target="_blank">evilsite.com</a>',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'<a href="https://user:pass@evilsite.com/deep/path">evilsite.com</a>',
|
||||||
|
'<a href="https://user:pass@evilsite.com/deep/path" rel="noopener" target="_blank">evilsite.com</a>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'<a href="https://foo:bar@evilsite.com/deep/path">https://foo:bar@goodsite.com</a>',
|
||||||
|
'<a href="https://foo:bar@evilsite.com/deep/path" rel="noopener" target="_blank">https://foo:bar@evilsite.com/deep/path</a>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'<a href="https://pretix.social/@pretix">@pretix@pretix.social</a>',
|
||||||
|
'<a href="https://pretix.social/@pretix" rel="noopener" target="_blank">@pretix@pretix.social</a>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'<a href="https://evilsite.social/@pretix">@pretix@pretix.social</a>',
|
||||||
|
'<a href="https://evilsite.social/@pretix" rel="noopener" target="_blank">https://evilsite.social/@pretix</a>',
|
||||||
|
),
|
||||||
("<a>broken</a>", "<a>broken</a>"),
|
("<a>broken</a>", "<a>broken</a>"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user