diff --git a/src/pretix/base/templatetags/rich_text.py b/src/pretix/base/templatetags/rich_text.py index bee135a93b..f38cdeab8c 100644 --- a/src/pretix/base/templatetags/rich_text.py +++ b/src/pretix/base/templatetags/rich_text.py @@ -54,6 +54,19 @@ from tlds import tld_set register = template.Library() + +def build_fediverse_re(tlds): + return re.compile( + r"""\(* # Match any opening parentheses. + @[^@]+@ + ([\w-]+\.)+(?:{0})(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)? + """.format( + "|".join(sorted(tlds)) + ), + re.IGNORECASE | re.VERBOSE | re.UNICODE, + ) + + ALLOWED_TAGS_SNIPPET = { 'a', 'abbr', @@ -112,6 +125,8 @@ URL_RE = SimpleLazyObject(lambda: build_url_re(tlds=sorted(tld_set, key=len, rev EMAIL_RE = SimpleLazyObject(lambda: build_email_re(tlds=sorted(tld_set, key=len, reverse=True))) +FEDIVERSE_RE = SimpleLazyObject(lambda: build_fediverse_re(tlds=sorted(tld_set, key=len, reverse=True))) + DOT_ESCAPE = "|escaped-dot-sGnY9LMK|" @@ -144,9 +159,11 @@ def truelink_callback(attrs, new=False): https://maps.google.com """ - text = re.sub(r'[^a-zA-Z0-9.\-/_ ]', '', attrs.get('_text')) # clean up link text + text = re.sub(r'[^a-zA-Z0-9.\-/_@: ]', '', attrs.get('_text')) # clean up link text url = attrs.get((None, 'href'), '/') href_url = urllib.parse.urlparse(url) + + # Verify server name of URL names if (None, 'href') in attrs and URL_RE.match(text) and href_url.scheme not in ('tel', 'mailto'): # link text looks like a url if text.startswith('//'): @@ -154,10 +171,20 @@ def truelink_callback(attrs, new=False): elif not text.startswith('http'): text = 'https://' + text + text_url = urllib.parse.urlparse(text) + if text_url.netloc.split("@")[-1] != href_url.netloc.split("@")[-1] or not href_url.path.startswith(text_url.path): + # link text contains an URL that has a different base than the actual URL + attrs['_text'] = attrs[None, 'href'] + + # Verify server name of mastodon display names (@name@server.tld) + if (None, 'href') in attrs and FEDIVERSE_RE.match(text): + parts = text.split('@') + text = f'https://{parts[2]}/@{parts[1]}' text_url = urllib.parse.urlparse(text) if text_url.netloc != href_url.netloc or not href_url.path.startswith(href_url.path): # link text contains an URL that has a different base than the actual URL attrs['_text'] = attrs[None, 'href'] + return attrs diff --git a/src/tests/base/test_rich_text.py b/src/tests/base/test_rich_text.py index c17fb46546..c893b7d3bb 100644 --- a/src/tests/base/test_rich_text.py +++ b/src/tests/base/test_rich_text.py @@ -73,10 +73,34 @@ from pretix.base.templatetags.rich_text import ( 'goodsite.com', 'https://goodsite.com.evilsite.com', ), + ( + 'evilsite.com/bad/path/', + 'https://evilsite.com/deep/path', + ), + ( + 'evilsite.com/deep', + 'evilsite.com/deep', + ), ( 'evilsite.com', 'evilsite.com', ), + ( + 'evilsite.com', + 'evilsite.com', + ), + ( + 'https://foo:bar@goodsite.com', + 'https://foo:bar@evilsite.com/deep/path', + ), + ( + '@pretix@pretix.social', + '@pretix@pretix.social', + ), + ( + '@pretix@pretix.social', + 'https://evilsite.social/@pretix', + ), ("broken", "broken"), ], )