Compare commits

...

1 Commits

Author SHA1 Message Date
Raphael Michel
d6b002d4de Fix #5765 -- Email rendering: Ampersands and placeholders in URLs 2026-01-06 16:24:20 +01:00
2 changed files with 43 additions and 5 deletions

View File

@@ -32,13 +32,14 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under the License.
import html
import re
import urllib.parse
import bleach
import markdown
from bleach import DEFAULT_CALLBACKS
from bleach.linkifier import build_email_re, build_url_re
from bleach import DEFAULT_CALLBACKS, html5lib_shim
from bleach.linkifier import build_email_re
from django import template
from django.conf import settings
from django.core import signing
@@ -124,6 +125,23 @@ ALLOWED_ATTRIBUTES = {
ALLOWED_PROTOCOLS = {'http', 'https', 'mailto', 'tel'}
def build_url_re(tlds=tld_set, protocols=html5lib_shim.allowed_protocols):
# Differs from bleach regex by allowing { and } in URL to allow placeholders in URL parameters
return re.compile(
r"""\(* # Match any opening parentheses.
\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http://
([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)?
(?:[/?][^\s\|\\\^`<>"]*)?
# /path/zz (excluding "unsafe" chars from RFC 3986,
# except for # and ~, which happen in practice)
""".format(
"|".join(sorted(protocols)), "|".join(sorted(tlds))
),
re.IGNORECASE | re.VERBOSE | re.UNICODE,
)
URL_RE = SimpleLazyObject(lambda: build_url_re(tlds=sorted(tld_set, key=len, reverse=True)))
EMAIL_RE = SimpleLazyObject(lambda: build_email_re(tlds=sorted(tld_set, key=len, reverse=True)))
@@ -333,8 +351,14 @@ def markdown_compile_email(source, allowed_tags=None, allowed_attributes=ALLOWED
# This is a workaround to fix placeholders in URL targets
def context_callback(attrs, new=False):
if (None, "href") in attrs and "{" in attrs[None, "href"]:
# Do not use MODE_RICH_TO_HTML to avoid recursive linkification
attrs[None, "href"] = escape(format_map(attrs[None, "href"], context=context, mode=SafeFormatter.MODE_RICH_TO_PLAIN))
# Do not use MODE_RICH_TO_HTML to avoid recursive linkification.
# We want to esacpe the end result, however, we need to unescape the input to prevent & being turned
# to &amp;amp; because the input is already escaped by the markdown parser.
attrs[None, "href"] = escape(format_map(
html.unescape(attrs[None, "href"]),
context=context,
mode=SafeFormatter.MODE_RICH_TO_PLAIN
))
return attrs
context_callbacks.append(context_callback)

View File

@@ -227,7 +227,9 @@ def test_placeholder_html_rendering_from_string(env):
"Event website: [{event}](https://example.org/{event_slug})\n\n"
"Other website: [{event}]({meta_Website})\n\n"
"URL: {url}\n\n"
"URL with text: <a href=\"{url}\">Test</a>"
"URL with text: <a href=\"{url}\">Test</a>\n\n"
"URL with params: https://example.com/form?action=foo&eventid={event_slug}\n\n"
"URL with params and text: [Link & Text](https://example.com/form?action=foo&eventid={event_slug})\n\n"
})
djmail.outbox = []
event, user, organizer = env
@@ -249,6 +251,8 @@ def test_placeholder_html_rendering_from_string(env):
assert '**Meta**: *Beep*' in djmail.outbox[0].body
assert 'URL: https://google.com' in djmail.outbox[0].body
assert 'URL with text: <a href="https://google.com">Test</a>' in djmail.outbox[0].body
assert 'URL with params: https://example.com/form?action=foo&eventid=dummy' in djmail.outbox[0].body
assert 'URL with params and text: [Link & Text](https://example.com/form?action=foo&eventid=dummy)' in djmail.outbox[0].body
assert '&lt;' not in djmail.outbox[0].body
assert '&amp;' not in djmail.outbox[0].body
html = _extract_html(djmail.outbox[0])
@@ -272,3 +276,13 @@ def test_placeholder_html_rendering_from_string(env):
r'URL with text: <a href="https://google.com" rel="noopener" style="[^"]+" target="_blank">Test</a>',
html
)
assert re.search(
r'URL with params: <a href="https://example.com/form\?action=foo&amp;eventid=dummy" rel="noopener" '
r'style="[^"]+" target="_blank">https://example.com/form\?action=foo&amp;eventid=dummy</a>',
html
)
assert re.search(
r'URL with params and text: <a href="https://example.com/form\?action=foo&amp;eventid=dummy" rel="noopener" '
r'style="[^"]+" target="_blank">Link &amp; Text</a>',
html
)