Fix #5765 -- Email rendering: Ampersands and placeholders in URLs (#5766)

This commit is contained in:
Raphael Michel
2026-01-09 13:01:21 +01:00
committed by GitHub
parent 1c684d62d4
commit c65fecf45e
2 changed files with 43 additions and 5 deletions

View File

@@ -32,13 +32,14 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under the License. # License for the specific language governing permissions and limitations under the License.
import html
import re import re
import urllib.parse import urllib.parse
import bleach import bleach
import markdown import markdown
from bleach import DEFAULT_CALLBACKS from bleach import DEFAULT_CALLBACKS, html5lib_shim
from bleach.linkifier import build_email_re, build_url_re from bleach.linkifier import build_email_re
from django import template from django import template
from django.conf import settings from django.conf import settings
from django.core import signing from django.core import signing
@@ -124,6 +125,23 @@ ALLOWED_ATTRIBUTES = {
ALLOWED_PROTOCOLS = {'http', 'https', 'mailto', 'tel'} ALLOWED_PROTOCOLS = {'http', 'https', 'mailto', 'tel'}
def build_url_re(tlds=tld_set, protocols=html5lib_shim.allowed_protocols):
# Differs from bleach regex by allowing { and } in URL to allow placeholders in URL parameters
return re.compile(
r"""\(* # Match any opening parentheses.
\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http://
([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)?
(?:[/?][^\s\|\\\^`<>"]*)?
# /path/zz (excluding "unsafe" chars from RFC 3986,
# except for # and ~, which happen in practice)
""".format(
"|".join(sorted(protocols)), "|".join(sorted(tlds))
),
re.IGNORECASE | re.VERBOSE | re.UNICODE,
)
URL_RE = SimpleLazyObject(lambda: build_url_re(tlds=sorted(tld_set, key=len, reverse=True))) URL_RE = SimpleLazyObject(lambda: build_url_re(tlds=sorted(tld_set, key=len, reverse=True)))
EMAIL_RE = SimpleLazyObject(lambda: build_email_re(tlds=sorted(tld_set, key=len, reverse=True))) EMAIL_RE = SimpleLazyObject(lambda: build_email_re(tlds=sorted(tld_set, key=len, reverse=True)))
@@ -333,8 +351,14 @@ def markdown_compile_email(source, allowed_tags=None, allowed_attributes=ALLOWED
# This is a workaround to fix placeholders in URL targets # This is a workaround to fix placeholders in URL targets
def context_callback(attrs, new=False): def context_callback(attrs, new=False):
if (None, "href") in attrs and "{" in attrs[None, "href"]: if (None, "href") in attrs and "{" in attrs[None, "href"]:
# Do not use MODE_RICH_TO_HTML to avoid recursive linkification # Do not use MODE_RICH_TO_HTML to avoid recursive linkification.
attrs[None, "href"] = escape(format_map(attrs[None, "href"], context=context, mode=SafeFormatter.MODE_RICH_TO_PLAIN)) # We want to esacpe the end result, however, we need to unescape the input to prevent & being turned
# to &amp;amp; because the input is already escaped by the markdown parser.
attrs[None, "href"] = escape(format_map(
html.unescape(attrs[None, "href"]),
context=context,
mode=SafeFormatter.MODE_RICH_TO_PLAIN
))
return attrs return attrs
context_callbacks.append(context_callback) context_callbacks.append(context_callback)

View File

@@ -227,7 +227,9 @@ def test_placeholder_html_rendering_from_string(env):
"Event website: [{event}](https://example.org/{event_slug})\n\n" "Event website: [{event}](https://example.org/{event_slug})\n\n"
"Other website: [{event}]({meta_Website})\n\n" "Other website: [{event}]({meta_Website})\n\n"
"URL: {url}\n\n" "URL: {url}\n\n"
"URL with text: <a href=\"{url}\">Test</a>" "URL with text: <a href=\"{url}\">Test</a>\n\n"
"URL with params: https://example.com/form?action=foo&eventid={event_slug}\n\n"
"URL with params and text: [Link & Text](https://example.com/form?action=foo&eventid={event_slug})\n\n"
}) })
djmail.outbox = [] djmail.outbox = []
event, user, organizer = env event, user, organizer = env
@@ -249,6 +251,8 @@ def test_placeholder_html_rendering_from_string(env):
assert '**Meta**: *Beep*' in djmail.outbox[0].body assert '**Meta**: *Beep*' in djmail.outbox[0].body
assert 'URL: https://google.com' in djmail.outbox[0].body assert 'URL: https://google.com' in djmail.outbox[0].body
assert 'URL with text: <a href="https://google.com">Test</a>' in djmail.outbox[0].body assert 'URL with text: <a href="https://google.com">Test</a>' in djmail.outbox[0].body
assert 'URL with params: https://example.com/form?action=foo&eventid=dummy' in djmail.outbox[0].body
assert 'URL with params and text: [Link & Text](https://example.com/form?action=foo&eventid=dummy)' in djmail.outbox[0].body
assert '&lt;' not in djmail.outbox[0].body assert '&lt;' not in djmail.outbox[0].body
assert '&amp;' not in djmail.outbox[0].body assert '&amp;' not in djmail.outbox[0].body
html = _extract_html(djmail.outbox[0]) html = _extract_html(djmail.outbox[0])
@@ -272,3 +276,13 @@ def test_placeholder_html_rendering_from_string(env):
r'URL with text: <a href="https://google.com" rel="noopener" style="[^"]+" target="_blank">Test</a>', r'URL with text: <a href="https://google.com" rel="noopener" style="[^"]+" target="_blank">Test</a>',
html html
) )
assert re.search(
r'URL with params: <a href="https://example.com/form\?action=foo&amp;eventid=dummy" rel="noopener" '
r'style="[^"]+" target="_blank">https://example.com/form\?action=foo&amp;eventid=dummy</a>',
html
)
assert re.search(
r'URL with params and text: <a href="https://example.com/form\?action=foo&amp;eventid=dummy" rel="noopener" '
r'style="[^"]+" target="_blank">Link &amp; Text</a>',
html
)