forked from CGM_Public/pretix_original
Markdown: Allow to escape domain name
This commit is contained in:
@@ -48,6 +48,8 @@ from django.utils.http import url_has_allowed_host_and_scheme
|
||||
from django.utils.safestring import mark_safe
|
||||
from markdown import Extension
|
||||
from markdown.inlinepatterns import SubstituteTagInlineProcessor
|
||||
from markdown.postprocessors import Postprocessor
|
||||
from markdown.treeprocessors import UnescapeTreeprocessor
|
||||
from tlds import tld_set
|
||||
|
||||
register = template.Library()
|
||||
@@ -185,6 +187,111 @@ class EmailNl2BrExtension(Extension):
|
||||
md.inlinePatterns.register(br_tag, 'nl', 5)
|
||||
|
||||
|
||||
class LinkifyPostprocessor(Postprocessor):
|
||||
def __init__(self, linker):
|
||||
self.linker = linker
|
||||
super().__init__()
|
||||
|
||||
def run(self, text):
|
||||
return self.linker.linkify(text)
|
||||
|
||||
|
||||
class CleanPostprocessor(Postprocessor):
|
||||
def __init__(self, tags, attributes, protocols, strip):
|
||||
self.tags = tags
|
||||
self.attributes = attributes
|
||||
self.protocols = protocols
|
||||
self.strip = strip
|
||||
super().__init__()
|
||||
|
||||
def run(self, text):
|
||||
return bleach.clean(
|
||||
text,
|
||||
tags=self.tags,
|
||||
attributes=self.attributes,
|
||||
protocols=self.protocols,
|
||||
strip=self.strip
|
||||
)
|
||||
|
||||
|
||||
class CustomUnescapeTreeprocessor(UnescapeTreeprocessor):
|
||||
"""
|
||||
This un-escapes everything except \\.
|
||||
"""
|
||||
|
||||
def _unescape(self, m):
|
||||
if m.group(1) == "46":
|
||||
return "|escaped-dot-sGnY9LMK|"
|
||||
return chr(int(m.group(1)))
|
||||
|
||||
|
||||
class CustomUnescapePostprocessor(Postprocessor):
|
||||
"""
|
||||
Restore escaped .
|
||||
"""
|
||||
|
||||
RE = re.compile(r'.')
|
||||
|
||||
def run(self, text):
|
||||
return text.replace("|escaped-dot-sGnY9LMK|", ".")
|
||||
|
||||
|
||||
class LinkifyAndCleanExtension(Extension):
|
||||
r"""
|
||||
We want to do:
|
||||
|
||||
input --> markdown --> bleach clean --> linkify --> output
|
||||
|
||||
Internally, the markdown library does:
|
||||
|
||||
source --> parse --> (tree|inline)processors --> serializing --> postprocessors
|
||||
|
||||
All escaped characters such as \. will be turned to something like <STX>46<ETX> in the processors
|
||||
step and then will be converted to . back again in the last tree processor, before serialization.
|
||||
Therefore, linkify does not see the escaped character anymore. This is annoying for the one case
|
||||
where you want to type "rich_text.py" and *not* have it turned into a link, since you can't type
|
||||
"rich_text\.py" either.
|
||||
|
||||
A simple solution would be to run linkify before markdown, but that may cause other issues when
|
||||
linkify messes with the markdown syntax and it makes handling our attributes etc. harder.
|
||||
|
||||
So we do a weird hack where we modify the unescape processor to unescape everything EXCEPT for the
|
||||
dot and then unescape that one manually after linkify. However, to make things even harder, the bleach
|
||||
clean step removes any invisible characters, so we need to cheat a bit more.
|
||||
"""
|
||||
|
||||
def __init__(self, linker, tags, attributes, protocols, strip):
|
||||
self.linker = linker
|
||||
self.tags = tags
|
||||
self.attributes = attributes
|
||||
self.protocols = protocols
|
||||
self.strip = strip
|
||||
super().__init__()
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
md.treeprocessors.deregister('unescape')
|
||||
md.treeprocessors.register(
|
||||
CustomUnescapeTreeprocessor(md),
|
||||
'unescape',
|
||||
0
|
||||
)
|
||||
md.postprocessors.register(
|
||||
CleanPostprocessor(self.tags, self.attributes, self.protocols, self.strip),
|
||||
'clean',
|
||||
2
|
||||
)
|
||||
md.postprocessors.register(
|
||||
LinkifyPostprocessor(self.linker),
|
||||
'linkify',
|
||||
1
|
||||
)
|
||||
md.postprocessors.register(
|
||||
CustomUnescapePostprocessor(self.linker),
|
||||
'unescape_dot',
|
||||
0
|
||||
)
|
||||
|
||||
|
||||
def markdown_compile_email(source):
|
||||
linker = bleach.Linker(
|
||||
url_re=URL_RE,
|
||||
@@ -192,18 +299,20 @@ def markdown_compile_email(source):
|
||||
callbacks=DEFAULT_CALLBACKS + [truelink_callback, abslink_callback],
|
||||
parse_email=True
|
||||
)
|
||||
return linker.linkify(bleach.clean(
|
||||
markdown.markdown(
|
||||
source,
|
||||
extensions=[
|
||||
'markdown.extensions.sane_lists',
|
||||
EmailNl2BrExtension(),
|
||||
]
|
||||
),
|
||||
tags=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
))
|
||||
return markdown.markdown(
|
||||
source,
|
||||
extensions=[
|
||||
'markdown.extensions.sane_lists',
|
||||
EmailNl2BrExtension(),
|
||||
LinkifyAndCleanExtension(
|
||||
linker,
|
||||
tags=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
strip=False,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class SnippetExtension(markdown.extensions.Extension):
|
||||
@@ -213,23 +322,24 @@ class SnippetExtension(markdown.extensions.Extension):
|
||||
md.parser.blockprocessors.deregister('quote')
|
||||
|
||||
|
||||
def markdown_compile(source, snippet=False):
|
||||
def markdown_compile(source, linker, snippet=False):
|
||||
tags = ALLOWED_TAGS_SNIPPET if snippet else ALLOWED_TAGS
|
||||
exts = [
|
||||
'markdown.extensions.sane_lists',
|
||||
'markdown.extensions.nl2br'
|
||||
'markdown.extensions.nl2br',
|
||||
LinkifyAndCleanExtension(
|
||||
linker,
|
||||
tags=tags,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
strip=snippet,
|
||||
)
|
||||
]
|
||||
if snippet:
|
||||
exts.append(SnippetExtension())
|
||||
return bleach.clean(
|
||||
markdown.markdown(
|
||||
source,
|
||||
extensions=exts
|
||||
),
|
||||
strip=snippet,
|
||||
tags=tags,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
return markdown.markdown(
|
||||
source,
|
||||
extensions=exts
|
||||
)
|
||||
|
||||
|
||||
@@ -245,7 +355,7 @@ def rich_text(text: str, **kwargs):
|
||||
callbacks=DEFAULT_CALLBACKS + ([truelink_callback, safelink_callback] if kwargs.get('safelinks', True) else [truelink_callback, abslink_callback]),
|
||||
parse_email=True
|
||||
)
|
||||
body_md = linker.linkify(markdown_compile(text))
|
||||
body_md = markdown_compile(text, linker)
|
||||
return mark_safe(body_md)
|
||||
|
||||
|
||||
@@ -261,5 +371,5 @@ def rich_text_snippet(text: str, **kwargs):
|
||||
callbacks=DEFAULT_CALLBACKS + ([truelink_callback, safelink_callback] if kwargs.get('safelinks', True) else [truelink_callback, abslink_callback]),
|
||||
parse_email=True
|
||||
)
|
||||
body_md = linker.linkify(markdown_compile(text, snippet=True))
|
||||
body_md = markdown_compile(text, linker, snippet=True)
|
||||
return mark_safe(body_md)
|
||||
|
||||
@@ -30,6 +30,8 @@ from pretix.base.templatetags.rich_text import (
|
||||
# Test link detection
|
||||
("google.com",
|
||||
'<a href="http://google.com" rel="noopener" target="_blank">google.com</a>'),
|
||||
# Test link escaping
|
||||
("google\\.com", 'google.com'),
|
||||
# Test abslink_callback
|
||||
("[Call](tel:+12345)",
|
||||
'<a href="tel:+12345" rel="nofollow">Call</a>'),
|
||||
@@ -79,3 +81,20 @@ def test_newline_handling(content, result):
|
||||
])
|
||||
def test_newline_handling_email(content, result):
|
||||
assert markdown_compile_email(content) == result
|
||||
|
||||
|
||||
@pytest.mark.parametrize("content,result,result_snippet", [
|
||||
# attributes
|
||||
('<a onclick="javascript:foo()">foo</a>', '<p><a>foo</a></p>', '<a>foo</a>'),
|
||||
('<strong color="red">foo</strong>',
|
||||
'<p><strong>foo</strong></p>',
|
||||
'<strong>foo</strong>'),
|
||||
# protocols
|
||||
('<a href="javascript:foo()">foo</a>', '<p><a>foo</a></p>', '<a>foo</a>'),
|
||||
# tags
|
||||
('<script>foo</script>', '<script>foo</script>', 'foo'),
|
||||
])
|
||||
def test_cleanup(content, result, result_snippet):
|
||||
assert rich_text(content) == result
|
||||
assert rich_text_snippet(content) == result_snippet
|
||||
assert markdown_compile_email(content) == result
|
||||
|
||||
Reference in New Issue
Block a user