From 482968175ba5784c2c4387c8b90a5443d3559bc5 Mon Sep 17 00:00:00 2001 From: Raphael Michel Date: Fri, 18 Feb 2022 13:56:06 +0100 Subject: [PATCH] [SECURITY] Prevent untrusted values from creating Excel formulas --- src/pretix/base/exporter.py | 31 ++----- src/pretix/helpers/safe_openpyxl.py | 113 ++++++++++++++++++++++++ src/tests/helpers/test_safe_openpyxl.py | 38 ++++++++ 3 files changed, 159 insertions(+), 23 deletions(-) create mode 100644 src/pretix/helpers/safe_openpyxl.py create mode 100644 src/tests/helpers/test_safe_openpyxl.py diff --git a/src/pretix/base/exporter.py b/src/pretix/base/exporter.py index fe3939c053..082f3adc95 100644 --- a/src/pretix/base/exporter.py +++ b/src/pretix/base/exporter.py @@ -33,7 +33,6 @@ # License for the specific language governing permissions and limitations under the License. import io -import re import tempfile from collections import OrderedDict, namedtuple from decimal import Decimal @@ -46,26 +45,13 @@ from django.conf import settings from django.db.models import QuerySet from django.utils.formats import localize from django.utils.translation import gettext, gettext_lazy as _ -from openpyxl import Workbook -from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE, KNOWN_TYPES, Cell from pretix.base.models import Event +from pretix.helpers.safe_openpyxl import ( # NOQA: backwards compatibility for plugins using excel_safe + SafeWorkbook, remove_invalid_excel_chars as excel_safe, +) - -def excel_safe(val): - if isinstance(val, Cell): - return val - - if not isinstance(val, KNOWN_TYPES): - val = str(val) - - if isinstance(val, bytes): - val = val.decode("utf-8", errors="ignore") - - if isinstance(val, str): - val = re.sub(ILLEGAL_CHARACTERS_RE, '', val) - - return val +__ = excel_safe # just so the compatbility import above is "used" and doesn't get removed by linter class BaseExporter: @@ -228,7 +214,7 @@ class ListExporter(BaseExporter): pass def _render_xlsx(self, form_data, output_file=None): - wb = Workbook(write_only=True) + wb = SafeWorkbook(write_only=True) ws = wb.create_sheet() self.prepare_xlsx_sheet(ws) try: @@ -242,7 +228,7 @@ class ListExporter(BaseExporter): total = line.total continue ws.append([ - excel_safe(val) for val in line + val for val in line ]) if total: counter += 1 @@ -347,7 +333,7 @@ class MultiSheetListExporter(ListExporter): return self.get_filename() + '.csv', 'text/csv', output.getvalue().encode("utf-8") def _render_xlsx(self, form_data, output_file=None): - wb = Workbook(write_only=True) + wb = SafeWorkbook(write_only=True) n_sheets = len(self.sheets) for i_sheet, (s, l) in enumerate(self.sheets): ws = wb.create_sheet(str(l)) @@ -361,8 +347,7 @@ class MultiSheetListExporter(ListExporter): total = line.total continue ws.append([ - excel_safe(val) - for val in line + val for val in line ]) if total: counter += 1 diff --git a/src/pretix/helpers/safe_openpyxl.py b/src/pretix/helpers/safe_openpyxl.py new file mode 100644 index 0000000000..6e4e9c0a5e --- /dev/null +++ b/src/pretix/helpers/safe_openpyxl.py @@ -0,0 +1,113 @@ +import re +from inspect import isgenerator + +from openpyxl import Workbook +from openpyxl.cell.cell import ( + ILLEGAL_CHARACTERS_RE, KNOWN_TYPES, TIME_TYPES, TYPE_FORMULA, TYPE_STRING, + Cell, +) +from openpyxl.compat import NUMERIC_TYPES +from openpyxl.utils import column_index_from_string +from openpyxl.utils.exceptions import ReadOnlyWorkbookException +from openpyxl.worksheet._write_only import WriteOnlyWorksheet +from openpyxl.worksheet.worksheet import Worksheet + +SAFE_TYPES = NUMERIC_TYPES + TIME_TYPES + (bool, type(None)) + + +""" +This module provides a safer version of openpyxl's `Workbook` class to generate XLSX files from +user-generated data using `WriteOnlyWorksheet` and `ws.append()`. We commonly use these methods +to output e.g. order data, which contains data from untrusted sources such as attendee names. + +There are mainly two problems this solves: + +- It makes sure strings starting with = are treated as text, not as a formula, as openpyxl will + otherwise assume, which can be used for remote code execution. + +- It removes characters considered invalid by Excel to avoid exporter crashes. +""" + + +def remove_invalid_excel_chars(val): + if isinstance(val, Cell): + return val + + if not isinstance(val, KNOWN_TYPES): + val = str(val) + + if isinstance(val, bytes): + val = val.decode("utf-8", errors="ignore") + + if isinstance(val, str): + val = re.sub(ILLEGAL_CHARACTERS_RE, '', val) + + return val + + +def SafeCell(*args, value=None, **kwargs): + value = remove_invalid_excel_chars(value) + c = Cell(*args, value=value, **kwargs) + if c.data_type == TYPE_FORMULA: + c.data_type = TYPE_STRING + return c + + +class SafeAppendMixin: + def append(self, iterable): + row_idx = self._current_row + 1 + + if isinstance(iterable, (list, tuple, range)) or isgenerator(iterable): + for col_idx, content in enumerate(iterable, 1): + if isinstance(content, Cell): + # compatible with write-only mode + cell = content + if cell.parent and cell.parent != self: + raise ValueError("Cells cannot be copied from other worksheets") + cell.parent = self + cell.column = col_idx + cell.row = row_idx + else: + cell = SafeCell(self, row=row_idx, column=col_idx, value=remove_invalid_excel_chars(content)) + self._cells[(row_idx, col_idx)] = cell + + elif isinstance(iterable, dict): + for col_idx, content in iterable.items(): + if isinstance(col_idx, str): + col_idx = column_index_from_string(col_idx) + cell = SafeCell(self, row=row_idx, column=col_idx, value=content) + self._cells[(row_idx, col_idx)] = cell + + else: + self._invalid_row(iterable) + + self._current_row = row_idx + + +class SafeWriteOnlyWorksheet(SafeAppendMixin, WriteOnlyWorksheet): + pass + + +class SafeWorksheet(SafeAppendMixin, Worksheet): + pass + + +class SafeWorkbook(Workbook): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + if self._sheets: + # monkeypatch existing sheets + for s in self._sheets: + s.append = SafeAppendMixin.append + + def create_sheet(self, title=None, index=None): + if self.read_only: + raise ReadOnlyWorkbookException('Cannot create new sheet in a read-only workbook') + + if self.write_only: + new_ws = SafeWriteOnlyWorksheet(parent=self, title=title) + else: + new_ws = SafeWorksheet(parent=self, title=title) + + self._add_sheet(sheet=new_ws, index=index) + return new_ws diff --git a/src/tests/helpers/test_safe_openpyxl.py b/src/tests/helpers/test_safe_openpyxl.py new file mode 100644 index 0000000000..e9a3145984 --- /dev/null +++ b/src/tests/helpers/test_safe_openpyxl.py @@ -0,0 +1,38 @@ +# +# This file is part of pretix (Community Edition). +# +# Copyright (C) 2014-2020 Raphael Michel and contributors +# Copyright (C) 2020-2021 rami.io GmbH and contributors +# +# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General +# Public License as published by the Free Software Foundation in version 3 of the License. +# +# ADDITIONAL TERMS APPLY: Pursuant to Section 7 of the GNU Affero General Public License, additional terms are +# applicable granting you additional permissions and placing additional restrictions on your usage of this software. +# Please refer to the pretix LICENSE file to obtain the full terms applicable to this work. If you did not receive +# this file, see . +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied +# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along with this program. If not, see +# . +# +from openpyxl.cell.cell import TYPE_STRING + +from pretix.helpers.safe_openpyxl import SafeWorkbook + + +def test_nullbyte_removed(): + wb = SafeWorkbook() + ws = wb.create_sheet() + ws.append(["foo\u0000bar"]) + assert ws.cell(1, 1).value == "foobar" + + +def test_no_formulas(): + wb = SafeWorkbook() + ws = wb.create_sheet() + ws.append(["=1+1"]) + assert ws.cell(1, 1).data_type == TYPE_STRING