[SECURITY] Prevent untrusted values from creating Excel formulas

This commit is contained in:
Raphael Michel
2022-02-18 13:56:06 +01:00
parent 5d6302d5fd
commit 482968175b
3 changed files with 159 additions and 23 deletions

View File

@@ -0,0 +1,113 @@
import re
from inspect import isgenerator
from openpyxl import Workbook
from openpyxl.cell.cell import (
ILLEGAL_CHARACTERS_RE, KNOWN_TYPES, TIME_TYPES, TYPE_FORMULA, TYPE_STRING,
Cell,
)
from openpyxl.compat import NUMERIC_TYPES
from openpyxl.utils import column_index_from_string
from openpyxl.utils.exceptions import ReadOnlyWorkbookException
from openpyxl.worksheet._write_only import WriteOnlyWorksheet
from openpyxl.worksheet.worksheet import Worksheet
SAFE_TYPES = NUMERIC_TYPES + TIME_TYPES + (bool, type(None))
"""
This module provides a safer version of openpyxl's `Workbook` class to generate XLSX files from
user-generated data using `WriteOnlyWorksheet` and `ws.append()`. We commonly use these methods
to output e.g. order data, which contains data from untrusted sources such as attendee names.
There are mainly two problems this solves:
- It makes sure strings starting with = are treated as text, not as a formula, as openpyxl will
otherwise assume, which can be used for remote code execution.
- It removes characters considered invalid by Excel to avoid exporter crashes.
"""
def remove_invalid_excel_chars(val):
if isinstance(val, Cell):
return val
if not isinstance(val, KNOWN_TYPES):
val = str(val)
if isinstance(val, bytes):
val = val.decode("utf-8", errors="ignore")
if isinstance(val, str):
val = re.sub(ILLEGAL_CHARACTERS_RE, '', val)
return val
def SafeCell(*args, value=None, **kwargs):
value = remove_invalid_excel_chars(value)
c = Cell(*args, value=value, **kwargs)
if c.data_type == TYPE_FORMULA:
c.data_type = TYPE_STRING
return c
class SafeAppendMixin:
def append(self, iterable):
row_idx = self._current_row + 1
if isinstance(iterable, (list, tuple, range)) or isgenerator(iterable):
for col_idx, content in enumerate(iterable, 1):
if isinstance(content, Cell):
# compatible with write-only mode
cell = content
if cell.parent and cell.parent != self:
raise ValueError("Cells cannot be copied from other worksheets")
cell.parent = self
cell.column = col_idx
cell.row = row_idx
else:
cell = SafeCell(self, row=row_idx, column=col_idx, value=remove_invalid_excel_chars(content))
self._cells[(row_idx, col_idx)] = cell
elif isinstance(iterable, dict):
for col_idx, content in iterable.items():
if isinstance(col_idx, str):
col_idx = column_index_from_string(col_idx)
cell = SafeCell(self, row=row_idx, column=col_idx, value=content)
self._cells[(row_idx, col_idx)] = cell
else:
self._invalid_row(iterable)
self._current_row = row_idx
class SafeWriteOnlyWorksheet(SafeAppendMixin, WriteOnlyWorksheet):
pass
class SafeWorksheet(SafeAppendMixin, Worksheet):
pass
class SafeWorkbook(Workbook):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self._sheets:
# monkeypatch existing sheets
for s in self._sheets:
s.append = SafeAppendMixin.append
def create_sheet(self, title=None, index=None):
if self.read_only:
raise ReadOnlyWorkbookException('Cannot create new sheet in a read-only workbook')
if self.write_only:
new_ws = SafeWriteOnlyWorksheet(parent=self, title=title)
else:
new_ws = SafeWorksheet(parent=self, title=title)
self._add_sheet(sheet=new_ws, index=index)
return new_ws