Files
pretix_original/src/pretix/plugins/banktransfer/csvimport.py
Kian Cross 5d4b218aa6 Banktransfer: Handle trailing commas in headers for Lloyds Bank CSV files (#4782)
Lloyds Bank (UK) CSV files include a trailing comma in the header row
but not in the data rows, causing the `csvimport.parse` function to
skip the data rows. This occurs because the header length exceeds the
row length, making them unequal to `hint.cols`.

This commit adjusts the length check to allow a range of acceptable row
lengths, from the index of the last non-empty column in the header to
`hint.cols`. This ensures compatibility with headers containing one or
more trailing commas without affecting rows with correctly labelled columns.

The solution avoids breaking changes by leaving underlying data structures
untouched. Alternative approaches, such as dropping trailing commas before
parsing or removing empty elements after parsing, were avoided due to
potential risks. Specifically, trailing columns might contain data that
banks provide but fail to label in the header row.
2025-02-05 16:56:28 +01:00

136 lines
5.2 KiB
Python

#
# This file is part of pretix (Community Edition).
#
# Copyright (C) 2014-2020 Raphael Michel and contributors
# Copyright (C) 2020-2021 rami.io GmbH and contributors
#
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General
# Public License as published by the Free Software Foundation in version 3 of the License.
#
# ADDITIONAL TERMS APPLY: Pursuant to Section 7 of the GNU Affero General Public License, additional terms are
# applicable granting you additional permissions and placing additional restrictions on your usage of this software.
# Please refer to the pretix LICENSE file to obtain the full terms applicable to this work. If you did not receive
# this file, see <https://pretix.eu/about/en/license>.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along with this program. If not, see
# <https://www.gnu.org/licenses/>.
#
import csv
import io
import re
from django.utils.text import Truncator
class HintMismatchError(Exception):
pass
def check_row_length(data, hint, row):
valid_lengths = [hint['cols']]
header = data[0]
for i in range(len(header) - 1, 0, -1):
if header[i]:
break
else:
valid_lengths.append(hint['cols'] - (len(header) - i))
return None not in row and len(row) in valid_lengths
def parse(data, hint):
result = []
if 'cols' not in hint:
raise HintMismatchError('Invalid hint')
if len(data[0]) != hint['cols']:
raise HintMismatchError('Wrong column count')
good_hint = False
for row in data:
resrow = {}
if not check_row_length(data, hint, row):
# Wrong column count
continue
if hint.get('payer') is not None:
resrow['payer'] = "\n".join([row[int(i)].strip() for i in hint.get('payer')]).strip()
if hint.get('reference') is not None:
resrow['reference'] = "\n".join([row[int(i)].strip() for i in hint.get('reference')]).strip()
if hint.get('amount') is not None:
resrow['amount'] = row[int(hint.get('amount'))].strip()
resrow['amount'] = re.sub('[^0-9,+.-]', '', resrow['amount'])
if hint.get('date') is not None:
resrow['date'] = row[int(hint.get('date'))].strip()
if hint.get('iban') is not None:
resrow['iban'] = Truncator(row[int(hint.get('iban'))].strip()).chars(200)
if hint.get('bic') is not None:
resrow['bic'] = Truncator(row[int(hint.get('bic'))].strip()).chars(200)
if len(resrow['amount']) == 0 or 'amount' not in resrow or resrow.get('date') == '':
# This is probably a headline or something other special.
continue
if resrow.get('reference') or resrow.get('payer'):
good_hint = True
result.append(resrow)
return result, good_hint
def get_rows_from_file(file):
data = file.read()
try:
import chardet
charset = chardet.detect(data)['encoding']
except ImportError:
charset = file.charset
data = data.decode(charset or 'utf-8')
# If the file was modified on a Mac, it only contains \r as line breaks
if '\r' in data and '\n' not in data:
data = data.replace('\r', '\n')
# Sniffing line by line is necessary as some banks like to include
# one-column garbage at the beginning of the file which breaks the sniffer.
# See also: http://bugs.python.org/issue2078
last_e = None
dialect = None
for line in data.split("\n"):
line = line.strip()
if len(line) == 0:
continue
try:
dialect = csv.Sniffer().sniff(line, delimiters=";,.#:")
except Exception as e:
last_e = e
else:
last_e = None
break
if dialect is None:
raise last_e or csv.Error("No dialect detected")
reader = csv.reader(io.StringIO(data), dialect)
rows = []
for row in reader:
if rows and len(row) > len(rows[0]):
# Some banks put metadata above the real data, things like
# a headline, the bank's name, the user's name, etc.
# In many cases, we can identify this because these rows
# have less columns than the rows containing the real data.
# Therefore, if the number of columns suddenly grows, we start
# over with parsing.
rows = []
rows.append(row)
return rows
def new_hint(data):
return {
'payer': data.getlist('payer') if 'payer' in data else None,
'reference': data.getlist('reference') if 'reference' in data else None,
'date': int(data.get('date')) if 'date' in data else None,
'amount': int(data.get('amount')) if 'amount' in data else None,
'cols': int(data.get('cols')) if 'cols' in data else None,
'iban': int(data.get('iban')) if 'iban' in data else None,
'bic': int(data.get('bic')) if 'bic' in data else None,
}