mirror of
https://github.com/pretix/pretix.git
synced 2026-05-05 15:14:04 +00:00
Lloyds Bank (UK) CSV files include a trailing comma in the header row but not in the data rows, causing the `csvimport.parse` function to skip the data rows. This occurs because the header length exceeds the row length, making them unequal to `hint.cols`. This commit adjusts the length check to allow a range of acceptable row lengths, from the index of the last non-empty column in the header to `hint.cols`. This ensures compatibility with headers containing one or more trailing commas without affecting rows with correctly labelled columns. The solution avoids breaking changes by leaving underlying data structures untouched. Alternative approaches, such as dropping trailing commas before parsing or removing empty elements after parsing, were avoided due to potential risks. Specifically, trailing columns might contain data that banks provide but fail to label in the header row.
136 lines
5.2 KiB
Python
136 lines
5.2 KiB
Python
#
|
|
# This file is part of pretix (Community Edition).
|
|
#
|
|
# Copyright (C) 2014-2020 Raphael Michel and contributors
|
|
# Copyright (C) 2020-2021 rami.io GmbH and contributors
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General
|
|
# Public License as published by the Free Software Foundation in version 3 of the License.
|
|
#
|
|
# ADDITIONAL TERMS APPLY: Pursuant to Section 7 of the GNU Affero General Public License, additional terms are
|
|
# applicable granting you additional permissions and placing additional restrictions on your usage of this software.
|
|
# Please refer to the pretix LICENSE file to obtain the full terms applicable to this work. If you did not receive
|
|
# this file, see <https://pretix.eu/about/en/license>.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
|
# details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License along with this program. If not, see
|
|
# <https://www.gnu.org/licenses/>.
|
|
#
|
|
import csv
|
|
import io
|
|
import re
|
|
|
|
from django.utils.text import Truncator
|
|
|
|
|
|
class HintMismatchError(Exception):
|
|
pass
|
|
|
|
|
|
def check_row_length(data, hint, row):
|
|
valid_lengths = [hint['cols']]
|
|
header = data[0]
|
|
|
|
for i in range(len(header) - 1, 0, -1):
|
|
if header[i]:
|
|
break
|
|
else:
|
|
valid_lengths.append(hint['cols'] - (len(header) - i))
|
|
|
|
return None not in row and len(row) in valid_lengths
|
|
|
|
|
|
def parse(data, hint):
|
|
result = []
|
|
if 'cols' not in hint:
|
|
raise HintMismatchError('Invalid hint')
|
|
if len(data[0]) != hint['cols']:
|
|
raise HintMismatchError('Wrong column count')
|
|
good_hint = False
|
|
for row in data:
|
|
resrow = {}
|
|
if not check_row_length(data, hint, row):
|
|
# Wrong column count
|
|
continue
|
|
if hint.get('payer') is not None:
|
|
resrow['payer'] = "\n".join([row[int(i)].strip() for i in hint.get('payer')]).strip()
|
|
if hint.get('reference') is not None:
|
|
resrow['reference'] = "\n".join([row[int(i)].strip() for i in hint.get('reference')]).strip()
|
|
if hint.get('amount') is not None:
|
|
resrow['amount'] = row[int(hint.get('amount'))].strip()
|
|
resrow['amount'] = re.sub('[^0-9,+.-]', '', resrow['amount'])
|
|
if hint.get('date') is not None:
|
|
resrow['date'] = row[int(hint.get('date'))].strip()
|
|
if hint.get('iban') is not None:
|
|
resrow['iban'] = Truncator(row[int(hint.get('iban'))].strip()).chars(200)
|
|
if hint.get('bic') is not None:
|
|
resrow['bic'] = Truncator(row[int(hint.get('bic'))].strip()).chars(200)
|
|
|
|
if len(resrow['amount']) == 0 or 'amount' not in resrow or resrow.get('date') == '':
|
|
# This is probably a headline or something other special.
|
|
continue
|
|
if resrow.get('reference') or resrow.get('payer'):
|
|
good_hint = True
|
|
result.append(resrow)
|
|
return result, good_hint
|
|
|
|
|
|
def get_rows_from_file(file):
|
|
data = file.read()
|
|
try:
|
|
import chardet
|
|
charset = chardet.detect(data)['encoding']
|
|
except ImportError:
|
|
charset = file.charset
|
|
data = data.decode(charset or 'utf-8')
|
|
# If the file was modified on a Mac, it only contains \r as line breaks
|
|
if '\r' in data and '\n' not in data:
|
|
data = data.replace('\r', '\n')
|
|
|
|
# Sniffing line by line is necessary as some banks like to include
|
|
# one-column garbage at the beginning of the file which breaks the sniffer.
|
|
# See also: http://bugs.python.org/issue2078
|
|
last_e = None
|
|
dialect = None
|
|
for line in data.split("\n"):
|
|
line = line.strip()
|
|
if len(line) == 0:
|
|
continue
|
|
try:
|
|
dialect = csv.Sniffer().sniff(line, delimiters=";,.#:")
|
|
except Exception as e:
|
|
last_e = e
|
|
else:
|
|
last_e = None
|
|
break
|
|
if dialect is None:
|
|
raise last_e or csv.Error("No dialect detected")
|
|
reader = csv.reader(io.StringIO(data), dialect)
|
|
rows = []
|
|
for row in reader:
|
|
if rows and len(row) > len(rows[0]):
|
|
# Some banks put metadata above the real data, things like
|
|
# a headline, the bank's name, the user's name, etc.
|
|
# In many cases, we can identify this because these rows
|
|
# have less columns than the rows containing the real data.
|
|
# Therefore, if the number of columns suddenly grows, we start
|
|
# over with parsing.
|
|
rows = []
|
|
rows.append(row)
|
|
return rows
|
|
|
|
|
|
def new_hint(data):
|
|
return {
|
|
'payer': data.getlist('payer') if 'payer' in data else None,
|
|
'reference': data.getlist('reference') if 'reference' in data else None,
|
|
'date': int(data.get('date')) if 'date' in data else None,
|
|
'amount': int(data.get('amount')) if 'amount' in data else None,
|
|
'cols': int(data.get('cols')) if 'cols' in data else None,
|
|
'iban': int(data.get('iban')) if 'iban' in data else None,
|
|
'bic': int(data.get('bic')) if 'bic' in data else None,
|
|
}
|