Handle duplicate column names in CSV import (#5681)

- display a warning message to the user
- automatically rename columns by adding "__1", "__2", ... suffixes
This commit is contained in:
luelista
2025-12-04 14:03:27 +01:00
committed by GitHub
parent 8d66e1e732
commit 609b7c82ee
2 changed files with 25 additions and 2 deletions

View File

@@ -47,6 +47,19 @@ class DataImportError(LazyLocaleException):
super().__init__(msg)
def rename_duplicates(values):
used = set()
had_duplicates = False
for i, value in enumerate(values):
c = 0
while values[i] in used:
c += 1
values[i] = f'{value}__{c}'
had_duplicates = True
used.add(values[i])
return had_duplicates
def parse_csv(file, length=None, mode="strict", charset=None):
file.seek(0)
data = file.read(length)
@@ -70,6 +83,7 @@ def parse_csv(file, length=None, mode="strict", charset=None):
return None
reader = csv.DictReader(io.StringIO(data), dialect=dialect)
reader._had_duplicates = rename_duplicates(reader.fieldnames)
return reader

View File

@@ -146,7 +146,7 @@ class BaseProcessView(AsyncAction, FormView):
else:
charset = None
try:
return parse_csv(self.file.file, 1024 * 1024, charset=charset)
reader = parse_csv(self.file.file, 1024 * 1024, charset=charset)
except UnicodeDecodeError:
messages.warning(
self.request,
@@ -155,7 +155,16 @@ class BaseProcessView(AsyncAction, FormView):
"Some characters were replaced with a placeholder."
)
)
return parse_csv(self.file.file, 1024 * 1024, "replace", charset=charset)
reader = parse_csv(self.file.file, 1024 * 1024, "replace", charset=charset)
if reader._had_duplicates:
messages.warning(
self.request,
_(
"Multiple columns of the CSV file have the same name and were renamed automatically. We "
"recommend that you rename these in your source file to avoid problems during import."
)
)
return reader
@cached_property
def parsed_list(self):