Order import: warn when char-replacement happens due to unknown encoding (#2184)

Co-authored-by: Raphael Michel <michel@rami.io>
This commit is contained in:
Richard Schreiber
2021-08-27 15:03:35 +02:00
committed by GitHub
parent e7068020d5
commit 529092a4ed
2 changed files with 14 additions and 3 deletions

View File

@@ -52,14 +52,15 @@ class DataImportError(LazyLocaleException):
super().__init__(msg)
def parse_csv(file, length=None):
def parse_csv(file, length=None, mode="strict"):
file.seek(0)
data = file.read(length)
try:
import chardet
charset = chardet.detect(data)['encoding']
except ImportError:
charset = file.charset
data = data.decode(charset or 'utf-8')
data = data.decode(charset or "utf-8", mode)
# If the file was modified on a Mac, it only contains \r as line breaks
if '\r' in data and '\n' not in data:
data = data.replace('\r', '\n')

View File

@@ -119,7 +119,17 @@ class ProcessView(EventPermissionRequiredMixin, AsyncAction, FormView):
@cached_property
def parsed(self):
return parse_csv(self.file.file, 1024 * 1024)
try:
return parse_csv(self.file.file, 1024 * 1024)
except UnicodeDecodeError:
messages.warning(
self.request,
_(
"We could not identify the character encoding of the CSV file. "
"Some characters were replaced with a placeholder."
)
)
return parse_csv(self.file.file, 1024 * 1024, "replace")
def get(self, request, *args, **kwargs):
if 'async_id' in request.GET and settings.HAS_CELERY: