Order import: Allow to manually specify character set

This commit is contained in:
Raphael Michel
2023-10-16 12:21:49 +02:00
parent 9d7d5389dc
commit 2dd51b6f62
3 changed files with 49 additions and 13 deletions

View File

@@ -54,14 +54,15 @@ class DataImportError(LazyLocaleException):
super().__init__(msg)
def parse_csv(file, length=None, mode="strict"):
def parse_csv(file, length=None, mode="strict", charset=None):
file.seek(0)
data = file.read(length)
try:
import chardet
charset = chardet.detect(data)['encoding']
except ImportError:
charset = file.charset
if not charset:
try:
import chardet
charset = chardet.detect(data)['encoding']
except ImportError:
charset = file.charset
data = data.decode(charset or "utf-8", mode)
# If the file was modified on a Mac, it only contains \r as line breaks
if '\r' in data and '\n' not in data:
@@ -85,12 +86,12 @@ def setif(record, obj, attr, setting):
@app.task(base=ProfiledEventTask, throws=(DataImportError,))
def import_orders(event: Event, fileid: str, settings: dict, locale: str, user) -> None:
def import_orders(event: Event, fileid: str, settings: dict, locale: str, user, charset=None) -> None:
cf = CachedFile.objects.get(id=fileid)
user = User.objects.get(pk=user)
with language(locale, event.settings.region):
cols = get_all_columns(event)
parsed = parse_csv(cf.file)
parsed = parse_csv(cf.file, charset=charset)
orders = []
order = None
data = []

View File

@@ -10,7 +10,7 @@
<h3 class="panel-title">{% trans "Upload a new file" %}</h3>
</div>
<div class="panel-body">
<form action="" method="post" enctype="multipart/form-data" class="form-inline">
<form action="" method="post" enctype="multipart/form-data">
{% csrf_token %}
<p>
{% blocktrans trimmed %}
@@ -21,6 +21,15 @@
<div class="form-group">
<label for="file">{% trans "Import file" %}: </label> <input id="file" type="file" name="file"/>
</div>
<div class="form-group">
<label for="file">{% trans "Character set" %}: </label>
<select name="charset" class="form-control">
<option>{% trans "Detect automatically" %}</option>
{% for e in encodings %}
<option value="{{ e }}">{{ e }}</option>
{% endfor %}
</select>
</div>
<div class="clearfix"></div>
<button class="btn btn-primary pull-right flip" type="submit">
<span class="icon icon-upload"></span> {% trans "Start import" %}

View File

@@ -51,6 +51,15 @@ from pretix.control.forms.orderimport import ProcessForm
from pretix.control.permissions import EventPermissionRequiredMixin
logger = logging.getLogger(__name__)
ENCODINGS = (
"utf8", "utf16", "utf32",
"iso-8859-1", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7",
"iso-8859-8", "iso-8859-9", "iso-8859-10", "iso-8859-11", "iso-8859-12", "iso-8859-13", "iso-8859-14",
"iso-8859-15", "iso-8859-16",
"maccyrillic", "macgreek", "maciceland", "maclatin2", "macroman", "macturkish",
"windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255",
"windows-1256", "windows-1257", "windows-1258"
)
class ImportView(EventPermissionRequiredMixin, TemplateView):
@@ -83,11 +92,20 @@ class ImportView(EventPermissionRequiredMixin, TemplateView):
type='text/csv',
)
cf.file.save('import.csv', request.FILES['file'])
if self.request.POST.get("charset") in ENCODINGS:
charset = self.request.POST.get("charset")
else:
charset = "auto"
return redirect(reverse('control:event.orders.import.process', kwargs={
'event': request.event.slug,
'organizer': request.organizer.slug,
'file': cf.id
}))
}) + "?charset=" + charset)
def get_context_data(self, **kwargs):
return super().get_context_data(encodings=ENCODINGS)
class ProcessView(EventPermissionRequiredMixin, AsyncAction, FormView):
@@ -108,9 +126,13 @@ class ProcessView(EventPermissionRequiredMixin, AsyncAction, FormView):
def form_valid(self, form):
self.request.event.settings.order_import_settings = form.cleaned_data
if self.request.GET.get("charset") in ENCODINGS:
charset = self.request.GET.get("charset")
else:
charset = None
return self.do(
self.request.event.pk, self.file.id, form.cleaned_data, self.request.LANGUAGE_CODE,
self.request.user.pk
self.request.user.pk, charset
)
@cached_property
@@ -119,8 +141,12 @@ class ProcessView(EventPermissionRequiredMixin, AsyncAction, FormView):
@cached_property
def parsed(self):
if self.request.GET.get("charset") in ENCODINGS:
charset = self.request.GET.get("charset")
else:
charset = None
try:
return parse_csv(self.file.file, 1024 * 1024)
return parse_csv(self.file.file, 1024 * 1024, charset=charset)
except UnicodeDecodeError:
messages.warning(
self.request,
@@ -129,7 +155,7 @@ class ProcessView(EventPermissionRequiredMixin, AsyncAction, FormView):
"Some characters were replaced with a placeholder."
)
)
return parse_csv(self.file.file, 1024 * 1024, "replace")
return parse_csv(self.file.file, 1024 * 1024, "replace", charset=charset)
def get(self, request, *args, **kwargs):
if 'async_id' in request.GET and settings.HAS_CELERY: