Fix memory usage in exporters by using chunked iterators

This commit is contained in:
Raphael Michel
2020-07-23 20:39:49 +02:00
parent bff1041878
commit fc5c3caf66
3 changed files with 179 additions and 142 deletions

View File

@@ -16,6 +16,7 @@ from pretix.base.models import Invoice, InvoiceLine, OrderPayment
from ...control.forms.filter import get_all_payment_providers
from ...helpers import GroupConcat
from ...helpers.iter import chunked_iterable
from ..exporter import BaseExporter, MultiSheetListExporter
from ..services.invoices import invoice_pdf_task
from ..signals import (
@@ -100,7 +101,7 @@ class InvoiceExporter(InvoiceExporterMixin, BaseExporter):
with tempfile.TemporaryDirectory() as d:
any = False
with ZipFile(output_file or os.path.join(d, 'tmp.zip'), 'w') as zipf:
for i in qs:
for i in qs.iterator():
try:
if not i.file:
invoice_pdf_task.apply(args=(i.pk,))
@@ -198,7 +199,9 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
'm'
).order_by()
qs = self.invoices_queryset(form_data).order_by('full_invoice_no').select_related(
base_qs = self.invoices_queryset(form_data)\
qs = base_qs.select_related(
'order', 'refers'
).prefetch_related('order__payments').annotate(
payment_providers=Subquery(p_providers, output_field=CharField()),
@@ -218,7 +221,11 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
)
)
for i in qs:
all_ids = base_qs.order_by('full_invoice_no').values_list('pk', flat=True)
for ids in chunked_iterable(all_ids, 1000):
invs = sorted(qs.filter(id__in=ids), key=lambda k: ids.index(k.pk))
for i in invs:
pmis = []
for p in i.order.payments.all():
if p.state in (OrderPayment.PAYMENT_STATE_CONFIRMED, OrderPayment.PAYMENT_STATE_CREATED,
@@ -320,7 +327,7 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
'invoice', 'invoice__order', 'invoice__refers'
)
for l in qs:
for l in qs.iterator():
i = l.invoice
yield [
i.full_invoice_no,

View File

@@ -12,7 +12,7 @@ from django.utils.functional import cached_property
from django.utils.translation import gettext as _, gettext_lazy, pgettext
from pretix.base.models import (
GiftCard, InvoiceAddress, Order, OrderPosition, Question,
GiftCard, Invoice, InvoiceAddress, Order, OrderPosition, Question,
)
from pretix.base.models.orders import OrderFee, OrderPayment, OrderRefund
from pretix.base.services.quotas import QuotaAvailability
@@ -20,6 +20,7 @@ from pretix.base.settings import PERSON_NAME_SCHEMES
from ...control.forms.filter import get_all_payment_providers
from ...helpers import GroupConcat
from ...helpers.iter import chunked_iterable
from ..exporter import ListExporter, MultiSheetListExporter
from ..signals import (
register_data_exporters, register_multievent_data_exporters,
@@ -79,6 +80,10 @@ class OrderListExporter(MultiSheetListExporter):
elif sheet == 'fees':
return self.iterate_fees(form_data)
@cached_property
def event_object_cache(self):
return {e.pk: e for e in self.events}
def iterate_orders(self, form_data: dict):
p_date = OrderPayment.objects.filter(
order=OuterRef('pk'),
@@ -98,6 +103,13 @@ class OrderListExporter(MultiSheetListExporter):
).values(
'm'
).order_by()
i_numbers = Invoice.objects.filter(
order=OuterRef('pk'),
).values('order').annotate(
m=GroupConcat('full_invoice_no', delimiter=', ')
).values(
'm'
).order_by()
s = OrderPosition.objects.filter(
order=OuterRef('pk')
@@ -105,8 +117,9 @@ class OrderListExporter(MultiSheetListExporter):
qs = Order.objects.filter(event__in=self.events).annotate(
payment_date=Subquery(p_date, output_field=DateTimeField()),
payment_providers=Subquery(p_providers, output_field=CharField()),
invoice_numbers=Subquery(i_numbers, output_field=CharField()),
pcnt=Subquery(s, output_field=IntegerField())
).select_related('invoice_address').prefetch_related('invoices').prefetch_related('event')
).select_related('invoice_address')
if form_data['paid_only']:
qs = qs.filter(status=Order.STATUS_PAID)
tax_rates = self._get_all_tax_rates(qs)
@@ -157,11 +170,11 @@ class OrderListExporter(MultiSheetListExporter):
)
}
for order in qs.order_by('datetime'):
tz = pytz.timezone(order.event.settings.timezone)
for order in qs.order_by('datetime').iterator():
tz = pytz.timezone(self.event_object_cache[order.event_id].settings.timezone)
row = [
order.event.slug,
self.event_object_cache[order.event_id].slug,
order.code,
order.total,
order.get_status_display(),
@@ -210,7 +223,7 @@ class OrderListExporter(MultiSheetListExporter):
taxrate_values['taxsum'] + fee_taxrate_values['taxsum'],
]
row.append(', '.join([i.number for i in order.invoices.all()]))
row.append(order.invoice_numbers)
row.append(order.sales_channel)
row.append(_('Yes') if order.checkin_attention else _('No'))
row.append(order.comment or "")
@@ -265,11 +278,11 @@ class OrderListExporter(MultiSheetListExporter):
headers.append(_('Payment providers'))
yield headers
for op in qs.order_by('order__datetime'):
for op in qs.order_by('order__datetime').iterator():
order = op.order
tz = pytz.timezone(order.event.settings.timezone)
row = [
order.event.slug,
self.event_object_cache[order.event_id].slug,
order.code,
order.get_status_display(),
order.email,
@@ -318,9 +331,10 @@ class OrderListExporter(MultiSheetListExporter):
).values(
'm'
).order_by()
qs = OrderPosition.objects.filter(
base_qs = OrderPosition.objects.filter(
order__event__in=self.events,
).annotate(
)
qs = base_qs.annotate(
payment_providers=Subquery(p_providers, output_field=CharField()),
).select_related(
'order', 'order__invoice_address', 'item', 'variation',
@@ -331,6 +345,8 @@ class OrderListExporter(MultiSheetListExporter):
if form_data['paid_only']:
qs = qs.filter(order__status=Order.STATUS_PAID)
has_subevents = self.events.filter(has_subevents=True).exists()
headers = [
_('Event slug'),
_('Order code'),
@@ -339,7 +355,7 @@ class OrderListExporter(MultiSheetListExporter):
_('Email'),
_('Order date'),
]
if self.events.filter(has_subevents=True).exists():
if has_subevents:
headers.append(pgettext('subevent', 'Date'))
headers.append(_('Start date'))
headers.append(_('End date'))
@@ -395,22 +411,26 @@ class OrderListExporter(MultiSheetListExporter):
yield headers
for op in qs.order_by('order__datetime', 'positionid'):
all_ids = base_qs.order_by('order__datetime', 'positionid').values_list('pk', flat=True)
for ids in chunked_iterable(all_ids, 1000):
ops = sorted(qs.filter(id__in=ids), key=lambda k: ids.index(k.pk))
for op in ops:
order = op.order
tz = pytz.timezone(order.event.settings.timezone)
tz = pytz.timezone(self.event_object_cache[order.event_id].settings.timezone)
row = [
order.event.slug,
self.event_object_cache[order.event_id].slug,
order.code,
op.positionid,
order.get_status_display(),
order.email,
order.datetime.astimezone(tz).strftime('%Y-%m-%d'),
]
if order.event.has_subevents:
if has_subevents:
row.append(op.subevent.name)
row.append(op.subevent.date_from.astimezone(order.event.timezone).strftime('%Y-%m-%d %H:%M:%S'))
row.append(op.subevent.date_from.astimezone(self.event_object_cache[order.event_id].timezone).strftime('%Y-%m-%d %H:%M:%S'))
if op.subevent.date_to:
row.append(op.subevent.date_to.astimezone(order.event.timezone).strftime('%Y-%m-%d %H:%M:%S'))
row.append(op.subevent.date_to.astimezone(self.event_object_cache[order.event_id].timezone).strftime('%Y-%m-%d %H:%M:%S'))
else:
row.append('')
row += [

View File

@@ -0,0 +1,10 @@
import itertools
def chunked_iterable(iterable, size):
it = iter(iterable)
while True:
chunk = tuple(itertools.islice(it, size))
if not chunk:
break
yield chunk