Fix memory usage in exporters by using chunked iterators

This commit is contained in:
Raphael Michel
2020-07-23 20:39:49 +02:00
parent bff1041878
commit fc5c3caf66
3 changed files with 179 additions and 142 deletions

View File

@@ -16,6 +16,7 @@ from pretix.base.models import Invoice, InvoiceLine, OrderPayment
from ...control.forms.filter import get_all_payment_providers from ...control.forms.filter import get_all_payment_providers
from ...helpers import GroupConcat from ...helpers import GroupConcat
from ...helpers.iter import chunked_iterable
from ..exporter import BaseExporter, MultiSheetListExporter from ..exporter import BaseExporter, MultiSheetListExporter
from ..services.invoices import invoice_pdf_task from ..services.invoices import invoice_pdf_task
from ..signals import ( from ..signals import (
@@ -100,7 +101,7 @@ class InvoiceExporter(InvoiceExporterMixin, BaseExporter):
with tempfile.TemporaryDirectory() as d: with tempfile.TemporaryDirectory() as d:
any = False any = False
with ZipFile(output_file or os.path.join(d, 'tmp.zip'), 'w') as zipf: with ZipFile(output_file or os.path.join(d, 'tmp.zip'), 'w') as zipf:
for i in qs: for i in qs.iterator():
try: try:
if not i.file: if not i.file:
invoice_pdf_task.apply(args=(i.pk,)) invoice_pdf_task.apply(args=(i.pk,))
@@ -198,7 +199,9 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
'm' 'm'
).order_by() ).order_by()
qs = self.invoices_queryset(form_data).order_by('full_invoice_no').select_related( base_qs = self.invoices_queryset(form_data)\
qs = base_qs.select_related(
'order', 'refers' 'order', 'refers'
).prefetch_related('order__payments').annotate( ).prefetch_related('order__payments').annotate(
payment_providers=Subquery(p_providers, output_field=CharField()), payment_providers=Subquery(p_providers, output_field=CharField()),
@@ -218,7 +221,11 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
) )
) )
for i in qs: all_ids = base_qs.order_by('full_invoice_no').values_list('pk', flat=True)
for ids in chunked_iterable(all_ids, 1000):
invs = sorted(qs.filter(id__in=ids), key=lambda k: ids.index(k.pk))
for i in invs:
pmis = [] pmis = []
for p in i.order.payments.all(): for p in i.order.payments.all():
if p.state in (OrderPayment.PAYMENT_STATE_CONFIRMED, OrderPayment.PAYMENT_STATE_CREATED, if p.state in (OrderPayment.PAYMENT_STATE_CONFIRMED, OrderPayment.PAYMENT_STATE_CREATED,
@@ -320,7 +327,7 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
'invoice', 'invoice__order', 'invoice__refers' 'invoice', 'invoice__order', 'invoice__refers'
) )
for l in qs: for l in qs.iterator():
i = l.invoice i = l.invoice
yield [ yield [
i.full_invoice_no, i.full_invoice_no,

View File

@@ -12,7 +12,7 @@ from django.utils.functional import cached_property
from django.utils.translation import gettext as _, gettext_lazy, pgettext from django.utils.translation import gettext as _, gettext_lazy, pgettext
from pretix.base.models import ( from pretix.base.models import (
GiftCard, InvoiceAddress, Order, OrderPosition, Question, GiftCard, Invoice, InvoiceAddress, Order, OrderPosition, Question,
) )
from pretix.base.models.orders import OrderFee, OrderPayment, OrderRefund from pretix.base.models.orders import OrderFee, OrderPayment, OrderRefund
from pretix.base.services.quotas import QuotaAvailability from pretix.base.services.quotas import QuotaAvailability
@@ -20,6 +20,7 @@ from pretix.base.settings import PERSON_NAME_SCHEMES
from ...control.forms.filter import get_all_payment_providers from ...control.forms.filter import get_all_payment_providers
from ...helpers import GroupConcat from ...helpers import GroupConcat
from ...helpers.iter import chunked_iterable
from ..exporter import ListExporter, MultiSheetListExporter from ..exporter import ListExporter, MultiSheetListExporter
from ..signals import ( from ..signals import (
register_data_exporters, register_multievent_data_exporters, register_data_exporters, register_multievent_data_exporters,
@@ -79,6 +80,10 @@ class OrderListExporter(MultiSheetListExporter):
elif sheet == 'fees': elif sheet == 'fees':
return self.iterate_fees(form_data) return self.iterate_fees(form_data)
@cached_property
def event_object_cache(self):
return {e.pk: e for e in self.events}
def iterate_orders(self, form_data: dict): def iterate_orders(self, form_data: dict):
p_date = OrderPayment.objects.filter( p_date = OrderPayment.objects.filter(
order=OuterRef('pk'), order=OuterRef('pk'),
@@ -98,6 +103,13 @@ class OrderListExporter(MultiSheetListExporter):
).values( ).values(
'm' 'm'
).order_by() ).order_by()
i_numbers = Invoice.objects.filter(
order=OuterRef('pk'),
).values('order').annotate(
m=GroupConcat('full_invoice_no', delimiter=', ')
).values(
'm'
).order_by()
s = OrderPosition.objects.filter( s = OrderPosition.objects.filter(
order=OuterRef('pk') order=OuterRef('pk')
@@ -105,8 +117,9 @@ class OrderListExporter(MultiSheetListExporter):
qs = Order.objects.filter(event__in=self.events).annotate( qs = Order.objects.filter(event__in=self.events).annotate(
payment_date=Subquery(p_date, output_field=DateTimeField()), payment_date=Subquery(p_date, output_field=DateTimeField()),
payment_providers=Subquery(p_providers, output_field=CharField()), payment_providers=Subquery(p_providers, output_field=CharField()),
invoice_numbers=Subquery(i_numbers, output_field=CharField()),
pcnt=Subquery(s, output_field=IntegerField()) pcnt=Subquery(s, output_field=IntegerField())
).select_related('invoice_address').prefetch_related('invoices').prefetch_related('event') ).select_related('invoice_address')
if form_data['paid_only']: if form_data['paid_only']:
qs = qs.filter(status=Order.STATUS_PAID) qs = qs.filter(status=Order.STATUS_PAID)
tax_rates = self._get_all_tax_rates(qs) tax_rates = self._get_all_tax_rates(qs)
@@ -157,11 +170,11 @@ class OrderListExporter(MultiSheetListExporter):
) )
} }
for order in qs.order_by('datetime'): for order in qs.order_by('datetime').iterator():
tz = pytz.timezone(order.event.settings.timezone) tz = pytz.timezone(self.event_object_cache[order.event_id].settings.timezone)
row = [ row = [
order.event.slug, self.event_object_cache[order.event_id].slug,
order.code, order.code,
order.total, order.total,
order.get_status_display(), order.get_status_display(),
@@ -210,7 +223,7 @@ class OrderListExporter(MultiSheetListExporter):
taxrate_values['taxsum'] + fee_taxrate_values['taxsum'], taxrate_values['taxsum'] + fee_taxrate_values['taxsum'],
] ]
row.append(', '.join([i.number for i in order.invoices.all()])) row.append(order.invoice_numbers)
row.append(order.sales_channel) row.append(order.sales_channel)
row.append(_('Yes') if order.checkin_attention else _('No')) row.append(_('Yes') if order.checkin_attention else _('No'))
row.append(order.comment or "") row.append(order.comment or "")
@@ -265,11 +278,11 @@ class OrderListExporter(MultiSheetListExporter):
headers.append(_('Payment providers')) headers.append(_('Payment providers'))
yield headers yield headers
for op in qs.order_by('order__datetime'): for op in qs.order_by('order__datetime').iterator():
order = op.order order = op.order
tz = pytz.timezone(order.event.settings.timezone) tz = pytz.timezone(order.event.settings.timezone)
row = [ row = [
order.event.slug, self.event_object_cache[order.event_id].slug,
order.code, order.code,
order.get_status_display(), order.get_status_display(),
order.email, order.email,
@@ -318,9 +331,10 @@ class OrderListExporter(MultiSheetListExporter):
).values( ).values(
'm' 'm'
).order_by() ).order_by()
qs = OrderPosition.objects.filter( base_qs = OrderPosition.objects.filter(
order__event__in=self.events, order__event__in=self.events,
).annotate( )
qs = base_qs.annotate(
payment_providers=Subquery(p_providers, output_field=CharField()), payment_providers=Subquery(p_providers, output_field=CharField()),
).select_related( ).select_related(
'order', 'order__invoice_address', 'item', 'variation', 'order', 'order__invoice_address', 'item', 'variation',
@@ -331,6 +345,8 @@ class OrderListExporter(MultiSheetListExporter):
if form_data['paid_only']: if form_data['paid_only']:
qs = qs.filter(order__status=Order.STATUS_PAID) qs = qs.filter(order__status=Order.STATUS_PAID)
has_subevents = self.events.filter(has_subevents=True).exists()
headers = [ headers = [
_('Event slug'), _('Event slug'),
_('Order code'), _('Order code'),
@@ -339,7 +355,7 @@ class OrderListExporter(MultiSheetListExporter):
_('Email'), _('Email'),
_('Order date'), _('Order date'),
] ]
if self.events.filter(has_subevents=True).exists(): if has_subevents:
headers.append(pgettext('subevent', 'Date')) headers.append(pgettext('subevent', 'Date'))
headers.append(_('Start date')) headers.append(_('Start date'))
headers.append(_('End date')) headers.append(_('End date'))
@@ -395,22 +411,26 @@ class OrderListExporter(MultiSheetListExporter):
yield headers yield headers
for op in qs.order_by('order__datetime', 'positionid'): all_ids = base_qs.order_by('order__datetime', 'positionid').values_list('pk', flat=True)
for ids in chunked_iterable(all_ids, 1000):
ops = sorted(qs.filter(id__in=ids), key=lambda k: ids.index(k.pk))
for op in ops:
order = op.order order = op.order
tz = pytz.timezone(order.event.settings.timezone) tz = pytz.timezone(self.event_object_cache[order.event_id].settings.timezone)
row = [ row = [
order.event.slug, self.event_object_cache[order.event_id].slug,
order.code, order.code,
op.positionid, op.positionid,
order.get_status_display(), order.get_status_display(),
order.email, order.email,
order.datetime.astimezone(tz).strftime('%Y-%m-%d'), order.datetime.astimezone(tz).strftime('%Y-%m-%d'),
] ]
if order.event.has_subevents: if has_subevents:
row.append(op.subevent.name) row.append(op.subevent.name)
row.append(op.subevent.date_from.astimezone(order.event.timezone).strftime('%Y-%m-%d %H:%M:%S')) row.append(op.subevent.date_from.astimezone(self.event_object_cache[order.event_id].timezone).strftime('%Y-%m-%d %H:%M:%S'))
if op.subevent.date_to: if op.subevent.date_to:
row.append(op.subevent.date_to.astimezone(order.event.timezone).strftime('%Y-%m-%d %H:%M:%S')) row.append(op.subevent.date_to.astimezone(self.event_object_cache[order.event_id].timezone).strftime('%Y-%m-%d %H:%M:%S'))
else: else:
row.append('') row.append('')
row += [ row += [

View File

@@ -0,0 +1,10 @@
import itertools
def chunked_iterable(iterable, size):
it = iter(iterable)
while True:
chunk = tuple(itertools.islice(it, size))
if not chunk:
break
yield chunk