Fix memory usage in exporters by using chunked iterators

This commit is contained in:
Raphael Michel
2020-07-23 20:39:49 +02:00
parent bff1041878
commit fc5c3caf66
3 changed files with 179 additions and 142 deletions

View File

@@ -16,6 +16,7 @@ from pretix.base.models import Invoice, InvoiceLine, OrderPayment
from ...control.forms.filter import get_all_payment_providers from ...control.forms.filter import get_all_payment_providers
from ...helpers import GroupConcat from ...helpers import GroupConcat
from ...helpers.iter import chunked_iterable
from ..exporter import BaseExporter, MultiSheetListExporter from ..exporter import BaseExporter, MultiSheetListExporter
from ..services.invoices import invoice_pdf_task from ..services.invoices import invoice_pdf_task
from ..signals import ( from ..signals import (
@@ -100,7 +101,7 @@ class InvoiceExporter(InvoiceExporterMixin, BaseExporter):
with tempfile.TemporaryDirectory() as d: with tempfile.TemporaryDirectory() as d:
any = False any = False
with ZipFile(output_file or os.path.join(d, 'tmp.zip'), 'w') as zipf: with ZipFile(output_file or os.path.join(d, 'tmp.zip'), 'w') as zipf:
for i in qs: for i in qs.iterator():
try: try:
if not i.file: if not i.file:
invoice_pdf_task.apply(args=(i.pk,)) invoice_pdf_task.apply(args=(i.pk,))
@@ -198,7 +199,9 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
'm' 'm'
).order_by() ).order_by()
qs = self.invoices_queryset(form_data).order_by('full_invoice_no').select_related( base_qs = self.invoices_queryset(form_data)\
qs = base_qs.select_related(
'order', 'refers' 'order', 'refers'
).prefetch_related('order__payments').annotate( ).prefetch_related('order__payments').annotate(
payment_providers=Subquery(p_providers, output_field=CharField()), payment_providers=Subquery(p_providers, output_field=CharField()),
@@ -218,53 +221,57 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
) )
) )
for i in qs: all_ids = base_qs.order_by('full_invoice_no').values_list('pk', flat=True)
pmis = [] for ids in chunked_iterable(all_ids, 1000):
for p in i.order.payments.all(): invs = sorted(qs.filter(id__in=ids), key=lambda k: ids.index(k.pk))
if p.state in (OrderPayment.PAYMENT_STATE_CONFIRMED, OrderPayment.PAYMENT_STATE_CREATED,
OrderPayment.PAYMENT_STATE_PENDING, OrderPayment.PAYMENT_STATE_REFUNDED): for i in invs:
pprov = p.payment_provider pmis = []
if pprov: for p in i.order.payments.all():
mid = pprov.matching_id(p) if p.state in (OrderPayment.PAYMENT_STATE_CONFIRMED, OrderPayment.PAYMENT_STATE_CREATED,
if mid: OrderPayment.PAYMENT_STATE_PENDING, OrderPayment.PAYMENT_STATE_REFUNDED):
pmis.append(mid) pprov = p.payment_provider
pmi = '\n'.join(pmis) if pprov:
yield [ mid = pprov.matching_id(p)
i.full_invoice_no, if mid:
date_format(i.date, "SHORT_DATE_FORMAT"), pmis.append(mid)
i.order.code, pmi = '\n'.join(pmis)
i.order.email, yield [
_('Cancellation') if i.is_cancellation else _('Invoice'), i.full_invoice_no,
i.refers.full_invoice_no if i.refers else '', date_format(i.date, "SHORT_DATE_FORMAT"),
i.locale, i.order.code,
i.invoice_from_name, i.order.email,
i.invoice_from, _('Cancellation') if i.is_cancellation else _('Invoice'),
i.invoice_from_zipcode, i.refers.full_invoice_no if i.refers else '',
i.invoice_from_city, i.locale,
i.invoice_from_country, i.invoice_from_name,
i.invoice_from_tax_id, i.invoice_from,
i.invoice_from_vat_id, i.invoice_from_zipcode,
i.invoice_to_company, i.invoice_from_city,
i.invoice_to_name, i.invoice_from_country,
i.invoice_to_street or i.invoice_to, i.invoice_from_tax_id,
i.invoice_to_zipcode, i.invoice_from_vat_id,
i.invoice_to_city, i.invoice_to_company,
i.invoice_to_country, i.invoice_to_name,
i.invoice_to_state, i.invoice_to_street or i.invoice_to,
i.invoice_to_vat_id, i.invoice_to_zipcode,
i.invoice_to_beneficiary, i.invoice_to_city,
i.internal_reference, i.invoice_to_country,
_('Yes') if i.reverse_charge else _('No'), i.invoice_to_state,
i.foreign_currency_display, i.invoice_to_vat_id,
i.foreign_currency_rate, i.invoice_to_beneficiary,
i.total_gross if i.total_gross else Decimal('0.00'), i.internal_reference,
Decimal(i.total_net if i.total_net else '0.00').quantize(Decimal('0.01')), _('Yes') if i.reverse_charge else _('No'),
pmi, i.foreign_currency_display,
', '.join([ i.foreign_currency_rate,
str(self.providers.get(p, p)) for p in sorted(set((i.payment_providers or '').split(','))) i.total_gross if i.total_gross else Decimal('0.00'),
if p and p != 'free' Decimal(i.total_net if i.total_net else '0.00').quantize(Decimal('0.01')),
]) pmi,
] ', '.join([
str(self.providers.get(p, p)) for p in sorted(set((i.payment_providers or '').split(',')))
if p and p != 'free'
])
]
elif sheet == 'lines': elif sheet == 'lines':
yield [ yield [
_('Invoice number'), _('Invoice number'),
@@ -320,7 +327,7 @@ class InvoiceDataExporter(InvoiceExporterMixin, MultiSheetListExporter):
'invoice', 'invoice__order', 'invoice__refers' 'invoice', 'invoice__order', 'invoice__refers'
) )
for l in qs: for l in qs.iterator():
i = l.invoice i = l.invoice
yield [ yield [
i.full_invoice_no, i.full_invoice_no,

View File

@@ -12,7 +12,7 @@ from django.utils.functional import cached_property
from django.utils.translation import gettext as _, gettext_lazy, pgettext from django.utils.translation import gettext as _, gettext_lazy, pgettext
from pretix.base.models import ( from pretix.base.models import (
GiftCard, InvoiceAddress, Order, OrderPosition, Question, GiftCard, Invoice, InvoiceAddress, Order, OrderPosition, Question,
) )
from pretix.base.models.orders import OrderFee, OrderPayment, OrderRefund from pretix.base.models.orders import OrderFee, OrderPayment, OrderRefund
from pretix.base.services.quotas import QuotaAvailability from pretix.base.services.quotas import QuotaAvailability
@@ -20,6 +20,7 @@ from pretix.base.settings import PERSON_NAME_SCHEMES
from ...control.forms.filter import get_all_payment_providers from ...control.forms.filter import get_all_payment_providers
from ...helpers import GroupConcat from ...helpers import GroupConcat
from ...helpers.iter import chunked_iterable
from ..exporter import ListExporter, MultiSheetListExporter from ..exporter import ListExporter, MultiSheetListExporter
from ..signals import ( from ..signals import (
register_data_exporters, register_multievent_data_exporters, register_data_exporters, register_multievent_data_exporters,
@@ -79,6 +80,10 @@ class OrderListExporter(MultiSheetListExporter):
elif sheet == 'fees': elif sheet == 'fees':
return self.iterate_fees(form_data) return self.iterate_fees(form_data)
@cached_property
def event_object_cache(self):
return {e.pk: e for e in self.events}
def iterate_orders(self, form_data: dict): def iterate_orders(self, form_data: dict):
p_date = OrderPayment.objects.filter( p_date = OrderPayment.objects.filter(
order=OuterRef('pk'), order=OuterRef('pk'),
@@ -98,6 +103,13 @@ class OrderListExporter(MultiSheetListExporter):
).values( ).values(
'm' 'm'
).order_by() ).order_by()
i_numbers = Invoice.objects.filter(
order=OuterRef('pk'),
).values('order').annotate(
m=GroupConcat('full_invoice_no', delimiter=', ')
).values(
'm'
).order_by()
s = OrderPosition.objects.filter( s = OrderPosition.objects.filter(
order=OuterRef('pk') order=OuterRef('pk')
@@ -105,8 +117,9 @@ class OrderListExporter(MultiSheetListExporter):
qs = Order.objects.filter(event__in=self.events).annotate( qs = Order.objects.filter(event__in=self.events).annotate(
payment_date=Subquery(p_date, output_field=DateTimeField()), payment_date=Subquery(p_date, output_field=DateTimeField()),
payment_providers=Subquery(p_providers, output_field=CharField()), payment_providers=Subquery(p_providers, output_field=CharField()),
invoice_numbers=Subquery(i_numbers, output_field=CharField()),
pcnt=Subquery(s, output_field=IntegerField()) pcnt=Subquery(s, output_field=IntegerField())
).select_related('invoice_address').prefetch_related('invoices').prefetch_related('event') ).select_related('invoice_address')
if form_data['paid_only']: if form_data['paid_only']:
qs = qs.filter(status=Order.STATUS_PAID) qs = qs.filter(status=Order.STATUS_PAID)
tax_rates = self._get_all_tax_rates(qs) tax_rates = self._get_all_tax_rates(qs)
@@ -157,11 +170,11 @@ class OrderListExporter(MultiSheetListExporter):
) )
} }
for order in qs.order_by('datetime'): for order in qs.order_by('datetime').iterator():
tz = pytz.timezone(order.event.settings.timezone) tz = pytz.timezone(self.event_object_cache[order.event_id].settings.timezone)
row = [ row = [
order.event.slug, self.event_object_cache[order.event_id].slug,
order.code, order.code,
order.total, order.total,
order.get_status_display(), order.get_status_display(),
@@ -210,7 +223,7 @@ class OrderListExporter(MultiSheetListExporter):
taxrate_values['taxsum'] + fee_taxrate_values['taxsum'], taxrate_values['taxsum'] + fee_taxrate_values['taxsum'],
] ]
row.append(', '.join([i.number for i in order.invoices.all()])) row.append(order.invoice_numbers)
row.append(order.sales_channel) row.append(order.sales_channel)
row.append(_('Yes') if order.checkin_attention else _('No')) row.append(_('Yes') if order.checkin_attention else _('No'))
row.append(order.comment or "") row.append(order.comment or "")
@@ -265,11 +278,11 @@ class OrderListExporter(MultiSheetListExporter):
headers.append(_('Payment providers')) headers.append(_('Payment providers'))
yield headers yield headers
for op in qs.order_by('order__datetime'): for op in qs.order_by('order__datetime').iterator():
order = op.order order = op.order
tz = pytz.timezone(order.event.settings.timezone) tz = pytz.timezone(order.event.settings.timezone)
row = [ row = [
order.event.slug, self.event_object_cache[order.event_id].slug,
order.code, order.code,
order.get_status_display(), order.get_status_display(),
order.email, order.email,
@@ -318,9 +331,10 @@ class OrderListExporter(MultiSheetListExporter):
).values( ).values(
'm' 'm'
).order_by() ).order_by()
qs = OrderPosition.objects.filter( base_qs = OrderPosition.objects.filter(
order__event__in=self.events, order__event__in=self.events,
).annotate( )
qs = base_qs.annotate(
payment_providers=Subquery(p_providers, output_field=CharField()), payment_providers=Subquery(p_providers, output_field=CharField()),
).select_related( ).select_related(
'order', 'order__invoice_address', 'item', 'variation', 'order', 'order__invoice_address', 'item', 'variation',
@@ -331,6 +345,8 @@ class OrderListExporter(MultiSheetListExporter):
if form_data['paid_only']: if form_data['paid_only']:
qs = qs.filter(order__status=Order.STATUS_PAID) qs = qs.filter(order__status=Order.STATUS_PAID)
has_subevents = self.events.filter(has_subevents=True).exists()
headers = [ headers = [
_('Event slug'), _('Event slug'),
_('Order code'), _('Order code'),
@@ -339,7 +355,7 @@ class OrderListExporter(MultiSheetListExporter):
_('Email'), _('Email'),
_('Order date'), _('Order date'),
] ]
if self.events.filter(has_subevents=True).exists(): if has_subevents:
headers.append(pgettext('subevent', 'Date')) headers.append(pgettext('subevent', 'Date'))
headers.append(_('Start date')) headers.append(_('Start date'))
headers.append(_('End date')) headers.append(_('End date'))
@@ -395,96 +411,100 @@ class OrderListExporter(MultiSheetListExporter):
yield headers yield headers
for op in qs.order_by('order__datetime', 'positionid'): all_ids = base_qs.order_by('order__datetime', 'positionid').values_list('pk', flat=True)
order = op.order for ids in chunked_iterable(all_ids, 1000):
tz = pytz.timezone(order.event.settings.timezone) ops = sorted(qs.filter(id__in=ids), key=lambda k: ids.index(k.pk))
row = [
order.event.slug,
order.code,
op.positionid,
order.get_status_display(),
order.email,
order.datetime.astimezone(tz).strftime('%Y-%m-%d'),
]
if order.event.has_subevents:
row.append(op.subevent.name)
row.append(op.subevent.date_from.astimezone(order.event.timezone).strftime('%Y-%m-%d %H:%M:%S'))
if op.subevent.date_to:
row.append(op.subevent.date_to.astimezone(order.event.timezone).strftime('%Y-%m-%d %H:%M:%S'))
else:
row.append('')
row += [
str(op.item),
str(op.variation) if op.variation else '',
op.price,
op.tax_rate,
str(op.tax_rule) if op.tax_rule else '',
op.tax_value,
op.attendee_name,
]
if name_scheme and len(name_scheme['fields']) > 1:
for k, label, w in name_scheme['fields']:
row.append(
op.attendee_name_parts.get(k, '')
)
row += [
op.attendee_email,
op.company or '',
op.street or '',
op.zipcode or '',
op.city or '',
op.country if op.country else '',
op.state or '',
op.voucher.code if op.voucher else '',
op.pseudonymization_id,
]
acache = {}
for a in op.answers.all():
# We do not want to localize Date, Time and Datetime question answers, as those can lead
# to difficulties parsing the data (for example 2019-02-01 may become Février, 2019 01 in French).
if a.question.type == Question.TYPE_CHOICE_MULTIPLE:
acache[a.question_id] = set(o.pk for o in a.options.all())
elif a.question.type in Question.UNLOCALIZED_TYPES:
acache[a.question_id] = a.answer
else:
acache[a.question_id] = str(a)
for q in questions:
if q.type == Question.TYPE_CHOICE_MULTIPLE:
for o in options[q.pk]:
row.append(_('Yes') if o.pk in acache.get(q.pk, set()) else _('No'))
else:
row.append(acache.get(q.pk, ''))
try: for op in ops:
order = op.order
tz = pytz.timezone(self.event_object_cache[order.event_id].settings.timezone)
row = [
self.event_object_cache[order.event_id].slug,
order.code,
op.positionid,
order.get_status_display(),
order.email,
order.datetime.astimezone(tz).strftime('%Y-%m-%d'),
]
if has_subevents:
row.append(op.subevent.name)
row.append(op.subevent.date_from.astimezone(self.event_object_cache[order.event_id].timezone).strftime('%Y-%m-%d %H:%M:%S'))
if op.subevent.date_to:
row.append(op.subevent.date_to.astimezone(self.event_object_cache[order.event_id].timezone).strftime('%Y-%m-%d %H:%M:%S'))
else:
row.append('')
row += [ row += [
order.invoice_address.company, str(op.item),
order.invoice_address.name, str(op.variation) if op.variation else '',
op.price,
op.tax_rate,
str(op.tax_rule) if op.tax_rule else '',
op.tax_value,
op.attendee_name,
] ]
if name_scheme and len(name_scheme['fields']) > 1: if name_scheme and len(name_scheme['fields']) > 1:
for k, label, w in name_scheme['fields']: for k, label, w in name_scheme['fields']:
row.append( row.append(
order.invoice_address.name_parts.get(k, '') op.attendee_name_parts.get(k, '')
) )
row += [ row += [
order.invoice_address.street, op.attendee_email,
order.invoice_address.zipcode, op.company or '',
order.invoice_address.city, op.street or '',
order.invoice_address.country if order.invoice_address.country else op.zipcode or '',
order.invoice_address.country_old, op.city or '',
order.invoice_address.state, op.country if op.country else '',
order.invoice_address.vat_id, op.state or '',
op.voucher.code if op.voucher else '',
op.pseudonymization_id,
] ]
except InvoiceAddress.DoesNotExist: acache = {}
row += [''] * (8 + (len(name_scheme['fields']) if name_scheme and len(name_scheme['fields']) > 1 else 0)) for a in op.answers.all():
row += [ # We do not want to localize Date, Time and Datetime question answers, as those can lead
order.sales_channel, # to difficulties parsing the data (for example 2019-02-01 may become Février, 2019 01 in French).
order.locale if a.question.type == Question.TYPE_CHOICE_MULTIPLE:
] acache[a.question_id] = set(o.pk for o in a.options.all())
row.append(', '.join([ elif a.question.type in Question.UNLOCALIZED_TYPES:
str(self.providers.get(p, p)) for p in sorted(set((op.payment_providers or '').split(','))) acache[a.question_id] = a.answer
if p and p != 'free' else:
])) acache[a.question_id] = str(a)
yield row for q in questions:
if q.type == Question.TYPE_CHOICE_MULTIPLE:
for o in options[q.pk]:
row.append(_('Yes') if o.pk in acache.get(q.pk, set()) else _('No'))
else:
row.append(acache.get(q.pk, ''))
try:
row += [
order.invoice_address.company,
order.invoice_address.name,
]
if name_scheme and len(name_scheme['fields']) > 1:
for k, label, w in name_scheme['fields']:
row.append(
order.invoice_address.name_parts.get(k, '')
)
row += [
order.invoice_address.street,
order.invoice_address.zipcode,
order.invoice_address.city,
order.invoice_address.country if order.invoice_address.country else
order.invoice_address.country_old,
order.invoice_address.state,
order.invoice_address.vat_id,
]
except InvoiceAddress.DoesNotExist:
row += [''] * (8 + (len(name_scheme['fields']) if name_scheme and len(name_scheme['fields']) > 1 else 0))
row += [
order.sales_channel,
order.locale
]
row.append(', '.join([
str(self.providers.get(p, p)) for p in sorted(set((op.payment_providers or '').split(',')))
if p and p != 'free'
]))
yield row
def get_filename(self): def get_filename(self):
if self.is_multievent: if self.is_multievent:

View File

@@ -0,0 +1,10 @@
import itertools
def chunked_iterable(iterable, size):
it = iter(iterable)
while True:
chunk = tuple(itertools.islice(it, size))
if not chunk:
break
yield chunk