mirror of
https://github.com/pretix/pretix.git
synced 2026-05-05 15:14:04 +00:00
Order search: Fight the database optimizer to actually optimize the query
This commit is contained in:
@@ -46,7 +46,7 @@ class OrderSearch(PaginationMixin, ListView):
|
||||
return ctx
|
||||
|
||||
def get_queryset(self):
|
||||
qs = Order.objects.select_related('invoice_address').using(settings.DATABASE_REPLICA)
|
||||
qs = Order.objects.using(settings.DATABASE_REPLICA)
|
||||
|
||||
if not self.request.user.has_active_staff_session(self.request.session.session_key):
|
||||
qs = qs.filter(
|
||||
@@ -59,9 +59,47 @@ class OrderSearch(PaginationMixin, ListView):
|
||||
if self.filter_form.is_valid():
|
||||
qs = self.filter_form.filter_qs(qs)
|
||||
|
||||
if self.filter_form.cleaned_data.get('query'):
|
||||
"""
|
||||
We need to work around a bug in PostgreSQL's (and likely MySQL's) query plan optimizer here.
|
||||
The database lacks statistical data to predict how common our search filter is and therefore
|
||||
assumes that it is cheaper to first ORDER *all* orders in the system (since we got an index on
|
||||
datetime), then filter out with a full scan until OFFSET/LIMIT condition is fulfilled. If we
|
||||
look for something rare (such as an email address used once within hundreds of thousands of
|
||||
orders, this ends up to be pathologically slow.
|
||||
|
||||
For some search queries on pretix.eu, we see search times of >30s, just due to the ORDER BY and
|
||||
LIMIT clause. Without them. the query runs in roughly 0.6s. This heuristical approach tries to
|
||||
detect these cases and rewrite the query as a nested subquery that strongly suggests sorting
|
||||
before filtering. However, since even that fails in some cases because PostgreSQL thinks it knows
|
||||
better, we literally force it by evaluating the subquery explicitly. We only do this for n<=200,
|
||||
to avoid memory leaks – and problems with maximum parameter count on SQLite. In cases where the
|
||||
search query yields lots of results, this will actually be slower since it requires two queries,
|
||||
sorry.
|
||||
|
||||
Phew.
|
||||
"""
|
||||
|
||||
page = self.kwargs.get(self.page_kwarg) or self.request.GET.get(self.page_kwarg) or 1
|
||||
limit = self.get_paginate_by(None)
|
||||
offset = (page - 1) * limit
|
||||
resultids = list(qs.order_by().values_list('id', flat=True)[:201])
|
||||
if len(resultids) <= 200 and len(resultids) <= offset + limit:
|
||||
qs = Order.objects.using(settings.DATABASE_REPLICA).filter(
|
||||
id__in=resultids
|
||||
)
|
||||
|
||||
"""
|
||||
We use prefetch_related here instead of select_related for a reason, even though select_related
|
||||
would be the common choice for a foreign key and doesn't require an additional database query.
|
||||
The problem is, that if our results contain the same event 25 times, select_related will create
|
||||
25 Django objects which will all try to pull their ownsettings cache to show the event properly,
|
||||
leading to lots of unnecessary queries. Due to the way prefetch_related works differently, it
|
||||
will only create one shared Django object.
|
||||
"""
|
||||
return qs.only(
|
||||
'id', 'invoice_address__name_cached', 'invoice_address__name_parts', 'code', 'event', 'email',
|
||||
'datetime', 'total', 'status', 'require_approval'
|
||||
).prefetch_related(
|
||||
'event', 'event__organizer'
|
||||
)
|
||||
).select_related('invoice_address')
|
||||
|
||||
Reference in New Issue
Block a user