Order search: Further query-specific fine tuning

This commit is contained in:
Raphael Michel
2022-12-15 16:25:13 +01:00
parent 0cd8bbf9a9
commit bd5c9a4cb5
2 changed files with 16 additions and 11 deletions

View File

@@ -792,6 +792,12 @@ class OrderSearchFilterForm(OrderFilterForm):
)
)
def use_query_hack(self):
return (
self.cleaned_data.get('query') or
self.cleaned_data.get('status') in ('overpaid', 'partially_paid', 'underpaid', 'pendingpaid')
)
def filter_qs(self, qs):
fdata = self.cleaned_data
qs = super().filter_qs(qs)

View File

@@ -49,7 +49,7 @@ class OrderSearch(PaginationMixin, ListView):
self.filter_form[k] for k in self.filter_form.fields if k.startswith('meta_')
]
# Only compute this annotations for this page (query optimization)
# Only compute these annotations for this page (query optimization)
s = OrderPosition.objects.filter(
order=OuterRef('pk')
).order_by().values('order').annotate(k=Count('id')).values('k')
@@ -91,7 +91,7 @@ class OrderSearch(PaginationMixin, ListView):
if self.filter_form.is_valid():
qs = self.filter_form.filter_qs(qs)
if self.filter_form.cleaned_data.get('query'):
if self.filter_form.use_query_hack():
"""
We need to work around a bug in PostgreSQL's (and likely MySQL's) query plan optimizer here.
The database lacks statistical data to predict how common our search filter is and therefore
@@ -100,8 +100,14 @@ class OrderSearch(PaginationMixin, ListView):
look for something rare (such as an email address used once within hundreds of thousands of
orders, this ends up to be pathologically slow.
Generally, PostgreSQL tries to make these decisions on statistical data and generally, they *can*
only be made on statistical data, so it's a little bit of a stretch that we try to do it better
than PostgreSQL here. However, experience suggests applying this tricks works specifically in the
cases where the WHERE part of the statement is very hard to compute, e.g. uses a complicated
condition that can't utilize indices well.
For some search queries on pretix.eu, we see search times of >30s, just due to the ORDER BY and
LIMIT clause. Without them. the query runs in roughly 0.6s. This heuristical approach tries to
LIMIT clause. Without them. the query runs in roughly 0.6s. This heuristic approach tries to
detect these cases and rewrite the query as a nested subquery that strongly suggests sorting
before filtering. However, since even that fails in some cases because PostgreSQL thinks it knows
better, we literally force it by evaluating the subquery explicitly. We only do this for n<=200,
@@ -111,15 +117,8 @@ class OrderSearch(PaginationMixin, ListView):
Phew.
"""
page = self.kwargs.get(self.page_kwarg) or self.request.GET.get(self.page_kwarg) or 1
limit = self.get_paginate_by(None)
try:
offset = (int(page) - 1) * limit
except ValueError:
offset = 0
resultids = list(qs.order_by().values_list('id', flat=True)[:201])
if len(resultids) <= 200 and len(resultids) <= offset + limit:
if len(resultids) <= 200:
qs = Order.objects.using(settings.DATABASE_REPLICA).filter(
id__in=resultids
)