Order search: Further query-specific fine tuning

2022-12-15 16:25:13 +01:00
parent 0cd8bbf9a9
commit bd5c9a4cb5
2 changed files with 16 additions and 11 deletions
--- a/src/pretix/control/forms/filter.py
+++ b/src/pretix/control/forms/filter.py
@@ -792,6 +792,12 @@ class OrderSearchFilterForm(OrderFilterForm):
                )
            )

+    def use_query_hack(self):
+        return (
+            self.cleaned_data.get('query') or
+            self.cleaned_data.get('status') in ('overpaid', 'partially_paid', 'underpaid', 'pendingpaid')
+        )
+
    def filter_qs(self, qs):
        fdata = self.cleaned_data
        qs = super().filter_qs(qs)
--- a/src/pretix/control/views/search.py
+++ b/src/pretix/control/views/search.py
@@ -49,7 +49,7 @@ class OrderSearch(PaginationMixin, ListView):
            self.filter_form[k] for k in self.filter_form.fields if k.startswith('meta_')
        ]

-        # Only compute this annotations for this page (query optimization)
+        # Only compute these annotations for this page (query optimization)
        s = OrderPosition.objects.filter(
            order=OuterRef('pk')
        ).order_by().values('order').annotate(k=Count('id')).values('k')
@@ -91,7 +91,7 @@ class OrderSearch(PaginationMixin, ListView):
        if self.filter_form.is_valid():
            qs = self.filter_form.filter_qs(qs)

-            if self.filter_form.cleaned_data.get('query'):
+            if self.filter_form.use_query_hack():
                """
                We need to work around a bug in PostgreSQL's (and likely MySQL's) query plan optimizer here.
                The database lacks statistical data to predict how common our search filter is and therefore
@@ -100,8 +100,14 @@ class OrderSearch(PaginationMixin, ListView):
                look for something rare (such as an email address used once within hundreds of thousands of
                orders, this ends up to be pathologically slow.

+                Generally, PostgreSQL tries to make these decisions on statistical data and generally, they *can*
+                only be made on statistical data, so it's a little bit of a stretch that we try to do it better
+                than PostgreSQL here. However, experience suggests applying this tricks works specifically in the
+                cases where the WHERE part of the statement is very hard to compute, e.g. uses a complicated
+                condition that can't utilize indices well.
+
                For some search queries on pretix.eu, we see search times of >30s, just due to the ORDER BY and
-                LIMIT clause. Without them. the query runs in roughly 0.6s. This heuristical approach tries to
+                LIMIT clause. Without them. the query runs in roughly 0.6s. This heuristic approach tries to
                detect these cases and rewrite the query as a nested subquery that strongly suggests sorting
                before filtering. However, since even that fails in some cases because PostgreSQL thinks it knows
                better, we literally force it by evaluating the subquery explicitly. We only do this for n<=200,
@@ -111,15 +117,8 @@ class OrderSearch(PaginationMixin, ListView):

                Phew.
                """
-
-                page = self.kwargs.get(self.page_kwarg) or self.request.GET.get(self.page_kwarg) or 1
-                limit = self.get_paginate_by(None)
-                try:
-                    offset = (int(page) - 1) * limit
-                except ValueError:
-                    offset = 0
                resultids = list(qs.order_by().values_list('id', flat=True)[:201])
-                if len(resultids) <= 200 and len(resultids) <= offset + limit:
+                if len(resultids) <= 200:
                    qs = Order.objects.using(settings.DATABASE_REPLICA).filter(
                        id__in=resultids
                    )