Add in-flight state handling

This commit is contained in:
Mira Weller
2025-06-27 16:38:05 +02:00
parent b57e1e5ffc
commit 5d7cb6372b
5 changed files with 116 additions and 61 deletions

View File

@@ -47,11 +47,20 @@ from pretix.helpers import OF_SELF
logger = logging.getLogger(__name__)
@receiver(periodic_task, dispatch_uid="data_sync_periodic")
def on_periodic_task(sender, **kwargs):
@receiver(periodic_task, dispatch_uid="data_sync_periodic_sync_all")
def periodic_sync_all(sender, **kwargs):
sync_all.apply_async()
@receiver(periodic_task, dispatch_uid="data_sync_periodic_reset_in_flight")
def periodic_reset_in_flight(sender, **kwargs):
for sq in OrderSyncQueue.objects.filter(
in_flight=True,
in_flight_since__lt=now() - timedelta(minutes=20),
):
sq.set_sync_error('timeout', [], 'Timeout')
sync_targets = EventPluginRegistry({"identifier": lambda o: o.identifier})
@@ -60,7 +69,11 @@ def sync_all():
with scopes_disabled():
queue = (
OrderSyncQueue.objects
.filter(not_before__lt=now(), need_manual_retry__isnull=True)
.filter(
in_flight=False,
not_before__lt=now(),
need_manual_retry__isnull=True,
)
.order_by(Window(
expression=RowNumber(),
partition_by=[F("event_id")],
@@ -93,7 +106,7 @@ class UnrecoverableSyncError(BaseSyncError):
"""
A SyncProvider encountered a permanent problem, where a retry will not be successful.
"""
log_action_type = "pretix.event.order.data_sync.failed.permanent"
failure_mode = "permanent"
class SyncConfigError(UnrecoverableSyncError):
@@ -101,7 +114,7 @@ class SyncConfigError(UnrecoverableSyncError):
A SyncProvider is misconfigured in a way where a retry without configuration change will
not be successful.
"""
log_action_type = "pretix.event.order.data_sync.failed.config"
failure_mode = "config"
class RecoverableSyncError(BaseSyncError):
@@ -152,6 +165,7 @@ class OutboundSyncProvider:
OrderSyncQueue.objects.update_or_create(
order=order,
sync_provider=cls.identifier,
in_flight=False,
defaults={
"event": order.event,
"triggered_by": triggered_by,
@@ -216,54 +230,35 @@ class OutboundSyncProvider:
.select_related("order")
.get(pk=queue_item.pk)
)
if sq.in_flight:
continue
sq.in_flight = True
sq.in_flight_since = now()
sq.save()
except DatabaseError:
continue
try:
mapped_objects = self.sync_order(sq.order)
except UnrecoverableSyncError as e:
logger.warning(
f"Could not sync order {sq.order.code} to {type(self).__name__}",
exc_info=True,
)
sq.order.log_action(e.log_action_type, {
"provider": self.identifier,
"error": e.messages,
"full_message": e.full_message,
})
sq.need_manual_retry = "unrecoverable"
sq.save()
sq.set_sync_error(e.failure_mode, e.messages, e.full_message)
except RecoverableSyncError as e:
sq.failed_attempts += 1
sq.not_before = self.next_retry_date(sq)
logger.info(
f"Could not sync order {sq.order.code} to {type(self).__name__} (transient error, attempt #{sq.failed_attempts})",
exc_info=True,
)
if sq.failed_attempts >= self.max_attempts:
sentry_sdk.capture_exception(e)
sq.order.log_action("pretix.event.order.data_sync.failed.exceeded", {
"provider": self.identifier,
"error": e.messages,
"full_message": e.full_message,
})
sq.need_manual_retry = "recoverable"
sq.save()
sq.set_sync_error("exceeded", e.messages, e.full_message)
else:
sq.save()
logger.info(
f"Could not sync order {sq.order.code} to {type(self).__name__} "
f"(transient error, attempt #{sq.failed_attempts}, next {sq.not_before})",
exc_info=True,
)
sq.clear_in_flight()
except Exception as e:
logger.exception(
f"Could not sync order {sq.order.code} to {type(self).__name__} (unhandled exception)"
)
sentry_sdk.capture_exception(e)
sq.order.log_action("pretix.event.order.data_sync.failed.internal", {
"provider": self.identifier,
"error": [],
"full_message": str(e),
})
sq.need_manual_retry = "unhandled"
sq.save()
sq.set_sync_error("internal", [], str(e))
else:
if not all(res.get("action", "") == "nothing_to_do" for res in mapped_objects.values()):
if not all(all(res.get("action", "") == "nothing_to_do" for res in res_list) for res_list in mapped_objects.values()):
sq.order.log_action("pretix.event.order.data_sync.success", {
"provider": self.identifier,
"objects": mapped_objects

View File

@@ -1,7 +1,7 @@
# Generated by Django 4.2.21 on 2025-06-26 16:59
# Generated by Django 4.2.21 on 2025-06-27 13:32
from django.db import migrations, models
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
@@ -39,12 +39,14 @@ class Migration(migrations.Migration):
('failed_attempts', models.PositiveIntegerField(default=0)),
('not_before', models.DateTimeField(db_index=True)),
('need_manual_retry', models.CharField(null=True)),
('in_flight', models.BooleanField(default=False)),
('in_flight_since', models.DateTimeField(blank=True, null=True)),
('event', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='queued_sync_jobs', to='pretixbase.event')),
('order', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='queued_sync_jobs', to='pretixbase.order')),
],
options={
'ordering': ('triggered',),
'unique_together': {('order', 'sync_provider')},
'unique_together': {('order', 'sync_provider', 'in_flight')},
},
),
]

View File

@@ -23,10 +23,8 @@
import logging
from functools import cached_property
from django.db import models
from django.utils.translation import (
gettext as _, gettext_lazy, ngettext_lazy, pgettext_lazy,
)
from django.db import IntegrityError, models
from django.utils.translation import gettext as _
from pretix.base.models import Event, Order, OrderPosition
@@ -52,13 +50,17 @@ class OrderSyncQueue(models.Model):
failed_attempts = models.PositiveIntegerField(default=0)
not_before = models.DateTimeField(blank=False, null=False, db_index=True)
need_manual_retry = models.CharField(blank=True, null=True, choices=[
('recoverable', _('Temporary error, auto-retry limit exceeded')),
('unrecoverable', _('Misconfiguration, please check provider settings')),
('unhandled', _('System error, needs manual intervention'))
('exceeded', _('Temporary error, auto-retry limit exceeded')),
('permanent', _('Misconfiguration, please check provider settings')),
('config', _('Misconfiguration, please check provider settings')),
('internal', _('System error, needs manual intervention')),
('timeout', _('System error, needs manual intervention')),
])
in_flight = models.BooleanField(default=False)
in_flight_since = models.DateTimeField(blank=True, null=True)
class Meta:
unique_together = (("order", "sync_provider"),)
unique_together = (("order", "sync_provider", "in_flight"),)
ordering = ("triggered",)
@cached_property
@@ -78,6 +80,27 @@ class OrderSyncQueue(models.Model):
def max_retry_attempts(self):
return self.provider_class.max_attempts
def set_sync_error(self, failure_mode, messages, full_message):
logger.exception(
f"Could not sync order {self.order.code} to {type(self).__name__} ({failure_mode})"
)
self.order.log_action(f"pretix.event.order.data_sync.failed.{failure_mode}", {
"provider": self.sync_provider,
"error": messages,
"full_message": full_message,
})
self.need_manual_retry = failure_mode
self.clear_in_flight()
def clear_in_flight(self):
self.in_flight = False
self.in_flight_since = None
try:
self.save()
except IntegrityError:
# if setting in_flight=False fails due to UNIQUE constraint, just delete the current instance
self.delete()
class OrderSyncResult(models.Model):
order = models.ForeignKey(

View File

@@ -18,29 +18,60 @@
<th>{% trans "Sync provider" %}</th>
<th>{% trans "Date" %}</th>
<th>{% trans "Failure mode" %}</th>
{% if staff_session %}
<th>in_flight</th><th>retry</th>
{% endif %}
</tr>
</thead>
<tbody>
{% for item in queue_items %}
<tr>
<td><input type="checkbox" name="idlist" value="{{ item.pk }}"></td>
<td><a href="{% url "control:event.order" event=item.order.event.slug organizer=item.order.event.organizer.slug code=item.order.code %}">{{ item.order.full_code }}</a></td>
<td>
{% if staff_session %}{{ item.order.event.organizer.slug }} -{% endif %}
<a href="{% url "control:event.order" event=item.order.event.slug organizer=item.order.event.organizer.slug code=item.order.code %}">
{{ item.order.full_code }}
</a>
</td>
<td>{{ item.sync_provider }}</td>
<td>{{ item.triggered }}</td>
<td>{{ item.get_need_manual_retry_display }}</td>
<td>
{{ item.triggered }}
{% if staff_session %}({{ item.triggered_by }}){% endif %}
</td>
<td>
{% if item.need_manual_retry %}
{{ item.get_need_manual_retry_display }}
{% else %}
{% blocktrans trimmed with datetime=item.not_before|date:"SHORT_DATETIME_FORMAT" %}
Temporary error, will retry after {{ datetime }}
{% endblocktrans %}
{% endif %}
{% if staff_session %}({{ item.need_manual_retry }}){% endif %}
</td>
{% if staff_session %}
<td>{{ item.in_flight }} ({{ item.in_flight_since }})</td><td>{{ item.failed_attempts }} / {{ item.max_retry_attempts }} ({{ item.not_before }})</td>
{% endif %}
</tr>
{% empty %}
<tr>
<td colspan="5" align="center">{% trans "No problems." %}</td>
<td colspan="5" align="center">{% trans "No problems." %}</td>
{% if staff_session %}
<td></td><td></td>
{% endif %}
</tr>
{% endfor %}
</tbody>
{% if queue_items %}
<tfoot>
<tr><td colspan="5">
<button type="submit" name="action" value="retry" class="btn btn-primary"><i class="fa fa-refresh"></i> Retry selected</button>
<button type="submit" name="action" value="cancel" class="btn btn-danger"><i class="fa fa-times"></i> Cancel selected</button>
</td></tr>
<tr>
<td colspan="5">
<button type="submit" name="action" value="retry" class="btn btn-primary"><i class="fa fa-refresh"></i> Retry selected</button>
<button type="submit" name="action" value="cancel" class="btn btn-danger"><i class="fa fa-times"></i> Cancel selected</button>
</td>
{% if staff_session %}
<td></td><td></td>
{% endif %}
</tr>
</tfoot>
{% endif %}
</table>

View File

@@ -23,18 +23,21 @@
from itertools import groupby
from django.contrib import messages
from django.db.models import Q
from django.dispatch import receiver
from django.http import HttpResponseNotAllowed
from django.shortcuts import redirect
from django.template.loader import get_template
from django.utils.timezone import now
from django.utils.translation import gettext_lazy as _
from django.views.generic import TemplateView, ListView
from django.views.generic import ListView
from pretix.base.datasync.datasync import sync_targets
from pretix.base.models import Event, Order
from pretix.base.models.datasync import OrderSyncQueue
from pretix.control.permissions import AdministratorPermissionRequiredMixin, OrganizerPermissionRequiredMixin
from pretix.control.permissions import (
AdministratorPermissionRequiredMixin, OrganizerPermissionRequiredMixin,
)
from pretix.control.signals import order_info
from pretix.control.views.orders import OrderView
@@ -99,7 +102,8 @@ class FailedSyncJobsView(ListView):
def get_queryset(self):
return super().get_queryset().filter(
need_manual_retry__isnull=False,
Q(need_manual_retry__isnull=False)
| Q(failed_attempts__gt=0)
).select_related(
'order'
)