New locking mechanism (#2408)

Co-authored-by: Richard Schreiber <schreiber@rami.io>
This commit is contained in:
Raphael Michel
2023-09-11 11:44:50 +02:00
committed by GitHub
parent b2b3fa36be
commit c842ea597c
33 changed files with 1638 additions and 883 deletions

View File

@@ -20,31 +20,105 @@
# <https://www.gnu.org/licenses/>.
#
# This file is based on an earlier version of pretix which was released under the Apache License 2.0. The full text of
# the Apache License 2.0 can be obtained at <http://www.apache.org/licenses/LICENSE-2.0>.
#
# This file may have since been changed and any changes are released under the terms of AGPLv3 as described above. A
# full history of changes and contributors is available at <https://github.com/pretix/pretix>.
#
# This file contains Apache-licensed contributions copyrighted by: Tobias Kunze
#
# Unless required by applicable law or agreed to in writing, software distributed under the Apache License 2.0 is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under the License.
import logging
import time
import uuid
from datetime import timedelta
from itertools import groupby
from django.conf import settings
from django.db import transaction
from django.db import DatabaseError, connection
from django.utils.timezone import now
from pretix.base.models import EventLock
from pretix.base.models import Event, Membership, Quota, Seat, Voucher
from pretix.testutils.middleware import debugflags_var
logger = logging.getLogger('pretix.base.locking')
LOCK_TIMEOUT = 120
# A lock acquisition is aborted if it takes longer than LOCK_ACQUISITION_TIMEOUT to prevent connection starvation
LOCK_ACQUISITION_TIMEOUT = 3
# We make the assumption that it is safe to e.g. transform an order into a cart if the order has a lifetime of more than
# LOCK_TRUST_WINDOW into the future. In other words, we assume that a lock is never held longer than LOCK_TRUST_WINDOW.
# This assumption holds true for all in-request locks, since our gunicorn default settings kill a worker that takes
# longer than 60 seconds to process a request. It however does not hold true for celery tasks, especially long-running
# ones, so this does introduce *some* risk of incorrect locking.
LOCK_TRUST_WINDOW = 120
# These are different offsets for the different types of keys we want to lock
KEY_SPACES = {
Event: 1,
Quota: 2,
Seat: 3,
Voucher: 4,
Membership: 5
}
def pg_lock_key(obj):
"""
This maps the primary key space of multiple tables to a single bigint key space within postgres. It is not
an injective function, which is fine, as long as collisions are rare.
"""
keyspace = KEY_SPACES.get(type(obj))
objectid = obj.pk
if not keyspace:
raise ValueError(f"No key space defined for locking objects of type {type(obj)}")
assert isinstance(objectid, int)
# 64bit int: xxxxxxxx xxxxxxx xxxxxxx xxxxxxx xxxxxx xxxxxxx xxxxxxx xxxxxxx
# | objectid mod 2**48 | |index| |keysp.|
key = ((objectid % 281474976710656) << 16) | ((settings.DATABASE_ADVISORY_LOCK_INDEX % 256) << 8) | (keyspace % 256)
return key
class LockTimeoutException(Exception):
pass
def lock_objects(objects, *, shared_lock_objects=None, replace_exclusive_with_shared_when_exclusive_are_more_than=20):
"""
Create an exclusive lock on the objects passed in `objects`. This function MUST be called within an atomic
transaction and SHOULD be called only once per transaction to prevent deadlocks.
A shared lock will be created on objects passed in `shared_lock_objects`.
If `objects` contains more than `replace_exclusive_with_shared_when_exclusive_are_more_than` objects, `objects`
will be ignored and `shared_lock_objects` will be used in its place and receive an exclusive lock.
The idea behind it is this: Usually we create a lock on every quota, voucher, or seat contained in an order.
However, this has a large performance penalty in case we have hundreds of locks required. Therefore, we always
place a shared lock in the event, and if we have too many affected objects, we fall back to event-level locks.
"""
if (not objects and not shared_lock_objects) or 'skip-locking' in debugflags_var.get():
return
if 'fail-locking' in debugflags_var.get():
raise LockTimeoutException()
if not connection.in_atomic_block:
raise RuntimeError(
"You cannot create locks outside of an transaction"
)
if 'postgresql' in settings.DATABASES['default']['ENGINE']:
shared_keys = set(pg_lock_key(obj) for obj in shared_lock_objects) if shared_lock_objects else set()
exclusive_keys = set(pg_lock_key(obj) for obj in objects)
if replace_exclusive_with_shared_when_exclusive_are_more_than and len(exclusive_keys) > replace_exclusive_with_shared_when_exclusive_are_more_than:
exclusive_keys = shared_keys
keys = sorted(list(shared_keys | exclusive_keys))
calls = ", ".join([
(f"pg_advisory_xact_lock({k})" if k in exclusive_keys else f"pg_advisory_xact_lock_shared({k})") for k in keys
])
try:
with connection.cursor() as cursor:
cursor.execute(f"SET LOCAL lock_timeout = '{LOCK_ACQUISITION_TIMEOUT}s';")
cursor.execute(f"SELECT {calls};")
cursor.execute("SET LOCAL lock_timeout = '0';") # back to default
except DatabaseError as e:
logger.warning(f"Waiting for locks timed out: {e} on SELECT {calls};")
raise LockTimeoutException()
else:
for model, instances in groupby(objects, key=lambda o: type(o)):
model.objects.select_for_update().filter(pk__in=[o.pk for o in instances])
class NoLockManager:
@@ -57,128 +131,3 @@ class NoLockManager:
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type is not None:
return False
class LockManager:
def __init__(self, event):
self.event = event
def __enter__(self):
lock_event(self.event)
return now()
def __exit__(self, exc_type, exc_val, exc_tb):
release_event(self.event)
if exc_type is not None:
return False
class LockTimeoutException(Exception):
pass
class LockReleaseException(LockTimeoutException):
pass
def lock_event(event):
"""
Issue a lock on this event so nobody can book tickets for this event until
you release the lock. Will retry 5 times on failure.
:raises LockTimeoutException: if the event is locked every time we try
to obtain the lock
"""
if hasattr(event, '_lock') and event._lock:
return True
if settings.HAS_REDIS:
return lock_event_redis(event)
else:
return lock_event_db(event)
def release_event(event):
"""
Release a lock placed by :py:meth:`lock()`. If the parameter force is not set to ``True``,
the lock will only be released if it was issued in _this_ python
representation of the database object.
:raises LockReleaseException: if we do not own the lock
"""
if not hasattr(event, '_lock') or not event._lock:
raise LockReleaseException('Lock is not owned by this thread')
if settings.HAS_REDIS:
return release_event_redis(event)
else:
return release_event_db(event)
def lock_event_db(event):
retries = 5
for i in range(retries):
with transaction.atomic():
dt = now()
l, created = EventLock.objects.get_or_create(event=event.id)
if created:
event._lock = l
return True
elif l.date < now() - timedelta(seconds=LOCK_TIMEOUT):
newtoken = str(uuid.uuid4())
updated = EventLock.objects.filter(event=event.id, token=l.token).update(date=dt, token=newtoken)
if updated:
l.token = newtoken
event._lock = l
return True
time.sleep(2 ** i / 100)
raise LockTimeoutException()
@transaction.atomic
def release_event_db(event):
if not hasattr(event, '_lock') or not event._lock:
raise LockReleaseException('Lock is not owned by this thread')
try:
lock = EventLock.objects.get(event=event.id, token=event._lock.token)
lock.delete()
event._lock = None
except EventLock.DoesNotExist:
raise LockReleaseException('Lock is no longer owned by this thread')
def redis_lock_from_event(event):
from django_redis import get_redis_connection
from redis.lock import Lock
if not hasattr(event, '_lock') or not event._lock:
rc = get_redis_connection("redis")
event._lock = Lock(redis=rc, name='pretix_event_%s' % event.id, timeout=LOCK_TIMEOUT)
return event._lock
def lock_event_redis(event):
from redis.exceptions import RedisError
lock = redis_lock_from_event(event)
retries = 5
for i in range(retries):
try:
if lock.acquire(blocking=False):
return True
except RedisError:
logger.exception('Error locking an event')
raise LockTimeoutException()
time.sleep(2 ** i / 100)
raise LockTimeoutException()
def release_event_redis(event):
from redis import RedisError
lock = redis_lock_from_event(event)
try:
lock.release()
except RedisError:
logger.exception('Error releasing an event lock')
raise LockReleaseException()
event._lock = None