Fix #678 -- Data shredders for personally identifiable information (#817)

* Add data shredders for PII

* First working shredder

* Add more shredders

* Add new shredders and download confirmation

* tmp

* PayPal, Stripe, banktransfer

* Add icon to logs

* Untested payment log shredders

* Add waiting list shredder

* First tests

* Add tests for shredders

* Improve templats, link to shredder

* Test payment info shredders

* More tests

* Documentation

* Fix enabled flag in payment provider overview

* Fix minor issues
This commit is contained in:
Raphael Michel
2018-05-02 15:59:59 +02:00
committed by GitHub
parent 335838f2b2
commit 7bccd62a4f
41 changed files with 1728 additions and 21 deletions

View File

@@ -18,7 +18,7 @@ class InvoiceExporter(BaseExporter):
verbose_name = _('All invoices')
def render(self, form_data: dict):
qs = self.event.invoices.all()
qs = self.event.invoices.filter(shredded=False)
if form_data.get('payment_provider'):
qs = qs.filter(order__payment_provider=form_data.get('payment_provider'))

View File

@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.10 on 2018-03-15 13:22
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('pretixbase', '0088_auto_20180328_1217'),
]
operations = [
migrations.AddField(
model_name='logentry',
name='shredded',
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name='invoice',
name='shredded',
field=models.BooleanField(default=False),
),
]

View File

@@ -495,6 +495,22 @@ class Event(EventMixin, LoggedModel):
renderers[pp.identifier] = pp
return renderers
def get_data_shredders(self) -> dict:
"""
Returns a dictionary of initialized data shredders mapped by their identifiers.
"""
from ..signals import register_data_shredders
responses = register_data_shredders.send(self)
renderers = {}
for receiver, response in responses:
if not isinstance(response, list):
response = [response]
for p in response:
pp = p(self)
renderers[pp.identifier] = pp
return renderers
@property
def invoice_renderer(self):
"""

View File

@@ -83,6 +83,7 @@ class Invoice(models.Model):
foreign_currency_display = models.CharField(max_length=50, null=True, blank=True)
foreign_currency_rate = models.DecimalField(decimal_places=4, max_digits=10, null=True, blank=True)
foreign_currency_rate_date = models.DateField(null=True, blank=True)
shredded = models.BooleanField(default=False)
file = models.FileField(null=True, blank=True, upload_to=invoice_filename, max_length=255)
internal_reference = models.TextField(blank=True)

View File

@@ -45,6 +45,7 @@ class LogEntry(models.Model):
action_type = models.CharField(max_length=255)
data = models.TextField(default='{}')
visible = models.BooleanField(default=True)
shredded = models.BooleanField(default=False)
objects = VisibleOnlyManager()
all = models.Manager()

View File

@@ -191,7 +191,10 @@ class Order(LoggedModel):
@cached_property
def meta_info_data(self):
return json.loads(self.meta_info)
try:
return json.loads(self.meta_info)
except TypeError:
return None
@property
def full_code(self):

View File

@@ -87,6 +87,8 @@ class WaitingListEntry(LoggedModel):
raise WaitingListException(_('This product is currently not available.'))
if self.voucher:
raise WaitingListException(_('A voucher has already been sent to this person.'))
if '@' not in self.email:
raise WaitingListException(_('This entry is anonymized and can no longer be used.'))
with transaction.atomic():
v = Voucher.objects.create(

View File

@@ -566,6 +566,19 @@ class BasePaymentProvider:
messages.success(request, _('The order has been marked as refunded. Please transfer the money '
'back to the buyer manually.'))
def shred_payment_info(self, order: Order):
"""
When personal data is removed from an event, this method is called to scrub payment-related data
from an order. By default, it removes all info from the ``payment_info`` attribute. You can override
this behavior if you want to retain attributes that are not personal data on their own, i.e. a
reference to a transaction in an external system. You can also override this to scrub more data, e.g.
data from external sources that is saved in LogEntry objects or other places.
:param order: An order
"""
order.payment_info = None
order.save(update_fields=['payment_info'])
class PaymentException(Exception):
pass

View File

@@ -180,6 +180,8 @@ def generate_cancellation(invoice: Invoice, trigger_pdf=True):
def regenerate_invoice(invoice: Invoice):
if invoice.shredded:
return invoice
if invoice.is_cancellation:
invoice = build_cancellation(invoice)
else:
@@ -214,6 +216,10 @@ def generate_invoice(order: Order, trigger_pdf=True):
@app.task(base=TransactionAwareTask)
def invoice_pdf_task(invoice: int):
i = Invoice.objects.get(pk=invoice)
if i.shredded:
return None
if i.file:
i.file.delete()
with language(i.locale):
fname, ftype, fcontent = i.event.invoice_renderer.generate(i)
i.file.save(fname, ContentFile(fcontent))

View File

@@ -0,0 +1,91 @@
import json
from datetime import timedelta
from tempfile import NamedTemporaryFile
from typing import List
from zipfile import ZipFile
from dateutil.parser import parse
from django.conf import settings
from django.utils.crypto import get_random_string
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
from pretix.base.models import CachedFile, Event, cachedfile_name
from pretix.base.services.async import ProfiledTask
from pretix.base.shredder import ShredError
from pretix.celery_app import app
@app.task(base=ProfiledTask)
def export(event: str, shredders: List[str]) -> None:
event = Event.objects.get(id=event)
known_shredders = event.get_data_shredders()
with NamedTemporaryFile() as rawfile:
with ZipFile(rawfile, 'w') as zipfile:
ccode = get_random_string(6)
zipfile.writestr(
'CONFIRM_CODE.txt',
ccode,
)
zipfile.writestr(
'index.json',
json.dumps({
'instance': settings.SITE_URL,
'organizer': event.organizer.slug,
'event': event.slug,
'time': now().isoformat(),
'shredders': shredders,
'confirm_code': ccode
}, indent=4)
)
for s in shredders:
shredder = known_shredders.get(s)
if not shredder:
continue
it = shredder.generate_files()
if not it:
continue
for fname, ftype, content in it:
zipfile.writestr(fname, content)
rawfile.seek(0)
cf = CachedFile()
cf.date = now()
cf.filename = event.slug + '.zip'
cf.type = 'application/pdf'
cf.expires = now() + timedelta(hours=1)
cf.save()
cf.file.save(cachedfile_name(cf, cf.filename), rawfile)
return cf.pk
@app.task(base=ProfiledTask, throws=(ShredError,))
def shred(event: str, fileid: str, confirm_code: str) -> None:
event = Event.objects.get(id=event)
known_shredders = event.get_data_shredders()
try:
cf = CachedFile.objects.get(pk=fileid)
except CachedFile.DoesNotExist:
raise ShredError(_("The download file could no longer be found on the server, please try to start again."))
with ZipFile(cf.file.file, 'r') as zipfile:
indexdata = json.loads(zipfile.read('index.json').decode())
if indexdata['organizer'] != event.organizer.slug or indexdata['event'] != event.slug:
raise ShredError(_("This file is from a different event."))
if indexdata['confirm_code'] != confirm_code:
raise ShredError(_("The confirm code you entered was incorrect."))
if event.logentry_set.filter(datetime__gte=parse(indexdata['time'])):
raise ShredError(_("Something happened in your event after the export, please try again."))
for s in indexdata['shredders']:
shredder = known_shredders.get(s)
if not shredder:
continue
shredder.shred_data()
cf.file.delete(save=False)
cf.delete()

341
src/pretix/base/shredder.py Normal file
View File

@@ -0,0 +1,341 @@
import json
from datetime import timedelta
from typing import List, Tuple
from django.db import transaction
from django.db.models import Max
from django.db.models.functions import Greatest
from django.dispatch import receiver
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
from pretix.api.serializers.order import (
AnswerSerializer, InvoiceAddressSerializer,
)
from pretix.api.serializers.waitinglist import WaitingListSerializer
from pretix.base.i18n import LazyLocaleException
from pretix.base.models import (
CachedCombinedTicket, CachedTicket, Event, InvoiceAddress, OrderPosition,
QuestionAnswer,
)
from pretix.base.services.invoices import invoice_pdf_task
from pretix.base.signals import register_data_shredders
class ShredError(LazyLocaleException):
pass
def shred_constraints(event: Event):
if event.has_subevents:
max_date = event.subevents.aggregate(
max_from=Max('date_from'),
max_to=Max('date_to'),
max_fromto=Greatest(Max('date_to'), Max('date_from'))
)
max_date = max_date['max_fromto'] or max_date['max_to'] or max_date['max_From']
if max_date > now() - timedelta(days=60):
return _('Your event needs to be over for at least 60 days to use this feature.')
else:
if (event.date_to or event.date_from) > now() - timedelta(days=60):
return _('Your event needs to be over for at least 60 days to use this feature.')
if event.live:
return _('Your ticket shop needs to be offline to use this feature.')
return None
class BaseDataShredder:
"""
This is the base class for all data shredders.
"""
def __init__(self, event: Event):
self.event = event
def __str__(self):
return self.identifier
def generate_files(self) -> List[Tuple[str, str, str]]:
"""
This method is called to export the data that is about to be shred and return a list of tuples consisting of a
filename, a file type and file content.
You can also implement this as a generator and ``yield`` those tuples instead of returning a list of them.
"""
raise NotImplementedError() # NOQA
def shred_data(self):
"""
This method is called to actually remove the data from the system. You should remove any database objects
here.
You should never delete ``LogEntry`` objects, but you might modify them to remove personal data. In this
case, set the ``LogEntry.shredded`` attribute to ``True`` to show that this is no longer original log data.
"""
raise NotImplementedError() # NOQA
@property
def verbose_name(self) -> str:
"""
A human-readable name for what this shredder removes. This should be short but self-explanatory.
Good examples include 'E-Mail addresses' or 'Invoices'.
"""
raise NotImplementedError() # NOQA
@property
def identifier(self) -> str:
"""
A short and unique identifier for this shredder.
This should only contain lowercase letters and in most
cases will be the same as your package name.
"""
raise NotImplementedError() # NOQA
@property
def description(self) -> str:
"""
A more detailed description of what this shredder does. Can contain HTML.
"""
raise NotImplementedError() # NOQA
def shred_log_fields(logentry, blacklist=None, whitelist=None):
d = logentry.parsed_data
if whitelist:
for k, v in d.items():
if k not in whitelist:
d[k] = ''
elif blacklist:
for f in blacklist:
if f in d:
d[f] = ''
logentry.data = json.dumps(d)
logentry.shredded = True
logentry.save(update_fields=['data', 'shredded'])
class EmailAddressShredder(BaseDataShredder):
verbose_name = _('E-mails')
identifier = 'order_emails'
description = _('This will remove all e-mail addresses from orders and attendees, as well as logged email '
'contents.')
def generate_files(self) -> List[Tuple[str, str, str]]:
yield 'emails-by-order.json', 'application/json', json.dumps({
o.code: o.email for o in self.event.orders.filter(email__isnull=False)
}, indent=4)
yield 'emails-by-attendee.json', 'application/json', json.dumps({
'{}-{}'.format(op.order.code, op.positionid): op.attendee_email
for op in OrderPosition.objects.filter(order__event=self.event, attendee_email__isnull=False)
}, indent=4)
@transaction.atomic
def shred_data(self):
OrderPosition.objects.filter(order__event=self.event, attendee_email__isnull=False).update(attendee_email=None)
for o in self.event.orders.all():
o.email = None
d = o.meta_info_data
if d:
if 'contact_form_data' in d and 'email' in d['contact_form_data']:
del d['contact_form_data']['email']
o.meta_info = json.dumps(d)
o.save(update_fields=['meta_info', 'email'])
for le in self.event.logentry_set.filter(action_type__contains="order.email"):
shred_log_fields(le, blacklist=['recipient', 'message', 'subject'])
for le in self.event.logentry_set.filter(action_type="pretix.event.order.contact.changed"):
shred_log_fields(le, blacklist=['old_email', 'new_email'])
for le in self.event.logentry_set.filter(action_type="pretix.event.order.modified").exclude(data=""):
d = le.parsed_data
if 'data' in d:
for row in d['data']:
if 'attendee_email' in row:
row['attendee_email'] = ''
le.data = json.dumps(d)
le.shredded = True
le.save(update_fields=['data', 'shredded'])
class WaitingListShredder(BaseDataShredder):
verbose_name = _('Waiting list')
identifier = 'waiting_list'
description = _('This will remove all email addresses from the waiting list.')
def generate_files(self) -> List[Tuple[str, str, str]]:
yield 'waiting-list.json', 'application/json', json.dumps([
WaitingListSerializer(wle).data
for wle in self.event.waitinglistentries.all()
], indent=4)
@transaction.atomic
def shred_data(self):
self.event.waitinglistentries.update(email='')
for wle in self.event.waitinglistentries.select_related('voucher').filter(voucher__isnull=False):
if '@' in wle.voucher.comment:
wle.voucher.comment = ''
wle.voucher.save(update_fields=['comment'])
for le in self.event.logentry_set.filter(action_type="pretix.voucher.added.waitinglist").exclude(data=""):
d = le.parsed_data
d['email'] = ''
le.data = json.dumps(d)
le.shredded = True
le.save(update_fields=['data', 'shredded'])
class AttendeeNameShredder(BaseDataShredder):
verbose_name = _('Attendee names')
identifier = 'attendee_names'
description = _('This will remove all attendee names from order positions, as well as logged changes to them.')
def generate_files(self) -> List[Tuple[str, str, str]]:
yield 'attendee-names.json', 'application/json', json.dumps({
'{}-{}'.format(op.order.code, op.positionid): op.attendee_name
for op in OrderPosition.objects.filter(order__event=self.event, attendee_name__isnull=False)
}, indent=4)
@transaction.atomic
def shred_data(self):
OrderPosition.objects.filter(order__event=self.event, attendee_name__isnull=False).update(attendee_name=None)
for le in self.event.logentry_set.filter(action_type="pretix.event.order.modified").exclude(data=""):
d = le.parsed_data
if 'data' in d:
for i, row in enumerate(d['data']):
if 'attendee_name' in row:
d['data'][i]['attendee_name'] = ''
le.data = json.dumps(d)
le.shredded = True
le.save(update_fields=['data', 'shredded'])
class InvoiceAddressShredder(BaseDataShredder):
verbose_name = _('Invoice addresses')
identifier = 'invoice_addresses'
description = _('This will remove all invoice addresses from orders, as well as logged changes to them.')
def generate_files(self) -> List[Tuple[str, str, str]]:
yield 'invoice-addresses.json', 'application/json', json.dumps({
ia.order.code: InvoiceAddressSerializer(ia).data
for ia in InvoiceAddress.objects.filter(order__event=self.event)
}, indent=4)
@transaction.atomic
def shred_data(self):
InvoiceAddress.objects.filter(order__event=self.event).delete()
for le in self.event.logentry_set.filter(action_type="pretix.event.order.modified").exclude(data=""):
d = le.parsed_data
if 'invoice_data' in d and not isinstance(d['invoice_data'], bool):
for field in d['invoice_data']:
if d['invoice_data'][field]:
d['invoice_data'][field] = ''
le.data = json.dumps(d)
le.shredded = True
le.save(update_fields=['data', 'shredded'])
class QuestionAnswerShredder(BaseDataShredder):
verbose_name = _('Question answers')
identifier = 'question_answers'
description = _('This will remove all answers to questions, as well as logged changes to them.')
def generate_files(self) -> List[Tuple[str, str, str]]:
yield 'question-answers.json', 'application/json', json.dumps({
'{}-{}'.format(op.order.code, op.positionid): AnswerSerializer(op.answers.all(), many=True).data
for op in OrderPosition.objects.filter(order__event=self.event).prefetch_related('answers')
}, indent=4)
@transaction.atomic
def shred_data(self):
QuestionAnswer.objects.filter(orderposition__order__event=self.event).delete()
for le in self.event.logentry_set.filter(action_type="pretix.event.order.modified").exclude(data=""):
d = le.parsed_data
if 'data' in d:
for i, row in enumerate(d['data']):
for f in row:
if f not in ('attendee_name', 'attendee_email'):
d['data'][i][f] = ''
le.data = json.dumps(d)
le.shredded = True
le.save(update_fields=['data', 'shredded'])
class InvoiceShredder(BaseDataShredder):
verbose_name = _('Invoices')
identifier = 'invoices'
description = _('This will remove all invoice PDFs, as well as any of their text content that might contain '
'personal data from the database. Invoice numbers and totals will be conserved.')
def generate_files(self) -> List[Tuple[str, str, str]]:
for i in self.event.invoices.filter(shredded=False):
if not i.file:
invoice_pdf_task.apply(args=(i.pk,))
i.refresh_from_db()
i.file.open('rb')
yield 'invoices/{}.pdf'.format(i.number), 'application/pdf', i.file.read()
i.file.close()
@transaction.atomic
def shred_data(self):
for i in self.event.invoices.filter(shredded=False):
if i.file:
i.file.delete()
i.shredded = True
i.introductory_text = ""
i.additional_text = ""
i.invoice_to = ""
i.payment_provider_text = ""
i.save()
i.lines.update(description="")
class CachedTicketShredder(BaseDataShredder):
verbose_name = _('Cached ticket files')
identifier = 'cachedtickets'
description = _('This will remove all cached ticket files. No download will be offered.')
def generate_files(self) -> List[Tuple[str, str, str]]:
pass
@transaction.atomic
def shred_data(self):
CachedTicket.objects.filter(order_position__order__event=self.event).delete()
CachedCombinedTicket.objects.filter(order__event=self.event).delete()
class PaymentInfoShredder(BaseDataShredder):
verbose_name = _('Payment information')
identifier = 'payment_info'
description = _('This will remove payment-related information. Depending on the payment method, all data will be '
'removed or personal data only. No download will be offered.')
def generate_files(self) -> List[Tuple[str, str, str]]:
pass
@transaction.atomic
def shred_data(self):
provs = self.event.get_payment_providers()
for o in self.event.orders.all():
pprov = provs.get(o.payment_provider)
if pprov:
pprov.shred_payment_info(o)
@receiver(register_data_shredders, dispatch_uid="shredders_builtin")
def register_payment_provider(sender, **kwargs):
return [
EmailAddressShredder,
AttendeeNameShredder,
InvoiceAddressShredder,
QuestionAnswerShredder,
InvoiceShredder,
CachedTicketShredder,
PaymentInfoShredder,
WaitingListShredder
]

View File

@@ -119,7 +119,7 @@ register_payment_providers = EventPluginSignal(
)
"""
This signal is sent out to get all known payment providers. Receivers should return a
subclass of pretix.base.payment.BasePaymentProvider
subclass of pretix.base.payment.BasePaymentProvider or a list of these
As with all event-plugin signals, the ``sender`` keyword argument will contain the event.
"""
@@ -129,7 +129,17 @@ register_invoice_renderers = EventPluginSignal(
)
"""
This signal is sent out to get all known invoice renderers. Receivers should return a
subclass of pretix.base.invoice.BaseInvoiceRenderer
subclass of pretix.base.invoice.BaseInvoiceRenderer or a list of these
As with all event-plugin signals, the ``sender`` keyword argument will contain the event.
"""
register_data_shredders = EventPluginSignal(
providing_args=[]
)
"""
This signal is sent out to get all known data shredders. Receivers should return a
subclass of pretix.base.shredder.BaseDataShredder or a list of these
As with all event-plugin signals, the ``sender`` keyword argument will contain the event.
"""