Badges: reduce memory usage when placing multiple per page (Z#23125583) (#3472)

Co-authored-by: Raphael Michel <michel@rami.io>
This commit is contained in:
Richard Schreiber
2023-07-17 12:50:48 +02:00
committed by GitHub
parent 7b5789b110
commit 0969abb460
4 changed files with 255 additions and 56 deletions

View File

@@ -140,7 +140,7 @@ class BaseExporter:
"""
return {}
def render(self, form_data: dict) -> Tuple[str, str, bytes]:
def render(self, form_data: dict) -> Tuple[str, str, Optional[bytes]]:
"""
Render the exported file and return a tuple consisting of a filename, a file type
and file content.

View File

@@ -61,7 +61,8 @@ from django.utils.html import conditional_escape
from django.utils.timezone import now
from django.utils.translation import gettext_lazy as _, pgettext
from i18nfield.strings import LazyI18nString
from pypdf import PdfReader
from pypdf import PdfReader, PdfWriter, Transformation
from pypdf.generic import RectangleObject
from reportlab.graphics import renderPDF
from reportlab.graphics.barcode.qr import QrCodeWidget
from reportlab.graphics.shapes import Drawing
@@ -1027,14 +1028,12 @@ class Renderer:
with open(os.path.join(d, 'out.pdf'), 'rb') as f:
return BytesIO(f.read())
else:
from pypdf import PdfReader, PdfWriter, Transformation
from pypdf.generic import RectangleObject
buffer.seek(0)
new_pdf = PdfReader(buffer)
output = PdfWriter()
for i, page in enumerate(new_pdf.pages):
bg_page = copy.copy(self.bg_pdf.pages[i])
bg_page = copy.deepcopy(self.bg_pdf.pages[i])
bg_rotation = bg_page.get('/Rotate')
if bg_rotation:
# /Rotate is clockwise, transformation.rotate is counter-clockwise
@@ -1071,6 +1070,56 @@ class Renderer:
return outbuffer
def merge_background(fg_pdf, bg_pdf, out_file, compress):
if settings.PDFTK:
with tempfile.TemporaryDirectory() as d:
fg_filename = os.path.join(d, 'fg.pdf')
bg_filename = os.path.join(d, 'bg.pdf')
fg_pdf.write(fg_filename)
bg_pdf.write(bg_filename)
pdftk_cmd = [
settings.PDFTK,
fg_filename,
'multibackground',
bg_filename,
'output',
'-',
]
if compress:
pdftk_cmd.append('compress')
subprocess.run(pdftk_cmd, check=True, stdout=out_file)
else:
output = PdfWriter()
for i, page in enumerate(fg_pdf.pages):
bg_page = copy.deepcopy(bg_pdf.pages[i])
bg_rotation = bg_page.get('/Rotate')
if bg_rotation:
# /Rotate is clockwise, transformation.rotate is counter-clockwise
t = Transformation().rotate(bg_rotation)
w = float(page.mediabox.getWidth())
h = float(page.mediabox.getHeight())
if bg_rotation in (90, 270):
# offset due to rotation base
if bg_rotation == 90:
t = t.translate(h, 0)
else:
t = t.translate(0, w)
# rotate mediabox as well
page.mediabox = RectangleObject((
page.mediabox.left.as_numeric(),
page.mediabox.bottom.as_numeric(),
page.mediabox.top.as_numeric(),
page.mediabox.right.as_numeric(),
))
page.trimbox = page.mediabox
elif bg_rotation == 180:
t = t.translate(w, h)
page.add_transformation(t)
bg_page.merge_page(page)
output.add_page(bg_page)
output.write(out_file)
@deconstructible
class PdfLayoutValidator:
def __call__(self, value):

View File

@@ -34,24 +34,27 @@
import json
import logging
import os
import subprocess
import tempfile
from collections import OrderedDict
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from io import BytesIO
from typing import Tuple
from typing import BinaryIO, List, Optional, Tuple
import dateutil.parser
from django import forms
from django.conf import settings
from django.contrib.staticfiles import finders
from django.core.files import File
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.db import DataError, models
from django.db.models import Case, Exists, OuterRef, Q, Subquery, When
from django.db.models.functions import Cast, Coalesce
from django.utils.timezone import make_aware
from django.utils.translation import gettext as _, gettext_lazy, pgettext_lazy
from pypdf import PdfReader, PdfWriter, Transformation
from pypdf import PageObject, PdfReader, PdfWriter, Transformation
from pypdf.generic import RectangleObject
from reportlab.lib import pagesizes
from reportlab.lib.units import mm
@@ -59,8 +62,10 @@ from reportlab.pdfgen import canvas
from pretix.base.exporter import BaseExporter
from pretix.base.i18n import language
from pretix.base.models import Order, OrderPosition, Question, QuestionAnswer
from pretix.base.pdf import Renderer
from pretix.base.models import (
Event, Order, OrderPosition, Question, QuestionAnswer,
)
from pretix.base.pdf import Renderer, merge_background
from pretix.base.services.export import ExportError
from pretix.base.settings import PERSON_NAME_SCHEMES
from pretix.helpers.templatetags.jsonfield import JSONExtract
@@ -179,9 +184,137 @@ OPTIONS = OrderedDict([
])
def render_pdf(event, positions, opt):
Renderer._register_fonts()
def _chunks(lst, n):
"""
Yield successive n-sized chunks from lst.
"""
for i in range(0, len(lst), n):
yield lst[i:i + n]
def _render_nup_page(nup_pdf: PdfWriter, input_pages: PageObject, opt: dict) -> PageObject:
"""
Render the `Page` objects in `input_pages` onto one page of `nup_pdf` using the options given in `opt` and
return the newly created page.
"""
badges_per_page = opt['cols'] * opt['rows']
nup_page = nup_pdf.add_blank_page(
width=Decimal('%.5f' % (opt['pagesize'][0])),
height=Decimal('%.5f' % (opt['pagesize'][1])),
)
for i, page in enumerate(input_pages):
di = i % badges_per_page
tx = opt['margins'][3] + (di % opt['cols']) * opt['offsets'][0]
ty = opt['margins'][2] + (opt['rows'] - 1 - (di // opt['cols'])) * opt['offsets'][1]
page.add_transformation(Transformation().translate(tx, ty))
page.mediabox = RectangleObject((
Decimal('%.5f' % (page.mediabox.left.as_numeric() + tx)),
Decimal('%.5f' % (page.mediabox.bottom.as_numeric() + ty)),
Decimal('%.5f' % (page.mediabox.right.as_numeric() + tx)),
Decimal('%.5f' % (page.mediabox.top.as_numeric() + ty))
))
page.trimbox = page.mediabox
nup_page.merge_page(page)
return nup_page
def _merge_pages(file_paths: List[str], output_file: BinaryIO):
"""
Merge all pages from the PDF files named `file_paths` into the `output_file`.
"""
if settings.PDFTK:
subprocess.run([
settings.PDFTK,
*file_paths,
'cat',
'output',
'-',
'compress'
], check=True, stdout=output_file)
else:
merger = PdfWriter()
merger.add_metadata({
'/Title': 'Badges',
'/Creator': 'pretix',
})
# append all temp-PDFs
for pdf in file_paths:
merger.append(pdf)
# write merged PDFs to buffer
merger.write(output_file)
def _render_nup(input_files: List[str], num_pages: int, output_file: BytesIO, opt: dict):
"""
Render the pages from the PDF files listed in `input_files` (file names) with a total number of `num_pages` pages
into one file written to `output_file` using the -nup options given in `opt`.
"""
badges_per_page = opt['cols'] * opt['rows']
max_nup_pages = 20 # chunk size to prevent working with huge files
nup_pdf_files = []
temp_dir = None
if num_pages > badges_per_page * max_nup_pages:
# to reduce memory consumption with lots of badges
# we try to use temporary PDF-files with up to
# max_nup_pages pages
# If temp-files fail, we try to merge in-memory anyways
try:
temp_dir = tempfile.TemporaryDirectory()
except IOError:
pass
try:
badges_pdf = PdfReader(input_files.pop())
offset = 0
for i, chunk_indices in enumerate(_chunks(range(num_pages), badges_per_page * max_nup_pages)):
chunk = []
for j in chunk_indices:
# We need to dynamically switch to the next input file as we don't know how many pages each input
# file has beforehand
if j - offset >= len(badges_pdf.pages):
offset += len(badges_pdf.pages)
badges_pdf = PdfReader(input_files.pop())
chunk.append(badges_pdf.pages[j - offset])
# Reset some internal state from pypdf. This will make it a little slower, but will prevent us from
# running out of memory if we process a really large file.
badges_pdf.flattened_pages = None
nup_pdf = PdfWriter()
nup_pdf.add_metadata({
'/Title': 'Badges',
'/Creator': 'pretix',
})
for page_chunk in _chunks(chunk, badges_per_page):
_render_nup_page(nup_pdf, page_chunk, opt)
if temp_dir:
file_path = os.path.join(temp_dir.name, 'badges-%d.pdf' % i)
nup_pdf.write(file_path)
nup_pdf_files.append(file_path)
else:
# everything fitted into one nup_pdf -- we can save some work
nup_pdf.write(output_file)
return
del badges_pdf # free up memory
file_paths = [os.path.join(temp_dir.name, fp) for fp in nup_pdf_files]
_merge_pages(file_paths, output_file)
finally:
if temp_dir:
try:
temp_dir.cleanup()
except IOError:
pass
def _render_badges(event: Event, positions: List[OrderPosition], opt: dict) -> Tuple[PdfWriter, PdfWriter, int]:
"""
Render the badges for the given order positions into two different files, one with the foregrounds and one with
the backgrounds.
"""
renderermap = {
bi.item_id: _renderer(event, bi.layout)
for bi in BadgeItem.objects.select_related('layout').filter(item__event=event)
@@ -195,12 +328,13 @@ def render_pdf(event, positions, opt):
if not len(op_renderers):
raise ExportError(_("None of the selected products is configured to print badges."))
# render each badge on its own page first
merger = PdfWriter()
merger.add_metadata({
fg_pdf = PdfWriter()
fg_pdf.add_metadata({
'/Title': 'Badges',
'/Creator': 'pretix',
})
bg_pdf = PdfWriter()
num_pages = 0
for op, renderer in op_renderers:
buffer = BytesIO()
page = canvas.Canvas(buffer, pagesize=pagesizes.A4)
@@ -210,46 +344,52 @@ def render_pdf(event, positions, opt):
if opt['pagesize']:
page.setPageSize(opt['pagesize'])
page.save()
buffer = renderer.render_background(buffer, _('Badge'))
merger.append(ContentFile(buffer.read()))
# to reduce disk-IO render backgrounds in own PDF and merge later
fg_pdf.append(buffer)
new_num_pages = len(fg_pdf.pages)
for i in range(new_num_pages - num_pages):
bg_pdf.add_page(renderer.bg_pdf.pages[i])
num_pages = new_num_pages
outbuffer = BytesIO()
merger.write(outbuffer)
outbuffer.seek(0)
return fg_pdf, bg_pdf, num_pages
def render_pdf(event, positions, opt, output_file):
Renderer._register_fonts()
badges_per_page = opt['cols'] * opt['rows']
if badges_per_page == 1:
# no need to place multiple badges on one page
return outbuffer
fg_pdf, bg_pdf, _ = _render_badges(event, positions, opt)
merge_background(
fg_pdf,
bg_pdf,
output_file,
compress=True,
)
else:
# place n-up badges/pages per page
with tempfile.TemporaryDirectory() as tmp_dir:
page_pdfs = []
total_num_pages = 0
for position_chunk in _chunks(positions, 200):
# We first render the foreground and background of every individual badge and merge them, but we do
# so in chunks, since the n-up code is slower if it has to deal with huge PDFs. It doesn't matter
# that not every position has the same number of pages, as the n-up code can deal with that
fg_pdf, bg_pdf, num_pages = _render_badges(event, position_chunk, opt)
out_pdf_name = os.path.join(tmp_dir, f'chunk-{len(page_pdfs)}.pdf')
with open(out_pdf_name, 'wb') as out_pdf:
merge_background(
fg_pdf,
bg_pdf,
out_pdf,
compress=False,
)
page_pdfs.append(out_pdf_name)
total_num_pages += num_pages
del fg_pdf, bg_pdf # free up memory
# place n-up badges/pages per page
badges_pdf = PdfReader(outbuffer)
nup_pdf = PdfWriter()
nup_page = None
for i, page in enumerate(badges_pdf.pages):
di = i % badges_per_page
if di == 0:
nup_page = nup_pdf.add_blank_page(
width=Decimal('%.5f' % (opt['pagesize'][0])),
height=Decimal('%.5f' % (opt['pagesize'][1])),
)
tx = opt['margins'][3] + (di % opt['cols']) * opt['offsets'][0]
ty = opt['margins'][2] + (opt['rows'] - 1 - (di // opt['cols'])) * opt['offsets'][1]
page.add_transformation(Transformation().translate(tx, ty))
page.mediabox = RectangleObject((
Decimal('%.5f' % (page.mediabox.left.as_numeric() + tx)),
Decimal('%.5f' % (page.mediabox.bottom.as_numeric() + ty)),
Decimal('%.5f' % (page.mediabox.right.as_numeric() + tx)),
Decimal('%.5f' % (page.mediabox.top.as_numeric() + ty))
))
page.trimbox = page.mediabox
nup_page.merge_page(page)
outbuffer = BytesIO()
nup_pdf.write(outbuffer)
outbuffer.seek(0)
return outbuffer
# Actually render a n-up file
return _render_nup(page_pdfs, total_num_pages, output_file, opt)
class BadgeExporter(BaseExporter):
@@ -335,7 +475,7 @@ class BadgeExporter(BaseExporter):
)
return d
def render(self, form_data: dict) -> Tuple[str, str, str]:
def render(self, form_data: dict, output_file=None) -> Tuple[str, str, Optional[bytes]]:
qs = OrderPosition.objects.filter(
order__event=self.event, item_id__in=form_data['items']
).prefetch_related(
@@ -431,11 +571,17 @@ class BadgeExporter(BaseExporter):
)
try:
outbuffer = render_pdf(self.event, qs, OPTIONS[form_data.get('rendering', 'one')])
if output_file:
render_pdf(self.event, qs, OPTIONS[form_data.get('rendering', 'one')], output_file=output_file)
return 'badges.pdf', 'application/pdf', None
else:
with tempfile.NamedTemporaryFile(delete=True) as tmpfile:
render_pdf(self.event, qs, OPTIONS[form_data.get('rendering', 'one')], output_file=tmpfile)
tmpfile.seek(0)
return 'badges.pdf', 'application/pdf', tmpfile.read()
except DataError:
logging.exception('DataError during export')
raise ExportError(
_('Your data could not be converted as requested. This could be caused by invalid values in your '
'databases, such as answers to number questions which are not a number.')
)
return 'badges.pdf', 'application/pdf', outbuffer.read()

View File

@@ -20,6 +20,7 @@
# <https://www.gnu.org/licenses/>.
#
import logging
import tempfile
from typing import List
from django.core.files.base import ContentFile
@@ -41,7 +42,10 @@ logger = logging.getLogger(__name__)
def badges_create_pdf(event: Event, fileid: int, positions: List[int]) -> int:
file = CachedFile.objects.get(id=fileid)
pdfcontent = render_pdf(event, OrderPosition.objects.filter(id__in=positions), opt=OPTIONS['one'])
file.file.save(cachedfile_name(file, file.filename), ContentFile(pdfcontent.read()))
file.save()
with tempfile.TemporaryFile() as tmp_file:
render_pdf(event, OrderPosition.objects.filter(id__in=positions), opt=OPTIONS['one'],
output_file=tmp_file)
tmp_file.seek(0)
file.file.save(cachedfile_name(file, file.filename), ContentFile(tmp_file.read()))
file.save()
return file.pk