"""Playwright-based PDF renderer — Chromium headless PDF export. Replaces WeasyPrint. Uses Playwright to launch headless Chromium, load the fully-rendered HTML, wait for fonts/images/layout, and export a print-quality PDF. """ from __future__ import annotations import asyncio import logging import os import tempfile from pathlib import Path from typing import Optional logger = logging.getLogger(__name__) # Singleton browser instance for reuse across requests _browser = None _browser_lock = asyncio.Lock() async def _get_browser(): """Get or create a persistent Chromium browser instance.""" global _browser async with _browser_lock: if _browser is None or not _browser.is_connected(): from playwright.async_api import async_playwright pw = await async_playwright().start() _browser = await pw.chromium.launch( headless=True, args=[ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--font-render-hinting=none", ], ) logger.info("Chromium browser launched for PDF rendering") return _browser async def shutdown_browser(): """Gracefully close the browser on application shutdown.""" global _browser async with _browser_lock: if _browser and _browser.is_connected(): await _browser.close() _browser = None logger.info("Chromium browser closed") async def render_pdf_from_html( html_content: str, *, format: str = "A4", print_background: bool = True, prefer_css_page_size: bool = True, wait_timeout: int = 30000, ) -> bytes: """Render HTML string to PDF bytes using Playwright Chromium. Generates a base PDF (content only, no decorative header/label), then creates a one-page overlay with the header image and right-side label, and stamps the overlay onto content pages (page 3 → last content page) using pypdf. Pages 1-2 (cover/TOC) and trailing full-page image pages get no overlay. Args: html_content: Complete HTML document string. format: Page format (default A4). print_background: Include background colors/images. prefer_css_page_size: Use @page CSS rules for sizing. wait_timeout: Max time (ms) to wait for page load. Returns: PDF file bytes. """ browser = await _get_browser() context = await browser.new_context( viewport={"width": 794, "height": 1123}, # A4 at 96dpi device_scale_factor=2, java_script_enabled=True, ) page = await context.new_page() try: # Write HTML to a temp file so Chromium can load local file:// resources with tempfile.NamedTemporaryFile( mode="w", suffix=".html", delete=False, encoding="utf-8", ) as tmp: tmp.write(html_content) tmp_path = tmp.name try: file_url = Path(tmp_path).as_uri() await page.goto(file_url, wait_until="networkidle", timeout=wait_timeout) # Wait for fonts and images to be fully loaded await page.evaluate("() => document.fonts.ready") await page.evaluate(""" () => { const images = Array.from(document.querySelectorAll('img')); return Promise.all(images.map(img => { if (img.complete) return Promise.resolve(); return new Promise(r => { img.addEventListener('load', r); img.addEventListener('error', r); }); })); } """) await page.wait_for_timeout(500) # ── Collect info from DOM before hiding elements ── header_src = await page.evaluate(""" () => { const img = document.querySelector('.page-header img'); return img ? img.src : ''; } """) label_src = await page.evaluate(""" () => { const img = document.querySelector('.hb-right-label img'); return img ? img.src : ''; } """) num_bottom_pages = await page.evaluate(""" () => document.querySelectorAll('.fullpage-img-wrap').length """) # Cover page count: cover + TOC image (each is a .cover-page) num_cover_pages = await page.evaluate(""" () => document.querySelectorAll('.cover-page').length """) logger.info( "Overlay info: header=%s, label=%s, covers=%d, bottoms=%d", bool(header_src), bool(label_src), num_cover_pages, num_bottom_pages, ) # ── Hide header, footer, and label from the base PDF ── await page.evaluate(""" () => { document.querySelectorAll('.page-header, .page-footer, .hb-right-label') .forEach(el => el.style.display = 'none'); } """) # ── Render BASE PDF (no header, no label) ── base_pdf = await page.pdf( format=format, print_background=print_background, prefer_css_page_size=prefer_css_page_size, margin={ "top": "2.54cm", "right": "2.54cm", "bottom": "2.54cm", "left": "2.54cm", }, display_header_footer=True, header_template='', footer_template=( '
' '
' ), ) logger.info("Base PDF rendered, size=%d bytes", len(base_pdf)) finally: os.unlink(tmp_path) # ── Build overlay (header + label) and stamp onto content pages ── if not header_src and not label_src: logger.info("No header or label to overlay, returning base PDF") return base_pdf overlay_pdf = await _build_overlay_pdf( page, header_src, label_src, format, wait_timeout ) merged = _stamp_overlay( base_pdf, overlay_pdf, skip_front=num_cover_pages, skip_back=num_bottom_pages, ) logger.info("Final PDF with overlay, size=%d bytes", len(merged)) return merged finally: await context.close() async def _build_overlay_pdf( page, header_src: str, label_src: str, format: str, timeout: int, ) -> bytes: """Render a single-page transparent overlay PDF with header + label.""" parts = [] if header_src: parts.append( f'
' f'
' ) if label_src: parts.append( f'
' f'
' ) overlay_html = ( '' '' + '\n'.join(parts) + '
' '' ) with tempfile.NamedTemporaryFile( mode="w", suffix=".html", delete=False, encoding="utf-8", ) as tmp: tmp.write(overlay_html) tmp_path = tmp.name try: await page.goto( Path(tmp_path).as_uri(), wait_until="networkidle", timeout=timeout, ) await page.evaluate("() => document.fonts.ready") await page.evaluate(""" () => { const images = Array.from(document.querySelectorAll('img')); return Promise.all(images.map(img => { if (img.complete) return Promise.resolve(); return new Promise(r => { img.addEventListener('load', r); img.addEventListener('error', r); }); })); } """) await page.wait_for_timeout(300) overlay_bytes = await page.pdf( format=format, print_background=True, prefer_css_page_size=True, margin={"top": "0", "right": "0", "bottom": "0", "left": "0"}, display_header_footer=False, ) logger.info("Overlay PDF rendered, size=%d bytes", len(overlay_bytes)) return overlay_bytes finally: os.unlink(tmp_path) def _stamp_overlay( base_pdf: bytes, overlay_pdf: bytes, skip_front: int = 2, skip_back: int = 4, ) -> bytes: """Merge overlay onto content pages of the base PDF. Pages 0..(skip_front-1) and (total-skip_back)..(total-1) are left untouched. All other pages get the overlay stamped on top. """ import io from pypdf import PdfReader, PdfWriter base = PdfReader(io.BytesIO(base_pdf)) overlay_reader = PdfReader(io.BytesIO(overlay_pdf)) overlay_page = overlay_reader.pages[0] writer = PdfWriter() total = len(base.pages) first_content = skip_front # e.g. page index 2 last_content = total - skip_back - 1 # e.g. total-5 for i, pg in enumerate(base.pages): if first_content <= i <= last_content: pg.merge_page(overlay_page) writer.add_page(pg) buf = io.BytesIO() writer.write(buf) return buf.getvalue()