"""Playwright-based PDF renderer — Chromium headless PDF export. Replaces WeasyPrint. Uses Playwright to launch headless Chromium, load the fully-rendered HTML, wait for fonts/images/layout, and export a print-quality PDF. """ from __future__ import annotations import asyncio import logging import os import tempfile from pathlib import Path from typing import Optional logger = logging.getLogger(__name__) # Singleton browser instance for reuse across requests _browser = None _browser_lock = asyncio.Lock() async def _get_browser(): """Get or create a persistent Chromium browser instance.""" global _browser async with _browser_lock: if _browser is None or not _browser.is_connected(): from playwright.async_api import async_playwright pw = await async_playwright().start() _browser = await pw.chromium.launch( headless=True, args=[ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-gpu", "--font-render-hinting=none", ], ) logger.info("Chromium browser launched for PDF rendering") return _browser async def shutdown_browser(): """Gracefully close the browser on application shutdown.""" global _browser async with _browser_lock: if _browser and _browser.is_connected(): await _browser.close() _browser = None logger.info("Chromium browser closed") async def render_pdf_from_html( html_content: str, *, format: str = "A4", print_background: bool = True, prefer_css_page_size: bool = True, wait_timeout: int = 30000, ) -> bytes: """Render HTML string to PDF bytes using Playwright Chromium. Generates a base PDF (content only, no decorative header/label), then creates a one-page overlay with the header image and right-side label, and stamps the overlay onto content pages (page 3 → last content page) using pypdf. Pages 1-2 (cover/TOC) and trailing full-page image pages get no overlay. Args: html_content: Complete HTML document string. format: Page format (default A4). print_background: Include background colors/images. prefer_css_page_size: Use @page CSS rules for sizing. wait_timeout: Max time (ms) to wait for page load. Returns: PDF file bytes. """ browser = await _get_browser() context = await browser.new_context( viewport={"width": 794, "height": 1123}, # A4 at 96dpi device_scale_factor=2, java_script_enabled=True, ) page = await context.new_page() try: # Write HTML to a temp file so Chromium can load local file:// resources with tempfile.NamedTemporaryFile( mode="w", suffix=".html", delete=False, encoding="utf-8", ) as tmp: tmp.write(html_content) tmp_path = tmp.name try: file_url = Path(tmp_path).as_uri() await page.goto(file_url, wait_until="networkidle", timeout=wait_timeout) # Wait for fonts and images to be fully loaded await page.evaluate("() => document.fonts.ready") await page.evaluate(""" () => { const images = Array.from(document.querySelectorAll('img')); return Promise.all(images.map(img => { if (img.complete) return Promise.resolve(); return new Promise(r => { img.addEventListener('load', r); img.addEventListener('error', r); }); })); } """) await page.wait_for_timeout(500) # ── Collect info from DOM before hiding elements ── header_src = await page.evaluate(""" () => { const img = document.querySelector('.page-header img'); return img ? img.src : ''; } """) label_src = await page.evaluate(""" () => { const img = document.querySelector('.hb-right-label img'); return img ? img.src : ''; } """) num_bottom_pages = await page.evaluate(""" () => document.querySelectorAll('.fullpage-img-wrap').length """) # Cover page count: cover + TOC image (each is a .cover-page) num_cover_pages = await page.evaluate(""" () => document.querySelectorAll('.cover-page').length """) logger.info( "Overlay info: header=%s, label=%s, covers=%d, bottoms=%d", bool(header_src), bool(label_src), num_cover_pages, num_bottom_pages, ) # ── Hide header, footer, and label from the base PDF ── await page.evaluate(""" () => { document.querySelectorAll('.page-header, .page-footer, .hb-right-label') .forEach(el => el.style.display = 'none'); } """) # ── Render BASE PDF (no header, no label) ── base_pdf = await page.pdf( format=format, print_background=print_background, prefer_css_page_size=prefer_css_page_size, margin={ "top": "2.54cm", "right": "2.54cm", "bottom": "2.54cm", "left": "2.54cm", }, display_header_footer=True, header_template='', footer_template=( '