| """Playwright-based PDF renderer β Chromium headless PDF export. |
| |
| Replaces WeasyPrint. Uses Playwright to launch headless Chromium, |
| load the fully-rendered HTML, wait for fonts/images/layout, and |
| export a print-quality PDF. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import asyncio |
| import logging |
| import os |
| import tempfile |
| from pathlib import Path |
| from typing import Optional |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| _browser = None |
| _browser_lock = asyncio.Lock() |
|
|
|
|
| async def _get_browser(): |
| """Get or create a persistent Chromium browser instance.""" |
| global _browser |
| async with _browser_lock: |
| if _browser is None or not _browser.is_connected(): |
| from playwright.async_api import async_playwright |
|
|
| pw = await async_playwright().start() |
| _browser = await pw.chromium.launch( |
| headless=True, |
| args=[ |
| "--no-sandbox", |
| "--disable-setuid-sandbox", |
| "--disable-dev-shm-usage", |
| "--disable-gpu", |
| "--font-render-hinting=none", |
| ], |
| ) |
| logger.info("Chromium browser launched for PDF rendering") |
| return _browser |
|
|
|
|
| async def shutdown_browser(): |
| """Gracefully close the browser on application shutdown.""" |
| global _browser |
| async with _browser_lock: |
| if _browser and _browser.is_connected(): |
| await _browser.close() |
| _browser = None |
| logger.info("Chromium browser closed") |
|
|
|
|
| async def render_pdf_from_html( |
| html_content: str, |
| *, |
| format: str = "A4", |
| print_background: bool = True, |
| prefer_css_page_size: bool = True, |
| wait_timeout: int = 30000, |
| ) -> bytes: |
| """Render HTML string to PDF bytes using Playwright Chromium. |
| |
| Generates a base PDF (content only, no decorative header/label), |
| then creates a one-page overlay with the header image and right-side |
| label, and stamps the overlay onto content pages (page 3 β last |
| content page) using pypdf. Pages 1-2 (cover/TOC) and trailing |
| full-page image pages get no overlay. |
| |
| Args: |
| html_content: Complete HTML document string. |
| format: Page format (default A4). |
| print_background: Include background colors/images. |
| prefer_css_page_size: Use @page CSS rules for sizing. |
| wait_timeout: Max time (ms) to wait for page load. |
| |
| Returns: |
| PDF file bytes. |
| """ |
| browser = await _get_browser() |
| context = await browser.new_context( |
| viewport={"width": 794, "height": 1123}, |
| device_scale_factor=2, |
| java_script_enabled=True, |
| ) |
| page = await context.new_page() |
|
|
| try: |
| |
| with tempfile.NamedTemporaryFile( |
| mode="w", |
| suffix=".html", |
| delete=False, |
| encoding="utf-8", |
| ) as tmp: |
| tmp.write(html_content) |
| tmp_path = tmp.name |
|
|
| try: |
| file_url = Path(tmp_path).as_uri() |
| await page.goto(file_url, wait_until="networkidle", timeout=wait_timeout) |
|
|
| |
| await page.evaluate("() => document.fonts.ready") |
| await page.evaluate(""" |
| () => { |
| const images = Array.from(document.querySelectorAll('img')); |
| return Promise.all(images.map(img => { |
| if (img.complete) return Promise.resolve(); |
| return new Promise(r => { |
| img.addEventListener('load', r); |
| img.addEventListener('error', r); |
| }); |
| })); |
| } |
| """) |
| await page.wait_for_timeout(500) |
|
|
| |
| header_src = await page.evaluate(""" |
| () => { |
| const img = document.querySelector('.page-header img'); |
| return img ? img.src : ''; |
| } |
| """) |
| label_src = await page.evaluate(""" |
| () => { |
| const img = document.querySelector('.hb-right-label img'); |
| return img ? img.src : ''; |
| } |
| """) |
| num_bottom_pages = await page.evaluate(""" |
| () => document.querySelectorAll('.fullpage-img-wrap').length |
| """) |
| |
| num_cover_pages = await page.evaluate(""" |
| () => document.querySelectorAll('.cover-page').length |
| """) |
|
|
| logger.info( |
| "Overlay info: header=%s, label=%s, covers=%d, bottoms=%d", |
| bool(header_src), bool(label_src), |
| num_cover_pages, num_bottom_pages, |
| ) |
|
|
| |
| await page.evaluate(""" |
| () => { |
| document.querySelectorAll('.page-header, .page-footer, .hb-right-label') |
| .forEach(el => el.style.display = 'none'); |
| } |
| """) |
|
|
| |
| base_pdf = await page.pdf( |
| format=format, |
| print_background=print_background, |
| prefer_css_page_size=prefer_css_page_size, |
| margin={ |
| "top": "2.54cm", |
| "right": "2.54cm", |
| "bottom": "2.54cm", |
| "left": "2.54cm", |
| }, |
| display_header_footer=True, |
| header_template='<span></span>', |
| footer_template=( |
| '<div style="width:100%;text-align:center;font-size:10px;' |
| 'font-family:Century Gothic,Segoe UI,sans-serif;color:#1C75BC;' |
| 'padding:0 0 4px 0;">' |
| '<span class="pageNumber"></span></div>' |
| ), |
| ) |
| logger.info("Base PDF rendered, size=%d bytes", len(base_pdf)) |
|
|
| finally: |
| os.unlink(tmp_path) |
|
|
| |
| if not header_src and not label_src: |
| logger.info("No header or label to overlay, returning base PDF") |
| return base_pdf |
|
|
| overlay_pdf = await _build_overlay_pdf( |
| page, header_src, label_src, format, wait_timeout |
| ) |
|
|
| merged = _stamp_overlay( |
| base_pdf, overlay_pdf, |
| skip_front=num_cover_pages, |
| skip_back=num_bottom_pages, |
| ) |
| logger.info("Final PDF with overlay, size=%d bytes", len(merged)) |
| return merged |
|
|
| finally: |
| await context.close() |
|
|
|
|
| async def _build_overlay_pdf( |
| page, header_src: str, label_src: str, |
| format: str, timeout: int, |
| ) -> bytes: |
| """Render a single-page transparent overlay PDF with header + label.""" |
| parts = [] |
| if header_src: |
| parts.append( |
| f'<div style="position:fixed;top:0;left:0;width:100%;height:2.54cm;' |
| f'margin:0;padding:0;overflow:hidden;z-index:1;">' |
| f'<img src="{header_src}" style="display:block;width:100%;' |
| f'height:100%;object-fit:fill;margin:0;padding:0;" /></div>' |
| ) |
| if label_src: |
| parts.append( |
| f'<div style="position:fixed;top:3.14cm;right:0;width:1.65cm;' |
| f'height:23.42cm;z-index:2;overflow:hidden;">' |
| f'<img src="{label_src}" style="display:block;width:100%;' |
| f'height:100%;object-fit:fill;" /></div>' |
| ) |
|
|
| overlay_html = ( |
| '<!doctype html><html><head><meta charset="utf-8">' |
| '<style>' |
| '@page{size:A4;margin:0}' |
| 'html,body{margin:0;padding:0;background:transparent}' |
| '</style></head><body>' |
| + '\n'.join(parts) |
| + '<div style="height:297mm;width:210mm;"></div>' |
| '</body></html>' |
| ) |
|
|
| with tempfile.NamedTemporaryFile( |
| mode="w", suffix=".html", delete=False, encoding="utf-8", |
| ) as tmp: |
| tmp.write(overlay_html) |
| tmp_path = tmp.name |
|
|
| try: |
| await page.goto( |
| Path(tmp_path).as_uri(), |
| wait_until="networkidle", |
| timeout=timeout, |
| ) |
| await page.evaluate("() => document.fonts.ready") |
| await page.evaluate(""" |
| () => { |
| const images = Array.from(document.querySelectorAll('img')); |
| return Promise.all(images.map(img => { |
| if (img.complete) return Promise.resolve(); |
| return new Promise(r => { |
| img.addEventListener('load', r); |
| img.addEventListener('error', r); |
| }); |
| })); |
| } |
| """) |
| await page.wait_for_timeout(300) |
|
|
| overlay_bytes = await page.pdf( |
| format=format, |
| print_background=True, |
| prefer_css_page_size=True, |
| margin={"top": "0", "right": "0", "bottom": "0", "left": "0"}, |
| display_header_footer=False, |
| ) |
| logger.info("Overlay PDF rendered, size=%d bytes", len(overlay_bytes)) |
| return overlay_bytes |
| finally: |
| os.unlink(tmp_path) |
|
|
|
|
| def _stamp_overlay( |
| base_pdf: bytes, |
| overlay_pdf: bytes, |
| skip_front: int = 2, |
| skip_back: int = 4, |
| ) -> bytes: |
| """Merge overlay onto content pages of the base PDF. |
| |
| Pages 0..(skip_front-1) and (total-skip_back)..(total-1) are left |
| untouched. All other pages get the overlay stamped on top. |
| """ |
| import io |
| from pypdf import PdfReader, PdfWriter |
|
|
| base = PdfReader(io.BytesIO(base_pdf)) |
| overlay_reader = PdfReader(io.BytesIO(overlay_pdf)) |
| overlay_page = overlay_reader.pages[0] |
| writer = PdfWriter() |
|
|
| total = len(base.pages) |
| first_content = skip_front |
| last_content = total - skip_back - 1 |
|
|
| for i, pg in enumerate(base.pages): |
| if first_content <= i <= last_content: |
| pg.merge_page(overlay_page) |
| writer.add_page(pg) |
|
|
| buf = io.BytesIO() |
| writer.write(buf) |
| return buf.getvalue() |
|
|