from __future__ import annotations from dataclasses import dataclass from pathlib import Path from typing import List import fitz # PyMuPDF from PIL import Image @dataclass class RenderedImage: path: Path page_index: int def render_pdf_to_pngs(pdf_path: Path, out_dir: Path, pages: int = 2, dpi: int = 200) -> List[RenderedImage]: out_dir.mkdir(parents=True, exist_ok=True) doc = fitz.open(pdf_path) n = min(pages, doc.page_count) zoom = dpi / 72.0 mat = fitz.Matrix(zoom, zoom) rendered: List[RenderedImage] = [] for i in range(n): page = doc.load_page(i) pix = page.get_pixmap(matrix=mat, alpha=False) img_path = out_dir / f"{pdf_path.stem}_p{i+1}.png" pix.save(str(img_path)) # normalize to RGB with PIL (avoids weird modes) im = Image.open(img_path).convert("RGB") im.save(img_path) rendered.append(RenderedImage(path=img_path, page_index=i)) doc.close() return rendered