Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import List | |
| import fitz # PyMuPDF | |
| from PIL import Image | |
| class RenderedImage: | |
| path: Path | |
| page_index: int | |
| def render_pdf_to_pngs(pdf_path: Path, out_dir: Path, pages: int = 2, dpi: int = 200) -> List[RenderedImage]: | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| doc = fitz.open(pdf_path) | |
| n = min(pages, doc.page_count) | |
| zoom = dpi / 72.0 | |
| mat = fitz.Matrix(zoom, zoom) | |
| rendered: List[RenderedImage] = [] | |
| for i in range(n): | |
| page = doc.load_page(i) | |
| pix = page.get_pixmap(matrix=mat, alpha=False) | |
| img_path = out_dir / f"{pdf_path.stem}_p{i+1}.png" | |
| pix.save(str(img_path)) | |
| # normalize to RGB with PIL (avoids weird modes) | |
| im = Image.open(img_path).convert("RGB") | |
| im.save(img_path) | |
| rendered.append(RenderedImage(path=img_path, page_index=i)) | |
| doc.close() | |
| return rendered |