File size: 996 Bytes
7fd3f6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import List

import fitz  # PyMuPDF
from PIL import Image


@dataclass
class RenderedImage:
    path: Path
    page_index: int


def render_pdf_to_pngs(pdf_path: Path, out_dir: Path, pages: int = 2, dpi: int = 200) -> List[RenderedImage]:
    out_dir.mkdir(parents=True, exist_ok=True)

    doc = fitz.open(pdf_path)
    n = min(pages, doc.page_count)

    zoom = dpi / 72.0
    mat = fitz.Matrix(zoom, zoom)

    rendered: List[RenderedImage] = []
    for i in range(n):
        page = doc.load_page(i)
        pix = page.get_pixmap(matrix=mat, alpha=False)

        img_path = out_dir / f"{pdf_path.stem}_p{i+1}.png"
        pix.save(str(img_path))

        # normalize to RGB with PIL (avoids weird modes)
        im = Image.open(img_path).convert("RGB")
        im.save(img_path)

        rendered.append(RenderedImage(path=img_path, page_index=i))

    doc.close()
    return rendered