File size: 3,171 Bytes
cbf3423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""
PDF Utilities -- AI Reel Creator Platform
=========================================

Helpers for converting PDFs to images (for Gemini multimodal parsing)
and extracting embedded images.

Primary backend: pdf2image  (poppler required)
Fallback backend: PyMuPDF   (pure Python, no external deps)
"""

import os
import io
from pathlib import Path
from typing import List, Tuple, Optional
from PIL import Image


def _try_pdf2image(
    pdf_path: str,
    dpi: int = 200,
    first_page: int = 1,
    last_page: Optional[int] = None,
) -> Optional[Tuple[List[Image.Image], List[int]]]:
    """Try rendering PDF pages with pdf2image. Returns None on failure."""
    try:
        from pdf2image import convert_from_path
        images = convert_from_path(pdf_path, dpi=dpi, first_page=first_page, last_page=last_page)
        page_numbers = list(range(first_page, first_page + len(images)))
        return images, page_numbers
    except Exception:
        return None


def _try_pymupdf(
    pdf_path: str,
    dpi: int = 200,
    first_page: int = 1,
    last_page: Optional[int] = None,
) -> Tuple[List[Image.Image], List[int]]:
    """Render PDF pages with PyMuPDF (fitz). Raises on failure."""
    import fitz
    doc = fitz.open(pdf_path)
    images = []
    page_numbers = []
    end = last_page or len(doc)
    start_idx = first_page - 1
    for i in range(start_idx, end):
        page = doc.load_page(i)
        mat = fitz.Matrix(dpi / 72, dpi / 72)
        pix = page.get_pixmap(matrix=mat)
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        images.append(img)
        page_numbers.append(i + 1)
    doc.close()
    return images, page_numbers


def pdf_pages_to_images(
    pdf_path: str,
    dpi: int = 200,
    first_page: int = 1,
    last_page: Optional[int] = None,
) -> Tuple[List[Image.Image], List[int]]:
    """Convert PDF pages to PIL Images. Tries pdf2image first, falls back to PyMuPDF."""
    pdf_path = str(Path(pdf_path).resolve())
    result = _try_pdf2image(pdf_path, dpi, first_page, last_page)
    if result is not None:
        return result
    try:
        return _try_pymupdf(pdf_path, dpi, first_page, last_page)
    except ImportError as exc:
        raise RuntimeError("No PDF-to-image library available. Install one of: pdf2image (+ poppler), PyMuPDF.") from exc


def extract_pdf_embedded_images(pdf_path: str) -> List[Image.Image]:
    """Extract all embedded raster images from a PDF."""
    try:
        import fitz
    except ImportError as exc:
        raise RuntimeError("PyMuPDF is required for embedded-image extraction.") from exc

    doc = fitz.open(pdf_path)
    images = []
    for page in doc:
        for img_index, img in enumerate(page.get_images(full=True)):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            try:
                pil_img = Image.open(io.BytesIO(image_bytes))
                if pil_img.mode != "RGB":
                    pil_img = pil_img.convert("RGB")
                images.append(pil_img)
            except Exception:
                continue
    doc.close()
    return images