acd23's picture
Upload src/utils/pdf_utils.py
cbf3423 verified
"""
PDF Utilities -- AI Reel Creator Platform
=========================================
Helpers for converting PDFs to images (for Gemini multimodal parsing)
and extracting embedded images.
Primary backend: pdf2image (poppler required)
Fallback backend: PyMuPDF (pure Python, no external deps)
"""
import os
import io
from pathlib import Path
from typing import List, Tuple, Optional
from PIL import Image
def _try_pdf2image(
pdf_path: str,
dpi: int = 200,
first_page: int = 1,
last_page: Optional[int] = None,
) -> Optional[Tuple[List[Image.Image], List[int]]]:
"""Try rendering PDF pages with pdf2image. Returns None on failure."""
try:
from pdf2image import convert_from_path
images = convert_from_path(pdf_path, dpi=dpi, first_page=first_page, last_page=last_page)
page_numbers = list(range(first_page, first_page + len(images)))
return images, page_numbers
except Exception:
return None
def _try_pymupdf(
pdf_path: str,
dpi: int = 200,
first_page: int = 1,
last_page: Optional[int] = None,
) -> Tuple[List[Image.Image], List[int]]:
"""Render PDF pages with PyMuPDF (fitz). Raises on failure."""
import fitz
doc = fitz.open(pdf_path)
images = []
page_numbers = []
end = last_page or len(doc)
start_idx = first_page - 1
for i in range(start_idx, end):
page = doc.load_page(i)
mat = fitz.Matrix(dpi / 72, dpi / 72)
pix = page.get_pixmap(matrix=mat)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(img)
page_numbers.append(i + 1)
doc.close()
return images, page_numbers
def pdf_pages_to_images(
pdf_path: str,
dpi: int = 200,
first_page: int = 1,
last_page: Optional[int] = None,
) -> Tuple[List[Image.Image], List[int]]:
"""Convert PDF pages to PIL Images. Tries pdf2image first, falls back to PyMuPDF."""
pdf_path = str(Path(pdf_path).resolve())
result = _try_pdf2image(pdf_path, dpi, first_page, last_page)
if result is not None:
return result
try:
return _try_pymupdf(pdf_path, dpi, first_page, last_page)
except ImportError as exc:
raise RuntimeError("No PDF-to-image library available. Install one of: pdf2image (+ poppler), PyMuPDF.") from exc
def extract_pdf_embedded_images(pdf_path: str) -> List[Image.Image]:
"""Extract all embedded raster images from a PDF."""
try:
import fitz
except ImportError as exc:
raise RuntimeError("PyMuPDF is required for embedded-image extraction.") from exc
doc = fitz.open(pdf_path)
images = []
for page in doc:
for img_index, img in enumerate(page.get_images(full=True)):
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
try:
pil_img = Image.open(io.BytesIO(image_bytes))
if pil_img.mode != "RGB":
pil_img = pil_img.convert("RGB")
images.append(pil_img)
except Exception:
continue
doc.close()
return images