"""Image and PDF rendering utilities.""" import base64 import io from pathlib import Path from typing import List import pypdfium2 as pdfium from PIL import Image def render_pdf_to_images(pdf_path: Path, target_width: int = 2000) -> List[Image.Image]: """ Render PDF pages to PIL images (layout-preserving). Args: pdf_path: Path to the PDF file. target_width: Target width for rendering (scales proportionally). Returns: List of PIL Image objects, one per page. """ doc = pdfium.PdfDocument(str(pdf_path)) images: List[Image.Image] = [] for index in range(len(doc)): page = doc[index] width_pt, height_pt = page.get_size() scale = max(1.0, float(target_width) / float(max(1.0, width_pt))) bitmap = page.render(scale=scale) img = bitmap.to_pil() images.append(img) page.close() return images def pil_to_png_data_uri(img: Image.Image) -> str: """Convert a PIL image to a PNG data URI (base64).""" buf = io.BytesIO() img.save(buf, format="PNG") b64 = base64.b64encode(buf.getvalue()).decode("ascii") return f"data:image/png;base64,{b64}" def split_halves(img: Image.Image, overlap_px: int = 40) -> List[Image.Image]: """ Create left/right column crops with small overlap. Useful for two-column CV layouts where GPT-4 Vision might miss content in narrow columns. Args: img: PIL Image to split. overlap_px: Pixels of overlap in the middle. Returns: List of [left_half, right_half] images. """ w, h = img.size mid = w // 2 left_box = (0, 0, min(mid + overlap_px, w), h) right_box = (max(mid - overlap_px, 0), 0, w, h) return [img.crop(left_box), img.crop(right_box)]