owenkaplinsky's picture
update from github stable code (#3)
3370983 verified
raw
history blame
1.81 kB
"""Image and PDF rendering utilities."""
import base64
import io
from pathlib import Path
from typing import List
import pypdfium2 as pdfium
from PIL import Image
def render_pdf_to_images(pdf_path: Path, target_width: int = 2000) -> List[Image.Image]:
"""
Render PDF pages to PIL images (layout-preserving).
Args:
pdf_path: Path to the PDF file.
target_width: Target width for rendering (scales proportionally).
Returns:
List of PIL Image objects, one per page.
"""
doc = pdfium.PdfDocument(str(pdf_path))
images: List[Image.Image] = []
for index in range(len(doc)):
page = doc[index]
width_pt, height_pt = page.get_size()
scale = max(1.0, float(target_width) / float(max(1.0, width_pt)))
bitmap = page.render(scale=scale)
img = bitmap.to_pil()
images.append(img)
page.close()
return images
def pil_to_png_data_uri(img: Image.Image) -> str:
"""Convert a PIL image to a PNG data URI (base64)."""
buf = io.BytesIO()
img.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode("ascii")
return f"data:image/png;base64,{b64}"
def split_halves(img: Image.Image, overlap_px: int = 40) -> List[Image.Image]:
"""
Create left/right column crops with small overlap.
Useful for two-column CV layouts where GPT-4 Vision might
miss content in narrow columns.
Args:
img: PIL Image to split.
overlap_px: Pixels of overlap in the middle.
Returns:
List of [left_half, right_half] images.
"""
w, h = img.size
mid = w // 2
left_box = (0, 0, min(mid + overlap_px, w), h)
right_box = (max(mid - overlap_px, 0), 0, w, h)
return [img.crop(left_box), img.crop(right_box)]