File size: 1,809 Bytes
363cda9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
"""Image and PDF rendering utilities."""
import base64
import io
from pathlib import Path
from typing import List
import pypdfium2 as pdfium
from PIL import Image
def render_pdf_to_images(pdf_path: Path, target_width: int = 2000) -> List[Image.Image]:
"""
Render PDF pages to PIL images (layout-preserving).
Args:
pdf_path: Path to the PDF file.
target_width: Target width for rendering (scales proportionally).
Returns:
List of PIL Image objects, one per page.
"""
doc = pdfium.PdfDocument(str(pdf_path))
images: List[Image.Image] = []
for index in range(len(doc)):
page = doc[index]
width_pt, height_pt = page.get_size()
scale = max(1.0, float(target_width) / float(max(1.0, width_pt)))
bitmap = page.render(scale=scale)
img = bitmap.to_pil()
images.append(img)
page.close()
return images
def pil_to_png_data_uri(img: Image.Image) -> str:
"""Convert a PIL image to a PNG data URI (base64)."""
buf = io.BytesIO()
img.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode("ascii")
return f"data:image/png;base64,{b64}"
def split_halves(img: Image.Image, overlap_px: int = 40) -> List[Image.Image]:
"""
Create left/right column crops with small overlap.
Useful for two-column CV layouts where GPT-4 Vision might
miss content in narrow columns.
Args:
img: PIL Image to split.
overlap_px: Pixels of overlap in the middle.
Returns:
List of [left_half, right_half] images.
"""
w, h = img.size
mid = w // 2
left_box = (0, 0, min(mid + overlap_px, w), h)
right_box = (max(mid - overlap_px, 0), 0, w, h)
return [img.crop(left_box), img.crop(right_box)]
|