| | """ |
| | PDF I/O utilities - Load and render PDFs |
| | Placeholder for Task 2 |
| | """ |
| |
|
| | from typing import List, Tuple |
| | from PIL import Image |
| | import io |
| |
|
| |
|
| | def load_pdf_pages(pdf_bytes: bytes, dpi: int = 150, max_pages: int = 10) -> List[Image.Image]: |
| | """ |
| | Load PDF and render each page to PIL Image. |
| | |
| | Args: |
| | pdf_bytes: PDF file as bytes |
| | dpi: Resolution for rendering (default 150) |
| | max_pages: Maximum pages to render (default 10) |
| | |
| | Returns: |
| | List of PIL Images, one per page |
| | """ |
| | try: |
| | import fitz |
| |
|
| | doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
| | pages = [] |
| |
|
| | for i in range(min(len(doc), max_pages)): |
| | page = doc[i] |
| | |
| | pix = page.get_pixmap(matrix=fitz.Matrix(dpi/72, dpi/72)) |
| | img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples) |
| | pages.append(img) |
| |
|
| | doc.close() |
| | return pages |
| |
|
| | except ImportError: |
| | |
| | print("⚠️ PyMuPDF not available, returning placeholder") |
| | img = Image.new("RGB", (800, 1000), color=(200, 200, 200)) |
| | return [img] * 3 |
| |
|
| |
|
| | def get_page_count(pdf_bytes: bytes) -> int: |
| | """Get total page count of PDF.""" |
| | try: |
| | import fitz |
| | doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
| | count = len(doc) |
| | doc.close() |
| | return count |
| | except Exception: |
| | return 0 |
| |
|