from __future__ import annotations
from pathlib import Path
from typing import List

import fitz  # PyMuPDF
from PIL import Image


def pdf_to_images(pdf_path: Path, dpi: int = 200) -> List[Image.Image]:
    """
    Render each PDF page to a PIL Image (RGB).

    Args:
        pdf_path: path to a .pdf file
        dpi: target DPI for rasterization (higher = sharper but slower)

    Returns:
        List of PIL Images, one per page, in reading order.
    """
    images: List[Image.Image] = []
    zoom = dpi / 72.0  # 72 dpi is the PDF default
    mat = fitz.Matrix(zoom, zoom)
    with fitz.open(pdf_path) as doc:
        for page in doc:
            pix = page.get_pixmap(matrix=mat)
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            images.append(img)
    return images

from .schema import OCRBlock

def pdf_native_blocks(pdf_path: Path) -> list[OCRBlock]:
    blocks: list[OCRBlock] = []
    with fitz.open(pdf_path) as doc:
        for i, page in enumerate(doc, start=1):
            for b in page.get_text("blocks"):  # (x0,y0,x1,y1,text, ...)
                x0, y0, x1, y1, txt, *_ = b
                if txt and txt.strip():
                    blocks.append(
                        OCRBlock(page=i, bbox=(int(x0), int(y0), int(x1), int(y1)),
                                 text=txt.strip(), confidence=1.0)
                    )
    return blocks