from __future__ import annotations
import json
from pathlib import Path
from typing import Iterable, List, Tuple

import click
import numpy as np
from PIL import Image

from .models import get_reader, get_paddle_reader
from .utils import preprocess, quad_to_bbox
from .schema import OCRBlock
from .pdf import pdf_to_images

# Supported extensions
IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".bmp", ".webp", ".tif", ".tiff", ".gif"}
PDF_EXTS = {".pdf"}


# ---------------- EasyOCR (base) ----------------
def run_ocr_on_image(
    img: Image.Image,
    langs: Iterable[str] = ("en",),
    conf_threshold: float = 0.3,
    page: int = 1,
) -> List[OCRBlock]:
    """Run EasyOCR on a single PIL image and return structured blocks."""
    reader = get_reader(tuple(langs))
    img_prep = preprocess(img)
    results = reader.readtext(np.array(img_prep), detail=1, paragraph=False)  # [quad, text, conf]

    blocks: List[OCRBlock] = []
    for quad, text, conf in results:
        if conf is None or conf < conf_threshold or not str(text).strip():
            continue
        bbox = quad_to_bbox(quad)
        blocks.append(OCRBlock(page=page, bbox=bbox, text=str(text), confidence=float(conf)))
    return blocks


# ---------------- PaddleOCR (high quality) ----------------
def run_ocr_on_image_paddle(
    img: Image.Image,
    lang: str = "en",
    conf_threshold: float = 0.3,
    page: int = 1,
) -> List[OCRBlock]:
    """
    Run PaddleOCR (det + rec) on a PIL image and return OCRBlocks.
    """
    import cv2
    ocr = get_paddle_reader(lang)
    arr = cv2.cvtColor(np.array(img.convert("RGB")), cv2.COLOR_RGB2BGR)
    result = ocr.ocr(arr, cls=True)

    blocks: List[OCRBlock] = []
    for line in result:
        for det in line:
            quad, (text, conf) = det
            if conf is None or conf < conf_threshold or not str(text).strip():
                continue
            xs = [int(p[0]) for p in quad]
            ys = [int(p[1]) for p in quad]
            bbox = (min(xs), min(ys), max(xs), max(ys))
            blocks.append(OCRBlock(page=page, bbox=bbox, text=str(text), confidence=float(conf)))
    return blocks


# ---------------- File routing ----------------
def render_input_to_pages(path: Path, dpi: int = 200) -> List[Image.Image]:
    """Convert a file (image or PDF) into a list of PIL pages."""
    if path.suffix.lower() in PDF_EXTS:
        return pdf_to_images(path, dpi=dpi)
    elif path.suffix.lower() in IMAGE_EXTS:
        return [Image.open(path).convert("RGB")]
    else:
        raise ValueError(f"Unsupported file type: {path.suffix}")


def ocr_file(
    input_path: Path,
    langs: Iterable[str] = ("en",),
    dpi: int = 200,
    conf_threshold: float = 0.3,
) -> List[OCRBlock]:
    """Main OCR entrypoint for one file using EasyOCR (default)."""
    pages = render_input_to_pages(input_path, dpi=dpi)
    all_blocks: List[OCRBlock] = []
    for i, page_img in enumerate(pages, start=1):
        blocks = run_ocr_on_image(page_img, langs=langs, conf_threshold=conf_threshold, page=i)
        all_blocks.extend(blocks)
    return all_blocks


# ---------------- Save helpers ----------------
def save_json(blocks: List[OCRBlock], out_path: Path) -> None:
    """Save OCR results to JSON."""
    out_path.parent.mkdir(parents=True, exist_ok=True)
    data = [b.model_dump() for b in blocks]
    out_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")


def save_csv(blocks: List[OCRBlock], out_path: Path) -> None:
    """Save OCR results to CSV (via pandas)."""
    import pandas as pd
    out_path.parent.mkdir(parents=True, exist_ok=True)
    rows = []
    for b in blocks:
        x1, y1, x2, y2 = b.bbox
        rows.append(
            {
                "page": b.page,
                "x1": x1,
                "y1": y1,
                "x2": x2,
                "y2": y2,
                "text": b.text,
                "confidence": b.confidence,
            }
        )
    pd.DataFrame(rows).to_csv(out_path, index=False)


# ---------------- CLI ----------------
@click.command(context_settings=dict(help_option_names=["-h", "--help"]))
@click.argument("input_path", type=click.Path(exists=True, path_type=Path))
@click.argument("output_dir", type=click.Path(path_type=Path))
@click.option("--lang", "langs", multiple=True, default=["en"], show_default=True,
              help="Languages for EasyOCR (e.g., en, fr, de)")
@click.option("--dpi", default=200, show_default=True, help="PDF render DPI")
@click.option("--conf-threshold", default=0.3, show_default=True,
              help="Min confidence to keep a block")
def main(input_path: Path, output_dir: Path, langs: list[str], dpi: int, conf_threshold: float):
    """Run OCR on a file or a folder recursively, save JSON + CSV results (EasyOCR)."""
    inputs: list[Path] = []
    if input_path.is_dir():
        for p in input_path.rglob("*"):
            if p.suffix.lower() in IMAGE_EXTS.union(PDF_EXTS):
                inputs.append(p)
    else:
        inputs = [input_path]

    output_dir.mkdir(parents=True, exist_ok=True)
    for p in inputs:
        try:
            blocks = ocr_file(p, langs=langs, dpi=dpi, conf_threshold=conf_threshold)
            base = p.stem
            json_out = output_dir / f"{base}.json"
            csv_out = output_dir / f"{base}.csv"
            save_json(blocks, json_out)
            save_csv(blocks, csv_out)
            click.echo(f"[OK] {p} -> {json_out.name}, {csv_out.name}")
        except Exception as e:
            click.echo(f"[ERR] {p}: {e}", err=True)


if __name__ == "__main__":
    main()