"""Task definitions with PIL-generated synthetic document images and ground truth."""
from __future__ import annotations
import base64
import io
import math
import random

try:
    from PIL import Image, ImageDraw, ImageFont, ImageFilter
    PIL_AVAILABLE = True
except ImportError:
    PIL_AVAILABLE = False


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _b64_png(img) -> str:
    buf = io.BytesIO()
    img.save(buf, format="PNG")
    return base64.b64encode(buf.getvalue()).decode()


def _try_font(size: int = 14):
    if not PIL_AVAILABLE:
        return None
    try:
        return ImageFont.truetype("arial.ttf", size)
    except Exception:
        try:
            return ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", size)
        except Exception:
            return ImageFont.load_default()


def _add_noise(img, intensity: float = 30.0):
    """Add Gaussian-like noise to a PIL image."""
    import random as rnd
    pixels = img.load()
    w, h = img.size
    for x in range(w):
        for y in range(h):
            r, g, b = pixels[x, y]
            noise = int(rnd.gauss(0, intensity))
            pixels[x, y] = (
                max(0, min(255, r + noise)),
                max(0, min(255, g + noise)),
                max(0, min(255, b + noise)),
            )
    return img


def _rotate_image(img, degrees: float):
    return img.rotate(degrees, expand=True, fillcolor=(255, 255, 255))


# ---------------------------------------------------------------------------
# Task 1 — Clean Printed Table
# ---------------------------------------------------------------------------

TASK1_DATA = {
    "headers": ["Product", "Q1 Sales", "Q2 Sales", "Q3 Sales", "Q4 Sales"],
    "rows": [
        ["Widget A", "1200", "1350", "1100", "1500"],
        ["Widget B", "800", "950", "870", "920"],
        ["Widget C", "2100", "2300", "2050", "2400"],
        ["Widget D", "450", "480", "510", "490"],
        ["Widget E", "3200", "3100", "3300", "3500"],
        ["Widget F", "670", "720", "690", "750"],
        ["Widget G", "1800", "1950", "1870", "2000"],
    ],
    "kpis": {
        "total_q1": "10220",
        "total_q4": "13060",
        "best_product": "Widget E",
    },
}


def _md_from_data(data: dict) -> str:
    headers = data["headers"]
    rows = data["rows"]
    sep = "| " + " | ".join(["---"] * len(headers)) + " |"
    lines = ["| " + " | ".join(headers) + " |", sep]
    for row in rows:
        lines.append("| " + " | ".join(row) + " |")
    return "\n".join(lines)


def generate_task1() -> dict:
    gt_md = _md_from_data(TASK1_DATA)
    gt_kpis = TASK1_DATA["kpis"]

    if not PIL_AVAILABLE:
        return {
            "image_b64": None,
            "text_hint": gt_md,
            "gt_md": gt_md,
            "gt_kpis": gt_kpis,
            "gt_cells": {},
            "max_steps": 5,
            "task_id": 1,
        }

    headers = TASK1_DATA["headers"]
    rows = TASK1_DATA["rows"]
    col_widths = [110, 80, 80, 80, 80]
    row_h = 28
    pad = 20
    W = sum(col_widths) + pad * 2
    H = row_h * (len(rows) + 2) + pad * 2

    img = Image.new("RGB", (W, H), (255, 255, 255))
    draw = ImageDraw.Draw(img)
    font = _try_font(13)
    hfont = _try_font(13)

    # Title
    draw.text((pad, pad), "Sales Report — Annual Summary", fill=(30, 30, 30), font=hfont)

    y = pad + row_h
    x0 = pad
    # Header row
    for i, (h, cw) in enumerate(zip(headers, col_widths)):
        x = x0 + sum(col_widths[:i])
        draw.rectangle([x, y, x + cw, y + row_h], fill=(50, 100, 180))
        draw.text((x + 4, y + 6), h, fill=(255, 255, 255), font=font)
    y += row_h

    # Data rows
    for ri, row in enumerate(rows):
        fill = (240, 245, 255) if ri % 2 == 0 else (255, 255, 255)
        for i, (cell, cw) in enumerate(zip(row, col_widths)):
            x = x0 + sum(col_widths[:i])
            draw.rectangle([x, y, x + cw, y + row_h], fill=fill, outline=(200, 200, 200))
            draw.text((x + 4, y + 7), cell, fill=(20, 20, 20), font=font)
        y += row_h

    # KPI summary
    y += 5
    draw.text((pad, y), f"Total Q1: {gt_kpis['total_q1']}  |  Total Q4: {gt_kpis['total_q4']}  |  Best: {gt_kpis['best_product']}", fill=(80, 80, 80), font=font)

    return {
        "image_b64": _b64_png(img),
        "text_hint": gt_md,  # clean hint for task 1
        "gt_md": gt_md,
        "gt_kpis": gt_kpis,
        "gt_cells": {},
        "max_steps": 5,
        "task_id": 1,
    }


# ---------------------------------------------------------------------------
# Task 2 — Noisy Financial Statement
# ---------------------------------------------------------------------------

TASK2_DATA = {
    "headers": ["Metric", "FY2022", "FY2023", "FY2024", "YoY%"],
    "rows": [
        ["Revenue", "$4,200K", "$5,100K", "$6,300K", "+23.5%"],
        ["Gross Profit", "$1,890K", "$2,346K", "$3,024K", "+28.9%"],
        ["EBITDA", "$840K", "$1,020K", "$1,386K", "+35.9%"],
        ["Net Income", "$504K", "$663K", "$945K", "+42.5%"],
        ["EPS", "$1.26", "$1.66", "$2.36", "+42.2%"],
    ],
    "kpis": {
        "revenue_fy2024": "$6,300K",
        "ebitda_margin": "22.0%",
        "yoy_growth_pct": "+23.5%",
        "net_income_fy2024": "$945K",
    },
    "gt_cells": {
        "(0,0)": True, "(0,1)": True, "(0,2)": True,
        "(1,0)": True, "(1,3)": True,
        "(2,0)": True, "(2,3)": True,
    },
}


def generate_task2() -> dict:
    gt_md = _md_from_data(TASK2_DATA)
    gt_kpis = TASK2_DATA["kpis"]
    gt_cells = TASK2_DATA["gt_cells"]

    # Noisy text hint — simulate OCR errors
    noisy_hint = gt_md.replace("$", "S").replace("%", "°/o").replace("K", "lK").replace("+", "÷")

    if not PIL_AVAILABLE:
        return {
            "image_b64": None,
            "text_hint": noisy_hint,
            "gt_md": gt_md,
            "gt_kpis": gt_kpis,
            "gt_cells": gt_cells,
            "max_steps": 10,
            "task_id": 2,
        }

    headers = TASK2_DATA["headers"]
    rows = TASK2_DATA["rows"]
    col_widths = [120, 80, 80, 80, 70]
    row_h = 30
    pad = 20
    W = sum(col_widths) + pad * 2
    H = row_h * (len(rows) + 3) + pad * 2

    img = Image.new("RGB", (W, H), (252, 252, 248))
    draw = ImageDraw.Draw(img)
    font = _try_font(13)

    draw.text((pad, pad), "Annual Financial Statement — CONFIDENTIAL", fill=(20, 20, 80), font=font)

    y = pad + row_h
    x0 = pad

    # Two-level header simulation
    draw.text((x0, y), "Financials (USD)", fill=(100, 100, 100), font=font)
    y += row_h // 2

    for i, (h, cw) in enumerate(zip(headers, col_widths)):
        x = x0 + sum(col_widths[:i])
        draw.rectangle([x, y, x + cw, y + row_h], fill=(30, 60, 120))
        draw.text((x + 4, y + 7), h, fill=(255, 255, 255), font=font)
    y += row_h

    for ri, row in enumerate(rows):
        fill = (245, 248, 255) if ri % 2 == 0 else (255, 255, 255)
        for i, (cell, cw) in enumerate(zip(row, col_widths)):
            x = x0 + sum(col_widths[:i])
            draw.rectangle([x, y, x + cw, y + row_h], fill=fill, outline=(180, 180, 200))
            color = (0, 120, 0) if "+" in cell else (180, 20, 20) if "-" in cell else (20, 20, 20)
            draw.text((x + 4, y + 8), cell, fill=color, font=font)
        y += row_h

    # Add noise + slight rotation
    img = _add_noise(img, intensity=18)
    img = img.filter(ImageFilter.GaussianBlur(radius=0.4))
    img = _rotate_image(img, -1.5)

    return {
        "image_b64": _b64_png(img),
        "text_hint": noisy_hint,
        "gt_md": gt_md,
        "gt_kpis": gt_kpis,
        "gt_cells": gt_cells,
        "max_steps": 10,
        "task_id": 2,
    }


# ---------------------------------------------------------------------------
# Task 3 — Degraded Multi-Table Report
# ---------------------------------------------------------------------------

TASK3_DATA = {
    "table1": {
        "headers": ["KPI", "Target", "Actual", "Variance"],
        "rows": [
            ["Customer Satisfaction", "90%", "87%", "-3%"],
            ["Ticket Resolution Rate", "95%", "93%", "-2%"],
            ["Avg Handle Time", "4.5min", "5.1min", "+0.6min"],
            ["First Call Resolution", "80%", "78%", "-2%"],
        ],
        "kpis": {
            "csat_actual": "87%",
            "ticket_resolution": "93%",
            "avg_handle_time": "5.1min",
        },
    },
    "table2": {
        "headers": ["Category", "Budget", "Spent", "Remaining"],
        "rows": [
            ["Operations", "$500K", "$472K", "$28K"],
            ["Marketing", "$200K", "$198K", "$2K"],
            ["R&D", "$300K", "$275K", "$25K"],
            ["Support", "$150K", "$163K", "-$13K"],
        ],
        "kpis": {
            "operations_budget": "$500K",
            "support_overspend": "-$13K",
            "total_remaining": "$42K",
        },
    },
}


def generate_task3() -> dict:
    t1 = TASK3_DATA["table1"]
    t2 = TASK3_DATA["table2"]
    gt_md1 = _md_from_data(t1)
    gt_md2 = _md_from_data(t2)
    gt_md = gt_md1 + "\n---\n" + gt_md2

    gt_kpis = {
        "table1": t1["kpis"],
        "table2": t2["kpis"],
    }

    # Very noisy hint
    noisy = (gt_md
             .replace("$", "S")
             .replace("%", "°/o")
             .replace("K", "lK")
             .replace("-", "—")
             .replace("+", "÷"))

    if not PIL_AVAILABLE:
        return {
            "image_b64": None,
            "text_hint": noisy,
            "gt_md": gt_md,
            "gt_kpis": gt_kpis,
            "gt_cells": {},
            "max_steps": 15,
            "task_id": 3,
        }

    col_widths1 = [180, 80, 80, 80]
    col_widths2 = [120, 80, 80, 80]
    row_h = 28
    pad = 20

    W = max(sum(col_widths1), sum(col_widths2)) + pad * 2
    H = row_h * (len(t1["rows"]) + len(t2["rows"]) + 7) + pad * 2

    img = Image.new("RGB", (W, H), (248, 245, 240))
    draw = ImageDraw.Draw(img)
    font = _try_font(12)

    y = pad

    def draw_table(data, col_widths, title_str):
        nonlocal y
        draw.text((pad, y), title_str, fill=(50, 30, 10), font=font)
        y += row_h
        x0 = pad
        for i, (h, cw) in enumerate(zip(data["headers"], col_widths)):
            x = x0 + sum(col_widths[:i])
            draw.rectangle([x, y, x + cw, y + row_h], fill=(80, 50, 20))
            draw.text((x + 3, y + 7), h, fill=(255, 240, 200), font=font)
        y += row_h
        for ri, row in enumerate(data["rows"]):
            fill = (250, 245, 235) if ri % 2 == 0 else (240, 235, 225)
            for i, (cell, cw) in enumerate(zip(row, col_widths)):
                x = x0 + sum(col_widths[:i])
                draw.rectangle([x, y, x + cw, y + row_h], fill=fill, outline=(170, 150, 130))
                draw.text((x + 3, y + 8), cell, fill=(30, 20, 10), font=font)
            y += row_h
        y += 10

    draw_table(t1, col_widths1, "Table 1: Operational KPIs")
    draw.line([(pad, y), (W - pad, y)], fill=(100, 80, 60), width=2)
    y += 10
    draw_table(t2, col_widths2, "Table 2: Financial Summary")

    # Heavy degradation
    img = _add_noise(img, intensity=35)
    img = img.filter(ImageFilter.GaussianBlur(radius=0.7))
    img = _rotate_image(img, 3.5)

    return {
        "image_b64": _b64_png(img),
        "text_hint": noisy,
        "gt_md": gt_md,
        "gt_kpis": gt_kpis,
        "gt_cells": {},
        "max_steps": 15,
        "task_id": 3,
    }


# ---------------------------------------------------------------------------
# Public registry
# ---------------------------------------------------------------------------

TASK_REGISTRY = {
    "clean_table": generate_task1,
    "noisy_financial": generate_task2,
    "degraded_report": generate_task3,
}

TASK_METADATA = [
    {
        "id": "clean_table",
        "title": "Clean Printed Table",
        "difficulty": "easy",
        "description": "Extract a clean single-table sales report into Markdown + JSON KPIs.",
        "optimal_steps": 2,
        "max_steps": 5,
    },
    {
        "id": "noisy_financial",
        "title": "Noisy Financial Statement",
        "difficulty": "medium",
        "description": "Extract a noisy multi-header financial table with merged context. Output Markdown + labeled KPIs. Optional per-cell confidence scoring.",
        "optimal_steps": 4,
        "max_steps": 10,
    },
    {
        "id": "degraded_report",
        "title": "Degraded Multi-Table Report",
        "difficulty": "hard",
        "description": "Extract two tables from a heavily degraded rotated document. Output separate Markdowns + cross-table KPI JSON.",
        "optimal_steps": 8,
        "max_steps": 15,
    },
]