"""
evaluate.py — Compare baseline vs trained RL policy
=====================================================
Run after train.py completes.
"""
from __future__ import annotations
import os
import json
import requests
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

ENV_URL        = os.getenv("ENV_BASE_URL", "https://sejal-k-ai-sprint-manager.hf.space")
BASELINE_MODEL = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
API_BASE_URL   = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
API_KEY        = os.getenv("HF_TOKEN", "dummy")
POLICY_PATH    = "./results/best_policy.json"
TASKS          = ["easy_sprint", "medium_sprint", "hard_sprint"]
N_SEEDS        = 5

SYSTEM = """You are a Tech Lead. Output ONLY JSON.
{"action_type": "assign", "task_id": "T1", "dev_id": "dev1", "new_priority": null}
Only assign backlog tasks to available skill-matched developers."""


# ── Shared helpers ────────────────────────────────────────────────────────────

def env_reset(task_name, seed=42):
    r = requests.post(f"{ENV_URL}/reset",
                      json={"task_name": task_name, "seed": seed}, timeout=30)
    r.raise_for_status()
    return r.json()

def env_step(action):
    r = requests.post(f"{ENV_URL}/step", json={"action": {
        "action_type":  action.get("action_type", "skip"),
        "task_id":      action.get("task_id"),
        "dev_id":       action.get("dev_id"),
        "new_priority": action.get("new_priority"),
    }}, timeout=30)
    r.raise_for_status()
    return r.json()

def run_episode(act_fn, task_name, seed=42):
    obs    = env_reset(task_name, seed)
    result = {"info": {}}
    for _ in range(12):
        if obs.get("done"):
            break
        action = act_fn(obs)
        result = env_step(action)
        obs    = result["observation"]
    return max(0.01, min(0.99, result.get("info", {}).get("final_score", 0.01)))

def evaluate(act_fn, name):
    print(f"\n  {name}")
    print(f"  {'─'*45}")
    scores = {}
    for task in TASKS:
        vals = []
        for seed in range(N_SEEDS):
            try:
                vals.append(run_episode(act_fn, task, seed=seed*13+42))
            except Exception:
                vals.append(0.01)
        avg = sum(vals) / len(vals)
        scores[task] = round(avg, 4)
        bar = "█" * int(avg * 20)
        print(f"  {task:<20} {avg:.4f}  {bar}")
    overall = sum(scores.values()) / len(scores)
    scores["average"] = round(overall, 4)
    print(f"  {'AVERAGE':<20} {overall:.4f}")
    return scores


# ── Trained policy agent ──────────────────────────────────────────────────────

class TrainedPolicy:
    def __init__(self, path):
        with open(path) as f:
            w = json.load(f)
        self.priority_weight = w["priority_weight"]
        self.deadline_weight = w["deadline_weight"]
        self.skill_weight    = w["skill_weight"]
        self.load_weight     = w["load_weight"]

    def act(self, obs):
        day     = obs.get("current_day", 1)
        backlog = [t for t in obs["tasks"] if t["status"] == "backlog"]
        avail   = [d for d in obs["developers"]
                   if d["is_available"] and d["current_load"] < d["capacity"]]
        if not backlog or not avail:
            return {"action_type": "skip", "task_id": None,
                    "dev_id": None, "new_priority": None}

        best, bt, bd = float("-inf"), None, None
        for t in backlog:
            for d in avail:
                s  = self.priority_weight * (6 - t["priority"])
                s += self.deadline_weight * (10 / max(1, t["deadline"] - day))
                if d["skill"] == t["required_skill"]:
                    s += self.skill_weight * 3
                elif d["skill"] == "fullstack":
                    s += self.skill_weight * 2
                else:
                    s -= self.skill_weight * 2
                s -= self.load_weight * (d["current_load"] / max(d["capacity"], 1)) * 2
                if s > best:
                    best, bt, bd = s, t, d
        if bt and bd:
            return {"action_type": "assign", "task_id": bt["id"],
                    "dev_id": bd["id"], "new_priority": None}
        return {"action_type": "skip", "task_id": None,
                "dev_id": None, "new_priority": None}


# ── Baseline LLM agent ────────────────────────────────────────────────────────

def make_baseline_act():
    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)

    def act(obs):
        backlog = sorted([t for t in obs["tasks"] if t["status"] == "backlog"],
                         key=lambda t: (t["priority"], t["deadline"]))
        avail   = [d for d in obs["developers"]
                   if d["is_available"] and d["current_load"] < d["capacity"]]
        prompt  = (
            f"Day {obs['current_day']}/{obs['sprint_length']}\n"
            f"Backlog: {[t['id']+':'+t['required_skill'] for t in backlog[:4]]}\n"
            f"Devs: {[d['id']+':'+d['skill'] for d in avail]}\n"
            f"JSON action:"
        )
        try:
            resp = client.chat.completions.create(
                model=BASELINE_MODEL,
                messages=[
                    {"role": "system", "content": SYSTEM},
                    {"role": "user",   "content": prompt},
                ],
                temperature=0.1, max_tokens=80,
            )
            text = resp.choices[0].message.content or ""
            a = json.loads(text.strip())
            if a.get("action_type") in ("assign","reassign","skip","unblock"):
                return a
        except Exception:
            pass
        return {"action_type": "skip", "task_id": None,
                "dev_id": None, "new_priority": None}
    return act


# ── Main ──────────────────────────────────────────────────────────────────────

def main():
    try:
        r = requests.get(f"{ENV_URL}/health", timeout=10)
        print(f"Server: {r.json()}")
    except Exception:
        print("ERROR: Start server first: python ui.py")
        return

    print("\n" + "="*55)
    print("  EVALUATION: Baseline vs Trained RL Policy")
    print("="*55)

    results = {}

    # Baseline
    results["baseline"] = evaluate(make_baseline_act(), f"Baseline LLM ({BASELINE_MODEL})")

    # Trained policy
    if os.path.exists(POLICY_PATH):
        policy = TrainedPolicy(POLICY_PATH)
        results["trained"] = evaluate(policy.act, f"Trained RL Policy ({POLICY_PATH})")

        # Comparison table
        print(f"\n{'='*55}")
        print("  IMPROVEMENT SUMMARY")
        print(f"{'='*55}")
        print(f"  {'Task':<20} {'Baseline':>10} {'Trained':>10} {'Delta':>10}")
        print(f"  {'─'*48}")
        for task in TASKS + ["average"]:
            b     = results["baseline"].get(task, 0)
            t     = results["trained"].get(task, 0)
            delta = t - b
            sign  = "+" if delta >= 0 else ""
            print(f"  {task:<20} {b:>10.4f} {t:>10.4f} {sign}{delta:>9.4f}")
    else:
        print(f"\n  No trained policy at {POLICY_PATH}")
        print("  Run python train.py first.")

    # Save
    os.makedirs("results", exist_ok=True)
    with open("results/evaluation.json", "w") as f:
        json.dump(results, f, indent=2)
    print(f"\n  Saved → results/evaluation.json")


if __name__ == "__main__":
    main()