import os
import requests
from openai import OpenAI

API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1/")
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
HF_TOKEN = os.getenv("HF_TOKEN", "dummy_token")

ENV_URL = "http://127.0.0.1:7860"

client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)


def log_start(task, env, model):
    print(f"[START] task={task} env={env} model={model}", flush=True)


def log_step(step, action, reward, done, error):
    done_str = "true" if done else "false"
    err_str = "null" if error is None else f'"{error}"'
    print(
        f"[STEP] step={step} action={action} reward={float(reward):.2f} "
        f"done={done_str} error={err_str}",
        flush=True,
    )


def log_end(success, steps, score, rewards):
    succ_str = "true" if success else "false"
    rews_str = ",".join([f"{float(r):.2f}" for r in rewards])
    print(
        f"[END] success={succ_str} steps={steps} score={score:.2f} rewards={rews_str}",
        flush=True,
    )


def _classify_trend(history, key):
    if len(history) < 2:
        return "UNKNOWN"
    delta = history[-1][key] - history[0][key]
    if key == "pps":
        if delta > 1000:
            return "SURGING"
        if delta > 200:
            return "RISING"
        if delta < -200:
            return "FALLING"
    else:
        if delta > 10:
            return "RISING"
        if delta < -5:
            return "FALLING"
    return "STABLE"


def get_action(history):
    entries = []
    for h in history:
        entries.append(
            f"(CPU {h['cpu']:.1f}%, PPS {h['pps']:.0f}, "
            f"BW {h['bw']:.1f}Mbps, Health {h['health']:.0f}%)"
        )
    telemetry = " -> ".join(entries)

    pps_trend = _classify_trend(history, "pps")
    cpu_trend = _classify_trend(history, "cpu")

    prompt = (
        f"Network telemetry (last {len(history)} snapshots):\n"
        f"  {telemetry}\n"
        f"PPS trend: {pps_trend}  |  CPU trend: {cpu_trend}\n\n"
        f"Respond with exactly one word: monitor, rate_limit, or block."
    )

    system_msg = (
        "You are a strict IoT network defense AI protecting critical infrastructure. "
        "Analyse the telemetry trend and choose the optimal mitigation action. "
        "Output ONLY one word — no explanation."
    )

    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {"role": "system", "content": system_msg},
                {"role": "user", "content": prompt},
            ],
            max_tokens=10,
            temperature=0.1,
        )
        text = response.choices[0].message.content.strip().lower()

        if "block" in text:
            return "block"
        if "limit" in text or "rate" in text:
            return "rate_limit"
        return "monitor"
    except Exception:
        return "monitor"


def run_episode(task_id):
    log_start(task=task_id, env="clairs-network-defense", model=MODEL_NAME)

    try:
        res = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id}).json()
        obs = res if "cpu_usage_percent" in res else res.get("observation", {})
    except Exception:
        obs = {
            "cpu_usage_percent": 0.0,
            "packet_rate_pps": 0.0,
            "active_connections": 0,
            "bandwidth_mbps": 0.0,
            "memory_usage_percent": 30.0,
            "system_health": 100.0,
        }

    done = False
    step_count = 0
    rewards = []
    history = []

    while not done and step_count < 10:
        step_count += 1

        cpu = obs.get("cpu_usage_percent", 0.0)
        pps = obs.get("packet_rate_pps", 0.0)
        bw = obs.get("bandwidth_mbps", 0.0)
        health = obs.get("system_health", 100.0)
        history.append({"cpu": cpu, "pps": pps, "bw": bw, "health": health})

        if len(history) > 3:
            history.pop(0)

        action = get_action(history)

        try:
            step_res = requests.post(
                f"{ENV_URL}/step", json={"decision": action}
            ).json()
            obs = step_res.get("observation", obs)
            reward = step_res.get("reward", 0.01)
            done = step_res.get("done", True)
            error = None
        except Exception as e:
            reward = 0.01
            done = True
            error = str(e)

        rewards.append(reward)
        log_step(step=step_count, action=action, reward=reward, done=done, error=error)

    raw_score = sum(rewards) / len(rewards) if rewards else 0.01
    score = max(0.01, min(0.99, raw_score))
    success = score >= 0.5
    log_end(success=success, steps=step_count, score=score, rewards=rewards)


if __name__ == "__main__":
    tasks = ["task_1_easy", "task_2_medium", "task_3_hard", "task_4_expert"]
    for t in tasks:
        run_episode(t)