"""
RL Environment for Datacenter Cooling and Operations — GRPO Training — Gradio Space launcher.

Starts training in a background thread immediately on Space startup.
Gradio keeps the Space healthy (port 7860 responding) while training runs.
Checkpoints are pushed to HF Hub every 10 iterations.
"""

import os
import sys
import threading
import time

ROOT = os.path.dirname(os.path.abspath(__file__))
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)

import gradio as gr

N_ITERATIONS = 50  # must match training/train_grpo.py

_status  = {"state": "starting", "iteration": 0, "reward": 0.0, "log": []}
_started = threading.Event()


def _log(msg: str) -> None:
    print(msg, flush=True)
    _status["log"].append(msg)
    if len(_status["log"]) > 200:
        _status["log"] = _status["log"][-200:]


def _download_ppo() -> None:
    dest = os.path.join(ROOT, "training", "cooling_controller_best", "best_model.zip")
    if os.path.exists(dest):
        _log("PPO model already present.")
        return
    try:
        from huggingface_hub import hf_hub_download
        os.makedirs(os.path.dirname(dest), exist_ok=True)
        hf_hub_download(
            repo_id   = "Mephisto2412/clusterenv-ppo-cooling",
            filename  = "best_model.zip",
            local_dir = os.path.dirname(dest),
        )
        _log("PPO cooling model downloaded from HF Hub.")
    except Exception as e:
        _log(f"PPO download failed ({e}) — heuristic fallback active.")


def _run_training() -> None:
    _status["state"] = "downloading PPO model"
    _download_ppo()

    _status["state"] = "training"
    _log("Starting GRPO training...")
    try:
        from training.train_grpo import main
        main()
        _status["state"] = "complete"
        _log("Training complete.")
    except Exception as e:
        import traceback
        _status["state"] = f"error: {e}"
        _log(f"Training error: {e}")
        traceback.print_exc()


# Self-ping loop: HF sleeps Spaces with no HTTP traffic; this prevents that.
def _keepalive() -> None:
    import urllib.request
    time.sleep(60)  # let Gradio start first
    while True:
        try:
            urllib.request.urlopen("http://127.0.0.1:7860/", timeout=5)
        except Exception:
            pass
        time.sleep(45)  # ping every 45 s — well under any inactivity threshold

threading.Thread(target=_keepalive, daemon=True).start()

# Start training immediately in background
_thread = threading.Thread(target=_run_training, daemon=False)
_thread.start()


# ── Gradio interface (keeps Space healthy) ────────────────────────────────────

def get_status() -> tuple[str, str]:
    state = _status["state"]
    it    = _status["iteration"]
    rew   = _status["reward"]
    log   = "\n".join(_status["log"][-50:])  # last 50 lines
    header = f"State: {state} | Iteration: {it}/{N_ITERATIONS} | Last reward: {rew:+.4f}"
    return header, log


with gr.Blocks(title="RL Environment for Datacenter Cooling and Operations — GRPO Training") as demo:
    gr.Markdown(
        "## RL Environment for Datacenter Cooling and Operations — GRPO Scheduler Training\n\n"
        "Training runs in the background. "
        "Checkpoints pushed to [Mephisto2412/clusterenv-grpo-adapter]"
        "(https://huggingface.co/Mephisto2412/clusterenv-grpo-adapter) "
        "every 10 iterations.\n\n"
        "Refresh this page to see updated status."
    )
    status_box = gr.Textbox(label="Status", lines=1, interactive=False)
    log_box    = gr.Textbox(label="Training log (last 50 lines)", lines=25,
                            interactive=False, max_lines=25)

    refresh_btn = gr.Button("Refresh logs")
    refresh_btn.click(fn=get_status, outputs=[status_box, log_box])

    # Auto-refresh every 30 seconds (gr.Timer is the Gradio 5 API)
    timer = gr.Timer(value=30)
    timer.tick(fn=get_status, outputs=[status_box, log_box])


demo.launch(server_name="0.0.0.0", server_port=7860)