Spaces:

Dash10107
/

rocket-lander-sac

Paused

Daksh C Jain Claude Sonnet 4.6 commited on May 13

Commit

d7456d6

1 Parent(s): 3fca800

Upgrade to full production MARL masterclass app

- 3-tab Gradio UI: Mission Control, Training Lab, Algorithm Guide
- Animated GIF replay with HUD overlay (step, reward, throttle bars)
- Side-by-side comparison GIF for multi-episode runs
- 4-panel mission overview: reward bars, 2D trajectory, cumulative reward, engine throttle
- 6-panel episode deep-dive: trajectory, altitude, angle, throttle timelines
- SAC fine-tuning in background thread with live metrics refresh
- Training dashboard: reward history, actor/critic loss, entropy coefficient
- Environment controls: gravity, wind, turbulence sliders
- Modular structure: core/mission.py, core/trainer.py, viz/charts.py, viz/replay.py

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (14) hide show

app.py +432 -300
core/__init__.py +0 -0
core/__pycache__/__init__.cpython-311.pyc +0 -0
core/__pycache__/mission.cpython-311.pyc +0 -0
core/__pycache__/trainer.cpython-311.pyc +0 -0
core/mission.py +178 -0
core/trainer.py +120 -0
requirements.txt +2 -0
viz/__init__.py +0 -0
viz/__pycache__/__init__.cpython-311.pyc +0 -0
viz/__pycache__/charts.cpython-311.pyc +0 -0
viz/__pycache__/replay.cpython-311.pyc +0 -0
viz/charts.py +246 -0
viz/replay.py +160 -0

app.py CHANGED Viewed

@@ -1,69 +1,194 @@
-import gradio as gr
-import gymnasium as gym
 import numpy as np
 from stable_baselines3 import SAC
-import time
-# Load the model
-model = SAC.load("sac_rocket_lander.zip")
-# ── Mission logic ──────────────────────────────────────────────────────────────
-def run_mission(episodes, progress=gr.Progress()):
-    env = gym.make("LunarLander-v3", continuous=True)
-    total_rewards = []
-    episode_logs = []
-    for i in range(int(episodes)):
-        progress((i) / int(episodes), desc=f"Running landing attempt {i+1} of {int(episodes)}...")
-        obs, _ = env.reset()
-        done = False
-        ep_reward = 0
-        steps = 0
-        while not done:
-            action, _ = model.predict(obs, deterministic=True)
-            obs, reward, terminated, truncated, _ = env.step(action)
-            ep_reward += reward
-            steps += 1
-            done = terminated or truncated
-        total_rewards.append(ep_reward)
-        status = "✅ LANDED" if ep_reward > 150 else ("⚠️ PARTIAL" if ep_reward > 0 else "💥 CRASHED")
-        episode_logs.append(f"Attempt {i+1:02d}  |  Score: {ep_reward:+.1f}  |  Steps: {steps:4d}  |  {status}")
-    env.close()
-    progress(1.0, desc="Mission complete.")
-    avg = np.mean(total_rewards)
-    best = np.max(total_rewards)
-    worst = np.min(total_rewards)
-    success_rate = sum(1 for r in total_rewards if r > 150) / len(total_rewards) * 100
-    mission_status = "MISSION SUCCESS" if avg > 150 else "MISSION FAILURE"
-    status_icon = "🚀" if avg > 150 else "💥"
-    log_output = "\n".join(episode_logs)
-    summary = (
-        f"{status_icon}  {mission_status}\n\n"
-        f"{'─'*38}\n"
-        f"  Average Score   {avg:>+10.2f}\n"
-        f"  Best Landing    {best:>+10.2f}\n"
-        f"  Worst Landing   {worst:>+10.2f}\n"
-        f"  Success Rate    {success_rate:>9.1f}%\n"
-        f"{'─'*38}\n\n"
-        f"FLIGHT LOG\n{log_output}"
     )
-    return summary
-# ── Custom CSS ──────────────────────���──────────────────────────────────────────
-custom_css = """
-/* ── Google Font: Space Grotesk not used — using Orbitron + Share Tech Mono ── */
 @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700;900&family=Share+Tech+Mono&family=Exo+2:wght@300;400;600&display=swap');
-/* Reset & base */
 *, *::before, *::after { box-sizing: border-box; }
 body, .gradio-container {
@@ -72,296 +197,303 @@ body, .gradio-container {
     font-family: 'Exo 2', sans-serif !important;
 }
-.gradio-container {
-    max-width: 860px !important;
-    margin: 0 auto !important;
-    padding: 0 1rem 3rem !important;
-}
-/* ── Header ── */
-.mission-header {
-    text-align: center;
-    padding: 2.5rem 1rem 1.5rem;
-    position: relative;
 }
-.mission-header h1 {
     font-family: 'Orbitron', monospace !important;
-    font-size: clamp(1.4rem, 4vw, 2.4rem) !important;
-    font-weight: 900 !important;
-    letter-spacing: 0.08em !important;
-    color: #e8f4ff !important;
-    margin: 0 0 0.35rem !important;
-    text-transform: uppercase !important;
 }
-.mission-header .sub {
     font-family: 'Share Tech Mono', monospace;
-    font-size: 0.78rem;
-    color: #5b8fb5;
-    letter-spacing: 0.25em;
-    text-transform: uppercase;
 }
-.divider {
-    border: none;
-    border-top: 1px solid #0d2540;
-    margin: 1.5rem 0;
-}
-/* ── Status badge strip ── */
 .status-strip {
-    display: flex;
-    gap: 0.75rem;
-    justify-content: center;
-    flex-wrap: wrap;
-    margin: 1.2rem 0 2rem;
 }
 .badge {
     font-family: 'Share Tech Mono', monospace;
-    font-size: 0.72rem;
-    letter-spacing: 0.15em;
-    padding: 5px 14px;
-    border-radius: 3px;
-    text-transform: uppercase;
-}
-.badge-green {
-    background: #041e12;
-    color: #2ddb7c;
-    border: 1px solid #0a5530;
 }
-.badge-blue {
-    background: #020f20;
-    color: #4fb3ff;
-    border: 1px solid #0b3362;
-}
-.badge-amber {
-    background: #1a1002;
-    color: #f5a623;
-    border: 1px solid #5c3700;
 }
-/* ── Panel cards ── */
-.panel {
-    background: #060f1e;
-    border: 1px solid #0d2540;
-    border-radius: 6px;
-    padding: 1.5rem;
-    margin-bottom: 1rem;
 }
-.panel-label {
-    font-family: 'Share Tech Mono', monospace;
-    font-size: 0.68rem;
-    letter-spacing: 0.22em;
-    text-transform: uppercase;
-    color: #2d6a9f;
-    margin-bottom: 1rem;
 }
-/* ── Slider ── */
-.slider-wrap label,
-.gradio-container label span {
     font-family: 'Share Tech Mono', monospace !important;
-    font-size: 0.75rem !important;
-    letter-spacing: 0.15em !important;
-    text-transform: uppercase !important;
-    color: #4fb3ff !important;
 }
 input[type=range] {
-    -webkit-appearance: none;
-    appearance: none;
-    width: 100%;
-    height: 3px;
-    background: #0d2540;
-    border-radius: 2px;
-    outline: none;
-    margin: 0.5rem 0;
 }
 input[type=range]::-webkit-slider-thumb {
-    -webkit-appearance: none;
-    width: 18px;
-    height: 18px;
-    border-radius: 50%;
-    background: #4fb3ff;
-    cursor: pointer;
-    border: 2px solid #030b1a;
-    box-shadow: 0 0 8px rgba(79,179,255,0.5);
 }
-input[type=range]::-moz-range-thumb {
-    width: 18px;
-    height: 18px;
-    border-radius: 50%;
-    background: #4fb3ff;
-    cursor: pointer;
-    border: 2px solid #030b1a;
 }
-/* ── Launch button ── */
-#launch-btn {
-    font-family: 'Orbitron', monospace !important;
-    font-size: 0.9rem !important;
-    font-weight: 700 !important;
-    letter-spacing: 0.18em !important;
-    text-transform: uppercase !important;
-    background: linear-gradient(135deg, #0a2a52 0%, #0d3a72 100%) !important;
-    color: #4fb3ff !important;
-    border: 1px solid #1a5a9e !important;
-    border-radius: 4px !important;
-    padding: 0.85rem 2rem !important;
-    cursor: pointer !important;
-    width: 100% !important;
-    transition: all 0.2s ease !important;
-}
-#launch-btn:hover {
-    background: linear-gradient(135deg, #0d3a72 0%, #1150a0 100%) !important;
-    border-color: #4fb3ff !important;
-    color: #a8d8ff !important;
-    transform: translateY(-1px) !important;
-    box-shadow: 0 4px 20px rgba(79,179,255,0.25) !important;
-}
-#launch-btn:active {
-    transform: translateY(0) !important;
-}
-/* ── Output telemetry box ── */
-.telemetry textarea,
-#output-box textarea,
-.gradio-container textarea {
-    font-family: 'Share Tech Mono', monospace !important;
-    font-size: 0.82rem !important;
-    line-height: 1.7 !important;
-    background: #020810 !important;
-    color: #7fcfff !important;
-    border: 1px solid #0d2540 !important;
-    border-radius: 4px !important;
-    padding: 1.2rem !important;
-    resize: none !important;
-    caret-color: #4fb3ff !important;
-}
-.gradio-container textarea::selection {
-    background: #0d3a72;
-}
-/* Progress bar */
-.progress-bar {
-    background: #0d2540 !important;
-    border-radius: 3px !important;
-}
-.progress-bar > div {
-    background: linear-gradient(90deg, #1150a0, #4fb3ff) !important;
-    border-radius: 3px !important;
-}
-/* ── Footer ── */
-.mission-footer {
-    text-align: center;
-    font-family: 'Share Tech Mono', monospace;
-    font-size: 0.65rem;
-    color: #1e3d5c;
-    letter-spacing: 0.2em;
-    text-transform: uppercase;
-    padding: 2rem 0 0;
-}
-/* ── Misc Gradio overrides ── */
-.gradio-container .prose,
-.gradio-container p {
-    color: #5b8fb5 !important;
-    font-family: 'Exo 2', sans-serif !important;
-    font-size: 0.85rem !important;
-}
-footer { display: none !important; }
-.gradio-container .output-class,
-.gradio-container .block {
-    background: transparent !important;
-    border: none !important;
-}
-.gradio-container .form {
-    background: transparent !important;
-}
-"""
-# ── Header HTML ───────────────────────────────────────────────────────────────
-header_html = """
-<div class="mission-header">
-    <div class="sub">Autonomous Flight Intelligence System  ·  v2.0</div>
-    <h1>⬡ SpaceX Mission Control</h1>
-    <div class="sub">SAC Neural Lander  ·  LunarLander-v3 Simulation</div>
-</div>
-<hr class="divider"/>
-<div class="status-strip">
-    <span class="badge badge-green">● SAC MODEL LOADED</span>
-    <span class="badge badge-blue">● SIMULATION READY</span>
-    <span class="badge badge-amber">◈ AWAITING LAUNCH</span>
-</div>
-"""
-footer_html = """
-<div class="mission-footer">
-    Powered by Stable-Baselines3 · Soft Actor-Critic · Gymnasium LunarLander-v3
-</div>
-"""
-# ── Build Gradio UI ───────────────────────────────────────────────────────────
-with gr.Blocks(title="SpaceX Mission Control") as demo:
-    gr.HTML(header_html)
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("**MISSION PARAMETERS**", elem_classes=["panel-label"])
-            attempts = gr.Slider(
-                minimum=1,
-                maximum=5,
-                value=1,
-                step=1,
-                label="Landing Attempts",
-                info="Select the number of autonomous landing simulations to execute",
-                elem_id="attempts-slider",
             )
-            launch_btn = gr.Button(
-                "🚀  INITIATE LAUNCH SEQUENCE",
-                elem_id="launch-btn",
-                variant="primary",
             )
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("**FLIGHT TELEMETRY  ·  MISSION REPORT**", elem_classes=["panel-label"])
-            output = gr.Textbox(
-                label="",
-                lines=18,
-                max_lines=24,
-                placeholder="Awaiting telemetry data...\n\nPress INITIATE LAUNCH SEQUENCE to begin simulation.",
-                elem_id="output-box",
-                elem_classes=["telemetry"],
-            )
-    launch_btn.click(
-        fn=run_mission,
-        inputs=[attempts],
-        outputs=[output],
-        show_progress="full",
-    )
-    gr.HTML(footer_html)
 if __name__ == "__main__":
-    demo.launch()

+"""
+SpaceX Mission Control — SAC Rocket Lander
+Production Gradio application: simulate, visualise, analyse, and train
+a Soft Actor-Critic agent on the LunarLander-v3 continuous control task.
+"""
+from __future__ import annotations
+import os
 import numpy as np
+import gradio as gr
 from stable_baselines3 import SAC
+from core.mission import run_mission, MissionResult
+from core.trainer import TrainingState, start_training
+from viz.charts import (
+    mission_overview, single_episode_detail,
+    training_dashboard, empty_figure,
+)
+from viz.replay import make_episode_gif, make_comparison_gif
+# ── Model loading ─────────────────────────────────────────────────────────────
+_MODEL_PATHS = ["sac_finetuned.zip", "sac_rocket_lander.zip"]
+_model: SAC | None = None
+def _load_model(path: str | None = None) -> tuple[SAC, str]:
+    candidates = ([path] if path else []) + _MODEL_PATHS
+    for p in candidates:
+        if p and os.path.exists(p):
+            try:
+                return SAC.load(p), p
+            except Exception:
+                continue
+    raise FileNotFoundError("No valid SAC checkpoint found.")
+def _get_model() -> SAC:
+    global _model
+    if _model is None:
+        _model, _ = _load_model()
+    return _model
+# ── Global training state ─────────────────────────────────────────────────────
+_train_state = TrainingState()
+# ── Callbacks ─────────────────────────────────────────────────────────────────
+def cb_run_mission(
+    n_episodes: int,
+    gravity: float,
+    enable_wind: bool,
+    wind_power: float,
+    turbulence: float,
+    render_gif: bool,
+    progress: gr.Progress = gr.Progress(),
+) -> tuple:
+    try:
+        model = _get_model()
+    except FileNotFoundError as e:
+        empty = empty_figure(str(e))
+        return empty, None, empty, str(e), gr.update(choices=[])
+    mission, all_frames = run_mission(
+        model,
+        n_episodes=int(n_episodes),
+        gravity=float(gravity),
+        enable_wind=bool(enable_wind),
+        wind_power=float(wind_power),
+        turbulence_power=float(turbulence),
+        render=bool(render_gif),
+        progress_cb=progress,
     )
+    overview_fig = mission_overview(mission)
+    gif_path = None
+    if render_gif and all_frames:
+        if n_episodes >= 2:
+            gif_path = make_comparison_gif(all_frames, mission.episodes, fps=15)
+        else:
+            gif_path = make_episode_gif(all_frames[0], mission.episodes[0], fps=15)
+    best = mission.best
+    detail_fig = single_episode_detail(best)
+    sr = mission.success_rate * 100
+    icon = "🚀" if mission.avg_reward >= 150 else "💥"
+    stats_md = f"""
+### {icon} Mission Complete
+| Metric | Value |
+|---|---|
+| **Avg Reward** | `{mission.avg_reward:+.2f}` |
+| **Best** | `{best.total_reward:+.2f}` ({best.status_emoji} Ep {best.episode_idx+1}) |
+| **Worst** | `{mission.worst.total_reward:+.2f}` ({mission.worst.status_emoji} Ep {mission.worst.episode_idx+1}) |
+| **Success Rate** | `{sr:.1f}%` |
+| **Episodes** | `{len(mission.episodes)}` |
+**Per-Episode Scores:**
+"""
+    per_ep = "".join(
+        f"- `#{e.episode_idx+1}` {e.status_emoji} **{e.status}** — {e.total_reward:+.1f} ({len(e.steps)} steps)\n"
+        for e in mission.episodes
+    )
+    stats_md += per_ep
+    ep_choices = [
+        f"#{e.episode_idx+1} — {e.status_emoji} {e.status} ({e.total_reward:+.1f})"
+        for e in mission.episodes
+    ]
+    _last_mission["data"] = mission
+    _last_mission["frames"] = all_frames
+    return overview_fig, gif_path, detail_fig, stats_md, gr.update(choices=ep_choices, value=ep_choices[0])
+_last_mission: dict = {"data": None, "frames": []}
+def cb_select_episode(selection: str) -> tuple:
+    mission: MissionResult | None = _last_mission.get("data")
+    all_frames = _last_mission.get("frames", [])
+    if not mission or not selection:
+        return empty_figure("Run a mission first."), None
+    try:
+        idx = int(selection.split("#")[1].split(" ")[0]) - 1
+    except Exception:
+        idx = 0
+    ep = mission.episodes[idx]
+    fig = single_episode_detail(ep)
+    gif = None
+    if all_frames and idx < len(all_frames):
+        gif = make_episode_gif(all_frames[idx], ep, fps=15)
+    return fig, gif
+def cb_start_training(total_steps: int, lr: float, batch_size: int) -> str:
+    global _train_state
+    if _train_state.running:
+        return "Training already in progress."
+    _train_state = TrainingState()
+    start_training(
+        base_model_path="sac_rocket_lander.zip",
+        total_timesteps=int(total_steps),
+        learning_rate=float(lr),
+        batch_size=int(batch_size),
+        state=_train_state,
+        save_path="sac_finetuned.zip",
+    )
+    return "Training started. Click **Refresh** to update charts."
+def cb_stop_training() -> str:
+    _train_state.running = False
+    return "Stop signal sent."
+def cb_refresh_training() -> tuple:
+    fig = training_dashboard(_train_state)
+    n_ep = len(_train_state.episode_rewards)
+    rolling = float(np.mean(_train_state.episode_rewards[-20:])) if n_ep else 0.0
+    progress_pct = (_train_state.timestep / max(_train_state.total_timesteps, 1)) * 100
+    status_md = f"""
+| | |
+|---|---|
+| **Status** | `{_train_state.status}` |
+| **Progress** | `{progress_pct:.1f}%` |
+| **Episodes** | `{n_ep}` |
+| **Rolling Reward (20)** | `{rolling:+.1f}` |
+| **Best Reward** | `{_train_state.best_reward:+.1f}` |
+"""
+    return fig, status_md
+def cb_load_finetuned() -> str:
+    global _model
+    for path in _MODEL_PATHS:
+        if os.path.exists(path):
+            try:
+                _model = SAC.load(path)
+                return f"Model loaded from `{path}`."
+            except Exception as e:
+                return f"Failed to load `{path}`: {e}"
+    return "No checkpoint found."
+# ── CSS ───────────────────────────────────────────────────────────────────────
+CSS = """
 @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700;900&family=Share+Tech+Mono&family=Exo+2:wght@300;400;600&display=swap');
 *, *::before, *::after { box-sizing: border-box; }
 body, .gradio-container {
     font-family: 'Exo 2', sans-serif !important;
 }
+.gradio-container { max-width: 1200px !important; margin: 0 auto !important; }
+.tab-nav { background: #060f1e !important; border-bottom: 1px solid #0d2540 !important; }
+.tab-nav button {
+    font-family: 'Share Tech Mono', monospace !important;
+    font-size: 0.72rem !important; letter-spacing: 0.18em !important;
+    color: #3a6080 !important; background: transparent !important;
+    border: none !important; text-transform: uppercase !important;
+    padding: 0.7rem 1.4rem !important;
+}
+.tab-nav button.selected {
+    color: #4fb3ff !important;
+    border-bottom: 2px solid #4fb3ff !important;
 }
+.mc-header {
+    text-align: center; padding: 2rem 1rem 1rem;
+    border-bottom: 1px solid #0d2540; margin-bottom: 1.5rem;
+}
+.mc-header h1 {
     font-family: 'Orbitron', monospace !important;
+    font-size: clamp(1.4rem, 3.5vw, 2.2rem) !important;
+    font-weight: 900 !important; letter-spacing: 0.1em !important;
+    color: #e8f4ff !important; margin: 0 !important;
 }
+.mc-sub {
     font-family: 'Share Tech Mono', monospace;
+    font-size: 0.72rem; color: #2d6a9f;
+    letter-spacing: 0.3em; text-transform: uppercase; margin-top: 0.3rem;
 }
 .status-strip {
+    display: flex; gap: 0.6rem; justify-content: center;
+    flex-wrap: wrap; margin: 1rem 0;
 }
 .badge {
     font-family: 'Share Tech Mono', monospace;
+    font-size: 0.68rem; letter-spacing: 0.15em;
+    padding: 4px 12px; border-radius: 3px; text-transform: uppercase;
 }
+.badge-green  { background:#041e12; color:#2ddb7c; border:1px solid #0a5530; }
+.badge-blue   { background:#020f20; color:#4fb3ff; border:1px solid #0b3362; }
+.badge-amber  { background:#1a1002; color:#f5a623; border:1px solid #5c3700; }
+.badge-purple { background:#120920; color:#c77dff; border:1px solid #4a1a7a; }
+button.primary {
+    font-family: 'Orbitron', monospace !important;
+    font-size: 0.82rem !important; font-weight: 700 !important;
+    letter-spacing: 0.15em !important; text-transform: uppercase !important;
+    background: linear-gradient(135deg,#0a2a52,#0d3a72) !important;
+    color: #4fb3ff !important; border: 1px solid #1a5a9e !important;
+    border-radius: 4px !important; transition: all 0.2s !important;
 }
+button.primary:hover {
+    background: linear-gradient(135deg,#0d3a72,#1150a0) !important;
+    border-color: #4fb3ff !important;
+    box-shadow: 0 4px 20px rgba(79,179,255,0.25) !important;
 }
+button.stop {
+    background: linear-gradient(135deg,#2a0a0a,#4a1010) !important;
+    color: #ff4d6d !important; border: 1px solid #7a1a1a !important;
+    font-family: 'Share Tech Mono', monospace !important;
 }
+label span, .gradio-container label {
     font-family: 'Share Tech Mono', monospace !important;
+    font-size: 0.72rem !important; letter-spacing: 0.15em !important;
+    text-transform: uppercase !important; color: #4fb3ff !important;
 }
 input[type=range] {
+    -webkit-appearance: none; height: 3px;
+    background: #0d2540; border-radius: 2px; outline: none;
 }
 input[type=range]::-webkit-slider-thumb {
+    -webkit-appearance: none; width: 16px; height: 16px;
+    border-radius: 50%; background: #4fb3ff; cursor: pointer;
+    border: 2px solid #030b1a; box-shadow: 0 0 8px rgba(79,179,255,0.5);
 }
+textarea, .gradio-container textarea {
+    font-family: 'Share Tech Mono', monospace !important;
+    font-size: 0.82rem !important; line-height: 1.7 !important;
+    background: #020810 !important; color: #7fcfff !important;
+    border: 1px solid #0d2540 !important; border-radius: 4px !important;
 }
+table { width: 100%; border-collapse: collapse; }
+th { background: #060f1e; color: #4fb3ff;
+     font-family: 'Share Tech Mono', monospace;
+     font-size: 0.7rem; letter-spacing: 0.1em; padding: 6px 10px; }
+td { border-top: 1px solid #0d2540; padding: 6px 10px;
+     color: #c8ddf0; font-size: 0.85rem; }
+footer { display: none !important; }
+.gradio-container .block { background: transparent !important; border: none !important; }
+"""
+# ── Theory ────────────────────────────────────────────────────────────────────
+THEORY_MD = """
+## Soft Actor-Critic (SAC)
+SAC is an **off-policy, maximum-entropy** deep RL algorithm for continuous
+action spaces. It simultaneously maximises expected return *and* policy entropy,
+encouraging exploration while converging to a stable policy.
+### Objective
+$$J(\\pi) = \\sum_t \\mathbb{E}_{(s_t,a_t)\\sim\\rho_\\pi}\\left[ r(s_t,a_t) + \\alpha\\,\\mathcal{H}(\\pi(\\cdot|s_t)) \\right]$$
+The temperature $\\alpha$ is **auto-tuned** to a target entropy level.
+### Architecture
+| Component | Role |
+|---|---|
+| **Actor** $\\pi_\\phi(a\\|s)$ | Gaussian policy — outputs mean & log-std |
+| **Critic 1** $Q_{\\theta_1}(s,a)$ | Q-value estimator |
+| **Critic 2** $Q_{\\theta_2}(s,a)$ | Clipped double-Q: take min to reduce overestimation |
+| **Target Critics** | Soft-updated copies ($\\tau=0.005$) for stable TD targets |
+### Update Rules
+**Critic** — minimise Bellman residual:
+$$y = r + \\gamma\\min_i Q_{\\bar\\theta_i}(s',\\tilde a') - \\alpha\\log\\pi(\\tilde a'|s')$$
+**Actor** — maximise Q + entropy:
+$$\\mathcal{L}(\\phi) = \\mathbb{E}\\left[\\alpha\\log\\pi_\\phi(a|s) - \\min_i Q_{\\theta_i}(s,a)\\right]$$
+**Temperature** — match target entropy $\\bar{\\mathcal{H}}$:
+$$\\mathcal{L}(\\alpha) = \\mathbb{E}\\left[-\\alpha(\\log\\pi(a|s)+\\bar{\\mathcal{H}})\\right]$$
+---
+## LunarLander-v3 (Continuous)
+| Property | Value |
+|---|---|
+| **State** | 8-dim: pos (x,y), vel (vx,vy), angle, angular vel, leg contacts |
+| **Action** | 2-dim continuous: main throttle, lateral thrust ∈ [−1,1] |
+| **Reward** | +100 each leg contact, +100 landing, −100 crash |
+| **Solved** | Episode reward ≥ 200 |
+---
+## Model Hyperparameters
+| Parameter | Value |
+|---|---|
+| `learning_rate` | 3×10⁻⁴ |
+| `buffer_size` | 1,000,000 |
+| `batch_size` | 256 |
+| `tau` | 0.005 |
+| `gamma` | 0.99 |
+| `target_entropy` | −2.0 |
+---
+## Reading the Charts
+- **Reward bars**: green ≥ 150, amber ≥ 0, red < 0
+- **Trajectory plot**: `★` = successful landing, `×` = crash
+- **Engine throttle**: main (blue) fires downward; lateral (amber) steers
+- **Training reward**: smoothed line (solid) trends matter more than raw (faded)
+- **Actor loss**: negative values normal — actor maximises Q, so loss = −Q
+- **Entropy coef**: starts high, decreases as policy converges
+"""
+# ── Build UI ──────────────────────────────────────────────────────────────────
+with gr.Blocks(title="SpaceX Mission Control — SAC Rocket Lander") as demo:
+    gr.HTML("""
+    <div class="mc-header">
+        <div class="mc-sub">Autonomous Flight Intelligence System · SAC v2.0</div>
+        <h1>⬡ SpaceX Mission Control</h1>
+        <div class="mc-sub">Soft Actor-Critic · LunarLander-v3 · Continuous Control</div>
+    </div>
+    <div class="status-strip">
+        <span class="badge badge-green">● SAC MODEL LOADED</span>
+        <span class="badge badge-blue">● PHYSICS ENGINE READY</span>
+        <span class="badge badge-amber">● TELEMETRY ONLINE</span>
+        <span class="badge badge-purple">● TRAINING MODULE ARMED</span>
+    </div>
+    """)
+    with gr.Tabs():
+        # ── Mission Control ────────────────────────────────────────────────
+        with gr.Tab("🚀 MISSION CONTROL"):
+            with gr.Row():
+                with gr.Column(scale=1, min_width=300):
+                    gr.HTML('<div class="mc-sub" style="margin-bottom:0.8rem">MISSION PARAMETERS</div>')
+                    n_episodes = gr.Slider(1, 10, value=3, step=1,
+                                           label="Landing Attempts")
+                    gravity = gr.Slider(-20.0, -1.0, value=-10.0, step=0.5,
+                                        label="Gravity (m/s²)")
+                    enable_wind = gr.Checkbox(label="Enable Wind Disturbance", value=False)
+                    wind_power = gr.Slider(0.0, 20.0, value=5.0, step=0.5,
+                                           label="Wind Power", visible=False)
+                    turbulence = gr.Slider(0.0, 2.0, value=0.5, step=0.1,
+                                           label="Turbulence Power", visible=False)
+                    render_gif = gr.Checkbox(label="Render Animated Replay", value=True)
+                    enable_wind.change(
+                        lambda v: (gr.update(visible=v), gr.update(visible=v)),
+                        inputs=enable_wind,
+                        outputs=[wind_power, turbulence],
+                    )
+                    launch_btn = gr.Button("🚀  INITIATE LAUNCH SEQUENCE", variant="primary")
+                    gr.HTML('<div class="mc-sub" style="margin-top:1.2rem;margin-bottom:0.4rem">MODEL</div>')
+                    load_btn = gr.Button("📂 Reload Checkpoint")
+                    load_status = gr.Textbox(label="", lines=1, interactive=False,
+                                             placeholder="Model status…")
+                    load_btn.click(cb_load_finetuned, outputs=load_status)
+                with gr.Column(scale=2):
+                    stats_md = gr.Markdown("*Configure mission parameters and click Launch.*")
+                    episode_selector = gr.Dropdown(
+                        choices=[], label="Inspect Episode", interactive=True,
+                    )
+            with gr.Row():
+                overview_plot = gr.Plot(label="Mission Overview Dashboard")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    detail_plot = gr.Plot(label="Episode Deep-Dive")
+                with gr.Column(scale=1):
+                    replay_gif = gr.Image(
+                        label="Episode Replay (GIF with HUD)",
+                        type="filepath",
+                    )
+            episode_selector.change(
+                cb_select_episode,
+                inputs=episode_selector,
+                outputs=[detail_plot, replay_gif],
             )
+            launch_btn.click(
+                cb_run_mission,
+                inputs=[n_episodes, gravity, enable_wind, wind_power, turbulence, render_gif],
+                outputs=[overview_plot, replay_gif, detail_plot, stats_md, episode_selector],
             )
+        # ── Training Lab ───────────────────────────────────────────────────
+        with gr.Tab("🧪 TRAINING LAB"):
+            gr.Markdown("### Fine-tune the SAC agent in your browser")
+            gr.Markdown(
+                "Runs in a background thread — click **Refresh Metrics** to pull updates. "
+                "The fine-tuned model saves to `sac_finetuned.zip` and is used automatically."
+            )
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.HTML('<div class="mc-sub" style="margin-bottom:0.8rem">HYPERPARAMETERS</div>')
+                    train_steps = gr.Slider(5_000, 200_000, value=20_000, step=5_000,
+                                            label="Total Timesteps")
+                    train_lr = gr.Slider(1e-5, 1e-3, value=3e-4, step=1e-5,
+                                         label="Learning Rate")
+                    train_batch = gr.Slider(64, 512, value=256, step=64,
+                                            label="Batch Size")
+                    with gr.Row():
+                        btn_train_start = gr.Button("▶ Start Training", variant="primary")
+                        btn_train_stop  = gr.Button("⏹ Stop", variant="stop")
+                    btn_refresh = gr.Button("🔄 Refresh Metrics")
+                    train_msg = gr.Textbox(label="", lines=2, interactive=False)
+                with gr.Column(scale=2):
+                    train_status_md = gr.Markdown("*Start training to see live metrics.*")
+                    train_plot = gr.Plot(label="Live Training Dashboard")
+            btn_train_start.click(
+                cb_start_training,
+                inputs=[train_steps, train_lr, train_batch],
+                outputs=train_msg,
+            )
+            btn_train_stop.click(cb_stop_training, outputs=train_msg)
+            btn_refresh.click(cb_refresh_training, outputs=[train_plot, train_status_md])
+        # ── Algorithm Guide ────────────────────────────────────────────────
+        with gr.Tab("📚 ALGORITHM GUIDE"):
+            gr.Markdown(THEORY_MD)
+    gr.HTML("""
+    <div style="text-align:center;font-family:'Share Tech Mono',monospace;
+                font-size:0.65rem;color:#1e3d5c;letter-spacing:0.2em;
+                text-transform:uppercase;padding:2rem 0 1rem;">
+        Powered by Stable-Baselines3 · Soft Actor-Critic ·
+        Gymnasium LunarLander-v3 · Gradio
+    </div>
+    """)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False, css=CSS)

core/__init__.py ADDED Viewed

File without changes

core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (197 Bytes). View file

core/__pycache__/mission.cpython-311.pyc ADDED Viewed

Binary file (10.4 kB). View file

core/__pycache__/trainer.cpython-311.pyc ADDED Viewed

Binary file (7.17 kB). View file

core/mission.py ADDED Viewed

	@@ -0,0 +1,178 @@

+"""
+Mission runner — executes SAC agent episodes, collects full telemetry.
+Returns structured data for both the UI and the visualization layer.
+"""
+from __future__ import annotations
+import numpy as np
+import gymnasium as gym
+from dataclasses import dataclass, field
+from stable_baselines3 import SAC
+# ── Telemetry data structures ─────────────────────────────────────────────────
+@dataclass
+class StepData:
+    x: float
+    y: float
+    vx: float
+    vy: float
+    angle: float
+    angular_vel: float
+    left_leg: bool
+    right_leg: bool
+    reward: float
+    action_main: float   # main engine throttle [-1, 1]
+    action_lateral: float  # lateral thruster [-1, 1]
+@dataclass
+class EpisodeResult:
+    episode_idx: int
+    steps: list[StepData] = field(default_factory=list)
+    total_reward: float = 0.0
+    landed: bool = False
+    crashed: bool = False
+    @property
+    def status(self) -> str:
+        if self.total_reward >= 200:
+            return "PERFECT"
+        if self.total_reward >= 150:
+            return "LANDED"
+        if self.total_reward >= 0:
+            return "PARTIAL"
+        return "CRASHED"
+    @property
+    def status_emoji(self) -> str:
+        return {"PERFECT": "🏆", "LANDED": "✅", "PARTIAL": "⚠️", "CRASHED": "💥"}[self.status]
+    @property
+    def xs(self) -> list[float]:
+        return [s.x for s in self.steps]
+    @property
+    def ys(self) -> list[float]:
+        return [s.y for s in self.steps]
+    @property
+    def cumulative_rewards(self) -> list[float]:
+        total = 0.0
+        out = []
+        for s in self.steps:
+            total += s.reward
+            out.append(total)
+        return out
+    @property
+    def main_throttle(self) -> list[float]:
+        return [s.action_main for s in self.steps]
+    @property
+    def lateral_throttle(self) -> list[float]:
+        return [s.action_lateral for s in self.steps]
+    @property
+    def angles(self) -> list[float]:
+        return [np.degrees(s.angle) for s in self.steps]
+@dataclass
+class MissionResult:
+    episodes: list[EpisodeResult] = field(default_factory=list)
+    @property
+    def rewards(self) -> list[float]:
+        return [e.total_reward for e in self.episodes]
+    @property
+    def success_rate(self) -> float:
+        if not self.episodes:
+            return 0.0
+        return sum(1 for e in self.episodes if e.total_reward >= 150) / len(self.episodes)
+    @property
+    def avg_reward(self) -> float:
+        return float(np.mean(self.rewards)) if self.rewards else 0.0
+    @property
+    def best(self) -> EpisodeResult:
+        return max(self.episodes, key=lambda e: e.total_reward)
+    @property
+    def worst(self) -> EpisodeResult:
+        return min(self.episodes, key=lambda e: e.total_reward)
+# ── Runner ────────────────────────────────────────────────────────────────────
+def run_mission(
+    model: SAC,
+    n_episodes: int = 5,
+    gravity: float = -10.0,
+    enable_wind: bool = False,
+    wind_power: float = 5.0,
+    turbulence_power: float = 0.5,
+    render: bool = True,
+    progress_cb=None,
+) -> tuple[MissionResult, list[list[np.ndarray]]]:
+    """
+    Run `n_episodes` of the lander.
+    Returns (MissionResult, list_of_frame_lists) — one frame list per episode.
+    """
+    mission = MissionResult()
+    all_frames: list[list[np.ndarray]] = []
+    env_kwargs = dict(
+        continuous=True,
+        gravity=gravity,
+        enable_wind=enable_wind,
+        wind_power=wind_power if enable_wind else 0.0,
+        turbulence_power=turbulence_power if enable_wind else 0.0,
+        render_mode="rgb_array" if render else None,
+    )
+    for ep_idx in range(n_episodes):
+        if progress_cb:
+            progress_cb(ep_idx / n_episodes, f"Running mission {ep_idx + 1}/{n_episodes}…")
+        env = gym.make("LunarLander-v3", **env_kwargs)
+        obs, _ = env.reset()
+        result = EpisodeResult(episode_idx=ep_idx)
+        frames: list[np.ndarray] = []
+        done = False
+        while not done:
+            action, _ = model.predict(obs, deterministic=True)
+            next_obs, reward, terminated, truncated, _ = env.step(action)
+            result.steps.append(StepData(
+                x=float(obs[0]), y=float(obs[1]),
+                vx=float(obs[2]), vy=float(obs[3]),
+                angle=float(obs[4]), angular_vel=float(obs[5]),
+                left_leg=bool(obs[6]), right_leg=bool(obs[7]),
+                reward=float(reward),
+                action_main=float(action[0]),
+                action_lateral=float(action[1]),
+            ))
+            result.total_reward += float(reward)
+            if render:
+                frame = env.render()
+                if frame is not None:
+                    frames.append(frame)
+            obs = next_obs
+            done = terminated or truncated
+        env.close()
+        mission.episodes.append(result)
+        all_frames.append(frames)
+    if progress_cb:
+        progress_cb(1.0, "Mission complete.")
+    return mission, all_frames

core/trainer.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+SAC training pipeline — fine-tune or train from scratch with live callbacks.
+"""
+from __future__ import annotations
+import os
+import threading
+from dataclasses import dataclass, field
+from stable_baselines3 import SAC
+from stable_baselines3.common.callbacks import BaseCallback
+import gymnasium as gym
+import numpy as np
+@dataclass
+class TrainingState:
+    running: bool = False
+    timestep: int = 0
+    total_timesteps: int = 0
+    episode_rewards: list[float] = field(default_factory=list)
+    actor_losses: list[float] = field(default_factory=list)
+    critic_losses: list[float] = field(default_factory=list)
+    ent_coefs: list[float] = field(default_factory=list)
+    log_steps: list[int] = field(default_factory=list)
+    status: str = "idle"
+    best_reward: float = float("-inf")
+class _LiveCallback(BaseCallback):
+    def __init__(self, state: TrainingState, log_interval: int = 500):
+        super().__init__()
+        self._state = state
+        self._log_interval = log_interval
+        self._ep_rewards: list[float] = []
+    def _on_step(self) -> bool:
+        if not self._state.running:
+            return False  # abort training
+        self._state.timestep = self.num_timesteps
+        # Collect episode rewards from monitor wrapper
+        infos = self.locals.get("infos", [])
+        for info in infos:
+            if "episode" in info:
+                r = float(info["episode"]["r"])
+                self._ep_rewards.append(r)
+                self._state.episode_rewards.append(r)
+                if r > self._state.best_reward:
+                    self._state.best_reward = r
+        if self.num_timesteps % self._log_interval == 0:
+            losses = self.model.logger.name_to_value
+            self._state.actor_losses.append(float(losses.get("train/actor_loss", 0)))
+            self._state.critic_losses.append(float(losses.get("train/critic_loss", 0)))
+            self._state.ent_coefs.append(float(losses.get("train/ent_coef", 0)))
+            self._state.log_steps.append(self.num_timesteps)
+        pct = self.num_timesteps / max(self._state.total_timesteps, 1)
+        rolling = float(np.mean(self._ep_rewards[-20:])) if self._ep_rewards else 0.0
+        self._state.status = (
+            f"Step {self.num_timesteps:,}/{self._state.total_timesteps:,} "
+            f"({pct*100:.1f}%)  |  Rolling reward: {rolling:+.1f}  |  "
+            f"Best: {self._state.best_reward:+.1f}"
+        )
+        return True
+    def _on_training_end(self) -> None:
+        self._state.status = (
+            f"Training complete — {self.num_timesteps:,} steps.  "
+            f"Best reward: {self._state.best_reward:+.1f}"
+        )
+        self._state.running = False
+def start_training(
+    base_model_path: str,
+    total_timesteps: int,
+    learning_rate: float,
+    batch_size: int,
+    state: TrainingState,
+    save_path: str = "sac_finetuned.zip",
+) -> threading.Thread:
+    """Launches training in a daemon thread. Progress written to `state`."""
+    def _train():
+        from stable_baselines3.common.monitor import Monitor
+        state.running = True
+        state.total_timesteps = total_timesteps
+        state.status = "Initialising environment…"
+        env = Monitor(gym.make("LunarLander-v3", continuous=True))
+        if os.path.exists(base_model_path):
+            model = SAC.load(base_model_path, env=env)
+            model.learning_rate = learning_rate
+            model.batch_size = batch_size
+        else:
+            model = SAC(
+                "MlpPolicy", env,
+                learning_rate=learning_rate,
+                batch_size=batch_size,
+                verbose=0,
+            )
+        cb = _LiveCallback(state, log_interval=max(total_timesteps // 200, 200))
+        model.learn(
+            total_timesteps=total_timesteps,
+            callback=cb,
+            reset_num_timesteps=False,
+            progress_bar=False,
+            log_interval=1,
+        )
+        model.save(save_path)
+        env.close()
+    thread = threading.Thread(target=_train, daemon=True)
+    thread.start()
+    return thread

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 stable-baselines3[extra]
 gymnasium[box2d]
 shimmy

 stable-baselines3[extra]
 gymnasium[box2d]
 shimmy
+matplotlib
+gradio>=6.0.0

viz/__init__.py ADDED Viewed

File without changes

viz/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (196 Bytes). View file

viz/__pycache__/charts.cpython-311.pyc ADDED Viewed

Binary file (18.5 kB). View file

viz/__pycache__/replay.cpython-311.pyc ADDED Viewed

Binary file (8.43 kB). View file

viz/charts.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""
+All matplotlib figure generation for the dashboard.
+Every function returns a plt.Figure — caller closes or passes to Gradio.
+"""
+from __future__ import annotations
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+import matplotlib.patches as mpatches
+from matplotlib.collections import LineCollection
+from core.mission import MissionResult, EpisodeResult
+# ── Palette ───────────────────────────────────────────────────────────────────
+BG       = "#030b1a"
+BG2      = "#060f1e"
+GRID     = "#0d2540"
+ACCENT   = "#4fb3ff"
+GREEN    = "#2ddb7c"
+AMBER    = "#f5a623"
+RED      = "#ff4d6d"
+PURPLE   = "#c77dff"
+TEXT     = "#c8ddf0"
+DIM      = "#3a6080"
+EP_COLORS = [ACCENT, GREEN, AMBER, PURPLE, "#ff9f1c", "#e9c46a", "#f4a261"]
+def _style_ax(ax, title: str = "", xlabel: str = "", ylabel: str = ""):
+    ax.set_facecolor(BG2)
+    ax.tick_params(colors=DIM, labelsize=8)
+    for spine in ax.spines.values():
+        spine.set_color(GRID)
+    ax.grid(color=GRID, linewidth=0.5, linestyle="--", alpha=0.6)
+    if title:
+        ax.set_title(title, color=TEXT, fontsize=10, pad=8, fontfamily="monospace")
+    if xlabel:
+        ax.set_xlabel(xlabel, color=DIM, fontsize=8)
+    if ylabel:
+        ax.set_ylabel(ylabel, color=DIM, fontsize=8)
+def mission_overview(mission: MissionResult) -> plt.Figure:
+    """4-panel summary: bar chart, trajectory, reward curves, throttle."""
+    n = len(mission.episodes)
+    fig = plt.figure(figsize=(14, 9), facecolor=BG)
+    gs = gridspec.GridSpec(2, 2, figure=fig, hspace=0.45, wspace=0.32,
+                           left=0.07, right=0.97, top=0.90, bottom=0.08)
+    # ── Panel 1: Episode rewards bar ─────────────────────────────────────────
+    ax1 = fig.add_subplot(gs[0, 0])
+    _style_ax(ax1, "EPISODE REWARDS", "Episode", "Score")
+    labels = [f"#{e.episode_idx+1}" for e in mission.episodes]
+    colors = [GREEN if r >= 150 else (AMBER if r >= 0 else RED) for r in mission.rewards]
+    bars = ax1.bar(labels, mission.rewards, color=colors, edgecolor=BG, linewidth=0.8)
+    ax1.axhline(200, color=GREEN, linestyle="--", linewidth=1, alpha=0.5, label="Perfect (200)")
+    ax1.axhline(150, color=ACCENT, linestyle="--", linewidth=1, alpha=0.5, label="Success (150)")
+    ax1.axhline(0, color=RED, linestyle="--", linewidth=1, alpha=0.3)
+    ax1.legend(fontsize=7, facecolor=BG2, edgecolor=GRID, labelcolor=DIM)
+    for bar, val in zip(bars, mission.rewards):
+        ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 3,
+                 f"{val:.0f}", ha="center", va="bottom", color=TEXT, fontsize=8)
+    # ── Panel 2: 2-D flight trajectory ───────────────────────────────────────
+    ax2 = fig.add_subplot(gs[0, 1])
+    _style_ax(ax2, "FLIGHT TRAJECTORIES", "X Position", "Altitude")
+    for i, ep in enumerate(mission.episodes):
+        col = EP_COLORS[i % len(EP_COLORS)]
+        # colour-map by altitude for gradient effect
+        points = np.array([ep.xs, ep.ys]).T.reshape(-1, 1, 2)
+        segments = np.concatenate([points[:-1], points[1:]], axis=1)
+        lc = LineCollection(segments, colors=col, linewidth=1.2, alpha=0.7)
+        ax2.add_collection(lc)
+        # landing marker
+        ax2.scatter(ep.xs[-1], ep.ys[-1],
+                    marker=("*" if ep.total_reward >= 150 else "x"),
+                    s=80, color=col, zorder=5)
+    ax2.autoscale()
+    ax2.axhline(0, color=GRID, linewidth=1)
+    # Legend patches
+    patches = [mpatches.Patch(color=EP_COLORS[i % len(EP_COLORS)],
+                               label=f"#{e.episode_idx+1} {e.status_emoji}")
+               for i, e in enumerate(mission.episodes)]
+    ax2.legend(handles=patches, fontsize=7, facecolor=BG2,
+               edgecolor=GRID, labelcolor=DIM, loc="upper right")
+    # ── Panel 3: Cumulative reward over steps ────────────────────────────────
+    ax3 = fig.add_subplot(gs[1, 0])
+    _style_ax(ax3, "CUMULATIVE REWARD", "Step", "Reward")
+    for i, ep in enumerate(mission.episodes):
+        col = EP_COLORS[i % len(EP_COLORS)]
+        ax3.plot(ep.cumulative_rewards, color=col, linewidth=1.5,
+                 label=f"#{ep.episode_idx+1}", alpha=0.85)
+    ax3.axhline(0, color=RED, linestyle="--", linewidth=0.8, alpha=0.4)
+    ax3.legend(fontsize=7, facecolor=BG2, edgecolor=GRID, labelcolor=DIM)
+    # ── Panel 4: Engine throttle timeline ───────────────────────────────────
+    ax4 = fig.add_subplot(gs[1, 1])
+    _style_ax(ax4, "ENGINE THROTTLE — BEST EPISODE", "Step", "Throttle")
+    best = mission.best
+    steps = range(len(best.steps))
+    ax4.fill_between(steps, 0, best.main_throttle,
+                     color=ACCENT, alpha=0.35, label="Main Engine")
+    ax4.plot(steps, best.main_throttle, color=ACCENT, linewidth=1.2)
+    ax4.fill_between(steps, 0, best.lateral_throttle,
+                     color=AMBER, alpha=0.25, label="Lateral Thrusters")
+    ax4.plot(steps, best.lateral_throttle, color=AMBER, linewidth=1.0)
+    ax4.axhline(0, color=GRID, linewidth=0.8)
+    ax4.set_ylim(-1.1, 1.1)
+    ax4.legend(fontsize=7, facecolor=BG2, edgecolor=GRID, labelcolor=DIM)
+    # ── Figure title ─────────────────────────────────────────────────────────
+    sr = mission.success_rate * 100
+    fig.suptitle(
+        f"MISSION REPORT  ·  {n} episodes  ·  "
+        f"Avg {mission.avg_reward:+.1f}  ·  Success {sr:.0f}%",
+        color=TEXT, fontsize=12, fontfamily="monospace", y=0.96,
+    )
+    return fig
+def single_episode_detail(ep: EpisodeResult) -> plt.Figure:
+    """6-panel deep-dive for one episode."""
+    fig = plt.figure(figsize=(14, 8), facecolor=BG)
+    gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.5, wspace=0.38,
+                           left=0.07, right=0.97, top=0.88, bottom=0.08)
+    steps = list(range(len(ep.steps)))
+    # Trajectory
+    ax = fig.add_subplot(gs[0, 0])
+    _style_ax(ax, "TRAJECTORY", "X", "Y")
+    ax.plot(ep.xs, ep.ys, color=ACCENT, linewidth=1.5)
+    ax.scatter(ep.xs[0], ep.ys[0], s=60, color=GREEN, zorder=5, label="Start")
+    ax.scatter(ep.xs[-1], ep.ys[-1], s=80,
+               marker="*" if ep.total_reward >= 150 else "x",
+               color=GREEN if ep.total_reward >= 150 else RED, zorder=5, label="End")
+    ax.axhline(0, color=GRID, linewidth=1)
+    ax.legend(fontsize=7, facecolor=BG2, edgecolor=GRID, labelcolor=DIM)
+    # Cumulative reward
+    ax = fig.add_subplot(gs[0, 1])
+    _style_ax(ax, "CUMULATIVE REWARD", "Step", "Reward")
+    cum = ep.cumulative_rewards
+    ax.fill_between(steps, 0, cum,
+                    color=GREEN if ep.total_reward >= 150 else RED, alpha=0.2)
+    ax.plot(steps, cum, color=GREEN if ep.total_reward >= 150 else RED, linewidth=1.5)
+    ax.axhline(0, color=GRID, linewidth=0.8, linestyle="--")
+    # Altitude over time
+    ax = fig.add_subplot(gs[0, 2])
+    _style_ax(ax, "ALTITUDE", "Step", "Y")
+    ax.fill_between(steps, 0, ep.ys, color=ACCENT, alpha=0.15)
+    ax.plot(steps, ep.ys, color=ACCENT, linewidth=1.5)
+    ax.axhline(0, color=RED, linewidth=1, linestyle="--", alpha=0.5)
+    # Angle
+    ax = fig.add_subplot(gs[1, 0])
+    _style_ax(ax, "BODY ANGLE", "Step", "Degrees")
+    ax.fill_between(steps, 0, ep.angles, color=AMBER, alpha=0.2)
+    ax.plot(steps, ep.angles, color=AMBER, linewidth=1.3)
+    ax.axhline(0, color=GRID, linewidth=0.8, linestyle="--")
+    # Main throttle
+    ax = fig.add_subplot(gs[1, 1])
+    _style_ax(ax, "MAIN ENGINE", "Step", "Throttle")
+    ax.fill_between(steps, 0, ep.main_throttle, color=ACCENT, alpha=0.3)
+    ax.plot(steps, ep.main_throttle, color=ACCENT, linewidth=1.2)
+    ax.set_ylim(-1.1, 1.1)
+    ax.axhline(0, color=GRID, linewidth=0.8)
+    # Lateral throttle
+    ax = fig.add_subplot(gs[1, 2])
+    _style_ax(ax, "LATERAL THRUSTERS", "Step", "Throttle")
+    ax.fill_between(steps, 0, ep.lateral_throttle, color=PURPLE, alpha=0.3)
+    ax.plot(steps, ep.lateral_throttle, color=PURPLE, linewidth=1.2)
+    ax.set_ylim(-1.1, 1.1)
+    ax.axhline(0, color=GRID, linewidth=0.8)
+    fig.suptitle(
+        f"EPISODE {ep.episode_idx+1} DEEP-DIVE  ·  "
+        f"{ep.status_emoji} {ep.status}  ·  Score: {ep.total_reward:+.1f}  ·  "
+        f"{len(ep.steps)} steps",
+        color=TEXT, fontsize=11, fontfamily="monospace", y=0.95,
+    )
+    return fig
+def training_dashboard(state) -> plt.Figure:
+    """Live training metrics: reward history + losses + entropy."""
+    fig = plt.figure(figsize=(14, 5), facecolor=BG)
+    gs = gridspec.GridSpec(1, 3, figure=fig, wspace=0.38,
+                           left=0.06, right=0.97, top=0.85, bottom=0.12)
+    # Reward curve
+    ax = fig.add_subplot(gs[0])
+    _style_ax(ax, "EPISODE REWARD", "Episode", "Reward")
+    if state.episode_rewards:
+        eps = list(range(len(state.episode_rewards)))
+        ax.plot(eps, state.episode_rewards, color=ACCENT, linewidth=0.8, alpha=0.4)
+        if len(eps) > 20:
+            k = max(5, len(eps) // 30)
+            smooth = np.convolve(state.episode_rewards, np.ones(k)/k, "valid")
+            ax.plot(range(k-1, len(eps)), smooth, color=ACCENT, linewidth=2)
+        ax.axhline(200, color=GREEN, linestyle="--", linewidth=1, alpha=0.5)
+        ax.axhline(150, color=AMBER, linestyle="--", linewidth=1, alpha=0.5)
+    # Losses
+    ax2 = fig.add_subplot(gs[1])
+    _style_ax(ax2, "ACTOR / CRITIC LOSS", "Log Step", "Loss")
+    if state.log_steps:
+        ax2.plot(state.log_steps, state.actor_losses, color=ACCENT,
+                 linewidth=1.5, label="Actor")
+        ax2.plot(state.log_steps, state.critic_losses, color=AMBER,
+                 linewidth=1.5, label="Critic")
+        ax2.legend(fontsize=7, facecolor=BG2, edgecolor=GRID, labelcolor=DIM)
+    # Entropy coef
+    ax3 = fig.add_subplot(gs[2])
+    _style_ax(ax3, "ENTROPY COEFFICIENT", "Log Step", "α")
+    if state.log_steps:
+        ax3.plot(state.log_steps, state.ent_coefs, color=PURPLE, linewidth=1.5)
+        ax3.axhline(0, color=GRID, linewidth=0.8, linestyle="--")
+    n_ep = len(state.episode_rewards)
+    best = state.best_reward
+    fig.suptitle(
+        f"SAC TRAINING  ·  {state.timestep:,}/{state.total_timesteps:,} steps  ·  "
+        f"{n_ep} episodes  ·  Best: {best:+.1f}",
+        color=TEXT, fontsize=10, fontfamily="monospace",
+    )
+    return fig
+def empty_figure(message: str = "Run a mission to see charts.") -> plt.Figure:
+    fig, ax = plt.subplots(figsize=(12, 5), facecolor=BG)
+    fig.patch.set_facecolor(BG)
+    ax.set_facecolor(BG2)
+    ax.text(0.5, 0.5, message, transform=ax.transAxes,
+            ha="center", va="center", color=DIM,
+            fontsize=13, fontfamily="monospace")
+    ax.axis("off")
+    return fig

viz/replay.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""
+Animated GIF generation from raw RGB frames.
+Adds HUD overlay (step, reward, throttle bars) using PIL drawing — no matplotlib overhead.
+"""
+from __future__ import annotations
+import tempfile
+import numpy as np
+import PIL.Image
+import PIL.ImageDraw
+import PIL.ImageFont
+from core.mission import EpisodeResult
+# ── HUD rendering ─────────────────────────────────────────────────────────────
+def _draw_hud(
+    img: PIL.Image.Image,
+    step: int,
+    cumulative_reward: float,
+    main_throttle: float,
+    lateral_throttle: float,
+    status: str,
+) -> PIL.Image.Image:
+    draw = PIL.ImageDraw.Draw(img)
+    W, H = img.size
+    # Semi-transparent top bar
+    draw.rectangle([(0, 0), (W, 22)], fill=(3, 11, 26, 200))
+    # Step & reward text
+    draw.text((6, 4), f"STEP {step:03d}", fill=(79, 179, 255), font=None)
+    rcolor = (45, 219, 124) if cumulative_reward >= 0 else (255, 77, 109)
+    draw.text((W//2 - 40, 4), f"REWARD {cumulative_reward:+.1f}", fill=rcolor, font=None)
+    draw.text((W - 80, 4), status, fill=(248, 166, 35), font=None)
+    # Throttle bars at bottom
+    BAR_H = 6
+    BAR_Y = H - BAR_H - 4
+    # Main engine bar (blue)
+    bar_max = W // 2 - 20
+    bar_w = int(abs(main_throttle) * bar_max)
+    draw.rectangle([(10, BAR_Y), (10 + bar_max, BAR_Y + BAR_H)],
+                   fill=(13, 37, 64))
+    draw.rectangle([(10, BAR_Y), (10 + bar_w, BAR_Y + BAR_H)],
+                   fill=(79, 179, 255))
+    draw.text((10, BAR_Y - 11), "MAIN", fill=(79, 179, 255), font=None)
+    # Lateral bar (amber)
+    lx = W // 2 + 10
+    lat_w = int(abs(lateral_throttle) * bar_max)
+    draw.rectangle([(lx, BAR_Y), (lx + bar_max, BAR_Y + BAR_H)],
+                   fill=(13, 37, 64))
+    col = (245, 166, 35) if lateral_throttle >= 0 else (255, 77, 109)
+    draw.rectangle([(lx, BAR_Y), (lx + lat_w, BAR_Y + BAR_H)], fill=col)
+    draw.text((lx, BAR_Y - 11), "LATERAL", fill=(245, 166, 35), font=None)
+    return img
+def make_episode_gif(
+    frames: list[np.ndarray],
+    episode: EpisodeResult,
+    fps: int = 15,
+) -> str:
+    """Overlay HUD on every frame, save as animated GIF. Returns temp file path."""
+    if not frames:
+        return ""
+    cum_rewards = episode.cumulative_rewards
+    pil_frames: list[PIL.Image.Image] = []
+    for i, frame in enumerate(frames):
+        img = PIL.Image.fromarray(frame).convert("RGBA")
+        cum_r = cum_rewards[i] if i < len(cum_rewards) else cum_rewards[-1]
+        step_data = episode.steps[i] if i < len(episode.steps) else episode.steps[-1]
+        img = _draw_hud(
+            img.convert("RGB"),
+            step=i + 1,
+            cumulative_reward=cum_r,
+            main_throttle=step_data.action_main,
+            lateral_throttle=step_data.action_lateral,
+            status=episode.status,
+        )
+        pil_frames.append(img)
+    tmp = tempfile.NamedTemporaryFile(suffix=".gif", delete=False)
+    pil_frames[0].save(
+        tmp.name,
+        save_all=True,
+        append_images=pil_frames[1:],
+        duration=int(1000 / fps),
+        loop=0,
+        optimize=False,
+    )
+    return tmp.name
+def make_comparison_gif(
+    all_frames: list[list[np.ndarray]],
+    episodes: list[EpisodeResult],
+    fps: int = 12,
+    max_episodes: int = 4,
+) -> str:
+    """
+    Side-by-side grid GIF comparing up to `max_episodes` episodes.
+    Pads shorter episodes with their last frame.
+    """
+    n = min(len(all_frames), max_episodes)
+    if n == 0:
+        return ""
+    frame_lists = [all_frames[i] for i in range(n)]
+    ep_list = [episodes[i] for i in range(n)]
+    max_len = max(len(fl) for fl in frame_lists)
+    # Pad each episode to max_len
+    padded = [fl + [fl[-1]] * (max_len - len(fl)) if fl else [] for fl in frame_lists]
+    if not padded[0]:
+        return ""
+    h, w = padded[0][0].shape[:2]
+    cols = 2 if n > 2 else n
+    rows = (n + cols - 1) // cols
+    grid_w, grid_h = cols * w, rows * h
+    pil_frames: list[PIL.Image.Image] = []
+    for step_i in range(max_len):
+        canvas = PIL.Image.new("RGB", (grid_w, grid_h), (3, 11, 26))
+        for ep_i in range(n):
+            if step_i < len(padded[ep_i]):
+                cell = PIL.Image.fromarray(padded[ep_i][step_i])
+            else:
+                continue
+            # label
+            draw = PIL.ImageDraw.Draw(cell)
+            ep = ep_list[ep_i]
+            draw.rectangle([(0, 0), (cell.width, 16)], fill=(3, 11, 26))
+            col = (45, 219, 124) if ep.total_reward >= 150 else (255, 77, 109)
+            draw.text((4, 2),
+                      f"#{ep.episode_idx+1} {ep.status} {ep.total_reward:+.0f}",
+                      fill=col, font=None)
+            cx = (ep_i % cols) * w
+            cy = (ep_i // cols) * h
+            canvas.paste(cell, (cx, cy))
+        pil_frames.append(canvas)
+    tmp = tempfile.NamedTemporaryFile(suffix=".gif", delete=False)
+    pil_frames[0].save(
+        tmp.name,
+        save_all=True,
+        append_images=pil_frames[1:],
+        duration=int(1000 / fps),
+        loop=0,
+        optimize=False,
+    )
+    return tmp.name