Spaces:
Running on T4
Running on T4
File size: 4,832 Bytes
e6b0e2f 19157df e6b0e2f 3502162 19157df c7dddaa 19157df 934b4ac 19157df e6b0e2f c7dddaa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | """
HF Spaces Gradio App β Training results & architecture overview
for the Nested RL Environments system.
"""
import gradio as gr
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
# ββ Training episode data (from Supabase export) ββ
# Episodes 0β75, sorted by step β episode β created_at
EPISODE_REWARDS = [
39.7, -20.1, -14.2, 96.6, -60.3, -60.1, 35.8, 96.6, -60.3, -5.1,
35.8, 41.6, 39.7, 19.9, 90.8, 41.6, 94.7, -60.1, -124.2, 41.6,
46.7, -56.3, 50.6, 48.6, 101.7, -56.3, 50.6, 48.6, 46.7, -1.3,
50.6, -11.4, 46.7, -56.3, 50.6, 103.6, 46.7, -56.3, 5.6, -121.4,
-13.9, 41.3, 100.3, -52.3, 46.1, 96.3, 45.3, -142.3, 91.1, 96.3,
100.3, -52.3, 46.1, 96.3, 0.3, -52.3, -13.9, 41.3, 45.3, -12.3,
33.8, 54.8, 91.0, -64.3, 33.8, 54.8, 36.0, 90.7, 33.8, 109.8,
36.0, -64.3, 33.8, 54.8, 91.0, -19.3,
]
def compute_rolling_avg(rewards, window=35):
"""Compute rolling average with given window size."""
avgs = []
for i in range(len(rewards)):
start = max(0, i - window + 1)
w = rewards[start : i + 1]
avgs.append(sum(w) / len(w))
return avgs
def create_reward_chart():
"""Generate the reward trend chart as a matplotlib figure."""
episodes = list(range(len(EPISODE_REWARDS)))
rolling = compute_rolling_avg(EPISODE_REWARDS, window=35)
# ββ Dark theme ββ
bg = "#0a0e17"
card = "#111827"
border = "#1e293b"
cyan = "#22d3ee"
cyan_dim = (34 / 255, 211 / 255, 238 / 255, 0.15)
text = "#e2e8f0"
muted = "#475569"
fig, ax = plt.subplots(figsize=(14, 6), facecolor=bg)
ax.set_facecolor(card)
# Subtle border
for spine in ax.spines.values():
spine.set_color(border)
spine.set_linewidth(0.8)
# Rolling average line + fill
ax.plot(episodes, rolling, color=cyan, linewidth=2.8, zorder=3)
ax.fill_between(
episodes, rolling, alpha=0.15, color=cyan, zorder=2,
)
# Zero reference line
ax.axhline(y=0, color=border, linewidth=0.8, linestyle="--", zorder=1)
# Axis styling
ax.set_xlabel("Episode", color=muted, fontsize=11, fontfamily="monospace")
ax.set_ylabel("Reward", color=muted, fontsize=11, fontfamily="monospace")
ax.tick_params(colors=muted, labelsize=9)
ax.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax.grid(False)
# Title
ax.set_title(
"Reward Trend Β· Episodes 0β75 Β· 35-ep Rolling Avg",
color=text,
fontsize=14,
fontfamily="monospace",
fontweight="bold",
pad=16,
)
fig.tight_layout(pad=1.5)
return fig
# ββ Build the Gradio app ββ
_theme = gr.themes.Base(
primary_hue="cyan",
neutral_hue="slate",
)
_css = """
.gradio-container { background: #0a0e17 !important; }
.main-header { text-align: center; margin-bottom: 8px; }
.main-header h1 { color: #e2e8f0; font-family: monospace; }
.main-header p { color: #64748b; font-family: monospace; font-size: 14px; }
.section-label { color: #94a3b8 !important; font-family: monospace !important; }
"""
with gr.Blocks(
title="Nested RL Environments β AI Oversight",
) as demo:
# Header
gr.HTML("""
<div class="main-header">
<h1>Nested RL Environments</h1>
<p>Self-Improving Oversight for AI Customer Support Β· Team Ludes Magnus</p>
</div>
""")
# ββ Tab layout ββ
with gr.Tabs():
# Tab 1: Architecture (default)
with gr.Tab("Architecture"):
gr.Image(
value="assets/architecture.png",
label="3-Layer Architecture",
show_label=False,
)
gr.Markdown("""
---
## Prize Targets
- **Main Track β Statement 4:** Layer 0 generates reward functions β new domain = new RL environment automatically
- **Fleet AI $10k:** Layer 1 provides scalable oversight β add intents, retrain
- **Halluminate $10k:** Layer 2 is a multi-actor environment with 100 diverse adversarial customers
""")
# Tab 2: Training Results
with gr.Tab("Training Results"):
gr.Markdown(
"### Reward Trend β GRPO Prompt Optimization",
elem_classes=["section-label"],
)
reward_plot = gr.Plot(value=create_reward_chart(), label="Reward Trend")
gr.Markdown(
"""
<div style="color: #64748b; font-family: monospace; font-size: 12px; text-align: right; margin-top: -8px;">
Run 20260308_135709 Β· 300 total episodes Β· All data authentic
</div>
""",
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, theme=_theme, css=_css)
|