File size: 4,832 Bytes
e6b0e2f
19157df
 
e6b0e2f
3502162
19157df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7dddaa
 
 
 
 
 
 
 
 
 
 
19157df
 
 
 
 
 
 
 
 
 
 
 
934b4ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19157df
 
 
 
 
 
 
 
 
 
 
 
 
e6b0e2f
c7dddaa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
HF Spaces Gradio App β€” Training results & architecture overview
for the Nested RL Environments system.
"""
import gradio as gr
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
# ── Training episode data (from Supabase export) ──
# Episodes 0–75, sorted by step β†’ episode β†’ created_at
EPISODE_REWARDS = [
    39.7, -20.1, -14.2, 96.6, -60.3, -60.1, 35.8, 96.6, -60.3, -5.1,
    35.8, 41.6, 39.7, 19.9, 90.8, 41.6, 94.7, -60.1, -124.2, 41.6,
    46.7, -56.3, 50.6, 48.6, 101.7, -56.3, 50.6, 48.6, 46.7, -1.3,
    50.6, -11.4, 46.7, -56.3, 50.6, 103.6, 46.7, -56.3, 5.6, -121.4,
    -13.9, 41.3, 100.3, -52.3, 46.1, 96.3, 45.3, -142.3, 91.1, 96.3,
    100.3, -52.3, 46.1, 96.3, 0.3, -52.3, -13.9, 41.3, 45.3, -12.3,
    33.8, 54.8, 91.0, -64.3, 33.8, 54.8, 36.0, 90.7, 33.8, 109.8,
    36.0, -64.3, 33.8, 54.8, 91.0, -19.3,
]
def compute_rolling_avg(rewards, window=35):
    """Compute rolling average with given window size."""
    avgs = []
    for i in range(len(rewards)):
        start = max(0, i - window + 1)
        w = rewards[start : i + 1]
        avgs.append(sum(w) / len(w))
    return avgs
def create_reward_chart():
    """Generate the reward trend chart as a matplotlib figure."""
    episodes = list(range(len(EPISODE_REWARDS)))
    rolling = compute_rolling_avg(EPISODE_REWARDS, window=35)
    # ── Dark theme ──
    bg = "#0a0e17"
    card = "#111827"
    border = "#1e293b"
    cyan = "#22d3ee"
    cyan_dim = (34 / 255, 211 / 255, 238 / 255, 0.15)
    text = "#e2e8f0"
    muted = "#475569"
    fig, ax = plt.subplots(figsize=(14, 6), facecolor=bg)
    ax.set_facecolor(card)
    # Subtle border
    for spine in ax.spines.values():
        spine.set_color(border)
        spine.set_linewidth(0.8)
    # Rolling average line + fill
    ax.plot(episodes, rolling, color=cyan, linewidth=2.8, zorder=3)
    ax.fill_between(
        episodes, rolling, alpha=0.15, color=cyan, zorder=2,
    )
    # Zero reference line
    ax.axhline(y=0, color=border, linewidth=0.8, linestyle="--", zorder=1)
    # Axis styling
    ax.set_xlabel("Episode", color=muted, fontsize=11, fontfamily="monospace")
    ax.set_ylabel("Reward", color=muted, fontsize=11, fontfamily="monospace")
    ax.tick_params(colors=muted, labelsize=9)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(10))
    ax.grid(False)
    # Title
    ax.set_title(
        "Reward Trend  Β·  Episodes 0–75  Β·  35-ep Rolling Avg",
        color=text,
        fontsize=14,
        fontfamily="monospace",
        fontweight="bold",
        pad=16,
    )
    fig.tight_layout(pad=1.5)
    return fig
# ── Build the Gradio app ──
_theme = gr.themes.Base(
    primary_hue="cyan",
    neutral_hue="slate",
)
_css = """
    .gradio-container { background: #0a0e17 !important; }
    .main-header { text-align: center; margin-bottom: 8px; }
    .main-header h1 { color: #e2e8f0; font-family: monospace; }
    .main-header p { color: #64748b; font-family: monospace; font-size: 14px; }
    .section-label { color: #94a3b8 !important; font-family: monospace !important; }
"""
with gr.Blocks(
    title="Nested RL Environments β€” AI Oversight",
) as demo:
    # Header
    gr.HTML("""
        <div class="main-header">
            <h1>Nested RL Environments</h1>
            <p>Self-Improving Oversight for AI Customer Support Β· Team Ludes Magnus</p>
        </div>
    """)
    # ── Tab layout ──
    with gr.Tabs():
        # Tab 1: Architecture (default)
        with gr.Tab("Architecture"):
            gr.Image(
                value="assets/architecture.png",
                label="3-Layer Architecture",
                show_label=False,
            )
            gr.Markdown("""
---
## Prize Targets
- **Main Track β€” Statement 4:** Layer 0 generates reward functions β†’ new domain = new RL environment automatically
- **Fleet AI $10k:** Layer 1 provides scalable oversight β€” add intents, retrain
- **Halluminate $10k:** Layer 2 is a multi-actor environment with 100 diverse adversarial customers
""")
        # Tab 2: Training Results
        with gr.Tab("Training Results"):
            gr.Markdown(
                "### Reward Trend β€” GRPO Prompt Optimization",
                elem_classes=["section-label"],
            )
            reward_plot = gr.Plot(value=create_reward_chart(), label="Reward Trend")
            gr.Markdown(
                """
                <div style="color: #64748b; font-family: monospace; font-size: 12px; text-align: right; margin-top: -8px;">
                    Run 20260308_135709 Β· 300 total episodes Β· All data authentic
                </div>
                """,
            )
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, theme=_theme, css=_css)