Openenv / cloud_arena /visualization.py
saravanatanjiro's picture
Migrate LLM pipeline to custom GRPO with robust rewards
dfc5996
# Cloud Arena Visualization — Mathematical Model
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
REF_BG = '#0e1117'
REF_CYAN = '#00d4ff'
REF_AMBER = '#ffa500'
REF_NEON = '#39ff14'
TEXT_COLOR = '#e6e6e6'
def smooth(y, box_pts=50):
if len(y) < box_pts:
return y
box = np.ones(box_pts) / box_pts
return np.convolve(y, box, mode='valid')
def generate_dashboard(callback, output_path="outputs/training_dashboard.png"):
rewards = np.array(callback.episode_rewards)
savings = np.array(callback.episode_savings)
security = np.array(callback.episode_security)
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(22, 6), facecolor=REF_BG)
for ax in [ax1, ax2, ax3]:
ax.set_facecolor(REF_BG)
ax.grid(True, alpha=0.05, color='white')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_color('#333333')
ax.spines['bottom'].set_color('#333333')
ax.tick_params(colors=TEXT_COLOR, labelsize=10)
ax1.plot(rewards, color=REF_CYAN, alpha=0.15)
ax1.plot(smooth(rewards), color=REF_CYAN, lw=3)
ax1.set_title("Learning Curve", color=TEXT_COLOR, fontsize=14, fontweight='bold')
ax2.plot(savings, color=REF_AMBER, alpha=0.15)
ax2.plot(smooth(savings), color=REF_AMBER, lw=3)
ax2.set_title("Cost Optimization %", color=TEXT_COLOR, fontsize=14, fontweight='bold')
ax2.set_ylim(0, 100)
ax3.plot(security, color=REF_NEON, alpha=0.15)
ax3.plot(smooth(security), color=REF_NEON, lw=3)
ax3.set_title("Security Score", color=TEXT_COLOR, fontsize=14, fontweight='bold')
ax3.set_ylim(0, 1)
plt.tight_layout()
plt.savefig(output_path, dpi=200, bbox_inches='tight', facecolor=REF_BG)
plt.close()
return output_path
def generate_grpo_dashboard(all_results, all_stats, output_path="outputs/grpo_dashboard.png"):
fig, axs = plt.subplots(2, 2, figsize=(16, 10), facecolor=REF_BG)
ax1, ax2, ax3, ax4 = axs.flatten()
for ax in [ax1, ax2, ax3, ax4]:
ax.set_facecolor(REF_BG)
ax.grid(True, alpha=0.08, color="white")
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_color("#333333")
ax.spines["bottom"].set_color("#333333")
ax.tick_params(colors=TEXT_COLOR, labelsize=9)
palette = ["#00d4ff", "#ffa500", "#39ff14", "#ff6b6b", "#b47eff"]
model_names = list(all_results.keys())
for i, name in enumerate(model_names):
c = palette[i % len(palette)]
rewards = all_results[name]
ax1.plot(smooth(np.array(rewards), box_pts=min(20, max(3, len(rewards) // 5))), color=c, lw=2, label=name)
kl_curve = [s.get("kl", 0.0) for s in all_stats.get(name, [])]
ent_curve = [s.get("entropy", 0.0) for s in all_stats.get(name, [])]
veto_curve = [s.get("veto_rate", 0.0) for s in all_stats.get(name, [])]
ax2.plot(kl_curve, color=c, lw=1.8, label=name)
ax3.plot(ent_curve, color=c, lw=1.8, label=name)
ax4.plot(veto_curve, color=c, lw=1.8, label=name)
ax1.set_title("GRPO Reward (Smoothed)", color=TEXT_COLOR, fontsize=12, fontweight="bold")
ax1.set_xlabel("Episode", color=TEXT_COLOR)
ax1.set_ylabel("Reward", color=TEXT_COLOR)
ax1.legend(facecolor="#1a1a2e", edgecolor="#333", labelcolor=TEXT_COLOR, fontsize=8)
ax2.set_title("KL Trend", color=TEXT_COLOR, fontsize=12, fontweight="bold")
ax2.set_xlabel("Episode", color=TEXT_COLOR)
ax2.set_ylabel("KL", color=TEXT_COLOR)
ax3.set_title("Entropy Trend", color=TEXT_COLOR, fontsize=12, fontweight="bold")
ax3.set_xlabel("Episode", color=TEXT_COLOR)
ax3.set_ylabel("Entropy", color=TEXT_COLOR)
ax4.set_title("Safety Violation / Veto Rate", color=TEXT_COLOR, fontsize=12, fontweight="bold")
ax4.set_xlabel("Episode", color=TEXT_COLOR)
ax4.set_ylabel("Rate", color=TEXT_COLOR)
ax4.set_ylim(0, 1)
plt.tight_layout()
plt.savefig(output_path, dpi=200, bbox_inches="tight", facecolor=REF_BG)
plt.close()
return output_path