Spaces:

saravanatanjiro
/

Openenv

Paused

App Files Files Community

Openenv / cloud_arena /visualization.py

saravanatanjiro

Migrate LLM pipeline to custom GRPO with robust rewards

dfc5996 16 days ago

raw

history blame contribute delete

4.14 kB

	# Cloud Arena Visualization — Mathematical Model

	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt
	import numpy as np


	REF_BG = '#0e1117'
	REF_CYAN = '#00d4ff'
	REF_AMBER = '#ffa500'
	REF_NEON = '#39ff14'
	TEXT_COLOR = '#e6e6e6'


	def smooth(y, box_pts=50):
	if len(y) < box_pts:
	return y
	box = np.ones(box_pts) / box_pts
	return np.convolve(y, box, mode='valid')


	def generate_dashboard(callback, output_path="outputs/training_dashboard.png"):
	rewards = np.array(callback.episode_rewards)
	savings = np.array(callback.episode_savings)
	security = np.array(callback.episode_security)

	fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(22, 6), facecolor=REF_BG)

	for ax in [ax1, ax2, ax3]:
	ax.set_facecolor(REF_BG)
	ax.grid(True, alpha=0.05, color='white')
	ax.spines['top'].set_visible(False)
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_color('#333333')
	ax.spines['bottom'].set_color('#333333')
	ax.tick_params(colors=TEXT_COLOR, labelsize=10)

	ax1.plot(rewards, color=REF_CYAN, alpha=0.15)
	ax1.plot(smooth(rewards), color=REF_CYAN, lw=3)
	ax1.set_title("Learning Curve", color=TEXT_COLOR, fontsize=14, fontweight='bold')

	ax2.plot(savings, color=REF_AMBER, alpha=0.15)
	ax2.plot(smooth(savings), color=REF_AMBER, lw=3)
	ax2.set_title("Cost Optimization %", color=TEXT_COLOR, fontsize=14, fontweight='bold')
	ax2.set_ylim(0, 100)

	ax3.plot(security, color=REF_NEON, alpha=0.15)
	ax3.plot(smooth(security), color=REF_NEON, lw=3)
	ax3.set_title("Security Score", color=TEXT_COLOR, fontsize=14, fontweight='bold')
	ax3.set_ylim(0, 1)

	plt.tight_layout()
	plt.savefig(output_path, dpi=200, bbox_inches='tight', facecolor=REF_BG)
	plt.close()
	return output_path


	def generate_grpo_dashboard(all_results, all_stats, output_path="outputs/grpo_dashboard.png"):
	fig, axs = plt.subplots(2, 2, figsize=(16, 10), facecolor=REF_BG)
	ax1, ax2, ax3, ax4 = axs.flatten()
	for ax in [ax1, ax2, ax3, ax4]:
	ax.set_facecolor(REF_BG)
	ax.grid(True, alpha=0.08, color="white")
	ax.spines["top"].set_visible(False)
	ax.spines["right"].set_visible(False)
	ax.spines["left"].set_color("#333333")
	ax.spines["bottom"].set_color("#333333")
	ax.tick_params(colors=TEXT_COLOR, labelsize=9)

	palette = ["#00d4ff", "#ffa500", "#39ff14", "#ff6b6b", "#b47eff"]
	model_names = list(all_results.keys())
	for i, name in enumerate(model_names):
	c = palette[i % len(palette)]
	rewards = all_results[name]
	ax1.plot(smooth(np.array(rewards), box_pts=min(20, max(3, len(rewards) // 5))), color=c, lw=2, label=name)

	kl_curve = [s.get("kl", 0.0) for s in all_stats.get(name, [])]
	ent_curve = [s.get("entropy", 0.0) for s in all_stats.get(name, [])]
	veto_curve = [s.get("veto_rate", 0.0) for s in all_stats.get(name, [])]

	ax2.plot(kl_curve, color=c, lw=1.8, label=name)
	ax3.plot(ent_curve, color=c, lw=1.8, label=name)
	ax4.plot(veto_curve, color=c, lw=1.8, label=name)

	ax1.set_title("GRPO Reward (Smoothed)", color=TEXT_COLOR, fontsize=12, fontweight="bold")
	ax1.set_xlabel("Episode", color=TEXT_COLOR)
	ax1.set_ylabel("Reward", color=TEXT_COLOR)
	ax1.legend(facecolor="#1a1a2e", edgecolor="#333", labelcolor=TEXT_COLOR, fontsize=8)

	ax2.set_title("KL Trend", color=TEXT_COLOR, fontsize=12, fontweight="bold")
	ax2.set_xlabel("Episode", color=TEXT_COLOR)
	ax2.set_ylabel("KL", color=TEXT_COLOR)

	ax3.set_title("Entropy Trend", color=TEXT_COLOR, fontsize=12, fontweight="bold")
	ax3.set_xlabel("Episode", color=TEXT_COLOR)
	ax3.set_ylabel("Entropy", color=TEXT_COLOR)

	ax4.set_title("Safety Violation / Veto Rate", color=TEXT_COLOR, fontsize=12, fontweight="bold")
	ax4.set_xlabel("Episode", color=TEXT_COLOR)
	ax4.set_ylabel("Rate", color=TEXT_COLOR)
	ax4.set_ylim(0, 1)

	plt.tight_layout()
	plt.savefig(output_path, dpi=200, bbox_inches="tight", facecolor=REF_BG)
	plt.close()
	return output_path