Spaces:

junaid0600
/

sql-db-engineer-agent

Sleeping

App Files Files Community

sql-db-engineer-agent / training /generate_plots.py

junaid0600

Update training/generate_plots.py

8ff0a54 verified 2 months ago

Raw

History Blame Contribute Delete

11 kB

	"""
	training/generate_plots.py
	Run this after training to generate clean publication-ready plots.
	Fixes all 6 issues:
	1. Loss annotations use scientific notation
	2. Zero division guard → shows infinity symbol
	3. Y-axis scale absorbed into label
	4. Zero bars get "0" text label
	5. 10-step moving average smoothing
	6. Outlier annotation with *
	"""

	import json, os, sys
	import numpy as np
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	sys.path.insert(0, ROOT)
	from env.db_simulator import DatabaseSimulator

	OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./sdea-trained")


	# ─────────────────────────────────────────────
	# LOSS CURVE (from trainer.state.log_history)
	# ─────────────────────────────────────────────

	def plot_loss_curve(log_history: list, save_path: str = "loss_curve.png"):
	logs = [l for l in log_history if "loss" in l]
	if not logs:
	print("⚠️ No training logs found — skipping loss curve")
	return

	steps = [l.get("step", i) for i, l in enumerate(logs)]
	losses = [l.get("loss", 0.0) for l in logs]
	rewards = [l.get("reward", 0.0) for l in logs]

	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
	fig.suptitle(
	"GRPO Training — SQL Database Engineer Agent\n"
	"Qwen2.5-7B fine-tuned with Unsloth + TRL",
	fontsize=13, fontweight="bold"
	)

	# ── Left: Loss ────────────────────────────────────────────
	ax1.plot(steps, losses, "b-", lw=1.0, alpha=0.35, label="Raw loss")

	# FIX 5: 10-step moving average
	if len(losses) >= 10:
	smooth = np.convolve(losses, np.ones(10) / 10, mode="valid")
	ax1.plot(steps[9:], smooth, "b-", lw=2.5, label="10-step avg")

	# FIX 3: absorb 1e-5 scale into the axis label
	ax1.set_xlabel("Training Step")
	ax1.set_ylabel("Loss")
	ax1.set_title("Training Loss ↓ = model learning DBA pattern")
	ax1.yaxis.set_major_formatter(
	matplotlib.ticker.ScalarFormatter(useMathText=True)
	)
	ax1.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
	ax1.grid(True, alpha=0.3)
	ax1.legend(fontsize=9)

	# FIX 1: scientific notation for start/end annotations
	if losses:
	ax1.annotate(
	f"Start: {losses[0]:.2e}",
	xy=(steps[0], losses[0]),
	xytext=(steps[0] + max(len(steps)//15, 1), max(losses) * 0.85),
	fontsize=8, color="red",
	arrowprops=dict(arrowstyle="->", color="red", lw=1),
	)
	ax1.annotate(
	f"End: {losses[-1]:.2e}",
	xy=(steps[-1], losses[-1]),
	xytext=(steps[-1] - max(len(steps)//6, 1), max(losses) * 0.65),
	fontsize=8, color="green",
	arrowprops=dict(arrowstyle="->", color="green", lw=1),
	)

	# ── Right: Reward ─────────────────────────────────────────
	ax2.plot(steps, rewards, "g-", lw=1.0, alpha=0.35, label="Raw reward")

	# FIX 5: smoothed reward
	if len(rewards) >= 10:
	smooth_r = np.convolve(rewards, np.ones(10) / 10, mode="valid")
	ax2.plot(steps[9:], smooth_r, "g-", lw=2.5, label="10-step avg")

	ax2.set_xlabel("Training Step")
	ax2.set_ylabel("Avg Reward")
	ax2.set_title("Reward During Training ↑ = improving")
	ax2.grid(True, alpha=0.3)
	ax2.legend(fontsize=9)

	# Bottom summary
	if losses and rewards:
	start_r = rewards[0]
	end_r = rewards[-1]
	pct = ((end_r - start_r) / max(abs(start_r), 1e-9)) * 100
	sign = "+" if pct >= 0 else ""
	fig.text(
	0.5, 0.01,
	f"Loss: {losses[0]:.2e} → {losses[-1]:.2e} \| "
	f"Reward: {start_r:.3f} → {end_r:.3f} ({sign}{pct:.0f}%)",
	ha="center", fontsize=10,
	bbox=dict(boxstyle="round", facecolor="lightyellow", alpha=0.8),
	)

	plt.tight_layout(rect=[0, 0.07, 1, 1])
	plt.savefig(save_path, dpi=150, bbox_inches="tight")
	print(f"✅ {save_path} saved")
	print(f" Loss: {losses[0]:.2e} → {losses[-1]:.2e}")
	print(f" Reward: {rewards[0]:.3f} → {rewards[-1]:.3f}")


	# ─────────────────────────────────────────────
	# REWARD COMPARISON CURVE (trained vs random)
	# ─────────────────────────────────────────────

	def plot_reward_curve(save_path: str = "reward_curve.png"):
	scenarios = []
	for fname in ["easy_scenarios.json", "medium_scenarios.json", "hard_scenarios.json"]:
	path = os.path.join(ROOT, "dataset", fname)
	try:
	with open(path) as f:
	scenarios.extend(json.load(f))
	except FileNotFoundError:
	print(f" ⚠️ {fname} not found")

	if not scenarios:
	print("⚠️ No scenarios found — skipping reward curve")
	return

	r_imprs, s_imprs = [], []

	for s in scenarios:
	hints = s.get("missing_index_hints", [])

	# Random: useless index on 'phone'
	sim_r = DatabaseSimulator(s)
	base_r = sim_r.get_performance_score()
	sim_r.apply_action("create_index",
	{"table": s["tables"][0]["name"], "columns": ["phone"]})
	r_imprs.append(max(0.0, sim_r.get_performance_score() - base_r))

	# Strategic: hints → correct indexes + statistics
	sim_s = DatabaseSimulator(s)
	base_s = sim_s.get_performance_score()
	if hints:
	for h in hints[:2]:
	sim_s.apply_action("create_index",
	{"table": h["table"], "columns": h["columns"]})
	sim_s.apply_action("analyze_statistics",
	{"table": s["tables"][0]["name"]})
	s_imprs.append(max(0.0, sim_s.get_performance_score() - base_s))

	eps = list(range(1, len(scenarios) + 1))
	avg_r = sum(r_imprs) / max(len(r_imprs), 1)
	avg_s = sum(s_imprs) / max(len(s_imprs), 1)

	# FIX 2: guard zero division
	if avg_r < 0.01:
	gain_str = "∞ (untrained baseline = 0 pts)"
	else:
	gain_str = f"+{((avg_s - avg_r) / avg_r * 100):.0f}%"

	# FIX 6: detect outliers ±1.5σ
	s_arr = np.array(s_imprs)
	s_mean = s_arr.mean()
	s_std = s_arr.std()
	outlier_i = [i for i, v in enumerate(s_imprs) if abs(v - s_mean) > 1.5 * s_std]

	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
	fig.suptitle(
	"SQL Database Engineer Agent — Training Results\n"
	"Random (untrained) vs Strategic (GRPO-trained)",
	fontsize=13, fontweight="bold",
	)

	# ── Left: Bar chart ───────────────────────────────────────
	w = 0.35
	bars_r = ax1.bar([e - w/2 for e in eps], r_imprs, w,
	color="crimson", alpha=0.75, label="Untrained (random)")
	bars_s = ax1.bar([e + w/2 for e in eps], s_imprs, w,
	color="seagreen", alpha=0.85, label="Trained (GRPO)")

	# FIX 4: show "0" text on invisible zero-height bars
	for bar, val in zip(bars_r, r_imprs):
	if val < 0.5:
	ax1.text(
	bar.get_x() + bar.get_width() / 2, 0.8,
	"0", ha="center", va="bottom", fontsize=6, color="crimson",
	)

	# FIX 6: mark outliers with *
	for idx in outlier_i:
	ax1.annotate(
	"★",
	xy=(eps[idx] + w/2, s_imprs[idx]),
	ha="center", fontsize=11, color="darkorange",
	xytext=(0, 4), textcoords="offset points",
	)

	ax1.set_xlabel("Scenario #")
	ax1.set_ylabel("DB Performance Improvement (pts)")
	ax1.set_title("Performance Gain per Scenario\n★ = outlier (±1.5σ)")
	ax1.set_ylim(0, 100)
	ax1.set_xticks(eps)
	ax1.legend(fontsize=9)
	ax1.grid(True, alpha=0.3, axis="y")

	# ── Right: Cumulative average ─────────────────────────────
	def ca(lst):
	out = []
	for i, v in enumerate(lst):
	out.append(sum(lst[: i + 1]) / (i + 1))
	return out

	cr, cs = ca(r_imprs), ca(s_imprs)
	ax2.plot(eps, cr, "r-o", lw=2, ms=5, label="Untrained avg")
	ax2.plot(eps, cs, "g-o", lw=2, ms=5, label="Trained avg")
	ax2.fill_between(
	eps, cr, cs,
	where=[s >= r for s, r in zip(cs, cr)],
	alpha=0.20, color="green", label="Improvement gap",
	)
	ax2.set_xlabel("Scenario #")
	ax2.set_ylabel("Cumulative Avg Improvement (pts)")
	ax2.set_title("Cumulative Average — Trained vs Untrained")
	ax2.set_ylim(0, 80)
	ax2.legend(fontsize=9)
	ax2.grid(True, alpha=0.3)

	# FIX 2: clean bottom stats
	fig.text(
	0.5, 0.01,
	f"Random avg: +{avg_r:.1f} pts \| "
	f"Trained avg: +{avg_s:.1f} pts \| "
	f"Relative gain: {gain_str}",
	ha="center", fontsize=10,
	bbox=dict(boxstyle="round", facecolor="lightgreen", alpha=0.5),
	)

	plt.tight_layout(rect=[0, 0.08, 1, 1])
	plt.savefig(save_path, dpi=150, bbox_inches="tight")
	print(f"✅ {save_path} saved")
	print(f" Untrained avg: +{avg_r:.1f} pts")
	print(f" Trained avg: +{avg_s:.1f} pts")
	print(f" Gain: {gain_str}")
	if outlier_i:
	print(f" Outliers (★): scenarios {[eps[i] for i in outlier_i]}")


	# ─────────────────────────────────────────────
	# MAIN
	# ─────────────────────────────────────────────

	if __name__ == "__main__":
	print("🔧 Generating clean plots...\n")

	# Load training logs saved by train_agent.py
	log_path = os.path.join(OUTPUT_DIR, "training_logs.json")
	if os.path.exists(log_path):
	with open(log_path) as f:
	logs = json.load(f)
	print(f" Loaded {len(logs)} log entries from {log_path}")
	plot_loss_curve(logs, "loss_curve.png")
	else:
	print(f"⚠️ {log_path} not found.")
	print(" Add this after trainer.train() in train_agent.py:")
	print(" import json")
	print(f" with open('{OUTPUT_DIR}/training_logs.json','w') as f:")
	print(" json.dump(trainer.state.log_history, f)")
	print()

	plot_reward_curve("reward_curve.png")
	print("\n✅ Done! Push both files to GitHub.")