adityanaikhpt commited on
Commit
0dfa258
·
verified ·
1 Parent(s): 44ca509

Deploy: plot_rewards.py

Browse files
Files changed (1) hide show
  1. plot_rewards.py +62 -0
plot_rewards.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import matplotlib.patches as mpatches
4
+ import os
5
+
6
+ LOG_FILE = os.path.join(os.path.dirname(__file__), "rewards_log.csv")
7
+ OUT_DIR = os.path.join(os.path.dirname(__file__), "results")
8
+ os.makedirs(OUT_DIR, exist_ok=True)
9
+
10
+ df = pd.read_csv(LOG_FILE)
11
+ df["global_step"] = range(len(df))
12
+ df["rolling_avg"] = df["reward"].rolling(window=10, min_periods=1).mean()
13
+
14
+ # -- Plot 1: Reward curve -------------------------------
15
+ fig, ax = plt.subplots(figsize=(12, 5))
16
+
17
+ ax.plot(df["global_step"], df["reward"],
18
+ alpha=0.25, color="#4A90D9", linewidth=1, label="raw reward")
19
+ ax.plot(df["global_step"], df["rolling_avg"],
20
+ color="#1a5fa8", linewidth=2.5, label="10-step rolling avg")
21
+ ax.axhline(0.5, linestyle="--", color="#999999", linewidth=1, label="baseline (0.5)")
22
+
23
+ ax.set_xlabel("Training Step", fontsize=13)
24
+ ax.set_ylabel("Reward (0 - 1)", fontsize=13)
25
+ ax.set_title("CodeArena - Agent Reward Over Training", fontsize=15, fontweight="bold")
26
+ ax.set_ylim(0, 1.05)
27
+ ax.legend(fontsize=11)
28
+ ax.grid(axis="y", alpha=0.3)
29
+ plt.tight_layout()
30
+ plt.savefig(os.path.join(OUT_DIR, "reward_curve.png"), dpi=150)
31
+ plt.close()
32
+ print("Saved: results/reward_curve.png")
33
+
34
+ # -- Plot 2: Reward by task -----------------------------
35
+ task_avg = df.groupby("task_id")["reward"].mean().sort_values(ascending=False)
36
+
37
+ fig, ax = plt.subplots(figsize=(8, 5))
38
+ colors = ["#2ecc71" if v > 0.7 else "#f39c12" if v > 0.4 else "#e74c3c"
39
+ for v in task_avg.values]
40
+ bars = ax.bar(task_avg.index, task_avg.values, color=colors, edgecolor="white", width=0.5)
41
+
42
+ for bar, val in zip(bars, task_avg.values):
43
+ ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
44
+ f"{val:.2f}", ha="center", fontsize=11, fontweight="bold")
45
+
46
+ ax.set_xlabel("Task Category", fontsize=13)
47
+ ax.set_ylabel("Average Reward", fontsize=13)
48
+ ax.set_title("CodeArena - Average Reward by Task Category", fontsize=15, fontweight="bold")
49
+ ax.set_ylim(0, 1.15)
50
+ ax.grid(axis="y", alpha=0.3)
51
+
52
+ legend_patches = [
53
+ mpatches.Patch(color="#2ecc71", label="> 0.70 (strong)"),
54
+ mpatches.Patch(color="#f39c12", label="0.40-0.70 (learning)"),
55
+ mpatches.Patch(color="#e74c3c", label="< 0.40 (struggling)")
56
+ ]
57
+ ax.legend(handles=legend_patches, fontsize=10)
58
+ plt.tight_layout()
59
+ plt.savefig(os.path.join(OUT_DIR, "reward_by_task.png"), dpi=150)
60
+ plt.close()
61
+ print("Saved: results/reward_by_task.png")
62
+ print("\nAll plots saved to results/")