Spaces:
Sleeping
Sleeping
| """ | |
| make_chart.py | |
| Generates the before/after reward chart using known scores. | |
| Run: python make_chart.py | |
| """ | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| # Rule-based agent (no LLM, no training) β measured locally | |
| baseline_scores = { | |
| "task1": 0.100, | |
| "task2": 0.113, | |
| "task3": 0.218, | |
| "overall": 0.144, | |
| } | |
| # Qwen2.5-72B via HF Inference API β from your clean run logs | |
| llm_scores = { | |
| "task1": 0.100, | |
| "task2": 0.113, | |
| "task3": 0.262, | |
| "overall": 0.158, | |
| } | |
| # After GRPO training β update these once Colab finishes | |
| # If Colab not done yet, use llm_scores as placeholder | |
| grpo_scores = { | |
| "task1": 0.100, | |
| "task2": 0.113, | |
| "task3": 0.262, | |
| "overall": 0.158, | |
| } | |
| def make_chart(baseline, llm, grpo, output="reward_chart.png"): | |
| tasks = ["Task 1\n(Classify)", "Task 2\n(Action)", "Task 3\n(Full Resolve)", "Overall"] | |
| keys = ["task1", "task2", "task3", "overall"] | |
| b_vals = [baseline.get(k, 0) for k in keys] | |
| llm_vals = [llm.get(k, 0) for k in keys] | |
| grpo_vals = [grpo.get(k, 0) for k in keys] | |
| x = np.arange(len(tasks)) | |
| width = 0.25 | |
| fig, axes = plt.subplots(1, 2, figsize=(15, 6)) | |
| fig.patch.set_facecolor("#1a1a2e") | |
| for ax in axes: | |
| ax.set_facecolor("#16213e") | |
| ax1 = axes[0] | |
| bars1 = ax1.bar(x - width, b_vals, width, label="Rule-Based", color="#636e72", edgecolor="#2d3436") | |
| bars2 = ax1.bar(x, llm_vals, width, label="Qwen2.5-72B", color="#0984e3", edgecolor="#2d3436") | |
| bars3 = ax1.bar(x + width, grpo_vals, width, label="After GRPO", color="#00b894", edgecolor="#2d3436") | |
| for bars in [bars1, bars2, bars3]: | |
| for bar in bars: | |
| h = bar.get_height() | |
| ax1.text(bar.get_x() + bar.get_width()/2., h + 0.008, | |
| f"{h:.2f}", ha="center", va="bottom", fontsize=8.5, color="white") | |
| ax1.set_xticks(x) | |
| ax1.set_xticklabels(tasks, color="white", fontsize=10) | |
| ax1.set_ylabel("Score (0 - 1)", color="white", fontsize=11) | |
| ax1.set_title("Score Comparison Across Training Stages", color="white", fontsize=12, fontweight="bold", pad=10) | |
| ax1.set_ylim(0, 1.2) | |
| ax1.tick_params(colors="white") | |
| ax1.spines[:].set_color("#2d3436") | |
| ax1.yaxis.grid(True, alpha=0.2, color="white") | |
| ax1.set_axisbelow(True) | |
| ax1.legend(facecolor="#0f3460", edgecolor="#2d3436", labelcolor="white", fontsize=9) | |
| ax2 = axes[1] | |
| deltas = [round(grpo.get(k, 0) - baseline.get(k, 0), 3) for k in keys] | |
| colors = ["#00b894" if d >= 0 else "#d63031" for d in deltas] | |
| bars4 = ax2.bar(x, deltas, width=0.4, color=colors, edgecolor="#2d3436") | |
| for bar, d in zip(bars4, deltas): | |
| ypos = bar.get_height() + 0.004 if d >= 0 else bar.get_height() - 0.016 | |
| ax2.text(bar.get_x() + bar.get_width()/2., ypos, | |
| f"{d:+.3f}", ha="center", va="bottom", fontsize=11, | |
| fontweight="bold", color="white") | |
| ax2.axhline(0, color="white", linewidth=0.8, alpha=0.4) | |
| ax2.set_xticks(x) | |
| ax2.set_xticklabels(tasks, color="white", fontsize=10) | |
| ax2.set_ylabel("Score Delta (GRPO vs Rule-Based)", color="white", fontsize=10) | |
| ax2.set_title("Improvement: Rule-Based β After GRPO", color="white", fontsize=12, fontweight="bold", pad=10) | |
| ax2.tick_params(colors="white") | |
| ax2.spines[:].set_color("#2d3436") | |
| ax2.yaxis.grid(True, alpha=0.2, color="white") | |
| ax2.set_axisbelow(True) | |
| fig.suptitle( | |
| "Support Ticket Env β Training Results\nModel: Qwen2.5-0.5B-Instruct + GRPO | OpenEnv x Scalar Hackathon 2026", | |
| color="white", fontsize=11, y=1.02 | |
| ) | |
| plt.tight_layout() | |
| plt.savefig(output, dpi=180, bbox_inches="tight", facecolor=fig.get_facecolor()) | |
| print(f"Chart saved: {output}") | |
| print("\n" + "="*52) | |
| print(f"{'Task':<14} {'Rule-Based':>10} {'Qwen-72B':>10} {'GRPO':>8} {'Delta':>8}") | |
| print("-"*52) | |
| for k, label in [("task1","Task 1"),("task2","Task 2"),("task3","Task 3"),("overall","Overall")]: | |
| b = baseline.get(k, 0) | |
| l = llm.get(k, 0) | |
| g = grpo.get(k, 0) | |
| d = g - b | |
| print(f"{label:<14} {b:>10.3f} {l:>10.3f} {g:>8.3f} {d:>+8.3f}") | |
| print("="*52) | |
| if __name__ == "__main__": | |
| make_chart(baseline_scores, llm_scores, grpo_scores) | |