| """ |
| Multi-Agent Reward Visualization Script. |
| |
| Loads training metrics from the multi-agent training run and generates: |
| - Per-agent reward curves (RM, PM, Trader on same axes) |
| - Governance intervention rate over training |
| - Compliance rate over training |
| - Baseline comparison chart |
| |
| Saves all to plots/ as PNG with labeled axes and titles. |
| |
| Usage: |
| python training/plot_multiagent.py --input outputs/multi_agent/metrics_final.json --output plots/ |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import sys |
| from pathlib import Path |
|
|
| import numpy as np |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| if str(ROOT) not in sys.path: |
| sys.path.insert(0, str(ROOT)) |
|
|
|
|
| def smooth(values: list[float], window: int = 10) -> np.ndarray: |
| """Simple moving average for smoother curves.""" |
| if len(values) < window: |
| return np.array(values) |
| kernel = np.ones(window) / window |
| return np.convolve(values, kernel, mode="valid") |
|
|
|
|
| def plot_per_agent_rewards(metrics: dict, output_dir: Path): |
| """Plot per-agent discounted returns on same axes.""" |
| import matplotlib.pyplot as plt |
|
|
| fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
| episodes = metrics.get("episode", []) |
| trader_r = metrics.get("trader_return", []) |
| rm_r = metrics.get("rm_return", []) |
| pm_r = metrics.get("pm_return", []) |
|
|
| if not episodes: |
| print(" No episode data found, skipping reward plot.") |
| return |
|
|
| window = max(1, len(episodes) // 20) |
|
|
| ax.plot(episodes[:len(smooth(trader_r, window))], smooth(trader_r, window), |
| label="Trader", color="#2ecc71", linewidth=2) |
| ax.plot(episodes[:len(smooth(rm_r, window))], smooth(rm_r, window), |
| label="Risk Manager", color="#e74c3c", linewidth=2) |
| ax.plot(episodes[:len(smooth(pm_r, window))], smooth(pm_r, window), |
| label="Portfolio Manager", color="#3498db", linewidth=2) |
|
|
| ax.set_xlabel("Episode", fontsize=12) |
| ax.set_ylabel("Discounted Return", fontsize=12) |
| ax.set_title("QuantHive: Per-Agent Reward Curves (Multi-Agent Training)", fontsize=14) |
| ax.legend(fontsize=11) |
| ax.grid(True, alpha=0.3) |
|
|
| plt.tight_layout() |
| path = output_dir / "reward_curve.png" |
| fig.savefig(path, dpi=150) |
| plt.close(fig) |
| print(f" Saved: {path}") |
|
|
|
|
| def plot_grade_and_sharpe(metrics: dict, output_dir: Path): |
| """Plot grade and Sharpe ratio progression.""" |
| import matplotlib.pyplot as plt |
|
|
| episodes = metrics.get("episode", []) |
| grades = metrics.get("grade", []) |
| sharpes = metrics.get("sharpe", []) |
|
|
| if not episodes or not grades: |
| print(" No grade data found, skipping grade plot.") |
| return |
|
|
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5)) |
| window = max(1, len(episodes) // 20) |
|
|
| ax1.plot(episodes[:len(smooth(grades, window))], smooth(grades, window), |
| color="#9b59b6", linewidth=2) |
| ax1.set_xlabel("Episode") |
| ax1.set_ylabel("Grade [0, 1]") |
| ax1.set_title("Portfolio Grade Over Training") |
| ax1.grid(True, alpha=0.3) |
|
|
| ax2.plot(episodes[:len(smooth(sharpes, window))], smooth(sharpes, window), |
| color="#f39c12", linewidth=2) |
| ax2.set_xlabel("Episode") |
| ax2.set_ylabel("Sharpe Ratio") |
| ax2.set_title("Sharpe Ratio Over Training") |
| ax2.grid(True, alpha=0.3) |
|
|
| plt.tight_layout() |
| path = output_dir / "grade_progression.png" |
| fig.savefig(path, dpi=150) |
| plt.close(fig) |
| print(f" Saved: {path}") |
|
|
|
|
| def plot_baseline_comparison(metrics: dict, output_dir: Path): |
| """Plot random baseline vs trained agent performance.""" |
| import matplotlib.pyplot as plt |
|
|
| episodes = metrics.get("episode", []) |
| trader_r = metrics.get("trader_return", []) |
| grades = metrics.get("grade", []) |
|
|
| if not episodes or len(episodes) < 20: |
| print(" Not enough data for baseline comparison, skipping.") |
| return |
|
|
| n = len(episodes) |
| first_20 = slice(0, min(20, n)) |
| last_20 = slice(max(0, n - 20), n) |
|
|
| metrics_names = ["Trader Return", "Grade", "Max Drawdown", "Sharpe"] |
| early = [ |
| np.mean(trader_r[first_20]), |
| np.mean(grades[first_20]), |
| np.mean(metrics.get("max_drawdown", [0])[first_20]), |
| np.mean(metrics.get("sharpe", [0])[first_20]), |
| ] |
| late = [ |
| np.mean(trader_r[last_20]), |
| np.mean(grades[last_20]), |
| np.mean(metrics.get("max_drawdown", [0])[last_20]), |
| np.mean(metrics.get("sharpe", [0])[last_20]), |
| ] |
|
|
| fig, ax = plt.subplots(figsize=(10, 6)) |
| x = np.arange(len(metrics_names)) |
| width = 0.35 |
|
|
| ax.bar(x - width / 2, early, width, label="Early (first 20 eps)", color="#e74c3c", alpha=0.8) |
| ax.bar(x + width / 2, late, width, label="Late (last 20 eps)", color="#2ecc71", alpha=0.8) |
|
|
| ax.set_ylabel("Value") |
| ax.set_title("QuantHive: Baseline vs Trained Performance") |
| ax.set_xticks(x) |
| ax.set_xticklabels(metrics_names) |
| ax.legend() |
| ax.grid(True, alpha=0.3, axis="y") |
|
|
| plt.tight_layout() |
| path = output_dir / "baseline_comparison.png" |
| fig.savefig(path, dpi=150) |
| plt.close(fig) |
| print(f" Saved: {path}") |
|
|
|
|
| def plot_loss_curve(metrics: dict, output_dir: Path): |
| """Plot PnL (as proxy loss) over training.""" |
| import matplotlib.pyplot as plt |
|
|
| episodes = metrics.get("episode", []) |
| pnl = metrics.get("pnl_pct", []) |
|
|
| if not episodes or not pnl: |
| print(" No PnL data found, skipping loss plot.") |
| return |
|
|
| fig, ax = plt.subplots(figsize=(10, 6)) |
| window = max(1, len(episodes) // 20) |
|
|
| smoothed = smooth(pnl, window) |
| ax.plot(episodes[:len(smoothed)], smoothed, color="#e74c3c", linewidth=2) |
| ax.axhline(y=0, color="gray", linestyle="--", alpha=0.5) |
| ax.fill_between(episodes[:len(smoothed)], 0, smoothed, |
| where=np.array(smoothed) > 0, color="#2ecc71", alpha=0.2) |
| ax.fill_between(episodes[:len(smoothed)], 0, smoothed, |
| where=np.array(smoothed) <= 0, color="#e74c3c", alpha=0.2) |
|
|
| ax.set_xlabel("Episode", fontsize=12) |
| ax.set_ylabel("PnL %", fontsize=12) |
| ax.set_title("QuantHive: PnL Over Training (Policy Convergence)", fontsize=14) |
| ax.grid(True, alpha=0.3) |
|
|
| plt.tight_layout() |
| path = output_dir / "loss_curve.png" |
| fig.savefig(path, dpi=150) |
| plt.close(fig) |
| print(f" Saved: {path}") |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Plot multi-agent training results") |
| parser.add_argument("--input", type=str, default="outputs/multi_agent/metrics_final.json", |
| help="Path to training metrics JSON file") |
| parser.add_argument("--output", type=str, default="plots/", |
| help="Output directory for PNG plots") |
| args = parser.parse_args() |
|
|
| input_path = Path(args.input) |
| output_dir = Path(args.output) |
| output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| if not input_path.exists(): |
| print(f"Error: Metrics file not found: {input_path}") |
| print("Run training first: python training/train_multi_agent.py") |
| sys.exit(1) |
|
|
| with open(input_path, "r") as f: |
| metrics = json.load(f) |
|
|
| print(f"Loaded {len(metrics.get('episode', []))} episodes from {input_path}") |
| print(f"Saving plots to {output_dir}/") |
|
|
| plot_per_agent_rewards(metrics, output_dir) |
| plot_grade_and_sharpe(metrics, output_dir) |
| plot_baseline_comparison(metrics, output_dir) |
| plot_loss_curve(metrics, output_dir) |
|
|
| print("\nAll plots generated successfully.") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|