| import os |
| import sys |
| from pathlib import Path |
| os.environ["OPENBLAS_NUM_THREADS"] = "1" |
| os.environ["MKL_NUM_THREADS"] = "1" |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| if str(ROOT) not in sys.path: |
| sys.path.insert(0, str(ROOT)) |
|
|
| import numpy as np |
| import pandas as pd |
| from env.trading_env import TradingEnv |
| from training.config import TrainingConfig |
| from training.train import run_episode, run_random_baseline |
| from agents.researcher import QuantResearcher |
| from agents.fa_agent import FundamentalAnalyst |
| from agents.risk_model import RiskModeler |
| from agents.trader import QuantTrader |
| from agents.portfolio_manager import PortfolioManager |
| from utils.judge import LLMJudge |
| from utils.visualization import ( |
| plot_reward_curve, |
| plot_grade_progression, |
| plot_comparison_table, |
| ) |
| import argparse |
|
|
|
|
| def run_benchmark(episodes=50): |
| """ |
| Compare trained multi-agent pipeline vs random baseline |
| using the REAL agent interaction loop — no faked results. |
| """ |
| config = TrainingConfig( |
| tickers=["AAPL"], |
| num_episodes=episodes, |
| fast_mode=True, |
| max_steps=200, |
| ) |
| env = TradingEnv(difficulty="hard", max_steps=200) |
|
|
| |
| researcher = QuantResearcher() |
| fa_agent = FundamentalAnalyst(fast_mode=True) |
| risk_model = RiskModeler( |
| max_drawdown_limit=config.risk_max_drawdown, |
| max_exposure=config.risk_max_exposure, |
| vol_threshold=config.risk_vol_threshold, |
| ) |
| trader = QuantTrader(aggression=config.trader_aggression) |
| portfolio_manager = PortfolioManager(fast_mode=True) |
| judge = LLMJudge() |
|
|
| trained_metrics = [] |
| print(f"Running {episodes} Trained Episodes (Multi-Agent Pipeline)...") |
| for ep in range(episodes): |
| metrics, _ = run_episode( |
| env, researcher, fa_agent, risk_model, |
| trader, portfolio_manager, judge, config=config, |
| ) |
| trained_metrics.append(metrics) |
| if (ep + 1) % 10 == 0: |
| print(f" Trained ep {ep+1}/{episodes}: grade={metrics['final_grade']:.3f}, pnl={metrics['pnl_pct']:+.2%}") |
|
|
| |
| print(f"\nRunning {episodes} Baseline Episodes (Random)...") |
| random_metrics = run_random_baseline(config, num_episodes=episodes) |
|
|
| |
| def avg(metrics, key): |
| return np.mean([m[key] for m in metrics]) |
|
|
| print(f"\n{'='*60}") |
| print("BENCHMARK RESULTS") |
| print(f"{'='*60}") |
| print(f"\n{'Metric':<20} {'Random':>12} {'Trained':>12} {'Improvement':>14}") |
| print("-" * 60) |
|
|
| for key, label in [ |
| ("total_reward", "Avg Reward"), |
| ("final_grade", "Avg Grade"), |
| ("pnl_pct", "Avg PnL %"), |
| ("max_drawdown", "Avg Max DD"), |
| ("sharpe_ratio", "Avg Sharpe"), |
| ]: |
| r = avg(random_metrics, key) |
| t = avg(trained_metrics, key) |
| imp = t - r |
| sign = "+" if imp > 0 else "" |
| print(f" {label:<18} {r:>12.4f} {t:>12.4f} {sign}{imp:>13.4f}") |
|
|
| |
| print("\nGenerating comparison plots...") |
| plot_reward_curve(trained_metrics, random_metrics) |
| plot_grade_progression(trained_metrics, random_metrics) |
| plot_comparison_table(trained_metrics, random_metrics) |
| print("Done! Plots saved to plots/") |
|
|
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--episodes", type=int, default=50) |
| args = parser.parse_args() |
| run_benchmark(episodes=args.episodes) |
|
|