import sys import json import random from pathlib import Path from typing import Dict, List import numpy as np ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from env.multi_agent_env import ( MultiAgentTradingEnv, RISK_MANAGER, PORTFOLIO_MGR, TRADER, ) from training.train_multi_agent import ( RuleRiskManagerPolicy, RulePortfolioManagerPolicy, ) SYSTEM_PROMPT = """You are a trading agent in a multi-agent governance system. The Risk Manager has set governance constraints, and the Portfolio Manager has allocated capital. Your job: propose a trade that maximizes profit while respecting these constraints. Respond exactly in this format: Your reasoning about the market state, risk constraints, and trade decision. {"direction": 0, "size": 0.0, "sl": 0, "tp": 0} """ def generate_pz_scenarios( n: int = 500, difficulty: str = "easy", max_env_steps: int = 100, ) -> List[Dict]: """Run the PZ env with rule policies to generate realistic scenarios. Each scenario captures: - The Trader's full observation (29 dims) - The RM constraints decoded from the message - The PM allocation decoded from the message """ env = MultiAgentTradingEnv(difficulty=difficulty, max_steps=max_env_steps) rm_policy = RuleRiskManagerPolicy() pm_policy = RulePortfolioManagerPolicy() scenarios: List[Dict] = [] attempts = 0 max_attempts = n * 3 while len(scenarios) < n and attempts < max_attempts: env.reset() attempts += 1 step_count = 0 while env.agents and step_count < max_env_steps: agent = env.agent_selection if agent == RISK_MANAGER: obs = env.observe(agent) action = rm_policy.act(obs) env.step(action) elif agent == PORTFOLIO_MGR: obs = env.observe(agent) action = pm_policy.act(obs) env.step(action) elif agent == TRADER: obs = env.observe(agent) # Extract RM and PM messages from the observation # obs layout: base(24) + rm_msg(3) + pm_msg(2) = 29 base_obs = obs[:24].tolist() rm_msg = obs[24:27].tolist() # [size_limit, allow_new, force_reduce] pm_msg = obs[27:29].tolist() # [cap_alloc, override_strength] rm_size_limit = float(rm_msg[0]) rm_allow_new = bool(rm_msg[1] > 0.5) rm_force_reduce = bool(rm_msg[2] > 0.5) pm_cap_alloc = float(pm_msg[0]) pm_override = float(pm_msg[1]) scenarios.append({ "state": [round(float(x), 4) for x in base_obs[:5]], "full_obs": [round(float(x), 4) for x in base_obs], "rm_size_limit": round(rm_size_limit, 3), "rm_allow_new": rm_allow_new, "rm_force_reduce": rm_force_reduce, "pm_cap_alloc": round(pm_cap_alloc, 3), "pm_override": round(pm_override, 3), "signals": { "ta": round(float(obs[5] * 2 - 1), 3), # RSI mapped to [-1,1] "fa": round(float(obs[8]), 3), # MACD as FA proxy "position_limit": round(rm_size_limit, 3), "rm_size_limit": round(rm_size_limit, 3), }, }) if len(scenarios) >= n: break # Take a random trader action so the env advances trader_action = { "direction": random.choice([0, 1, 2]), "size": np.array([random.uniform(0.05, 0.3)], dtype=np.float32), "sl": np.array([0.0], dtype=np.float32), "tp": np.array([0.0], dtype=np.float32), } env.step(trader_action) step_count += 1 random.shuffle(scenarios) return scenarios[:n] def build_prompt_multiagent(scenario: Dict) -> str: """Build the prompt for the Trader, including RM and PM constraints.""" rm_limit = scenario["rm_size_limit"] rm_allow_str = "allowed" if scenario.get("rm_allow_new", True) else "BLOCKED" rm_force_str = "yes" if scenario.get("rm_force_reduce", False) else "no" pm_cap = scenario["pm_cap_alloc"] pm_override_str = "none" if scenario.get("pm_override", 0.0) < 0.5 else "ACTIVE" state = scenario.get("state", [1.0, 1.0, 1.0, 1.0, 1.0]) signals = scenario.get("signals", {}) body = json.dumps({ "state": state, "signals": signals, "governance": { "rm_size_limit": rm_limit, "rm_allow_new": rm_allow_str, "rm_force_reduce": rm_force_str, "pm_cap_alloc": pm_cap, "pm_override": pm_override_str, }, }, separators=(",", ":")) prompt = ( f"{SYSTEM_PROMPT}\n" f"The Risk Manager has set the following constraints: " f"size_limit={rm_limit:.2f}, new_positions={rm_allow_str}, force_reduce={rm_force_str}.\n" f"The Portfolio Manager allocated: capital_cap={pm_cap:.2f}, override={pm_override_str}.\n\n" f"Scenario:\n{body}\n" ) return prompt