| import sys |
| import json |
| import random |
| from pathlib import Path |
| from typing import Dict, List |
| import numpy as np |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| if str(ROOT) not in sys.path: |
| sys.path.insert(0, str(ROOT)) |
|
|
| from env.multi_agent_env import ( |
| MultiAgentTradingEnv, |
| RISK_MANAGER, |
| PORTFOLIO_MGR, |
| TRADER, |
| ) |
| from training.train_multi_agent import ( |
| RuleRiskManagerPolicy, |
| RulePortfolioManagerPolicy, |
| ) |
|
|
| SYSTEM_PROMPT = """You are a trading agent in a multi-agent governance system. |
| The Risk Manager has set governance constraints, and the Portfolio Manager has allocated capital. |
| Your job: propose a trade that maximizes profit while respecting these constraints. |
| |
| Respond exactly in this format: |
| <thought> |
| Your reasoning about the market state, risk constraints, and trade decision. |
| </thought> |
| <action> |
| {"direction": 0, "size": 0.0, "sl": 0, "tp": 0} |
| </action> |
| """ |
|
|
| def generate_pz_scenarios( |
| n: int = 500, |
| difficulty: str = "easy", |
| max_env_steps: int = 100, |
| ) -> List[Dict]: |
| """Run the PZ env with rule policies to generate realistic scenarios. |
| |
| Each scenario captures: |
| - The Trader's full observation (29 dims) |
| - The RM constraints decoded from the message |
| - The PM allocation decoded from the message |
| """ |
| env = MultiAgentTradingEnv(difficulty=difficulty, max_steps=max_env_steps) |
| rm_policy = RuleRiskManagerPolicy() |
| pm_policy = RulePortfolioManagerPolicy() |
|
|
| scenarios: List[Dict] = [] |
| attempts = 0 |
| max_attempts = n * 3 |
|
|
| while len(scenarios) < n and attempts < max_attempts: |
| env.reset() |
| attempts += 1 |
|
|
| step_count = 0 |
| while env.agents and step_count < max_env_steps: |
| agent = env.agent_selection |
|
|
| if agent == RISK_MANAGER: |
| obs = env.observe(agent) |
| action = rm_policy.act(obs) |
| env.step(action) |
|
|
| elif agent == PORTFOLIO_MGR: |
| obs = env.observe(agent) |
| action = pm_policy.act(obs) |
| env.step(action) |
|
|
| elif agent == TRADER: |
| obs = env.observe(agent) |
| |
| |
| base_obs = obs[:24].tolist() |
| rm_msg = obs[24:27].tolist() |
| pm_msg = obs[27:29].tolist() |
|
|
| rm_size_limit = float(rm_msg[0]) |
| rm_allow_new = bool(rm_msg[1] > 0.5) |
| rm_force_reduce = bool(rm_msg[2] > 0.5) |
| pm_cap_alloc = float(pm_msg[0]) |
| pm_override = float(pm_msg[1]) |
|
|
| scenarios.append({ |
| "state": [round(float(x), 4) for x in base_obs[:5]], |
| "full_obs": [round(float(x), 4) for x in base_obs], |
| "rm_size_limit": round(rm_size_limit, 3), |
| "rm_allow_new": rm_allow_new, |
| "rm_force_reduce": rm_force_reduce, |
| "pm_cap_alloc": round(pm_cap_alloc, 3), |
| "pm_override": round(pm_override, 3), |
| "signals": { |
| "ta": round(float(obs[5] * 2 - 1), 3), |
| "fa": round(float(obs[8]), 3), |
| "position_limit": round(rm_size_limit, 3), |
| "rm_size_limit": round(rm_size_limit, 3), |
| }, |
| }) |
|
|
| if len(scenarios) >= n: |
| break |
|
|
| |
| trader_action = { |
| "direction": random.choice([0, 1, 2]), |
| "size": np.array([random.uniform(0.05, 0.3)], dtype=np.float32), |
| "sl": np.array([0.0], dtype=np.float32), |
| "tp": np.array([0.0], dtype=np.float32), |
| } |
| env.step(trader_action) |
|
|
| step_count += 1 |
|
|
| random.shuffle(scenarios) |
| return scenarios[:n] |
|
|
|
|
| def build_prompt_multiagent(scenario: Dict) -> str: |
| """Build the prompt for the Trader, including RM and PM constraints.""" |
| rm_limit = scenario["rm_size_limit"] |
| rm_allow_str = "allowed" if scenario.get("rm_allow_new", True) else "BLOCKED" |
| rm_force_str = "yes" if scenario.get("rm_force_reduce", False) else "no" |
| pm_cap = scenario["pm_cap_alloc"] |
| pm_override_str = "none" if scenario.get("pm_override", 0.0) < 0.5 else "ACTIVE" |
|
|
| state = scenario.get("state", [1.0, 1.0, 1.0, 1.0, 1.0]) |
| signals = scenario.get("signals", {}) |
|
|
| body = json.dumps({ |
| "state": state, |
| "signals": signals, |
| "governance": { |
| "rm_size_limit": rm_limit, |
| "rm_allow_new": rm_allow_str, |
| "rm_force_reduce": rm_force_str, |
| "pm_cap_alloc": pm_cap, |
| "pm_override": pm_override_str, |
| }, |
| }, separators=(",", ":")) |
|
|
| prompt = ( |
| f"{SYSTEM_PROMPT}\n" |
| f"The Risk Manager has set the following constraints: " |
| f"size_limit={rm_limit:.2f}, new_positions={rm_allow_str}, force_reduce={rm_force_str}.\n" |
| f"The Portfolio Manager allocated: capital_cap={pm_cap:.2f}, override={pm_override_str}.\n\n" |
| f"Scenario:\n{body}\n" |
| ) |
| return prompt |
|
|