Premchan369
/

alphaforge-quant-system

+"""Synthetic Market Simulation — Train Your Strategies Against Themselves
+Jane Street, Two Sigma, Citadel ALL run simulations.
+Why? Because you need MORE data than history provides.
+This module creates realistic synthetic markets with:
+- Agent-based modeling (informed vs noise traders)
+- Market impact propagation
+- Correlated asset dynamics
+- Regime switches
+- News shock injection
+Use this to:
+1. Train RL agents on unlimited data
+2. Stress test strategies with extreme scenarios
+3. Bootstrap confidence intervals
+4. Test strategy robustness
+"""
+import numpy as np
+import pandas as pd
+from typing import Dict, List, Tuple, Optional, Callable
+from dataclasses import dataclass
+import warnings
+warnings.filterwarnings('ignore')
+@dataclass
+class MarketConfig:
+    """Configuration for synthetic market"""
+    n_assets: int = 10
+    n_informed_traders: int = 5
+    n_noise_traders: int = 50
+    initial_price: float = 100.0
+    fundamental_volatility: float = 0.01
+    noise_trader_sigma: float = 0.02
+    informed_signal_quality: float = 0.7  # Probability informed trader is right
+    market_impact_per_lot: float = 0.0001
+    mean_reversion_speed: float = 0.05
+    correlation_matrix: Optional[np.ndarray] = None
+    def __post_init__(self):
+        if self.correlation_matrix is None:
+            # Generate random correlation matrix
+            from scipy.stats import wishart
+            # Use Wishart to generate positive definite correlation
+            df = self.n_assets + 2
+            scale = np.eye(self.n_assets) * 0.5 + np.ones((self.n_assets, self.n_assets)) * 0.5
+            cov = wishart.rvs(df=df, scale=scale, size=1)
+            # Convert to correlation
+            d = np.sqrt(np.diag(cov))
+            self.correlation_matrix = cov / np.outer(d, d)
+class FundamentalPriceProcess:
+    """
+    Simulate fundamental (fair) value of each asset.
+    Follows: dF = θ(μ - F)dt + σdW
+    Where:
+    - θ = mean reversion speed
+    - μ = long-term mean (changes at regime switches)
+    - σ = fundamental volatility
+    """
+    def __init__(self, config: MarketConfig):
+        self.config = config
+        self.prices = np.ones(config.n_assets) * config.initial_price
+        self.long_term_means = np.ones(config.n_assets) * config.initial_price
+        self.regime = 'normal'  # normal, boom, crash, high_vol
+        self.regime_duration = 0
+        self.regime_switches = []
+    def step(self, dt: float = 1.0) -> np.ndarray:
+        """Evolve fundamental prices one step"""
+        cfg = self.config
+        # Regime switching
+        self.regime_duration += 1
+        if self.regime_duration > np.random.poisson(100):
+            self._switch_regime()
+        # Mean reversion + random walk
+        theta = cfg.mean_reversion_speed
+        noise = np.random.multivariate_normal(
+            np.zeros(cfg.n_assets),
+            cfg.correlation_matrix * (cfg.fundamental_volatility ** 2)
+        )
+        # Regime effects
+        if self.regime == 'boom':
+            drift = 0.002
+            vol_mult = 1.0
+        elif self.regime == 'crash':
+            drift = -0.003
+            vol_mult = 2.0
+        elif self.regime == 'high_vol':
+            drift = 0.0
+            vol_mult = 3.0
+        else:  # normal
+            drift = 0.0
+            vol_mult = 1.0
+        dprices = (
+            theta * (self.long_term_means - self.prices) * dt
+            + drift * self.prices
+            + noise * vol_mult * np.sqrt(dt)
+        )
+        self.prices = np.maximum(self.prices + dprices, 0.01)
+        return self.prices.copy()
+    def _switch_regime(self):
+        """Switch market regime"""
+        old_regime = self.regime
+        self.regime = np.random.choice(
+            ['normal', 'boom', 'crash', 'high_vol'],
+            p=[0.6, 0.15, 0.15, 0.1]
+        )
+        self.regime_duration = 0
+        if self.regime == 'boom':
+            self.long_term_means *= 1.02
+        elif self.regime == 'crash':
+            self.long_term_means *= 0.98
+        self.regime_switches.append({
+            'step': len(self.regime_switches),
+            'from': old_regime,
+            'to': self.regime
+        })
+class Trader:
+    """Base trader agent"""
+    def __init__(self, trader_id: str, capital: float = 1_000_000):
+        self.trader_id = trader_id
+        self.capital = capital
+        self.positions = np.zeros(0)  # Will be set
+        self.trade_history = []
+    def decide(self,
+               market_state: Dict,
+               fundamental: np.ndarray) -> np.ndarray:
+        """
+        Returns trade vector: positive = buy, negative = sell.
+        """
+        raise NotImplementedError
+    def execute(self, trade: np.ndarray, prices: np.ndarray):
+        """Execute trade and update state"""
+        cost = np.sum(np.abs(trade) * prices)
+        if cost <= self.capital:
+            self.positions += trade
+            self.capital -= cost
+            self.trade_history.append({
+                'positions': trade.copy(),
+                'prices': prices.copy()
+            })
+class InformedTrader(Trader):
+    """
+    Informed trader with private signal about future price.
+    Has signal_quality probability of being right.
+    More informed = more likely to profit, creates adverse selection.
+    """
+    def __init__(self, trader_id: str, signal_quality: float, capital: float = 5_000_000):
+        super().__init__(trader_id, capital)
+        self.signal_quality = signal_quality
+        self.signal_horizon = np.random.randint(5, 30)
+        self.aggression = np.random.uniform(0.3, 0.8)
+    def decide(self,
+               market_state: Dict,
+               fundamental: np.ndarray) -> np.ndarray:
+        """Generate trade based on private signal"""
+        n_assets = len(fundamental)
+        if len(self.positions) != n_assets:
+            self.positions = np.zeros(n_assets)
+        # Generate signal: will price go up or down?
+        signal = np.random.randn(n_assets)
+        # Correct signal with probability signal_quality
+        future_drift = market_state.get('future_drift', np.zeros(n_assets))
+        correct = np.random.rand(n_assets) < self.signal_quality
+        signal = np.where(correct, np.sign(future_drift), -np.sign(future_drift))
+        # Trade size proportional to conviction
+        max_trade = self.capital * self.aggression / np.mean(fundamental)
+        trade = signal * max_trade / n_assets
+        return trade
+class NoiseTrader(Trader):
+    """
+    Noise trader with no information.
+    Trades randomly, provides liquidity, gets picked off.
+    Represents retail traders, uninformed flow.
+    """
+    def __init__(self, trader_id: str, sigma: float = 0.02, capital: float = 500_000):
+        super().__init__(trader_id, capital)
+        self.sigma = sigma
+    def decide(self,
+               market_state: Dict,
+               fundamental: np.ndarray) -> np.ndarray:
+        """Random trade with zero mean"""
+        n_assets = len(fundamental)
+        if len(self.positions) != n_assets:
+            self.positions = np.zeros(n_assets)
+        # Random position changes
+        trade_size = np.abs(np.random.randn(n_assets)) * self.sigma * self.capital
+        trade_size /= np.mean(fundamental)
+        direction = np.random.choice([-1, 1], n_assets)
+        return trade_size * direction
+class MomentumTrader(Trader):
+    """
+    Momentum trader: buys assets going up, sells going down.
+    Creates and rides trends. Can cause bubbles/crashes.
+    """
+    def __init__(self, trader_id: str, lookback: int = 10,
+                 threshold: float = 0.01, capital: float = 2_000_000):
+        super().__init__(trader_id, capital)
+        self.lookback = lookback
+        self.threshold = threshold
+        self.price_history = []
+    def decide(self,
+               market_state: Dict,
+               fundamental: np.ndarray) -> np.ndarray:
+        """Trade on momentum"""
+        n_assets = len(fundamental)
+        if len(self.positions) != n_assets:
+            self.positions = np.zeros(n_assets)
+        self.price_history.append(fundamental.copy())
+        if len(self.price_history) < self.lookback:
+            return np.zeros(n_assets)
+        # Calculate momentum
+        recent = np.array(self.price_history[-self.lookback:])
+        returns = (recent[-1] / recent[0]) - 1
+        # Trade on momentum
+        momentum_signals = returns / self.threshold  # Normalized
+        # Scale by available capital
+        max_trade = self.capital * 0.2 / np.mean(fundamental)
+        trade = momentum_signals * max_trade / n_assets
+        # Keep history bounded
+        if len(self.price_history) > self.lookback * 2:
+            self.price_history = self.price_history[-self.lookback:]
+        return np.clip(trade, -max_trade, max_trade)
+class SyntheticMarket:
+    """
+    Complete synthetic market simulation.
+    Simulates:
+    - Fundamental prices (with regime switches)
+    - Multiple trader types
+    - Market impact from orders
+    - Transaction costs
+    - Price discovery
+    """
+    def __init__(self, config: MarketConfig):
+        self.config = config
+        self.fundamental = FundamentalPriceProcess(config)
+        # Initialize traders
+        self.traders = []
+        # Informed traders
+        for i in range(config.n_informed_traders):
+            quality = np.random.uniform(0.5, 0.9)
+            self.traders.append(
+                InformedTrader(f"informed_{i}", quality)
+            )
+        # Noise traders
+        for i in range(config.n_noise_traders):
+            sigma = np.random.uniform(0.01, 0.03)
+            self.traders.append(
+                NoiseTrader(f"noise_{i}", sigma)
+            )
+        # Momentum traders
+        for i in range(5):
+            self.traders.append(
+                MomentumTrader(f"momentum_{i}")
+            )
+        # History
+        self.price_history = []
+        self.fundamental_history = []
+        self.volume_history = []
+        self.regime_history = []
+        self.order_flow_history = []
+    def step(self) -> Dict:
+        """Simulate one market step"""
+        cfg = self.config
+        # Update fundamentals
+        fundamental = self.fundamental.step()
+        # Generate future drift (for informed traders)
+        future_drift = np.random.randn(cfg.n_assets) * cfg.fundamental_volatility
+        market_state = {
+            'fundamental': fundamental,
+            'future_drift': future_drift,
+            'regime': self.fundamental.regime,
+            'prices': self.price_history[-1] if self.price_history else fundamental
+        }
+        # Collect all trades
+        total_orders = np.zeros(cfg.n_assets)
+        for trader in self.traders:
+            trade = trader.decide(market_state, fundamental)
+            total_orders += trade
+        # Market impact: orders move price
+        impact = total_orders * cfg.market_impact_per_lot
+        # Transaction cost decay
+        observed_price = fundamental + impact
+        # Noise
+        noise = np.random.randn(cfg.n_assets) * cfg.fundamental_volatility * 0.5
+        observed_price += noise
+        observed_price = np.maximum(observed_price, 0.01)
+        # Execute trades
+        for trader in self.traders:
+            trade = trader.decide(market_state, fundamental)
+            trader.execute(trade, observed_price)
+        # Record
+        self.price_history.append(observed_price.copy())
+        self.fundamental_history.append(fundamental.copy())
+        self.volume_history.append(np.sum(np.abs(total_orders)))
+        self.regime_history.append(self.fundamental.regime)
+        self.order_flow_history.append(total_orders.copy())
+        return {
+            'prices': observed_price,
+            'fundamental': fundamental,
+            'impact': impact,
+            'volume': np.sum(np.abs(total_orders)),
+            'regime': self.fundamental.regime,
+            'order_flow': total_orders
+        }
+    def run(self, n_steps: int = 1000) -> pd.DataFrame:
+        """Run simulation for n steps"""
+        print(f"Running synthetic market simulation: {n_steps} steps")
+        print(f"  {self.config.n_assets} assets")
+        print(f"  {len(self.traders)} traders ({self.config.n_informed_traders} informed, "
+              f"{self.config.n_noise_traders} noise, 5 momentum)")
+        results = []
+        for step in range(n_steps):
+            state = self.step()
+            state['step'] = step
+            results.append(state)
+            if (step + 1) % 200 == 0:
+                print(f"  Step {step + 1}/{n_steps} — Regime: {state['regime']}")
+        # Build DataFrame
+        df = pd.DataFrame()
+        df['step'] = [r['step'] for r in results]
+        df['regime'] = [r['regime'] for r in results]
+        df['volume'] = [r['volume'] for r in results]
+        for i in range(self.config.n_assets):
+            df[f'price_{i}'] = [r['prices'][i] for r in results]
+            df[f'fundamental_{i}'] = [r['fundamental'][i] for r in results]
+            df[f'impact_{i}'] = [r['impact'][i] for r in results]
+        return df
+    def get_price_data(self) -> pd.DataFrame:
+        """Get OHLC-style price data for all assets"""
+        if not self.price_history:
+            return pd.DataFrame()
+        prices = np.array(self.price_history)
+        df = pd.DataFrame()
+        for i in range(self.config.n_assets):
+            df[f'asset_{i}_close'] = prices[:, i]
+            df[f'asset_{i}_return'] = np.log(prices[1:, i] / prices[:-1, i])
+        df['regime'] = self.regime_history
+        df['volume'] = self.volume_history
+        return df
+    def inject_shock(self,
+                     asset_idx: int = 0,
+                     shock_size: float = 0.05,
+                     shock_type: str = 'price'):
+        """
+        Inject a price shock (simulates earnings surprise, news, etc.)
+        """
+        if shock_type == 'price':
+            self.fundamental.prices[asset_idx] *= (1 + shock_size)
+            self.fundamental.long_term_means[asset_idx] *= (1 + shock_size * 0.3)
+        elif shock_type == 'volatility':
+            # Temporarily increase volatility
+            pass  # Would modify the fundamental process
+        print(f"Injected {shock_type} shock: {shock_size*100:+.1f}% on asset {asset_idx}")
+def generate_training_data(n_simulations: int = 100,
+                           steps_per_sim: int = 500,
+                           config: Optional[MarketConfig] = None) -> List[pd.DataFrame]:
+    """
+    Generate massive synthetic training dataset.
+    Jane Street trains on YEARS of simulated data because:
+    1. Real data is expensive/limited
+    2. Simulations let you test extreme scenarios
+    3. You can generate unlimited data for deep learning
+    """
+    if config is None:
+        config = MarketConfig()
+    datasets = []
+    print(f"Generating {n_simulations} synthetic market simulations...")
+    print(f"  Total data: {n_simulations * steps_per_sim:,} observations")
+    for i in range(n_simulations):
+        # Vary parameters slightly each simulation
+        sim_config = MarketConfig(
+            n_assets=config.n_assets,
+            n_informed_traders=config.n_informed_traders,
+            n_noise_traders=config.n_noise_traders,
+            fundamental_volatility=config.fundamental_volatility * np.random.uniform(0.8, 1.2),
+            market_impact_per_lot=config.market_impact_per_lot * np.random.uniform(0.5, 2.0)
+        )
+        market = SyntheticMarket(sim_config)
+        df = market.run(steps_per_sim)
+        datasets.append(df)
+        if (i + 1) % 10 == 0:
+            print(f"  Completed {i+1}/{n_simulations} simulations")
+    return datasets
+if __name__ == '__main__':
+    print("=" * 70)
+    print("  SYNTHETIC MARKET SIMULATION")
+    print("=" * 70)
+    # Single simulation
+    config = MarketConfig(n_assets=5, n_informed_traders=3, n_noise_traders=30)
+    market = SyntheticMarket(config)
+    df = market.run(n_steps=1000)
+    print(f"\nSimulation Results:")
+    print(f"  Steps: {len(df)}")
+    print(f"  Regimes: {df['regime'].value_counts().to_dict()}")
+    print(f"  Avg Volume: {df['volume'].mean():.0f}")
+    print(f"  Price range (asset 0): ${df['price_0'].min():.2f} - ${df['price_0'].max():.2f}")
+    # Correlation structure
+    price_cols = [c for c in df.columns if c.startswith('price_')]
+    returns = np.log(df[price_cols].values[1:] / df[price_cols].values[:-1])
+    corr = np.corrcoef(returns.T)
+    print(f"\nAsset Return Correlations:")
+    for i in range(len(price_cols)):
+        for j in range(i+1, len(price_cols)):
+            print(f"  Asset {i} ↔ Asset {j}: {corr[i,j]:.3f}")
+    # Shock test
+    print(f"\nInjecting price shock on asset 0...")
+    market.inject_shock(asset_idx=0, shock_size=-0.10, shock_type='price')
+    for _ in range(10):
+        market.step()
+    print(f"  Post-shock price: ${market.price_history[-1][0]:.2f}")
+    print(f"  Recovery: {((market.price_history[-1][0] / market.price_history[-20][0]) - 1)*100:+.1f}%")
+    # Massive training dataset
+    print(f"\nGenerating training dataset...")
+    datasets = generate_training_data(n_simulations=5, steps_per_sim=500, config=config)
+    total_rows = sum(len(d) for d in datasets)
+    print(f"  Total synthetic observations: {total_rows:,}")
+    print(f"\n  Use this data to:")
+    print(f"    1. Train RL execution agents (unlimited episodes)")
+    print(f"    2. Test strategy robustness across market regimes")
+    print(f"    3. Bootstrap confidence intervals")
+    print(f"    4. Generate adversarial scenarios")
+    print(f"    5. Calibrate risk models")