File size: 20,345 Bytes

513693b

"""Market Making Engine — What Jane Street Actually Does

Jane Street is primarily a MARKET MAKER, not a directional trader.
They quote bid/ask on options, ETFs, bonds — make money on spread + volume.

Key challenges:
1. Adverse selection: informed traders pick off your quotes
2. Inventory risk: holding positions you don't want
3. Spread optimization: too wide = no volume, too tight = get run over
4. Regulatory constraints: Reg NMS, MiFID II

Based on:
- Avellaneda & Stoikov (2008): "High-frequency trading in a limit order book"
- Guéant et al. (2012): "Dealing with the inventory risk"
- Cartea & Jaimungal (2013): "Modeling asset prices for algorithmic trading"
"""
import numpy as np
import pandas as pd
from typing import Dict, List, Tuple, Optional, Callable
from dataclasses import dataclass
import warnings
warnings.filterwarnings('ignore')


@dataclass
class InventoryState:
    """Current market maker position"""
    position: float = 0.0  # Net position
    cash: float = 0.0       # Cash balance
    pnl_realized: float = 0.0
    pnl_unrealized: float = 0.0
    trades_executed: int = 0
    quotes_submitted: int = 0
    quotes_filled: int = 0
    
    def total_pnl(self, mark_price: float) -> float:
        return self.pnl_realized + self.position * mark_price + self.cash


class MarketMakerQuote:
    """Single market maker quote"""
    def __init__(self, side: str, price: float, quantity: int,
                 aggression: str = 'passive'):
        self.side = side  # 'bid' or 'ask'
        self.price = price
        self.quantity = quantity
        self.aggression = aggression  # 'passive' (resting) or 'aggressive' (crossing)
        self.fill_probability = 0.0
        self.expected_profit = 0.0


class AvellanedaStoikovMarketMaker:
    """
    Avellaneda-Stoikov (2008) market making model.
    
    Key insight: Quote prices should DEPEND on current inventory.
    
    Reservation price (where you're indifferent to trade):
    r = s - q * γ * σ² * (T - t)
    
    Spread:
    δ^a + δ^b = γ * σ² * (T - t) + (2/γ) * ln(1 + γ/κ)
    
    Where:
    - s = mid price
    - q = inventory position
    - γ = risk aversion
    - σ = volatility
    - T-t = time remaining
    - κ = order arrival intensity
    
    As inventory grows positive → skew quotes DOWN (want to sell)
    As inventory grows negative → skew quotes UP (want to buy)
    """
    
    def __init__(self,
                 gamma: float = 0.1,        # Risk aversion
                 sigma: float = 0.02,       # Volatility (per period)
                 kappa: float = 1.5,        # Order arrival rate
                 max_position: float = 1000.0,  # Position limit
                 min_spread_bps: float = 1.0,   # Minimum spread in bps
                 max_spread_bps: float = 50.0,  # Maximum spread
                 inventory_skew_factor: float = 2.0):  # How much to skew
        
        self.gamma = gamma
        self.sigma = sigma
        self.kappa = kappa
        self.max_position = max_position
        self.min_spread_bps = min_spread_bps / 10000.0  # Convert to decimal
        self.max_spread_bps = max_spread_bps / 10000.0
        self.inventory_skew_factor = inventory_skew_factor
        
        self.state = InventoryState()
        self.quote_history = []
        self.pnl_history = []
    
    def reset(self):
        """Reset state"""
        self.state = InventoryState()
        self.quote_history = []
        self.pnl_history = []
    
    def calculate_quotes(self, 
                         mid_price: float,
                         time_to_end: float = 1.0,
                         current_inventory: Optional[float] = None) -> Tuple[MarketMakerQuote, MarketMakerQuote]:
        """
        Calculate optimal bid and ask quotes.
        
        Returns: (bid_quote, ask_quote)
        """
        if current_inventory is None:
            current_inventory = self.state.position
        
        # Reservation price (inventory-adjusted mid)
        reservation_price = mid_price - current_inventory * self.gamma * (self.sigma ** 2) * time_to_end
        
        # Optimal spread
        optimal_spread = self.gamma * (self.sigma ** 2) * time_to_end + \
                        (2.0 / self.gamma) * np.log(1 + self.gamma / self.kappa)
        
        # Apply min/max spread constraints
        spread_decimal = max(optimal_spread, self.min_spread_bps * mid_price)
        spread_decimal = min(spread_decimal, self.max_spread_bps * mid_price)
        
        # Inventory skewing
        # If long (q > 0), make ask more attractive (lower ask), bid less attractive
        # If short (q < 0), make bid more attractive (higher bid), ask less attractive
        skew = np.tanh(current_inventory / self.max_position * self.inventory_skew_factor)
        
        half_spread = spread_decimal / 2
        
        # Skew: shift quotes away from reservation price
        bid_offset = half_spread * (1 + skew)  # Higher bid when short
        ask_offset = half_spread * (1 - skew)  # Lower ask when long
        
        bid_price = reservation_price - bid_offset
        ask_price = reservation_price + ask_offset
        
        # Ensure bid < ask
        if bid_price >= ask_price:
            # Emergency: force minimum spread
            avg = (bid_price + ask_price) / 2
            bid_price = avg - self.min_spread_bps * mid_price / 2
            ask_price = avg + self.min_spread_bps * mid_price / 2
        
        # Quantity sizing: larger when inventory is neutral, smaller when extreme
        inventory_ratio = abs(current_inventory) / self.max_position
        qty_multiplier = 1.0 - 0.7 * inventory_ratio  # Reduce size as inventory grows
        base_qty = 100
        
        bid_qty = int(base_qty * qty_multiplier)
        ask_qty = int(base_qty * qty_multiplier)
        
        # If extremely long, don't quote on ask (or tiny qty)
        if current_inventory > self.max_position * 0.9:
            ask_qty = 0
        # If extremely short, don't quote on bid
        if current_inventory < -self.max_position * 0.9:
            bid_qty = 0
        
        bid_quote = MarketMakerQuote('bid', bid_price, bid_qty, 'passive')
        ask_quote = MarketMakerQuote('ask', ask_price, ask_qty, 'passive')
        
        # Expected fill probability (simplified)
        bid_quote.fill_probability = np.exp(-self.kappa * bid_offset)
        ask_quote.fill_probability = np.exp(-self.kappa * ask_offset)
        
        # Expected profit per trade = half spread (simplified)
        bid_quote.expected_profit = bid_offset
        ask_quote.expected_profit = ask_offset
        
        return bid_quote, ask_quote
    
    def process_fill(self, quote: MarketMakerQuote, 
                     fill_qty: int,
                     fill_price: float,
                     is_aggressive_side: bool):
        """
        Process a quote fill.
        
        is_aggressive_side: True if WE were aggressive (market order), 
                           False if counterparty hit our resting quote
        """
        if quote.side == 'bid':
            # We bought
            self.state.position += fill_qty
            self.state.cash -= fill_qty * fill_price
            self.state.trades_executed += 1
        else:
            # We sold
            self.state.position -= fill_qty
            self.state.cash += fill_qty * fill_price
            self.state.trades_executed += 1
        
        self.state.quotes_filled += 1
        
        # Track
        self.quote_history.append({
            'side': quote.side,
            'quote_price': quote.price,
            'fill_price': fill_price,
            'quantity': fill_qty,
            'position_after': self.state.position,
            'cash_after': self.state.cash
        })
    
    def update_mark_price(self, mark_price: float):
        """Update unrealized PnL with current mark"""
        self.state.pnl_unrealized = self.state.position * mark_price + self.state.cash
        self.pnl_history.append({
            'mark_price': mark_price,
            'position': self.state.position,
            'cash': self.state.cash,
            'unrealized_pnl': self.state.pnl_unrealized
        })
    
    def get_summary(self) -> Dict:
        """Get current market maker summary"""
        return {
            'position': self.state.position,
            'cash': self.state.cash,
            'trades': self.state.trades_executed,
            'quotes_filled': self.state.quotes_filled,
            'pnl_realized': self.state.pnl_realized,
            'pnl_unrealized': self.state.pnl_unrealized,
            'inventory_ratio': abs(self.state.position) / self.max_position
        }


class InventoryRiskManager:
    """
    Advanced inventory risk management for market making.
    
    When inventory exceeds limits:
    1. Hedge via correlated instruments
    2. Cross the spread (aggressive unwind)
    3. Reduce quote sizes
    4. Stop quoting on the bad side entirely
    """
    
    def __init__(self,
                 max_inventory: float = 1000,
                 hedge_threshold: float = 0.6,   # Hedge at 60% of max
                 stop_threshold: float = 0.9,      # Stop quoting at 90%
                 aggressive_unwind_threshold: float = 0.95):  # Market order at 95%
        
        self.max_inventory = max_inventory
        self.hedge_threshold = hedge_threshold
        self.stop_threshold = stop_threshold
        self.aggressive_unwind_threshold = aggressive_unwind_threshold
    
    def check_inventory(self, position: float) -> Dict:
        """Determine actions needed based on inventory"""
        ratio = abs(position) / self.max_inventory
        
        actions = {
            'hedge': False,
            'stop_quoting_bad_side': False,
            'aggressive_unwind': False,
            'reduce_size': 1.0,  # Size multiplier
            'status': 'normal'
        }
        
        if ratio >= self.aggressive_unwind_threshold:
            actions['aggressive_unwind'] = True
            actions['stop_quoting_bad_side'] = True
            actions['reduce_size'] = 0.0
            actions['status'] = 'CRITICAL'
        
        elif ratio >= self.stop_threshold:
            actions['stop_quoting_bad_side'] = True
            actions['reduce_size'] = 0.1
            actions['status'] = 'SEVERE'
        
        elif ratio >= self.hedge_threshold:
            actions['hedge'] = True
            actions['reduce_size'] = 0.5
            actions['status'] = 'WARNING'
        
        elif ratio >= 0.5:
            actions['reduce_size'] = 0.8
            actions['status'] = 'MODERATE'
        
        return actions
    
    def hedge_recommendation(self, 
                           position: float,
                           correlated_assets: Dict[str, float]) -> Optional[Dict]:
        """
        Recommend hedge position in correlated assets.
        
        correlated_assets: {symbol: correlation_with_primary}
        """
        if abs(position) < self.max_inventory * self.hedge_threshold:
            return None
        
        # Find best hedge: highest absolute correlation
        best_hedge = None
        best_corr = 0
        
        for symbol, corr in correlated_assets.items():
            if abs(corr) > best_corr:
                best_corr = abs(corr)
                best_hedge = symbol
        
        if best_hedge is None:
            return None
        
        # Hedge amount: offset position in primary
        hedge_direction = -np.sign(position)
        hedge_size = abs(position) * abs(correlated_assets[best_hedge])
        
        return {
            'hedge_symbol': best_hedge,
            'direction': 'buy' if hedge_direction > 0 else 'sell',
            'quantity': hedge_size,
            'correlation': correlated_assets[best_hedge],
            'expected_hedge_effectiveness': best_corr ** 2  # R²
        }


class AdverseSelectionDetector:
    """
    Detect and respond to adverse selection.
    
    Adverse selection: Informed traders know something you don't.
    When they buy from you, price drops. When they sell to you, price rises.
    
    Detection methods:
    1. Post-trade price movement
    2. Order flow toxicity (VPIN)
    3. Large order detection
    4. Timing patterns (orders arrive in clusters before news)
    """
    
    def __init__(self, 
                 lookback_window: int = 20,
                 toxicity_threshold: float = 0.6):
        self.lookback_window = lookback_window
        self.toxicity_threshold = toxicity_threshold
        self.trade_history = []
        self.toxicity_score = 0.0
    
    def record_trade(self, 
                     side: str,           # Which side WE filled
                     our_price: float,    # Price we got
                     post_prices: List[float],  # Prices after trade (1min, 5min, 15min)
                     quantity: int,
                     counterparty: Optional[str] = None):
        """Record a trade for adverse selection analysis"""
        
        # Calculate post-trade drift
        drift = 0
        if post_prices and len(post_prices) >= 1:
            # If we SOLD and price went UP → bad (gave away value)
            # If we BOUGHT and price went DOWN → bad (overpaid)
            if side == 'ask':  # We sold
                drift = post_prices[0] - our_price
            else:  # We bought
                drift = our_price - post_prices[0]
        
        self.trade_history.append({
            'side': side,
            'our_price': our_price,
            'post_drift': drift,
            'quantity': quantity,
            'counterparty': counterparty,
            'adverse': drift > 0  # True if trade was bad for us
        })
        
        # Keep only recent trades
        if len(self.trade_history) > self.lookback_window:
            self.trade_history.pop(0)
    
    def get_toxicity_score(self) -> float:
        """Current toxicity score (0-1, higher = more adverse selection)"""
        if len(self.trade_history) < 5:
            return 0.0
        
        adverse_count = sum(1 for t in self.trade_history if t['adverse'])
        self.toxicity_score = adverse_count / len(self.trade_history)
        
        return self.toxicity_score
    
    def should_widen_spread(self) -> Tuple[bool, float]:
        """Should we widen spread due to adverse selection?"""
        toxicity = self.get_toxicity_score()
        
        if toxicity > self.toxicity_threshold:
            # Widen spread proportionally
            widen_factor = 1.0 + (toxicity - self.toxicity_threshold) * 2
            return True, min(widen_factor, 3.0)  # Max 3x wider
        
        return False, 1.0
    
    def get_recent_pnl(self) -> Dict:
        """P&L attribution from adverse selection"""
        if not self.trade_history:
            return {}
        
        adverse_trades = [t for t in self.trade_history if t['adverse']]
        good_trades = [t for t in self.trade_history if not t['adverse']]
        
        adverse_drift = sum(t['post_drift'] * t['quantity'] for t in adverse_trades)
        good_drift = sum(t['post_drift'] * t['quantity'] for t in good_trades)
        
        return {
            'total_trades': len(self.trade_history),
            'adverse_trades': len(adverse_trades),
            'adverse_pct': len(adverse_trades) / len(self.trade_history) * 100,
            'total_adverse_cost': adverse_drift,
            'total_good_gain': -good_drift,
            'net_selection_cost': adverse_drift + good_drift
        }


def simulate_market_making(n_steps: int = 1000,
                           price_drift: float = 0.0001,
                           volatility: float = 0.01,
                           arrival_rate: float = 0.3) -> pd.DataFrame:
    """
    Simulate a market maker in a random walk market.
    
    Generates synthetic tick data and lets the market maker quote and fill.
    """
    np.random.seed(42)
    
    # Initialize
    mm = AvellanedaStoikovMarketMaker(
        gamma=0.1,
        sigma=volatility,
        kappa=1.5,
        max_position=1000
    )
    
    detector = AdverseSelectionDetector(lookback_window=20)
    risk_mgr = InventoryRiskManager()
    
    # Price process
    price = 100.0
    prices = [price]
    
    results = []
    
    for step in range(n_steps):
        # Update price
        price_change = np.random.randn() * volatility * price + price_drift * price
        price += price_change
        price = max(price, 0.01)
        prices.append(price)
        
        # Calculate quotes
        bid_quote, ask_quote = mm.calculate_quotes(price, time_to_end=1.0)
        
        # Check inventory risk
        inventory_actions = risk_mgr.check_inventory(mm.state.position)
        
        # Check adverse selection
        widen, widen_factor = detector.should_widen_spread()
        if widen:
            # Widen spread
            spread_adj = (widen_factor - 1.0) * (ask_quote.price - bid_quote.price) / 2
            bid_quote.price -= spread_adj
            ask_quote.price += spread_adj
        
        # Simulate order arrivals
        if np.random.rand() < arrival_rate:
            # Someone hits our bid
            if np.random.rand() < bid_quote.fill_probability:
                fill_qty = np.random.randint(10, bid_quote.quantity + 1)
                mm.process_fill(bid_quote, fill_qty, bid_quote.price, False)
                
                # Record for adverse selection
                future_prices = prices[-5:] if len(prices) >= 5 else prices
                detector.record_trade('bid', bid_quote.price, future_prices, fill_qty)
        
        if np.random.rand() < arrival_rate:
            # Someone lifts our ask
            if np.random.rand() < ask_quote.fill_probability:
                fill_qty = np.random.randint(10, ask_quote.quantity + 1)
                mm.process_fill(ask_quote, fill_qty, ask_quote.price, False)
                
                future_prices = prices[-5:] if len(prices) >= 5 else prices
                detector.record_trade('ask', ask_quote.price, future_prices, fill_qty)
        
        # Mark to market
        mm.update_mark_price(price)
        
        # Record
        summary = mm.get_summary()
        results.append({
            'step': step,
            'price': price,
            'bid': bid_quote.price,
            'ask': ask_quote.price,
            'spread_bps': (ask_quote.price - bid_quote.price) / price * 10000,
            'position': summary['position'],
            'cash': summary['cash'],
            'inventory_ratio': summary['inventory_ratio'],
            'unrealized_pnl': summary['pnl_unrealized'],
            'toxicity': detector.get_toxicity_score(),
            'status': inventory_actions['status']
        })
    
    return pd.DataFrame(results)


if __name__ == '__main__':
    print("=" * 70)
    print("  MARKET MAKING ENGINE SIMULATION")
    print("=" * 70)
    
    results = simulate_market_making(n_steps=5000)
    
    # Summary
    final = results.iloc[-1]
    
    print(f"\nSimulation: 5000 steps, random walk market")
    print(f"  Initial Price: $100.00")
    print(f"  Final Price:   ${final['price']:.2f}")
    print(f"  Final Position: {final['position']:.0f}")
    print(f"  Final Cash:     ${final['cash']:.2f}")
    print(f"  Unrealized PnL: ${final['unrealized_pnl']:.2f}")
    print(f"  Avg Spread:     {results['spread_bps'].mean():.1f} bps")
    print(f"  Avg Position:   {abs(results['position']).mean():.0f}")
    print(f"  Max Position:   {results['position'].abs().max():.0f}")
    print(f"  Avg Toxicity:   {results['toxicity'].mean():.3f}")
    
    # Strategy attribution
    pnl_from_spread = results['spread_bps'].mean() / 10000 * 2 * 100  # Simplified
    
    print(f"\n  PnL Attribution:")
    print(f"    Spread capture: ~${pnl_from_spread * 50:.0f} (per 100 trades)")
    print(f"    Inventory risk: ${final['unrealized_pnl'] - pnl_from_spread * 50:.0f}")
    
    print(f"\n  This is how Jane Street makes money:")
    print(f"    1. Quote tight spreads 1000s of times per day")
    print(f"    2. Inventory management keeps risk bounded")
    print(f"    3. Adverse selection detection widens when toxic flow arrives")
    print(f"    4. Volume × Small spread margin = Big PnL")