""" Simulation utilities for Safe Choices prediction market trading simulations. This module contains shared functions for running Monte Carlo simulations of different trading strategies on prediction markets. """ import pandas as pd import numpy as np from datetime import datetime, timedelta from scipy import stats import matplotlib.pyplot as plt import seaborn as sns from typing import Tuple, Optional, List, Dict, Any def load_and_filter_data(csv_path: str, start_date: str = '2025-01-01') -> pd.DataFrame: """ Load the market data and filter for simulation period. Args: csv_path: Path to the CSV file containing market data start_date: Start date for simulation (markets must close after this date) Returns: Filtered DataFrame ready for simulation """ # Load data df = pd.read_csv(csv_path) # Convert dates - handle timezone awareness df['closingDate'] = pd.to_datetime(df['closingDate'], format='mixed', errors='coerce', utc=True) start_dt = pd.to_datetime(start_date, utc=True) # Filter for markets that close after start date and have complete data mask = ( (df['closingDate'] >= start_dt) & (df['outcome'].notna()) & (df['probability7d'].notna()) & (df['probability6d'].notna()) & (df['probability5d'].notna()) & (df['probability4d'].notna()) & (df['probability3d'].notna()) & (df['probability2d'].notna()) & (df['probability1d'].notna()) ) filtered_df = df[mask].copy().reset_index(drop=True) # Ensure probability columns are numeric and between 0 and 1 for days in range(7, 0, -1): col = f'probability{days}d' if col in filtered_df.columns: # Convert to float, handling any string values filtered_df[col] = pd.to_numeric(filtered_df[col], errors='coerce') # Clamp probabilities between 0 and 1 (in case of data issues) filtered_df[col] = filtered_df[col].clip(lower=0.0, upper=1.0) # Vectorized outcome conversion for speed outcome_map = {'True': 1, 'true': 1, 'FALSE': 0, 'false': 0, True: 1, False: 0} filtered_df['outcome_int'] = filtered_df['outcome'].map(outcome_map) # Fill any remaining NaN outcomes with proper conversion remaining_mask = filtered_df['outcome_int'].isna() if remaining_mask.any(): def convert_outcome(value): if pd.isna(value): return None if isinstance(value, (int, float)): return int(value) return 1 if str(value).lower() == 'true' else 0 filtered_df.loc[remaining_mask, 'outcome_int'] = filtered_df.loc[remaining_mask, 'outcome'].apply(convert_outcome) # Sort by closing date for better performance in simulations filtered_df = filtered_df.sort_values('closingDate').reset_index(drop=True) return filtered_df def check_market_eligibility(market_row: pd.Series, days_before: int, min_prob_7d: float, min_prob_current: float) -> bool: """ Check if a market meets the probability thresholds for investment. Args: market_row: Row from the DataFrame containing market data days_before: Number of days before resolution to check (1-7) min_prob_7d: Minimum probability threshold at 7 days before min_prob_current: Minimum probability threshold at current day Returns: True if market meets criteria, False otherwise """ prob_col = f'probability{days_before}d' # Check if required columns exist and have valid data if pd.isna(market_row.get('probability7d')) or pd.isna(market_row.get(prob_col)): return False # Check probability thresholds prob_7d = market_row['probability7d'] prob_current = market_row[prob_col] return prob_7d >= min_prob_7d and prob_current >= min_prob_current def calculate_days_until_resolution(current_date: datetime, closing_date: datetime) -> int: """ Calculate days until market resolution. Args: current_date: Current simulation date closing_date: Market closing date Returns: Number of days until resolution """ return max(0, (closing_date - current_date).days) def get_available_markets(df: pd.DataFrame, current_date: datetime, days_before: int, min_prob_7d: float, min_prob_current: float) -> pd.DataFrame: """ Get markets available for investment at current date. Args: df: DataFrame containing market data current_date: Current simulation date days_before: Days before resolution to invest min_prob_7d: Minimum probability at 7 days before min_prob_current: Minimum probability at current day Returns: DataFrame of available markets with their days until resolution """ # Vectorized approach - much faster than iterating prob_col = f'probability{days_before}d' # Calculate days until resolution for all markets at once days_until = (df['closingDate'] - current_date).dt.days # Create boolean mask for all conditions at once mask = ( (days_until == days_before) & # Market resolves exactly N days from now (df['probability7d'] >= min_prob_7d) & # 7d probability threshold (df[prob_col] >= min_prob_current) & # Current day probability threshold (df['probability7d'].notna()) & # Valid 7d data (df[prob_col].notna()) & # Valid current day data (df['outcome_int'].notna()) # Valid outcome data ) if not mask.any(): return pd.DataFrame() # Filter and add days_until_resolution column available_markets = df[mask].copy() available_markets['days_until_resolution'] = days_until[mask] return available_markets def select_next_market(available_markets: pd.DataFrame, random_state: np.random.RandomState, skew_factor: float = 0.1) -> Optional[pd.Series]: """ Select the next market to invest in using a left-skewed exponential distribution. Args: available_markets: DataFrame of markets available for investment random_state: Random state for reproducible results skew_factor: Controls the left skew (higher = more skew toward closer markets) Returns: Selected market as Series, or None if no markets available """ if len(available_markets) == 0: return None # Configurable market selection with adjustable skew days_until = available_markets['days_until_resolution'].values # Use exponential decay with configurable skew factor # Closer markets (lower days) get exponentially higher weights weights = np.exp(-days_until * skew_factor) # Normalize weights weights = weights / weights.sum() # Select market based on weights selected_idx = random_state.choice(len(available_markets), p=weights) return available_markets.iloc[selected_idx] def calculate_investment_return(market: pd.Series, days_before: int, capital: float) -> float: """ Calculate return from investing in a market. Args: market: Market data as Series days_before: Days before resolution when investment was made capital: Amount invested Returns: Final capital after resolution (capital / probability if win, 0 if loss) """ prob_col = f'probability{days_before}d' probability = market[prob_col] outcome = market['outcome_int'] # Safety checks if pd.isna(probability) or probability <= 0 or probability > 1: # Invalid probability - treat as loss to be safe return 0.0 if pd.isna(outcome): # Unknown outcome - treat as loss to be safe return 0.0 if outcome == 1: # Market resolved True (safe bet won) # Return is capital / probability # Ensure probability is valid (between 0 and 1, exclusive) if probability >= 1.0: # Invalid: probability should be < 1.0 for a valid bet return 0.0 return capital / probability else: # Market resolved False (safe bet lost) return 0.0 def run_single_fund_simulation_fast(df: pd.DataFrame, starting_capital: float = 10000, start_date: str = '2025-01-01', max_duration_days: int = 365, days_before: int = 1, min_prob_7d: float = 0.90, min_prob_current: float = 0.90, investment_probability: float = 0.5, target_return: Optional[float] = None, min_volume: Optional[float] = None, random_seed: Optional[int] = None) -> Dict[str, Any]: """ Optimized single fund simulation using pre-indexed markets and event-driven approach. """ random_state = np.random.RandomState(random_seed) start_dt = pd.to_datetime(start_date, utc=True) end_dt = start_dt + timedelta(days=max_duration_days) # Pre-filter and index markets by trading date for massive speedup prob_col = f'probability{days_before}d' # Filter eligible markets once upfront market_mask = ( (df['probability7d'] >= min_prob_7d) & (df[prob_col] >= min_prob_current) & (df['probability7d'].notna()) & (df[prob_col].notna()) & (df['outcome_int'].notna()) & (df['closingDate'] >= start_dt + timedelta(days=days_before)) & (df['closingDate'] <= end_dt + timedelta(days=days_before)) ) # Add volume filter if specified if min_volume is not None and 'volume' in df.columns: market_mask = market_mask & (df['volume'] >= min_volume) eligible_markets = df[market_mask].copy() if len(eligible_markets) == 0: return { 'final_capital': starting_capital, 'total_return': 0.0, 'num_trades': 0, 'went_bust': False, 'reached_target': False, 'ending_reason': 'no_markets', 'simulation_days': 0, 'trades': [], 'daily_capital': [], 'parameters': { 'starting_capital': starting_capital, 'start_date': start_date, 'max_duration_days': max_duration_days, 'days_before': days_before, 'min_prob_7d': min_prob_7d, 'min_prob_current': min_prob_current, 'investment_probability': investment_probability, 'target_return': target_return, 'min_volume': min_volume, 'random_seed': random_seed } } # Calculate trading dates for each market (days_before days before resolution) eligible_markets['trading_date'] = eligible_markets['closingDate'] - timedelta(days=days_before) # Group markets by trading date for O(1) lookup markets_by_date = {} for _, market in eligible_markets.iterrows(): trade_date = market['trading_date'] if trade_date not in markets_by_date: markets_by_date[trade_date] = [] markets_by_date[trade_date].append(market) # Get sorted trading opportunities trading_dates = sorted([d for d in markets_by_date.keys() if start_dt <= d <= end_dt]) # Initialize simulation state capital = starting_capital current_date = start_dt trades = [] daily_capital = [] # Event-driven simulation - only process days with trading opportunities for trade_date in trading_dates: if capital <= 0: break # Check if we've reached target return if target_return is not None: current_return = (capital - starting_capital) / starting_capital if current_return >= target_return: break # Skip if we're beyond simulation period if trade_date > end_dt: break # Decision: invest today? if random_state.random() >= investment_probability: continue # Select market from available options available_markets_today = markets_by_date[trade_date] if not available_markets_today: continue selected_market = available_markets_today[random_state.randint(0, len(available_markets_today))] # Calculate return probability = selected_market[prob_col] outcome = selected_market['outcome_int'] if pd.isna(probability) or probability <= 0 or probability > 1 or pd.isna(outcome): continue if outcome == 1: # Win new_capital = capital / probability else: # Loss new_capital = 0.0 # Record trade sim_day = (trade_date - start_dt).days trades.append({ 'trade_number': len(trades) + 1, 'investment_date': trade_date, 'resolution_date': selected_market['closingDate'], 'probability': probability, 'capital_invested': capital, 'outcome': outcome, 'capital_after': new_capital, 'return': (new_capital - capital) / capital if capital > 0 else 0, 'sim_day': sim_day }) capital = new_capital # Record capital history (periodically) if len(trades) % 5 == 0 or capital == 0: daily_capital.append({ 'date': trade_date, 'capital': capital, 'day': sim_day }) if capital == 0: break # Calculate final statistics total_return = (capital - starting_capital) / starting_capital if starting_capital > 0 else 0 num_trades = len(trades) went_bust = capital == 0 reached_target = target_return is not None and total_return >= target_return # Determine ending reason if went_bust: ending_reason = 'bust' elif reached_target: ending_reason = 'target_reached' else: ending_reason = 'max_duration' final_sim_day = (min(end_dt, trading_dates[-1] if trading_dates else start_dt) - start_dt).days return { 'final_capital': capital, 'total_return': total_return, 'num_trades': num_trades, 'went_bust': went_bust, 'reached_target': reached_target, 'ending_reason': ending_reason, 'simulation_days': final_sim_day, 'trades': trades, 'daily_capital': daily_capital, 'parameters': { 'starting_capital': starting_capital, 'start_date': start_date, 'max_duration_days': max_duration_days, 'days_before': days_before, 'min_prob_7d': min_prob_7d, 'min_prob_current': min_prob_current, 'investment_probability': investment_probability, 'target_return': target_return, 'min_volume': min_volume, 'random_seed': random_seed } } # Keep original function for backwards compatibility def run_single_fund_simulation(df: pd.DataFrame, starting_capital: float = 10000, start_date: str = '2025-01-01', max_duration_days: int = 365, days_before: int = 1, min_prob_7d: float = 0.90, min_prob_current: float = 0.90, investment_probability: float = 0.5, target_return: Optional[float] = None, min_volume: Optional[float] = None, random_seed: Optional[int] = None) -> Dict[str, Any]: """ Run a single fund simulation with day-by-day investment decisions. Each day (when not already invested), the trader decides to invest with probability alpha, then selects uniformly at random from available markets. Args: df: Market data DataFrame starting_capital: Initial capital start_date: Simulation start date max_duration_days: Maximum simulation duration days_before: Days before resolution to invest min_prob_7d: Minimum probability at 7 days min_prob_current: Minimum probability at investment day investment_probability: Probability of investing on any given day (alpha) target_return: Target return threshold to stop trading (None = no threshold) min_volume: Minimum market volume to consider (None = no filter) random_seed: Random seed for reproducibility Returns: Dictionary containing simulation results """ # Use the optimized fast version return run_single_fund_simulation_fast( df=df, starting_capital=starting_capital, start_date=start_date, max_duration_days=max_duration_days, days_before=days_before, min_prob_7d=min_prob_7d, min_prob_current=min_prob_current, investment_probability=investment_probability, target_return=target_return, min_volume=min_volume, random_seed=random_seed ) def calculate_kelly_fraction(true_prob: float, market_prob: float) -> float: """ Calculate the Kelly criterion bet fraction. Args: true_prob: Estimated true probability of winning (p) market_prob: Market's implied probability (price) Returns: Optimal fraction of bankroll to bet (can be negative if no edge) """ if market_prob <= 0 or market_prob >= 1: return 0.0 # Odds: profit per dollar risked if you win # If you pay P for a contract that pays $1, your profit is (1-P)/P b = (1 - market_prob) / market_prob # Kelly formula: f* = (p*b - q) / b # where q = 1 - p q = 1 - true_prob if b <= 0: return 0.0 kelly = (true_prob * b - q) / b # Kelly can be negative (meaning don't bet), cap at 0 return max(0.0, kelly) def get_historical_win_rate(df: pd.DataFrame, prob_col: str, probability: float, tolerance: float = 0.02) -> float: """ Get historical win rate for markets at a similar probability level. Args: df: Market data DataFrame prob_col: Probability column to use probability: Target probability to look up tolerance: Range around probability to include Returns: Historical win rate (defaults to market probability if insufficient data) """ mask = ( (df[prob_col] >= probability - tolerance) & (df[prob_col] <= probability + tolerance) & (df['outcome_int'].notna()) ) similar_markets = df[mask] if len(similar_markets) < 10: # Not enough data, use a small edge assumption return probability + 0.005 # Assume 0.5% edge return similar_markets['outcome_int'].mean() def run_kelly_simulation(df: pd.DataFrame, starting_capital: float = 10000, start_date: str = '2025-01-01', max_duration_days: int = 365, days_before: int = 1, min_prob_7d: float = 0.90, min_prob_current: float = 0.90, investment_probability: float = 0.5, kelly_fraction: float = 0.5, edge_estimate: str = 'historical', min_volume: Optional[float] = None, random_seed: Optional[int] = None) -> Dict[str, Any]: """ Run a simulation using Kelly criterion for position sizing. Instead of betting 100% of capital, bets are sized according to Kelly criterion based on estimated edge. Args: df: Market data DataFrame starting_capital: Initial capital start_date: Simulation start date max_duration_days: Maximum simulation duration days_before: Days before resolution to invest min_prob_7d: Minimum probability at 7 days min_prob_current: Minimum probability at investment day investment_probability: Probability of attempting to invest on any given day kelly_fraction: Fraction of Kelly to use (0.5 = half Kelly, 1.0 = full Kelly) edge_estimate: Method to estimate edge ('historical', 'fixed_edge', 'fixed_edge_2') min_volume: Minimum market volume to consider (None = no filter) random_seed: Random seed for reproducibility Returns: Dictionary containing simulation results """ random_state = np.random.RandomState(random_seed) start_dt = pd.to_datetime(start_date, utc=True) end_dt = start_dt + timedelta(days=max_duration_days) prob_col = f'probability{days_before}d' # Pre-filter eligible markets market_mask = ( (df['probability7d'] >= min_prob_7d) & (df[prob_col] >= min_prob_current) & (df['probability7d'].notna()) & (df[prob_col].notna()) & (df['outcome_int'].notna()) & (df['closingDate'] >= start_dt + timedelta(days=days_before)) & (df['closingDate'] <= end_dt + timedelta(days=days_before)) ) # Add volume filter if specified if min_volume is not None and 'volume' in df.columns: market_mask = market_mask & (df['volume'] >= min_volume) eligible_markets = df[market_mask].copy() if len(eligible_markets) == 0: return { 'final_capital': starting_capital, 'total_return': 0.0, 'num_trades': 0, 'went_bust': False, 'ending_reason': 'no_markets', 'simulation_days': 0, 'trades': [], 'daily_capital': [], 'kelly_stats': { 'avg_bet_size': 0, 'avg_edge': 0, 'bets_skipped': 0, 'total_opportunities': 0 }, 'parameters': { 'starting_capital': starting_capital, 'start_date': start_date, 'max_duration_days': max_duration_days, 'days_before': days_before, 'min_prob_7d': min_prob_7d, 'min_prob_current': min_prob_current, 'investment_probability': investment_probability, 'kelly_fraction': kelly_fraction, 'edge_estimate': edge_estimate, 'min_volume': min_volume, 'random_seed': random_seed } } # Calculate trading dates eligible_markets['trading_date'] = eligible_markets['closingDate'] - timedelta(days=days_before) # Group by trading date markets_by_date = {} for _, market in eligible_markets.iterrows(): trade_date = market['trading_date'] if trade_date not in markets_by_date: markets_by_date[trade_date] = [] markets_by_date[trade_date].append(market) trading_dates = sorted([d for d in markets_by_date.keys() if start_dt <= d <= end_dt]) # Simulation state capital = starting_capital trades = [] daily_capital = [] bet_sizes = [] edges = [] bets_skipped = 0 total_opportunities = 0 for trade_date in trading_dates: if capital <= 0: break if trade_date > end_dt: break # Decision: attempt to invest today? if random_state.random() >= investment_probability: continue available_markets_today = markets_by_date[trade_date] if not available_markets_today: continue total_opportunities += 1 # Select market selected_market = available_markets_today[random_state.randint(0, len(available_markets_today))] market_prob = selected_market[prob_col] if pd.isna(market_prob) or market_prob <= 0 or market_prob >= 1: continue # Estimate true probability based on edge_estimate method if edge_estimate == 'historical': true_prob = get_historical_win_rate(df, prob_col, market_prob) elif edge_estimate == 'fixed_edge': true_prob = market_prob + 0.01 # Assume 1% edge elif edge_estimate == 'fixed_edge_2': true_prob = market_prob + 0.02 # Assume 2% edge else: true_prob = market_prob + 0.005 # Default small edge # Cap true_prob at reasonable bounds true_prob = min(0.999, max(0.001, true_prob)) # Calculate Kelly bet size full_kelly = calculate_kelly_fraction(true_prob, market_prob) bet_fraction = full_kelly * kelly_fraction # If no edge (Kelly <= 0), skip this bet if bet_fraction <= 0.001: # Tiny threshold to avoid floating point issues bets_skipped += 1 continue # Cap bet at 100% of capital bet_fraction = min(bet_fraction, 1.0) # Calculate bet amount bet_amount = capital * bet_fraction # Resolve the bet outcome = selected_market['outcome_int'] if pd.isna(outcome): continue edge = true_prob - market_prob edges.append(edge) bet_sizes.append(bet_fraction) if outcome == 1: # Win # Profit = bet_amount * (1/market_prob - 1) = bet_amount * (1 - market_prob) / market_prob profit = bet_amount * (1 - market_prob) / market_prob new_capital = capital + profit else: # Loss new_capital = capital - bet_amount sim_day = (trade_date - start_dt).days trades.append({ 'trade_number': len(trades) + 1, 'investment_date': trade_date, 'resolution_date': selected_market['closingDate'], 'market_probability': market_prob, 'estimated_true_prob': true_prob, 'edge': edge, 'kelly_fraction': full_kelly, 'actual_bet_fraction': bet_fraction, 'capital_before': capital, 'bet_amount': bet_amount, 'outcome': outcome, 'capital_after': new_capital, 'return': (new_capital - capital) / capital if capital > 0 else 0, 'sim_day': sim_day }) capital = new_capital if len(trades) % 5 == 0 or capital <= 0: daily_capital.append({ 'date': trade_date, 'capital': capital, 'day': sim_day }) if capital <= 0: capital = 0 break # Calculate final statistics total_return = (capital - starting_capital) / starting_capital if starting_capital > 0 else 0 went_bust = capital <= 0 if went_bust: ending_reason = 'bust' else: ending_reason = 'max_duration' final_sim_day = (min(end_dt, trading_dates[-1] if trading_dates else start_dt) - start_dt).days # Kelly-specific stats avg_bet_size = np.mean(bet_sizes) if bet_sizes else 0 avg_edge = np.mean(edges) if edges else 0 return { 'final_capital': capital, 'total_return': total_return, 'num_trades': len(trades), 'went_bust': went_bust, 'ending_reason': ending_reason, 'simulation_days': final_sim_day, 'trades': trades, 'daily_capital': daily_capital, 'kelly_stats': { 'avg_bet_size': avg_bet_size, 'avg_edge': avg_edge, 'bets_skipped': bets_skipped, 'total_opportunities': total_opportunities }, 'parameters': { 'starting_capital': starting_capital, 'start_date': start_date, 'max_duration_days': max_duration_days, 'days_before': days_before, 'min_prob_7d': min_prob_7d, 'min_prob_current': min_prob_current, 'investment_probability': investment_probability, 'kelly_fraction': kelly_fraction, 'edge_estimate': edge_estimate, 'min_volume': min_volume, 'random_seed': random_seed } } def plot_simulation_results(results_list: List[Dict[str, Any]], title: str = "Simulation Results"): """ Plot results from multiple simulation runs. Args: results_list: List of simulation result dictionaries title: Plot title """ if not results_list: print("No results to plot") return # Extract data final_capitals = [r['final_capital'] for r in results_list] total_returns = [r['total_return'] for r in results_list] num_trades = [r['num_trades'] for r in results_list] bust_rate = sum(1 for r in results_list if r['went_bust']) / len(results_list) # Create plots fig, axes = plt.subplots(2, 2, figsize=(15, 12)) # Final capital distribution axes[0, 0].hist(final_capitals, bins=50, alpha=0.7, edgecolor='black') axes[0, 0].axvline(np.mean(final_capitals), color='red', linestyle='--', label=f'Mean: ${np.mean(final_capitals):,.0f}') axes[0, 0].axvline(np.median(final_capitals), color='green', linestyle='--', label=f'Median: ${np.median(final_capitals):,.0f}') axes[0, 0].set_xlabel('Final Capital ($)') axes[0, 0].set_ylabel('Frequency') axes[0, 0].set_title('Final Capital Distribution') axes[0, 0].legend() axes[0, 0].grid(True, alpha=0.3) # Return distribution return_pct = [r * 100 for r in total_returns] axes[0, 1].hist(return_pct, bins=50, alpha=0.7, edgecolor='black') axes[0, 1].axvline(np.mean(return_pct), color='red', linestyle='--', label=f'Mean: {np.mean(return_pct):.1f}%') axes[0, 1].axvline(np.median(return_pct), color='green', linestyle='--', label=f'Median: {np.median(return_pct):.1f}%') axes[0, 1].axvline(0, color='black', linestyle='-', alpha=0.5, label='Break-even') axes[0, 1].set_xlabel('Total Return (%)') axes[0, 1].set_ylabel('Frequency') axes[0, 1].set_title('Return Distribution') axes[0, 1].legend() axes[0, 1].grid(True, alpha=0.3) # Number of trades distribution axes[1, 0].hist(num_trades, bins=30, alpha=0.7, edgecolor='black') axes[1, 0].axvline(np.mean(num_trades), color='red', linestyle='--', label=f'Mean: {np.mean(num_trades):.1f}') axes[1, 0].set_xlabel('Number of Trades') axes[1, 0].set_ylabel('Frequency') axes[1, 0].set_title('Number of Trades Distribution') axes[1, 0].legend() axes[1, 0].grid(True, alpha=0.3) # Summary statistics axes[1, 1].axis('off') stats_text = f""" Summary Statistics: Total Simulations: {len(results_list):,} Bust Rate: {bust_rate:.1%} Final Capital: Mean: ${np.mean(final_capitals):,.0f} Median: ${np.median(final_capitals):,.0f} Min: ${np.min(final_capitals):,.0f} Max: ${np.max(final_capitals):,.0f} Total Return: Mean: {np.mean(total_returns):.1%} Median: {np.median(total_returns):.1%} Min: {np.min(total_returns):.1%} Max: {np.max(total_returns):.1%} Trades per Simulation: Mean: {np.mean(num_trades):.1f} Median: {np.median(num_trades):.1f} """ axes[1, 1].text(0.1, 0.9, stats_text, transform=axes[1, 1].transAxes, fontsize=11, verticalalignment='top', fontfamily='monospace') plt.suptitle(title, fontsize=16, fontweight='bold') plt.tight_layout() plt.show() def print_simulation_summary(results_list: List[Dict[str, Any]]): """ Print detailed summary statistics for simulation results. Args: results_list: List of simulation result dictionaries """ if not results_list: print("No results to summarize") return # Extract data final_capitals = np.array([r['final_capital'] for r in results_list]) total_returns = np.array([r['total_return'] for r in results_list]) num_trades = np.array([r['num_trades'] for r in results_list]) # Calculate statistics bust_count = sum(1 for r in results_list if r['went_bust']) bust_rate = bust_count / len(results_list) target_reached_count = sum(1 for r in results_list if r.get('reached_target', False)) target_reached_rate = target_reached_count / len(results_list) positive_return_count = sum(1 for r in total_returns if r > 0) positive_return_rate = positive_return_count / len(results_list) # Check if target return was used target_return = results_list[0]['parameters'].get('target_return', None) print("=" * 60) print("SIMULATION SUMMARY") print("=" * 60) print(f"Total Simulations: {len(results_list):,}") print(f"Went Bust: {bust_count:,} ({bust_rate:.1%})") if target_return is not None: print(f"Reached Target ({target_return:.1%}): {target_reached_count:,} ({target_reached_rate:.1%})") print(f"Positive Returns: {positive_return_count:,} ({positive_return_rate:.1%})") print(f"\nFINAL CAPITAL STATISTICS:") print(f"Mean: ${final_capitals.mean():,.2f}") print(f"Median: ${np.median(final_capitals):,.2f}") print(f"Std Dev: ${final_capitals.std():,.2f}") print(f"Min: ${final_capitals.min():,.2f}") print(f"Max: ${final_capitals.max():,.2f}") print(f"\nRETURN STATISTICS:") print(f"Mean: {total_returns.mean():.1%}") print(f"Median: {np.median(total_returns):.1%}") print(f"Std Dev: {total_returns.std():.1%}") print(f"Min: {total_returns.min():.1%}") print(f"Max: {total_returns.max():.1%}") print(f"\nTRADE STATISTICS:") print(f"Mean Trades: {num_trades.mean():.1f}") print(f"Median Trades: {np.median(num_trades):.1f}") print(f"Min Trades: {num_trades.min()}") print(f"Max Trades: {num_trades.max()}") # Percentiles percentiles = [5, 10, 25, 75, 90, 95] print(f"\nRETURN PERCENTILES:") for p in percentiles: value = np.percentile(total_returns, p) print(f"{p}th percentile: {value:.1%}") def run_multi_fund_simulation(df: pd.DataFrame, n_funds: int = 5, starting_capital: float = 10000, start_date: str = '2025-01-01', max_duration_days: int = 365, days_before: int = 1, min_prob_7d: float = 0.90, min_prob_current: float = 0.90, investment_probability: float = 0.5, target_return: Optional[float] = None, min_volume: Optional[float] = None, random_seed: Optional[int] = None) -> Dict[str, Any]: """ Run a multi-fund simulation where capital is divided into independent funds. Each fund operates independently with the same investment probability (alpha). Args: df: Market data DataFrame n_funds: Number of independent funds to create starting_capital: Total initial capital (divided among funds) start_date: Simulation start date max_duration_days: Maximum simulation duration days_before: Days before resolution to invest min_prob_7d: Minimum probability at 7 days min_prob_current: Minimum probability at investment day investment_probability: Probability of investing on any given day (alpha) target_return: Target return threshold per fund (None = no threshold) min_volume: Minimum market volume to consider (None = no filter) random_seed: Random seed for reproducibility Returns: Dictionary containing multi-fund simulation results """ # Set up random state random_state = np.random.RandomState(random_seed) # Calculate capital per fund capital_per_fund = starting_capital / n_funds # Run simulation for each fund independently fund_results = [] all_trades = [] for fund_id in range(n_funds): # Use different seed for each fund to ensure independence fund_seed = random_state.randint(0, 1000000) # Run single fund simulation for this fund using fast version fund_result = run_single_fund_simulation_fast( df=df, starting_capital=capital_per_fund, start_date=start_date, max_duration_days=max_duration_days, days_before=days_before, min_prob_7d=min_prob_7d, min_prob_current=min_prob_current, investment_probability=investment_probability, target_return=target_return, min_volume=min_volume, random_seed=fund_seed ) # Add fund ID to result and trades fund_result['fund_id'] = fund_id for trade in fund_result['trades']: trade['fund_id'] = fund_id all_trades.append(trade) fund_results.append(fund_result) # Calculate portfolio-level statistics surviving_funds = sum(1 for fund in fund_results if not fund['went_bust']) total_final_capital = sum(fund['final_capital'] for fund in fund_results) total_portfolio_return = (total_final_capital - starting_capital) / starting_capital if starting_capital > 0 else 0 # Calculate average metrics across surviving funds if surviving_funds > 0: avg_capital_per_surviving_fund = sum(fund['final_capital'] for fund in fund_results if not fund['went_bust']) / surviving_funds avg_return_per_surviving_fund = sum(fund['total_return'] for fund in fund_results if not fund['went_bust']) / surviving_funds else: avg_capital_per_surviving_fund = 0 avg_return_per_surviving_fund = -1 # All funds went bust # Target achievement stats funds_reached_target = sum(1 for fund in fund_results if fund.get('reached_target', False)) target_achievement_rate = funds_reached_target / n_funds # Trading activity stats total_trades = len(all_trades) avg_trades_per_fund = total_trades / n_funds # Survivorship and diversification metrics survivorship_rate = surviving_funds / n_funds bust_rate = 1 - survivorship_rate return { 'portfolio_final_capital': total_final_capital, 'portfolio_total_return': total_portfolio_return, 'n_funds': n_funds, 'surviving_funds': surviving_funds, 'survivorship_rate': survivorship_rate, 'bust_rate': bust_rate, 'avg_capital_per_surviving_fund': avg_capital_per_surviving_fund, 'avg_return_per_surviving_fund': avg_return_per_surviving_fund, 'funds_reached_target': funds_reached_target, 'target_achievement_rate': target_achievement_rate, 'total_trades': total_trades, 'avg_trades_per_fund': avg_trades_per_fund, 'fund_results': fund_results, 'all_trades': all_trades, 'parameters': { 'n_funds': n_funds, 'starting_capital': starting_capital, 'capital_per_fund': capital_per_fund, 'start_date': start_date, 'max_duration_days': max_duration_days, 'days_before': days_before, 'min_prob_7d': min_prob_7d, 'min_prob_current': min_prob_current, 'investment_probability': investment_probability, 'target_return': target_return, 'min_volume': min_volume, 'random_seed': random_seed } } def plot_multi_fund_results(results_list: List[Dict[str, Any]], title: str = "Multi-Fund Simulation Results"): """ Plot results from multiple multi-fund simulation runs. Args: results_list: List of multi-fund simulation result dictionaries title: Plot title """ if not results_list: print("No results to plot") return # Extract portfolio-level data portfolio_final_capitals = [r['portfolio_final_capital'] for r in results_list] portfolio_returns = [r['portfolio_total_return'] for r in results_list] surviving_funds = [r['surviving_funds'] for r in results_list] survivorship_rates = [r['survivorship_rate'] for r in results_list] # Create plots fig, axes = plt.subplots(2, 2, figsize=(16, 12)) # Portfolio final capital distribution axes[0, 0].hist(portfolio_final_capitals, bins=30, alpha=0.7, edgecolor='black', color='steelblue') axes[0, 0].axvline(np.mean(portfolio_final_capitals), color='red', linestyle='--', label=f'Mean: ${np.mean(portfolio_final_capitals):,.0f}') axes[0, 0].axvline(np.median(portfolio_final_capitals), color='green', linestyle='--', label=f'Median: ${np.median(portfolio_final_capitals):,.0f}') axes[0, 0].set_xlabel('Portfolio Final Capital ($)') axes[0, 0].set_ylabel('Frequency') axes[0, 0].set_title('Portfolio Final Capital Distribution') axes[0, 0].legend() axes[0, 0].grid(True, alpha=0.3) # Portfolio return distribution return_pct = [r * 100 for r in portfolio_returns] axes[0, 1].hist(return_pct, bins=30, alpha=0.7, edgecolor='black', color='green') axes[0, 1].axvline(np.mean(return_pct), color='red', linestyle='--', label=f'Mean: {np.mean(return_pct):.1f}%') axes[0, 1].axvline(0, color='black', linestyle='-', alpha=0.5, label='Break-even') axes[0, 1].set_xlabel('Portfolio Total Return (%)') axes[0, 1].set_ylabel('Frequency') axes[0, 1].set_title('Portfolio Return Distribution') axes[0, 1].legend() axes[0, 1].grid(True, alpha=0.3) # Number of surviving funds distribution n_funds = results_list[0]['n_funds'] axes[1, 0].hist(surviving_funds, bins=range(n_funds + 2), alpha=0.7, edgecolor='black', color='orange') axes[1, 0].axvline(np.mean(surviving_funds), color='red', linestyle='--', label=f'Mean: {np.mean(surviving_funds):.1f}') axes[1, 0].set_xlabel('Number of Surviving Funds') axes[1, 0].set_ylabel('Frequency') axes[1, 0].set_title('Surviving Funds Distribution') axes[1, 0].legend() axes[1, 0].grid(True, alpha=0.3) axes[1, 0].set_xticks(range(n_funds + 1)) # Summary statistics axes[1, 1].axis('off') # Calculate additional stats total_bust_rate = sum(1 for r in results_list if r['surviving_funds'] == 0) / len(results_list) avg_survivorship = np.mean(survivorship_rates) stats_text = f""" Multi-Fund Summary Statistics: Total Simulations: {len(results_list):,} Funds per Portfolio: {n_funds} Total Bust Rate: {total_bust_rate:.1%} Portfolio Capital: Mean: ${np.mean(portfolio_final_capitals):,.0f} Median: ${np.median(portfolio_final_capitals):,.0f} Min: ${np.min(portfolio_final_capitals):,.0f} Max: ${np.max(portfolio_final_capitals):,.0f} Portfolio Return: Mean: {np.mean(portfolio_returns):.1%} Median: {np.median(portfolio_returns):.1%} Fund Survivorship: Avg Surviving: {np.mean(surviving_funds):.1f} / {n_funds} Avg Survivorship: {avg_survivorship:.1%} """ axes[1, 1].text(0.1, 0.9, stats_text, transform=axes[1, 1].transAxes, fontsize=11, verticalalignment='top', fontfamily='monospace') plt.suptitle(title, fontsize=16, fontweight='bold') plt.tight_layout() plt.show() def print_multi_fund_summary(results_list: List[Dict[str, Any]]): """ Print detailed summary statistics for multi-fund simulation results. Args: results_list: List of multi-fund simulation result dictionaries """ if not results_list: print("No results to summarize") return # Extract data n_funds = results_list[0]['n_funds'] portfolio_capitals = np.array([r['portfolio_final_capital'] for r in results_list]) portfolio_returns = np.array([r['portfolio_total_return'] for r in results_list]) surviving_funds = np.array([r['surviving_funds'] for r in results_list]) survivorship_rates = np.array([r['survivorship_rate'] for r in results_list]) # Calculate portfolio-level statistics total_bust_count = sum(1 for r in results_list if r['surviving_funds'] == 0) total_bust_rate = total_bust_count / len(results_list) positive_return_count = sum(1 for r in portfolio_returns if r > 0) positive_return_rate = positive_return_count / len(results_list) # Check if target return was used target_return = results_list[0]['parameters'].get('target_return', None) print("=" * 80) print("MULTI-FUND SIMULATION SUMMARY") print("=" * 80) print(f"Total Simulations: {len(results_list):,}") print(f"Funds per Portfolio: {n_funds}") print(f"Starting Capital per Fund: ${results_list[0]['parameters']['capital_per_fund']:,.0f}") print(f"Total Starting Capital: ${results_list[0]['parameters']['starting_capital']:,.0f}") print(f"\nPORTFOLIO SURVIVORSHIP:") print(f"Total Portfolio Bust Rate: {total_bust_rate:.1%} ({total_bust_count:,} portfolios)") print(f"Average Surviving Funds: {surviving_funds.mean():.1f} / {n_funds}") print(f"Average Survivorship Rate: {survivorship_rates.mean():.1%}") print(f"Portfolios with All Funds Surviving: {sum(1 for s in surviving_funds if s == n_funds)} ({sum(1 for s in surviving_funds if s == n_funds)/len(results_list):.1%})") if target_return is not None: target_achieved_portfolios = sum(1 for r in results_list if r['funds_reached_target'] > 0) avg_funds_reaching_target = np.mean([r['funds_reached_target'] for r in results_list]) print(f"\nTARGET ACHIEVEMENT ({target_return:.1%}):") print(f"Portfolios with ≥1 Fund Reaching Target: {target_achieved_portfolios:,} ({target_achieved_portfolios/len(results_list):.1%})") print(f"Average Funds Reaching Target: {avg_funds_reaching_target:.1f} / {n_funds}") print(f"\nPORTFOLIO PERFORMANCE:") print(f"Positive Returns: {positive_return_count:,} ({positive_return_rate:.1%})") print(f"\nPORTFOLIO CAPITAL STATISTICS:") print(f"Mean: ${portfolio_capitals.mean():,.2f}") print(f"Median: ${np.median(portfolio_capitals):,.2f}") print(f"Std Dev: ${portfolio_capitals.std():,.2f}") print(f"Min: ${portfolio_capitals.min():,.2f}") print(f"Max: ${portfolio_capitals.max():,.2f}") print(f"\nPORTFOLIO RETURN STATISTICS:") print(f"Mean: {portfolio_returns.mean():.1%}") print(f"Median: {np.median(portfolio_returns):.1%}") print(f"Std Dev: {portfolio_returns.std():.1%}") print(f"Min: {portfolio_returns.min():.1%}") print(f"Max: {portfolio_returns.max():.1%}") # Compare to single fund equivalent print(f"\nDIVERSIFICATION ANALYSIS:") single_fund_equivalent = results_list[0]['parameters']['starting_capital'] avg_portfolio_capital = portfolio_capitals.mean() diversification_benefit = (avg_portfolio_capital - single_fund_equivalent) / single_fund_equivalent print(f"Diversification Benefit: {diversification_benefit:+.1%} vs single fund baseline") # Risk metrics portfolio_volatility = portfolio_returns.std() print(f"Portfolio Return Volatility: {portfolio_volatility:.1%}") # Percentiles percentiles = [5, 10, 25, 75, 90, 95] print(f"\nPORTFOLIO RETURN PERCENTILES:") for p in percentiles: value = np.percentile(portfolio_returns, p) print(f"{p}th percentile: {value:.1%}")