safeChoicesSimulation / simulation_utils.py
dhruv575
Minimum Volume Filter
09368f6
"""
Simulation utilities for Safe Choices prediction market trading simulations.
This module contains shared functions for running Monte Carlo simulations
of different trading strategies on prediction markets.
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Tuple, Optional, List, Dict, Any
def load_and_filter_data(csv_path: str, start_date: str = '2025-01-01') -> pd.DataFrame:
"""
Load the market data and filter for simulation period.
Args:
csv_path: Path to the CSV file containing market data
start_date: Start date for simulation (markets must close after this date)
Returns:
Filtered DataFrame ready for simulation
"""
# Load data
df = pd.read_csv(csv_path)
# Convert dates - handle timezone awareness
df['closingDate'] = pd.to_datetime(df['closingDate'], format='mixed', errors='coerce', utc=True)
start_dt = pd.to_datetime(start_date, utc=True)
# Filter for markets that close after start date and have complete data
mask = (
(df['closingDate'] >= start_dt) &
(df['outcome'].notna()) &
(df['probability7d'].notna()) &
(df['probability6d'].notna()) &
(df['probability5d'].notna()) &
(df['probability4d'].notna()) &
(df['probability3d'].notna()) &
(df['probability2d'].notna()) &
(df['probability1d'].notna())
)
filtered_df = df[mask].copy().reset_index(drop=True)
# Ensure probability columns are numeric and between 0 and 1
for days in range(7, 0, -1):
col = f'probability{days}d'
if col in filtered_df.columns:
# Convert to float, handling any string values
filtered_df[col] = pd.to_numeric(filtered_df[col], errors='coerce')
# Clamp probabilities between 0 and 1 (in case of data issues)
filtered_df[col] = filtered_df[col].clip(lower=0.0, upper=1.0)
# Vectorized outcome conversion for speed
outcome_map = {'True': 1, 'true': 1, 'FALSE': 0, 'false': 0, True: 1, False: 0}
filtered_df['outcome_int'] = filtered_df['outcome'].map(outcome_map)
# Fill any remaining NaN outcomes with proper conversion
remaining_mask = filtered_df['outcome_int'].isna()
if remaining_mask.any():
def convert_outcome(value):
if pd.isna(value):
return None
if isinstance(value, (int, float)):
return int(value)
return 1 if str(value).lower() == 'true' else 0
filtered_df.loc[remaining_mask, 'outcome_int'] = filtered_df.loc[remaining_mask, 'outcome'].apply(convert_outcome)
# Sort by closing date for better performance in simulations
filtered_df = filtered_df.sort_values('closingDate').reset_index(drop=True)
return filtered_df
def check_market_eligibility(market_row: pd.Series, days_before: int,
min_prob_7d: float, min_prob_current: float) -> bool:
"""
Check if a market meets the probability thresholds for investment.
Args:
market_row: Row from the DataFrame containing market data
days_before: Number of days before resolution to check (1-7)
min_prob_7d: Minimum probability threshold at 7 days before
min_prob_current: Minimum probability threshold at current day
Returns:
True if market meets criteria, False otherwise
"""
prob_col = f'probability{days_before}d'
# Check if required columns exist and have valid data
if pd.isna(market_row.get('probability7d')) or pd.isna(market_row.get(prob_col)):
return False
# Check probability thresholds
prob_7d = market_row['probability7d']
prob_current = market_row[prob_col]
return prob_7d >= min_prob_7d and prob_current >= min_prob_current
def calculate_days_until_resolution(current_date: datetime, closing_date: datetime) -> int:
"""
Calculate days until market resolution.
Args:
current_date: Current simulation date
closing_date: Market closing date
Returns:
Number of days until resolution
"""
return max(0, (closing_date - current_date).days)
def get_available_markets(df: pd.DataFrame, current_date: datetime, days_before: int,
min_prob_7d: float, min_prob_current: float) -> pd.DataFrame:
"""
Get markets available for investment at current date.
Args:
df: DataFrame containing market data
current_date: Current simulation date
days_before: Days before resolution to invest
min_prob_7d: Minimum probability at 7 days before
min_prob_current: Minimum probability at current day
Returns:
DataFrame of available markets with their days until resolution
"""
# Vectorized approach - much faster than iterating
prob_col = f'probability{days_before}d'
# Calculate days until resolution for all markets at once
days_until = (df['closingDate'] - current_date).dt.days
# Create boolean mask for all conditions at once
mask = (
(days_until == days_before) & # Market resolves exactly N days from now
(df['probability7d'] >= min_prob_7d) & # 7d probability threshold
(df[prob_col] >= min_prob_current) & # Current day probability threshold
(df['probability7d'].notna()) & # Valid 7d data
(df[prob_col].notna()) & # Valid current day data
(df['outcome_int'].notna()) # Valid outcome data
)
if not mask.any():
return pd.DataFrame()
# Filter and add days_until_resolution column
available_markets = df[mask].copy()
available_markets['days_until_resolution'] = days_until[mask]
return available_markets
def select_next_market(available_markets: pd.DataFrame, random_state: np.random.RandomState,
skew_factor: float = 0.1) -> Optional[pd.Series]:
"""
Select the next market to invest in using a left-skewed exponential distribution.
Args:
available_markets: DataFrame of markets available for investment
random_state: Random state for reproducible results
skew_factor: Controls the left skew (higher = more skew toward closer markets)
Returns:
Selected market as Series, or None if no markets available
"""
if len(available_markets) == 0:
return None
# Configurable market selection with adjustable skew
days_until = available_markets['days_until_resolution'].values
# Use exponential decay with configurable skew factor
# Closer markets (lower days) get exponentially higher weights
weights = np.exp(-days_until * skew_factor)
# Normalize weights
weights = weights / weights.sum()
# Select market based on weights
selected_idx = random_state.choice(len(available_markets), p=weights)
return available_markets.iloc[selected_idx]
def calculate_investment_return(market: pd.Series, days_before: int, capital: float) -> float:
"""
Calculate return from investing in a market.
Args:
market: Market data as Series
days_before: Days before resolution when investment was made
capital: Amount invested
Returns:
Final capital after resolution (capital / probability if win, 0 if loss)
"""
prob_col = f'probability{days_before}d'
probability = market[prob_col]
outcome = market['outcome_int']
# Safety checks
if pd.isna(probability) or probability <= 0 or probability > 1:
# Invalid probability - treat as loss to be safe
return 0.0
if pd.isna(outcome):
# Unknown outcome - treat as loss to be safe
return 0.0
if outcome == 1: # Market resolved True (safe bet won)
# Return is capital / probability
# Ensure probability is valid (between 0 and 1, exclusive)
if probability >= 1.0:
# Invalid: probability should be < 1.0 for a valid bet
return 0.0
return capital / probability
else: # Market resolved False (safe bet lost)
return 0.0
def run_single_fund_simulation_fast(df: pd.DataFrame,
starting_capital: float = 10000,
start_date: str = '2025-01-01',
max_duration_days: int = 365,
days_before: int = 1,
min_prob_7d: float = 0.90,
min_prob_current: float = 0.90,
investment_probability: float = 0.5,
target_return: Optional[float] = None,
min_volume: Optional[float] = None,
random_seed: Optional[int] = None) -> Dict[str, Any]:
"""
Optimized single fund simulation using pre-indexed markets and event-driven approach.
"""
random_state = np.random.RandomState(random_seed)
start_dt = pd.to_datetime(start_date, utc=True)
end_dt = start_dt + timedelta(days=max_duration_days)
# Pre-filter and index markets by trading date for massive speedup
prob_col = f'probability{days_before}d'
# Filter eligible markets once upfront
market_mask = (
(df['probability7d'] >= min_prob_7d) &
(df[prob_col] >= min_prob_current) &
(df['probability7d'].notna()) &
(df[prob_col].notna()) &
(df['outcome_int'].notna()) &
(df['closingDate'] >= start_dt + timedelta(days=days_before)) &
(df['closingDate'] <= end_dt + timedelta(days=days_before))
)
# Add volume filter if specified
if min_volume is not None and 'volume' in df.columns:
market_mask = market_mask & (df['volume'] >= min_volume)
eligible_markets = df[market_mask].copy()
if len(eligible_markets) == 0:
return {
'final_capital': starting_capital,
'total_return': 0.0,
'num_trades': 0,
'went_bust': False,
'reached_target': False,
'ending_reason': 'no_markets',
'simulation_days': 0,
'trades': [],
'daily_capital': [],
'parameters': {
'starting_capital': starting_capital,
'start_date': start_date,
'max_duration_days': max_duration_days,
'days_before': days_before,
'min_prob_7d': min_prob_7d,
'min_prob_current': min_prob_current,
'investment_probability': investment_probability,
'target_return': target_return,
'min_volume': min_volume,
'random_seed': random_seed
}
}
# Calculate trading dates for each market (days_before days before resolution)
eligible_markets['trading_date'] = eligible_markets['closingDate'] - timedelta(days=days_before)
# Group markets by trading date for O(1) lookup
markets_by_date = {}
for _, market in eligible_markets.iterrows():
trade_date = market['trading_date']
if trade_date not in markets_by_date:
markets_by_date[trade_date] = []
markets_by_date[trade_date].append(market)
# Get sorted trading opportunities
trading_dates = sorted([d for d in markets_by_date.keys() if start_dt <= d <= end_dt])
# Initialize simulation state
capital = starting_capital
current_date = start_dt
trades = []
daily_capital = []
# Event-driven simulation - only process days with trading opportunities
for trade_date in trading_dates:
if capital <= 0:
break
# Check if we've reached target return
if target_return is not None:
current_return = (capital - starting_capital) / starting_capital
if current_return >= target_return:
break
# Skip if we're beyond simulation period
if trade_date > end_dt:
break
# Decision: invest today?
if random_state.random() >= investment_probability:
continue
# Select market from available options
available_markets_today = markets_by_date[trade_date]
if not available_markets_today:
continue
selected_market = available_markets_today[random_state.randint(0, len(available_markets_today))]
# Calculate return
probability = selected_market[prob_col]
outcome = selected_market['outcome_int']
if pd.isna(probability) or probability <= 0 or probability > 1 or pd.isna(outcome):
continue
if outcome == 1: # Win
new_capital = capital / probability
else: # Loss
new_capital = 0.0
# Record trade
sim_day = (trade_date - start_dt).days
trades.append({
'trade_number': len(trades) + 1,
'investment_date': trade_date,
'resolution_date': selected_market['closingDate'],
'probability': probability,
'capital_invested': capital,
'outcome': outcome,
'capital_after': new_capital,
'return': (new_capital - capital) / capital if capital > 0 else 0,
'sim_day': sim_day
})
capital = new_capital
# Record capital history (periodically)
if len(trades) % 5 == 0 or capital == 0:
daily_capital.append({
'date': trade_date,
'capital': capital,
'day': sim_day
})
if capital == 0:
break
# Calculate final statistics
total_return = (capital - starting_capital) / starting_capital if starting_capital > 0 else 0
num_trades = len(trades)
went_bust = capital == 0
reached_target = target_return is not None and total_return >= target_return
# Determine ending reason
if went_bust:
ending_reason = 'bust'
elif reached_target:
ending_reason = 'target_reached'
else:
ending_reason = 'max_duration'
final_sim_day = (min(end_dt, trading_dates[-1] if trading_dates else start_dt) - start_dt).days
return {
'final_capital': capital,
'total_return': total_return,
'num_trades': num_trades,
'went_bust': went_bust,
'reached_target': reached_target,
'ending_reason': ending_reason,
'simulation_days': final_sim_day,
'trades': trades,
'daily_capital': daily_capital,
'parameters': {
'starting_capital': starting_capital,
'start_date': start_date,
'max_duration_days': max_duration_days,
'days_before': days_before,
'min_prob_7d': min_prob_7d,
'min_prob_current': min_prob_current,
'investment_probability': investment_probability,
'target_return': target_return,
'min_volume': min_volume,
'random_seed': random_seed
}
}
# Keep original function for backwards compatibility
def run_single_fund_simulation(df: pd.DataFrame,
starting_capital: float = 10000,
start_date: str = '2025-01-01',
max_duration_days: int = 365,
days_before: int = 1,
min_prob_7d: float = 0.90,
min_prob_current: float = 0.90,
investment_probability: float = 0.5,
target_return: Optional[float] = None,
min_volume: Optional[float] = None,
random_seed: Optional[int] = None) -> Dict[str, Any]:
"""
Run a single fund simulation with day-by-day investment decisions.
Each day (when not already invested), the trader decides to invest with
probability alpha, then selects uniformly at random from available markets.
Args:
df: Market data DataFrame
starting_capital: Initial capital
start_date: Simulation start date
max_duration_days: Maximum simulation duration
days_before: Days before resolution to invest
min_prob_7d: Minimum probability at 7 days
min_prob_current: Minimum probability at investment day
investment_probability: Probability of investing on any given day (alpha)
target_return: Target return threshold to stop trading (None = no threshold)
min_volume: Minimum market volume to consider (None = no filter)
random_seed: Random seed for reproducibility
Returns:
Dictionary containing simulation results
"""
# Use the optimized fast version
return run_single_fund_simulation_fast(
df=df,
starting_capital=starting_capital,
start_date=start_date,
max_duration_days=max_duration_days,
days_before=days_before,
min_prob_7d=min_prob_7d,
min_prob_current=min_prob_current,
investment_probability=investment_probability,
target_return=target_return,
min_volume=min_volume,
random_seed=random_seed
)
def calculate_kelly_fraction(true_prob: float, market_prob: float) -> float:
"""
Calculate the Kelly criterion bet fraction.
Args:
true_prob: Estimated true probability of winning (p)
market_prob: Market's implied probability (price)
Returns:
Optimal fraction of bankroll to bet (can be negative if no edge)
"""
if market_prob <= 0 or market_prob >= 1:
return 0.0
# Odds: profit per dollar risked if you win
# If you pay P for a contract that pays $1, your profit is (1-P)/P
b = (1 - market_prob) / market_prob
# Kelly formula: f* = (p*b - q) / b
# where q = 1 - p
q = 1 - true_prob
if b <= 0:
return 0.0
kelly = (true_prob * b - q) / b
# Kelly can be negative (meaning don't bet), cap at 0
return max(0.0, kelly)
def get_historical_win_rate(df: pd.DataFrame, prob_col: str, probability: float,
tolerance: float = 0.02) -> float:
"""
Get historical win rate for markets at a similar probability level.
Args:
df: Market data DataFrame
prob_col: Probability column to use
probability: Target probability to look up
tolerance: Range around probability to include
Returns:
Historical win rate (defaults to market probability if insufficient data)
"""
mask = (
(df[prob_col] >= probability - tolerance) &
(df[prob_col] <= probability + tolerance) &
(df['outcome_int'].notna())
)
similar_markets = df[mask]
if len(similar_markets) < 10:
# Not enough data, use a small edge assumption
return probability + 0.005 # Assume 0.5% edge
return similar_markets['outcome_int'].mean()
def run_kelly_simulation(df: pd.DataFrame,
starting_capital: float = 10000,
start_date: str = '2025-01-01',
max_duration_days: int = 365,
days_before: int = 1,
min_prob_7d: float = 0.90,
min_prob_current: float = 0.90,
investment_probability: float = 0.5,
kelly_fraction: float = 0.5,
edge_estimate: str = 'historical',
min_volume: Optional[float] = None,
random_seed: Optional[int] = None) -> Dict[str, Any]:
"""
Run a simulation using Kelly criterion for position sizing.
Instead of betting 100% of capital, bets are sized according to Kelly criterion
based on estimated edge.
Args:
df: Market data DataFrame
starting_capital: Initial capital
start_date: Simulation start date
max_duration_days: Maximum simulation duration
days_before: Days before resolution to invest
min_prob_7d: Minimum probability at 7 days
min_prob_current: Minimum probability at investment day
investment_probability: Probability of attempting to invest on any given day
kelly_fraction: Fraction of Kelly to use (0.5 = half Kelly, 1.0 = full Kelly)
edge_estimate: Method to estimate edge ('historical', 'fixed_edge', 'fixed_edge_2')
min_volume: Minimum market volume to consider (None = no filter)
random_seed: Random seed for reproducibility
Returns:
Dictionary containing simulation results
"""
random_state = np.random.RandomState(random_seed)
start_dt = pd.to_datetime(start_date, utc=True)
end_dt = start_dt + timedelta(days=max_duration_days)
prob_col = f'probability{days_before}d'
# Pre-filter eligible markets
market_mask = (
(df['probability7d'] >= min_prob_7d) &
(df[prob_col] >= min_prob_current) &
(df['probability7d'].notna()) &
(df[prob_col].notna()) &
(df['outcome_int'].notna()) &
(df['closingDate'] >= start_dt + timedelta(days=days_before)) &
(df['closingDate'] <= end_dt + timedelta(days=days_before))
)
# Add volume filter if specified
if min_volume is not None and 'volume' in df.columns:
market_mask = market_mask & (df['volume'] >= min_volume)
eligible_markets = df[market_mask].copy()
if len(eligible_markets) == 0:
return {
'final_capital': starting_capital,
'total_return': 0.0,
'num_trades': 0,
'went_bust': False,
'ending_reason': 'no_markets',
'simulation_days': 0,
'trades': [],
'daily_capital': [],
'kelly_stats': {
'avg_bet_size': 0,
'avg_edge': 0,
'bets_skipped': 0,
'total_opportunities': 0
},
'parameters': {
'starting_capital': starting_capital,
'start_date': start_date,
'max_duration_days': max_duration_days,
'days_before': days_before,
'min_prob_7d': min_prob_7d,
'min_prob_current': min_prob_current,
'investment_probability': investment_probability,
'kelly_fraction': kelly_fraction,
'edge_estimate': edge_estimate,
'min_volume': min_volume,
'random_seed': random_seed
}
}
# Calculate trading dates
eligible_markets['trading_date'] = eligible_markets['closingDate'] - timedelta(days=days_before)
# Group by trading date
markets_by_date = {}
for _, market in eligible_markets.iterrows():
trade_date = market['trading_date']
if trade_date not in markets_by_date:
markets_by_date[trade_date] = []
markets_by_date[trade_date].append(market)
trading_dates = sorted([d for d in markets_by_date.keys() if start_dt <= d <= end_dt])
# Simulation state
capital = starting_capital
trades = []
daily_capital = []
bet_sizes = []
edges = []
bets_skipped = 0
total_opportunities = 0
for trade_date in trading_dates:
if capital <= 0:
break
if trade_date > end_dt:
break
# Decision: attempt to invest today?
if random_state.random() >= investment_probability:
continue
available_markets_today = markets_by_date[trade_date]
if not available_markets_today:
continue
total_opportunities += 1
# Select market
selected_market = available_markets_today[random_state.randint(0, len(available_markets_today))]
market_prob = selected_market[prob_col]
if pd.isna(market_prob) or market_prob <= 0 or market_prob >= 1:
continue
# Estimate true probability based on edge_estimate method
if edge_estimate == 'historical':
true_prob = get_historical_win_rate(df, prob_col, market_prob)
elif edge_estimate == 'fixed_edge':
true_prob = market_prob + 0.01 # Assume 1% edge
elif edge_estimate == 'fixed_edge_2':
true_prob = market_prob + 0.02 # Assume 2% edge
else:
true_prob = market_prob + 0.005 # Default small edge
# Cap true_prob at reasonable bounds
true_prob = min(0.999, max(0.001, true_prob))
# Calculate Kelly bet size
full_kelly = calculate_kelly_fraction(true_prob, market_prob)
bet_fraction = full_kelly * kelly_fraction
# If no edge (Kelly <= 0), skip this bet
if bet_fraction <= 0.001: # Tiny threshold to avoid floating point issues
bets_skipped += 1
continue
# Cap bet at 100% of capital
bet_fraction = min(bet_fraction, 1.0)
# Calculate bet amount
bet_amount = capital * bet_fraction
# Resolve the bet
outcome = selected_market['outcome_int']
if pd.isna(outcome):
continue
edge = true_prob - market_prob
edges.append(edge)
bet_sizes.append(bet_fraction)
if outcome == 1: # Win
# Profit = bet_amount * (1/market_prob - 1) = bet_amount * (1 - market_prob) / market_prob
profit = bet_amount * (1 - market_prob) / market_prob
new_capital = capital + profit
else: # Loss
new_capital = capital - bet_amount
sim_day = (trade_date - start_dt).days
trades.append({
'trade_number': len(trades) + 1,
'investment_date': trade_date,
'resolution_date': selected_market['closingDate'],
'market_probability': market_prob,
'estimated_true_prob': true_prob,
'edge': edge,
'kelly_fraction': full_kelly,
'actual_bet_fraction': bet_fraction,
'capital_before': capital,
'bet_amount': bet_amount,
'outcome': outcome,
'capital_after': new_capital,
'return': (new_capital - capital) / capital if capital > 0 else 0,
'sim_day': sim_day
})
capital = new_capital
if len(trades) % 5 == 0 or capital <= 0:
daily_capital.append({
'date': trade_date,
'capital': capital,
'day': sim_day
})
if capital <= 0:
capital = 0
break
# Calculate final statistics
total_return = (capital - starting_capital) / starting_capital if starting_capital > 0 else 0
went_bust = capital <= 0
if went_bust:
ending_reason = 'bust'
else:
ending_reason = 'max_duration'
final_sim_day = (min(end_dt, trading_dates[-1] if trading_dates else start_dt) - start_dt).days
# Kelly-specific stats
avg_bet_size = np.mean(bet_sizes) if bet_sizes else 0
avg_edge = np.mean(edges) if edges else 0
return {
'final_capital': capital,
'total_return': total_return,
'num_trades': len(trades),
'went_bust': went_bust,
'ending_reason': ending_reason,
'simulation_days': final_sim_day,
'trades': trades,
'daily_capital': daily_capital,
'kelly_stats': {
'avg_bet_size': avg_bet_size,
'avg_edge': avg_edge,
'bets_skipped': bets_skipped,
'total_opportunities': total_opportunities
},
'parameters': {
'starting_capital': starting_capital,
'start_date': start_date,
'max_duration_days': max_duration_days,
'days_before': days_before,
'min_prob_7d': min_prob_7d,
'min_prob_current': min_prob_current,
'investment_probability': investment_probability,
'kelly_fraction': kelly_fraction,
'edge_estimate': edge_estimate,
'min_volume': min_volume,
'random_seed': random_seed
}
}
def plot_simulation_results(results_list: List[Dict[str, Any]], title: str = "Simulation Results"):
"""
Plot results from multiple simulation runs.
Args:
results_list: List of simulation result dictionaries
title: Plot title
"""
if not results_list:
print("No results to plot")
return
# Extract data
final_capitals = [r['final_capital'] for r in results_list]
total_returns = [r['total_return'] for r in results_list]
num_trades = [r['num_trades'] for r in results_list]
bust_rate = sum(1 for r in results_list if r['went_bust']) / len(results_list)
# Create plots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
# Final capital distribution
axes[0, 0].hist(final_capitals, bins=50, alpha=0.7, edgecolor='black')
axes[0, 0].axvline(np.mean(final_capitals), color='red', linestyle='--',
label=f'Mean: ${np.mean(final_capitals):,.0f}')
axes[0, 0].axvline(np.median(final_capitals), color='green', linestyle='--',
label=f'Median: ${np.median(final_capitals):,.0f}')
axes[0, 0].set_xlabel('Final Capital ($)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Final Capital Distribution')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
# Return distribution
return_pct = [r * 100 for r in total_returns]
axes[0, 1].hist(return_pct, bins=50, alpha=0.7, edgecolor='black')
axes[0, 1].axvline(np.mean(return_pct), color='red', linestyle='--',
label=f'Mean: {np.mean(return_pct):.1f}%')
axes[0, 1].axvline(np.median(return_pct), color='green', linestyle='--',
label=f'Median: {np.median(return_pct):.1f}%')
axes[0, 1].axvline(0, color='black', linestyle='-', alpha=0.5, label='Break-even')
axes[0, 1].set_xlabel('Total Return (%)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Return Distribution')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)
# Number of trades distribution
axes[1, 0].hist(num_trades, bins=30, alpha=0.7, edgecolor='black')
axes[1, 0].axvline(np.mean(num_trades), color='red', linestyle='--',
label=f'Mean: {np.mean(num_trades):.1f}')
axes[1, 0].set_xlabel('Number of Trades')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('Number of Trades Distribution')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)
# Summary statistics
axes[1, 1].axis('off')
stats_text = f"""
Summary Statistics:
Total Simulations: {len(results_list):,}
Bust Rate: {bust_rate:.1%}
Final Capital:
Mean: ${np.mean(final_capitals):,.0f}
Median: ${np.median(final_capitals):,.0f}
Min: ${np.min(final_capitals):,.0f}
Max: ${np.max(final_capitals):,.0f}
Total Return:
Mean: {np.mean(total_returns):.1%}
Median: {np.median(total_returns):.1%}
Min: {np.min(total_returns):.1%}
Max: {np.max(total_returns):.1%}
Trades per Simulation:
Mean: {np.mean(num_trades):.1f}
Median: {np.median(num_trades):.1f}
"""
axes[1, 1].text(0.1, 0.9, stats_text, transform=axes[1, 1].transAxes,
fontsize=11, verticalalignment='top', fontfamily='monospace')
plt.suptitle(title, fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
def print_simulation_summary(results_list: List[Dict[str, Any]]):
"""
Print detailed summary statistics for simulation results.
Args:
results_list: List of simulation result dictionaries
"""
if not results_list:
print("No results to summarize")
return
# Extract data
final_capitals = np.array([r['final_capital'] for r in results_list])
total_returns = np.array([r['total_return'] for r in results_list])
num_trades = np.array([r['num_trades'] for r in results_list])
# Calculate statistics
bust_count = sum(1 for r in results_list if r['went_bust'])
bust_rate = bust_count / len(results_list)
target_reached_count = sum(1 for r in results_list if r.get('reached_target', False))
target_reached_rate = target_reached_count / len(results_list)
positive_return_count = sum(1 for r in total_returns if r > 0)
positive_return_rate = positive_return_count / len(results_list)
# Check if target return was used
target_return = results_list[0]['parameters'].get('target_return', None)
print("=" * 60)
print("SIMULATION SUMMARY")
print("=" * 60)
print(f"Total Simulations: {len(results_list):,}")
print(f"Went Bust: {bust_count:,} ({bust_rate:.1%})")
if target_return is not None:
print(f"Reached Target ({target_return:.1%}): {target_reached_count:,} ({target_reached_rate:.1%})")
print(f"Positive Returns: {positive_return_count:,} ({positive_return_rate:.1%})")
print(f"\nFINAL CAPITAL STATISTICS:")
print(f"Mean: ${final_capitals.mean():,.2f}")
print(f"Median: ${np.median(final_capitals):,.2f}")
print(f"Std Dev: ${final_capitals.std():,.2f}")
print(f"Min: ${final_capitals.min():,.2f}")
print(f"Max: ${final_capitals.max():,.2f}")
print(f"\nRETURN STATISTICS:")
print(f"Mean: {total_returns.mean():.1%}")
print(f"Median: {np.median(total_returns):.1%}")
print(f"Std Dev: {total_returns.std():.1%}")
print(f"Min: {total_returns.min():.1%}")
print(f"Max: {total_returns.max():.1%}")
print(f"\nTRADE STATISTICS:")
print(f"Mean Trades: {num_trades.mean():.1f}")
print(f"Median Trades: {np.median(num_trades):.1f}")
print(f"Min Trades: {num_trades.min()}")
print(f"Max Trades: {num_trades.max()}")
# Percentiles
percentiles = [5, 10, 25, 75, 90, 95]
print(f"\nRETURN PERCENTILES:")
for p in percentiles:
value = np.percentile(total_returns, p)
print(f"{p}th percentile: {value:.1%}")
def run_multi_fund_simulation(df: pd.DataFrame,
n_funds: int = 5,
starting_capital: float = 10000,
start_date: str = '2025-01-01',
max_duration_days: int = 365,
days_before: int = 1,
min_prob_7d: float = 0.90,
min_prob_current: float = 0.90,
investment_probability: float = 0.5,
target_return: Optional[float] = None,
min_volume: Optional[float] = None,
random_seed: Optional[int] = None) -> Dict[str, Any]:
"""
Run a multi-fund simulation where capital is divided into independent funds.
Each fund operates independently with the same investment probability (alpha).
Args:
df: Market data DataFrame
n_funds: Number of independent funds to create
starting_capital: Total initial capital (divided among funds)
start_date: Simulation start date
max_duration_days: Maximum simulation duration
days_before: Days before resolution to invest
min_prob_7d: Minimum probability at 7 days
min_prob_current: Minimum probability at investment day
investment_probability: Probability of investing on any given day (alpha)
target_return: Target return threshold per fund (None = no threshold)
min_volume: Minimum market volume to consider (None = no filter)
random_seed: Random seed for reproducibility
Returns:
Dictionary containing multi-fund simulation results
"""
# Set up random state
random_state = np.random.RandomState(random_seed)
# Calculate capital per fund
capital_per_fund = starting_capital / n_funds
# Run simulation for each fund independently
fund_results = []
all_trades = []
for fund_id in range(n_funds):
# Use different seed for each fund to ensure independence
fund_seed = random_state.randint(0, 1000000)
# Run single fund simulation for this fund using fast version
fund_result = run_single_fund_simulation_fast(
df=df,
starting_capital=capital_per_fund,
start_date=start_date,
max_duration_days=max_duration_days,
days_before=days_before,
min_prob_7d=min_prob_7d,
min_prob_current=min_prob_current,
investment_probability=investment_probability,
target_return=target_return,
min_volume=min_volume,
random_seed=fund_seed
)
# Add fund ID to result and trades
fund_result['fund_id'] = fund_id
for trade in fund_result['trades']:
trade['fund_id'] = fund_id
all_trades.append(trade)
fund_results.append(fund_result)
# Calculate portfolio-level statistics
surviving_funds = sum(1 for fund in fund_results if not fund['went_bust'])
total_final_capital = sum(fund['final_capital'] for fund in fund_results)
total_portfolio_return = (total_final_capital - starting_capital) / starting_capital if starting_capital > 0 else 0
# Calculate average metrics across surviving funds
if surviving_funds > 0:
avg_capital_per_surviving_fund = sum(fund['final_capital'] for fund in fund_results if not fund['went_bust']) / surviving_funds
avg_return_per_surviving_fund = sum(fund['total_return'] for fund in fund_results if not fund['went_bust']) / surviving_funds
else:
avg_capital_per_surviving_fund = 0
avg_return_per_surviving_fund = -1 # All funds went bust
# Target achievement stats
funds_reached_target = sum(1 for fund in fund_results if fund.get('reached_target', False))
target_achievement_rate = funds_reached_target / n_funds
# Trading activity stats
total_trades = len(all_trades)
avg_trades_per_fund = total_trades / n_funds
# Survivorship and diversification metrics
survivorship_rate = surviving_funds / n_funds
bust_rate = 1 - survivorship_rate
return {
'portfolio_final_capital': total_final_capital,
'portfolio_total_return': total_portfolio_return,
'n_funds': n_funds,
'surviving_funds': surviving_funds,
'survivorship_rate': survivorship_rate,
'bust_rate': bust_rate,
'avg_capital_per_surviving_fund': avg_capital_per_surviving_fund,
'avg_return_per_surviving_fund': avg_return_per_surviving_fund,
'funds_reached_target': funds_reached_target,
'target_achievement_rate': target_achievement_rate,
'total_trades': total_trades,
'avg_trades_per_fund': avg_trades_per_fund,
'fund_results': fund_results,
'all_trades': all_trades,
'parameters': {
'n_funds': n_funds,
'starting_capital': starting_capital,
'capital_per_fund': capital_per_fund,
'start_date': start_date,
'max_duration_days': max_duration_days,
'days_before': days_before,
'min_prob_7d': min_prob_7d,
'min_prob_current': min_prob_current,
'investment_probability': investment_probability,
'target_return': target_return,
'min_volume': min_volume,
'random_seed': random_seed
}
}
def plot_multi_fund_results(results_list: List[Dict[str, Any]], title: str = "Multi-Fund Simulation Results"):
"""
Plot results from multiple multi-fund simulation runs.
Args:
results_list: List of multi-fund simulation result dictionaries
title: Plot title
"""
if not results_list:
print("No results to plot")
return
# Extract portfolio-level data
portfolio_final_capitals = [r['portfolio_final_capital'] for r in results_list]
portfolio_returns = [r['portfolio_total_return'] for r in results_list]
surviving_funds = [r['surviving_funds'] for r in results_list]
survivorship_rates = [r['survivorship_rate'] for r in results_list]
# Create plots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
# Portfolio final capital distribution
axes[0, 0].hist(portfolio_final_capitals, bins=30, alpha=0.7, edgecolor='black', color='steelblue')
axes[0, 0].axvline(np.mean(portfolio_final_capitals), color='red', linestyle='--',
label=f'Mean: ${np.mean(portfolio_final_capitals):,.0f}')
axes[0, 0].axvline(np.median(portfolio_final_capitals), color='green', linestyle='--',
label=f'Median: ${np.median(portfolio_final_capitals):,.0f}')
axes[0, 0].set_xlabel('Portfolio Final Capital ($)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Portfolio Final Capital Distribution')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
# Portfolio return distribution
return_pct = [r * 100 for r in portfolio_returns]
axes[0, 1].hist(return_pct, bins=30, alpha=0.7, edgecolor='black', color='green')
axes[0, 1].axvline(np.mean(return_pct), color='red', linestyle='--',
label=f'Mean: {np.mean(return_pct):.1f}%')
axes[0, 1].axvline(0, color='black', linestyle='-', alpha=0.5, label='Break-even')
axes[0, 1].set_xlabel('Portfolio Total Return (%)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Portfolio Return Distribution')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)
# Number of surviving funds distribution
n_funds = results_list[0]['n_funds']
axes[1, 0].hist(surviving_funds, bins=range(n_funds + 2), alpha=0.7, edgecolor='black', color='orange')
axes[1, 0].axvline(np.mean(surviving_funds), color='red', linestyle='--',
label=f'Mean: {np.mean(surviving_funds):.1f}')
axes[1, 0].set_xlabel('Number of Surviving Funds')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('Surviving Funds Distribution')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)
axes[1, 0].set_xticks(range(n_funds + 1))
# Summary statistics
axes[1, 1].axis('off')
# Calculate additional stats
total_bust_rate = sum(1 for r in results_list if r['surviving_funds'] == 0) / len(results_list)
avg_survivorship = np.mean(survivorship_rates)
stats_text = f"""
Multi-Fund Summary Statistics:
Total Simulations: {len(results_list):,}
Funds per Portfolio: {n_funds}
Total Bust Rate: {total_bust_rate:.1%}
Portfolio Capital:
Mean: ${np.mean(portfolio_final_capitals):,.0f}
Median: ${np.median(portfolio_final_capitals):,.0f}
Min: ${np.min(portfolio_final_capitals):,.0f}
Max: ${np.max(portfolio_final_capitals):,.0f}
Portfolio Return:
Mean: {np.mean(portfolio_returns):.1%}
Median: {np.median(portfolio_returns):.1%}
Fund Survivorship:
Avg Surviving: {np.mean(surviving_funds):.1f} / {n_funds}
Avg Survivorship: {avg_survivorship:.1%}
"""
axes[1, 1].text(0.1, 0.9, stats_text, transform=axes[1, 1].transAxes,
fontsize=11, verticalalignment='top', fontfamily='monospace')
plt.suptitle(title, fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
def print_multi_fund_summary(results_list: List[Dict[str, Any]]):
"""
Print detailed summary statistics for multi-fund simulation results.
Args:
results_list: List of multi-fund simulation result dictionaries
"""
if not results_list:
print("No results to summarize")
return
# Extract data
n_funds = results_list[0]['n_funds']
portfolio_capitals = np.array([r['portfolio_final_capital'] for r in results_list])
portfolio_returns = np.array([r['portfolio_total_return'] for r in results_list])
surviving_funds = np.array([r['surviving_funds'] for r in results_list])
survivorship_rates = np.array([r['survivorship_rate'] for r in results_list])
# Calculate portfolio-level statistics
total_bust_count = sum(1 for r in results_list if r['surviving_funds'] == 0)
total_bust_rate = total_bust_count / len(results_list)
positive_return_count = sum(1 for r in portfolio_returns if r > 0)
positive_return_rate = positive_return_count / len(results_list)
# Check if target return was used
target_return = results_list[0]['parameters'].get('target_return', None)
print("=" * 80)
print("MULTI-FUND SIMULATION SUMMARY")
print("=" * 80)
print(f"Total Simulations: {len(results_list):,}")
print(f"Funds per Portfolio: {n_funds}")
print(f"Starting Capital per Fund: ${results_list[0]['parameters']['capital_per_fund']:,.0f}")
print(f"Total Starting Capital: ${results_list[0]['parameters']['starting_capital']:,.0f}")
print(f"\nPORTFOLIO SURVIVORSHIP:")
print(f"Total Portfolio Bust Rate: {total_bust_rate:.1%} ({total_bust_count:,} portfolios)")
print(f"Average Surviving Funds: {surviving_funds.mean():.1f} / {n_funds}")
print(f"Average Survivorship Rate: {survivorship_rates.mean():.1%}")
print(f"Portfolios with All Funds Surviving: {sum(1 for s in surviving_funds if s == n_funds)} ({sum(1 for s in surviving_funds if s == n_funds)/len(results_list):.1%})")
if target_return is not None:
target_achieved_portfolios = sum(1 for r in results_list if r['funds_reached_target'] > 0)
avg_funds_reaching_target = np.mean([r['funds_reached_target'] for r in results_list])
print(f"\nTARGET ACHIEVEMENT ({target_return:.1%}):")
print(f"Portfolios with ≥1 Fund Reaching Target: {target_achieved_portfolios:,} ({target_achieved_portfolios/len(results_list):.1%})")
print(f"Average Funds Reaching Target: {avg_funds_reaching_target:.1f} / {n_funds}")
print(f"\nPORTFOLIO PERFORMANCE:")
print(f"Positive Returns: {positive_return_count:,} ({positive_return_rate:.1%})")
print(f"\nPORTFOLIO CAPITAL STATISTICS:")
print(f"Mean: ${portfolio_capitals.mean():,.2f}")
print(f"Median: ${np.median(portfolio_capitals):,.2f}")
print(f"Std Dev: ${portfolio_capitals.std():,.2f}")
print(f"Min: ${portfolio_capitals.min():,.2f}")
print(f"Max: ${portfolio_capitals.max():,.2f}")
print(f"\nPORTFOLIO RETURN STATISTICS:")
print(f"Mean: {portfolio_returns.mean():.1%}")
print(f"Median: {np.median(portfolio_returns):.1%}")
print(f"Std Dev: {portfolio_returns.std():.1%}")
print(f"Min: {portfolio_returns.min():.1%}")
print(f"Max: {portfolio_returns.max():.1%}")
# Compare to single fund equivalent
print(f"\nDIVERSIFICATION ANALYSIS:")
single_fund_equivalent = results_list[0]['parameters']['starting_capital']
avg_portfolio_capital = portfolio_capitals.mean()
diversification_benefit = (avg_portfolio_capital - single_fund_equivalent) / single_fund_equivalent
print(f"Diversification Benefit: {diversification_benefit:+.1%} vs single fund baseline")
# Risk metrics
portfolio_volatility = portfolio_returns.std()
print(f"Portfolio Return Volatility: {portfolio_volatility:.1%}")
# Percentiles
percentiles = [5, 10, 25, 75, 90, 95]
print(f"\nPORTFOLIO RETURN PERCENTILES:")
for p in percentiles:
value = np.percentile(portfolio_returns, p)
print(f"{p}th percentile: {value:.1%}")