Spaces:
Sleeping
Sleeping
| """ | |
| Simulation utilities for Safe Choices prediction market trading simulations. | |
| This module contains shared functions for running Monte Carlo simulations | |
| of different trading strategies on prediction markets. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| from scipy import stats | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from typing import Tuple, Optional, List, Dict, Any | |
| def load_and_filter_data(csv_path: str, start_date: str = '2025-01-01') -> pd.DataFrame: | |
| """ | |
| Load the market data and filter for simulation period. | |
| Args: | |
| csv_path: Path to the CSV file containing market data | |
| start_date: Start date for simulation (markets must close after this date) | |
| Returns: | |
| Filtered DataFrame ready for simulation | |
| """ | |
| # Load data | |
| df = pd.read_csv(csv_path) | |
| # Convert dates - handle timezone awareness | |
| df['closingDate'] = pd.to_datetime(df['closingDate'], format='mixed', errors='coerce', utc=True) | |
| start_dt = pd.to_datetime(start_date, utc=True) | |
| # Filter for markets that close after start date and have complete data | |
| mask = ( | |
| (df['closingDate'] >= start_dt) & | |
| (df['outcome'].notna()) & | |
| (df['probability7d'].notna()) & | |
| (df['probability6d'].notna()) & | |
| (df['probability5d'].notna()) & | |
| (df['probability4d'].notna()) & | |
| (df['probability3d'].notna()) & | |
| (df['probability2d'].notna()) & | |
| (df['probability1d'].notna()) | |
| ) | |
| filtered_df = df[mask].copy().reset_index(drop=True) | |
| # Ensure probability columns are numeric and between 0 and 1 | |
| for days in range(7, 0, -1): | |
| col = f'probability{days}d' | |
| if col in filtered_df.columns: | |
| # Convert to float, handling any string values | |
| filtered_df[col] = pd.to_numeric(filtered_df[col], errors='coerce') | |
| # Clamp probabilities between 0 and 1 (in case of data issues) | |
| filtered_df[col] = filtered_df[col].clip(lower=0.0, upper=1.0) | |
| # Vectorized outcome conversion for speed | |
| outcome_map = {'True': 1, 'true': 1, 'FALSE': 0, 'false': 0, True: 1, False: 0} | |
| filtered_df['outcome_int'] = filtered_df['outcome'].map(outcome_map) | |
| # Fill any remaining NaN outcomes with proper conversion | |
| remaining_mask = filtered_df['outcome_int'].isna() | |
| if remaining_mask.any(): | |
| def convert_outcome(value): | |
| if pd.isna(value): | |
| return None | |
| if isinstance(value, (int, float)): | |
| return int(value) | |
| return 1 if str(value).lower() == 'true' else 0 | |
| filtered_df.loc[remaining_mask, 'outcome_int'] = filtered_df.loc[remaining_mask, 'outcome'].apply(convert_outcome) | |
| # Sort by closing date for better performance in simulations | |
| filtered_df = filtered_df.sort_values('closingDate').reset_index(drop=True) | |
| return filtered_df | |
| def check_market_eligibility(market_row: pd.Series, days_before: int, | |
| min_prob_7d: float, min_prob_current: float) -> bool: | |
| """ | |
| Check if a market meets the probability thresholds for investment. | |
| Args: | |
| market_row: Row from the DataFrame containing market data | |
| days_before: Number of days before resolution to check (1-7) | |
| min_prob_7d: Minimum probability threshold at 7 days before | |
| min_prob_current: Minimum probability threshold at current day | |
| Returns: | |
| True if market meets criteria, False otherwise | |
| """ | |
| prob_col = f'probability{days_before}d' | |
| # Check if required columns exist and have valid data | |
| if pd.isna(market_row.get('probability7d')) or pd.isna(market_row.get(prob_col)): | |
| return False | |
| # Check probability thresholds | |
| prob_7d = market_row['probability7d'] | |
| prob_current = market_row[prob_col] | |
| return prob_7d >= min_prob_7d and prob_current >= min_prob_current | |
| def calculate_days_until_resolution(current_date: datetime, closing_date: datetime) -> int: | |
| """ | |
| Calculate days until market resolution. | |
| Args: | |
| current_date: Current simulation date | |
| closing_date: Market closing date | |
| Returns: | |
| Number of days until resolution | |
| """ | |
| return max(0, (closing_date - current_date).days) | |
| def get_available_markets(df: pd.DataFrame, current_date: datetime, days_before: int, | |
| min_prob_7d: float, min_prob_current: float) -> pd.DataFrame: | |
| """ | |
| Get markets available for investment at current date. | |
| Args: | |
| df: DataFrame containing market data | |
| current_date: Current simulation date | |
| days_before: Days before resolution to invest | |
| min_prob_7d: Minimum probability at 7 days before | |
| min_prob_current: Minimum probability at current day | |
| Returns: | |
| DataFrame of available markets with their days until resolution | |
| """ | |
| # Vectorized approach - much faster than iterating | |
| prob_col = f'probability{days_before}d' | |
| # Calculate days until resolution for all markets at once | |
| days_until = (df['closingDate'] - current_date).dt.days | |
| # Create boolean mask for all conditions at once | |
| mask = ( | |
| (days_until == days_before) & # Market resolves exactly N days from now | |
| (df['probability7d'] >= min_prob_7d) & # 7d probability threshold | |
| (df[prob_col] >= min_prob_current) & # Current day probability threshold | |
| (df['probability7d'].notna()) & # Valid 7d data | |
| (df[prob_col].notna()) & # Valid current day data | |
| (df['outcome_int'].notna()) # Valid outcome data | |
| ) | |
| if not mask.any(): | |
| return pd.DataFrame() | |
| # Filter and add days_until_resolution column | |
| available_markets = df[mask].copy() | |
| available_markets['days_until_resolution'] = days_until[mask] | |
| return available_markets | |
| def select_next_market(available_markets: pd.DataFrame, random_state: np.random.RandomState, | |
| skew_factor: float = 0.1) -> Optional[pd.Series]: | |
| """ | |
| Select the next market to invest in using a left-skewed exponential distribution. | |
| Args: | |
| available_markets: DataFrame of markets available for investment | |
| random_state: Random state for reproducible results | |
| skew_factor: Controls the left skew (higher = more skew toward closer markets) | |
| Returns: | |
| Selected market as Series, or None if no markets available | |
| """ | |
| if len(available_markets) == 0: | |
| return None | |
| # Configurable market selection with adjustable skew | |
| days_until = available_markets['days_until_resolution'].values | |
| # Use exponential decay with configurable skew factor | |
| # Closer markets (lower days) get exponentially higher weights | |
| weights = np.exp(-days_until * skew_factor) | |
| # Normalize weights | |
| weights = weights / weights.sum() | |
| # Select market based on weights | |
| selected_idx = random_state.choice(len(available_markets), p=weights) | |
| return available_markets.iloc[selected_idx] | |
| def calculate_investment_return(market: pd.Series, days_before: int, capital: float) -> float: | |
| """ | |
| Calculate return from investing in a market. | |
| Args: | |
| market: Market data as Series | |
| days_before: Days before resolution when investment was made | |
| capital: Amount invested | |
| Returns: | |
| Final capital after resolution (capital / probability if win, 0 if loss) | |
| """ | |
| prob_col = f'probability{days_before}d' | |
| probability = market[prob_col] | |
| outcome = market['outcome_int'] | |
| # Safety checks | |
| if pd.isna(probability) or probability <= 0 or probability > 1: | |
| # Invalid probability - treat as loss to be safe | |
| return 0.0 | |
| if pd.isna(outcome): | |
| # Unknown outcome - treat as loss to be safe | |
| return 0.0 | |
| if outcome == 1: # Market resolved True (safe bet won) | |
| # Return is capital / probability | |
| # Ensure probability is valid (between 0 and 1, exclusive) | |
| if probability >= 1.0: | |
| # Invalid: probability should be < 1.0 for a valid bet | |
| return 0.0 | |
| return capital / probability | |
| else: # Market resolved False (safe bet lost) | |
| return 0.0 | |
| def run_single_fund_simulation_fast(df: pd.DataFrame, | |
| starting_capital: float = 10000, | |
| start_date: str = '2025-01-01', | |
| max_duration_days: int = 365, | |
| days_before: int = 1, | |
| min_prob_7d: float = 0.90, | |
| min_prob_current: float = 0.90, | |
| investment_probability: float = 0.5, | |
| target_return: Optional[float] = None, | |
| min_volume: Optional[float] = None, | |
| random_seed: Optional[int] = None) -> Dict[str, Any]: | |
| """ | |
| Optimized single fund simulation using pre-indexed markets and event-driven approach. | |
| """ | |
| random_state = np.random.RandomState(random_seed) | |
| start_dt = pd.to_datetime(start_date, utc=True) | |
| end_dt = start_dt + timedelta(days=max_duration_days) | |
| # Pre-filter and index markets by trading date for massive speedup | |
| prob_col = f'probability{days_before}d' | |
| # Filter eligible markets once upfront | |
| market_mask = ( | |
| (df['probability7d'] >= min_prob_7d) & | |
| (df[prob_col] >= min_prob_current) & | |
| (df['probability7d'].notna()) & | |
| (df[prob_col].notna()) & | |
| (df['outcome_int'].notna()) & | |
| (df['closingDate'] >= start_dt + timedelta(days=days_before)) & | |
| (df['closingDate'] <= end_dt + timedelta(days=days_before)) | |
| ) | |
| # Add volume filter if specified | |
| if min_volume is not None and 'volume' in df.columns: | |
| market_mask = market_mask & (df['volume'] >= min_volume) | |
| eligible_markets = df[market_mask].copy() | |
| if len(eligible_markets) == 0: | |
| return { | |
| 'final_capital': starting_capital, | |
| 'total_return': 0.0, | |
| 'num_trades': 0, | |
| 'went_bust': False, | |
| 'reached_target': False, | |
| 'ending_reason': 'no_markets', | |
| 'simulation_days': 0, | |
| 'trades': [], | |
| 'daily_capital': [], | |
| 'parameters': { | |
| 'starting_capital': starting_capital, | |
| 'start_date': start_date, | |
| 'max_duration_days': max_duration_days, | |
| 'days_before': days_before, | |
| 'min_prob_7d': min_prob_7d, | |
| 'min_prob_current': min_prob_current, | |
| 'investment_probability': investment_probability, | |
| 'target_return': target_return, | |
| 'min_volume': min_volume, | |
| 'random_seed': random_seed | |
| } | |
| } | |
| # Calculate trading dates for each market (days_before days before resolution) | |
| eligible_markets['trading_date'] = eligible_markets['closingDate'] - timedelta(days=days_before) | |
| # Group markets by trading date for O(1) lookup | |
| markets_by_date = {} | |
| for _, market in eligible_markets.iterrows(): | |
| trade_date = market['trading_date'] | |
| if trade_date not in markets_by_date: | |
| markets_by_date[trade_date] = [] | |
| markets_by_date[trade_date].append(market) | |
| # Get sorted trading opportunities | |
| trading_dates = sorted([d for d in markets_by_date.keys() if start_dt <= d <= end_dt]) | |
| # Initialize simulation state | |
| capital = starting_capital | |
| current_date = start_dt | |
| trades = [] | |
| daily_capital = [] | |
| # Event-driven simulation - only process days with trading opportunities | |
| for trade_date in trading_dates: | |
| if capital <= 0: | |
| break | |
| # Check if we've reached target return | |
| if target_return is not None: | |
| current_return = (capital - starting_capital) / starting_capital | |
| if current_return >= target_return: | |
| break | |
| # Skip if we're beyond simulation period | |
| if trade_date > end_dt: | |
| break | |
| # Decision: invest today? | |
| if random_state.random() >= investment_probability: | |
| continue | |
| # Select market from available options | |
| available_markets_today = markets_by_date[trade_date] | |
| if not available_markets_today: | |
| continue | |
| selected_market = available_markets_today[random_state.randint(0, len(available_markets_today))] | |
| # Calculate return | |
| probability = selected_market[prob_col] | |
| outcome = selected_market['outcome_int'] | |
| if pd.isna(probability) or probability <= 0 or probability > 1 or pd.isna(outcome): | |
| continue | |
| if outcome == 1: # Win | |
| new_capital = capital / probability | |
| else: # Loss | |
| new_capital = 0.0 | |
| # Record trade | |
| sim_day = (trade_date - start_dt).days | |
| trades.append({ | |
| 'trade_number': len(trades) + 1, | |
| 'investment_date': trade_date, | |
| 'resolution_date': selected_market['closingDate'], | |
| 'probability': probability, | |
| 'capital_invested': capital, | |
| 'outcome': outcome, | |
| 'capital_after': new_capital, | |
| 'return': (new_capital - capital) / capital if capital > 0 else 0, | |
| 'sim_day': sim_day | |
| }) | |
| capital = new_capital | |
| # Record capital history (periodically) | |
| if len(trades) % 5 == 0 or capital == 0: | |
| daily_capital.append({ | |
| 'date': trade_date, | |
| 'capital': capital, | |
| 'day': sim_day | |
| }) | |
| if capital == 0: | |
| break | |
| # Calculate final statistics | |
| total_return = (capital - starting_capital) / starting_capital if starting_capital > 0 else 0 | |
| num_trades = len(trades) | |
| went_bust = capital == 0 | |
| reached_target = target_return is not None and total_return >= target_return | |
| # Determine ending reason | |
| if went_bust: | |
| ending_reason = 'bust' | |
| elif reached_target: | |
| ending_reason = 'target_reached' | |
| else: | |
| ending_reason = 'max_duration' | |
| final_sim_day = (min(end_dt, trading_dates[-1] if trading_dates else start_dt) - start_dt).days | |
| return { | |
| 'final_capital': capital, | |
| 'total_return': total_return, | |
| 'num_trades': num_trades, | |
| 'went_bust': went_bust, | |
| 'reached_target': reached_target, | |
| 'ending_reason': ending_reason, | |
| 'simulation_days': final_sim_day, | |
| 'trades': trades, | |
| 'daily_capital': daily_capital, | |
| 'parameters': { | |
| 'starting_capital': starting_capital, | |
| 'start_date': start_date, | |
| 'max_duration_days': max_duration_days, | |
| 'days_before': days_before, | |
| 'min_prob_7d': min_prob_7d, | |
| 'min_prob_current': min_prob_current, | |
| 'investment_probability': investment_probability, | |
| 'target_return': target_return, | |
| 'min_volume': min_volume, | |
| 'random_seed': random_seed | |
| } | |
| } | |
| # Keep original function for backwards compatibility | |
| def run_single_fund_simulation(df: pd.DataFrame, | |
| starting_capital: float = 10000, | |
| start_date: str = '2025-01-01', | |
| max_duration_days: int = 365, | |
| days_before: int = 1, | |
| min_prob_7d: float = 0.90, | |
| min_prob_current: float = 0.90, | |
| investment_probability: float = 0.5, | |
| target_return: Optional[float] = None, | |
| min_volume: Optional[float] = None, | |
| random_seed: Optional[int] = None) -> Dict[str, Any]: | |
| """ | |
| Run a single fund simulation with day-by-day investment decisions. | |
| Each day (when not already invested), the trader decides to invest with | |
| probability alpha, then selects uniformly at random from available markets. | |
| Args: | |
| df: Market data DataFrame | |
| starting_capital: Initial capital | |
| start_date: Simulation start date | |
| max_duration_days: Maximum simulation duration | |
| days_before: Days before resolution to invest | |
| min_prob_7d: Minimum probability at 7 days | |
| min_prob_current: Minimum probability at investment day | |
| investment_probability: Probability of investing on any given day (alpha) | |
| target_return: Target return threshold to stop trading (None = no threshold) | |
| min_volume: Minimum market volume to consider (None = no filter) | |
| random_seed: Random seed for reproducibility | |
| Returns: | |
| Dictionary containing simulation results | |
| """ | |
| # Use the optimized fast version | |
| return run_single_fund_simulation_fast( | |
| df=df, | |
| starting_capital=starting_capital, | |
| start_date=start_date, | |
| max_duration_days=max_duration_days, | |
| days_before=days_before, | |
| min_prob_7d=min_prob_7d, | |
| min_prob_current=min_prob_current, | |
| investment_probability=investment_probability, | |
| target_return=target_return, | |
| min_volume=min_volume, | |
| random_seed=random_seed | |
| ) | |
| def calculate_kelly_fraction(true_prob: float, market_prob: float) -> float: | |
| """ | |
| Calculate the Kelly criterion bet fraction. | |
| Args: | |
| true_prob: Estimated true probability of winning (p) | |
| market_prob: Market's implied probability (price) | |
| Returns: | |
| Optimal fraction of bankroll to bet (can be negative if no edge) | |
| """ | |
| if market_prob <= 0 or market_prob >= 1: | |
| return 0.0 | |
| # Odds: profit per dollar risked if you win | |
| # If you pay P for a contract that pays $1, your profit is (1-P)/P | |
| b = (1 - market_prob) / market_prob | |
| # Kelly formula: f* = (p*b - q) / b | |
| # where q = 1 - p | |
| q = 1 - true_prob | |
| if b <= 0: | |
| return 0.0 | |
| kelly = (true_prob * b - q) / b | |
| # Kelly can be negative (meaning don't bet), cap at 0 | |
| return max(0.0, kelly) | |
| def get_historical_win_rate(df: pd.DataFrame, prob_col: str, probability: float, | |
| tolerance: float = 0.02) -> float: | |
| """ | |
| Get historical win rate for markets at a similar probability level. | |
| Args: | |
| df: Market data DataFrame | |
| prob_col: Probability column to use | |
| probability: Target probability to look up | |
| tolerance: Range around probability to include | |
| Returns: | |
| Historical win rate (defaults to market probability if insufficient data) | |
| """ | |
| mask = ( | |
| (df[prob_col] >= probability - tolerance) & | |
| (df[prob_col] <= probability + tolerance) & | |
| (df['outcome_int'].notna()) | |
| ) | |
| similar_markets = df[mask] | |
| if len(similar_markets) < 10: | |
| # Not enough data, use a small edge assumption | |
| return probability + 0.005 # Assume 0.5% edge | |
| return similar_markets['outcome_int'].mean() | |
| def run_kelly_simulation(df: pd.DataFrame, | |
| starting_capital: float = 10000, | |
| start_date: str = '2025-01-01', | |
| max_duration_days: int = 365, | |
| days_before: int = 1, | |
| min_prob_7d: float = 0.90, | |
| min_prob_current: float = 0.90, | |
| investment_probability: float = 0.5, | |
| kelly_fraction: float = 0.5, | |
| edge_estimate: str = 'historical', | |
| min_volume: Optional[float] = None, | |
| random_seed: Optional[int] = None) -> Dict[str, Any]: | |
| """ | |
| Run a simulation using Kelly criterion for position sizing. | |
| Instead of betting 100% of capital, bets are sized according to Kelly criterion | |
| based on estimated edge. | |
| Args: | |
| df: Market data DataFrame | |
| starting_capital: Initial capital | |
| start_date: Simulation start date | |
| max_duration_days: Maximum simulation duration | |
| days_before: Days before resolution to invest | |
| min_prob_7d: Minimum probability at 7 days | |
| min_prob_current: Minimum probability at investment day | |
| investment_probability: Probability of attempting to invest on any given day | |
| kelly_fraction: Fraction of Kelly to use (0.5 = half Kelly, 1.0 = full Kelly) | |
| edge_estimate: Method to estimate edge ('historical', 'fixed_edge', 'fixed_edge_2') | |
| min_volume: Minimum market volume to consider (None = no filter) | |
| random_seed: Random seed for reproducibility | |
| Returns: | |
| Dictionary containing simulation results | |
| """ | |
| random_state = np.random.RandomState(random_seed) | |
| start_dt = pd.to_datetime(start_date, utc=True) | |
| end_dt = start_dt + timedelta(days=max_duration_days) | |
| prob_col = f'probability{days_before}d' | |
| # Pre-filter eligible markets | |
| market_mask = ( | |
| (df['probability7d'] >= min_prob_7d) & | |
| (df[prob_col] >= min_prob_current) & | |
| (df['probability7d'].notna()) & | |
| (df[prob_col].notna()) & | |
| (df['outcome_int'].notna()) & | |
| (df['closingDate'] >= start_dt + timedelta(days=days_before)) & | |
| (df['closingDate'] <= end_dt + timedelta(days=days_before)) | |
| ) | |
| # Add volume filter if specified | |
| if min_volume is not None and 'volume' in df.columns: | |
| market_mask = market_mask & (df['volume'] >= min_volume) | |
| eligible_markets = df[market_mask].copy() | |
| if len(eligible_markets) == 0: | |
| return { | |
| 'final_capital': starting_capital, | |
| 'total_return': 0.0, | |
| 'num_trades': 0, | |
| 'went_bust': False, | |
| 'ending_reason': 'no_markets', | |
| 'simulation_days': 0, | |
| 'trades': [], | |
| 'daily_capital': [], | |
| 'kelly_stats': { | |
| 'avg_bet_size': 0, | |
| 'avg_edge': 0, | |
| 'bets_skipped': 0, | |
| 'total_opportunities': 0 | |
| }, | |
| 'parameters': { | |
| 'starting_capital': starting_capital, | |
| 'start_date': start_date, | |
| 'max_duration_days': max_duration_days, | |
| 'days_before': days_before, | |
| 'min_prob_7d': min_prob_7d, | |
| 'min_prob_current': min_prob_current, | |
| 'investment_probability': investment_probability, | |
| 'kelly_fraction': kelly_fraction, | |
| 'edge_estimate': edge_estimate, | |
| 'min_volume': min_volume, | |
| 'random_seed': random_seed | |
| } | |
| } | |
| # Calculate trading dates | |
| eligible_markets['trading_date'] = eligible_markets['closingDate'] - timedelta(days=days_before) | |
| # Group by trading date | |
| markets_by_date = {} | |
| for _, market in eligible_markets.iterrows(): | |
| trade_date = market['trading_date'] | |
| if trade_date not in markets_by_date: | |
| markets_by_date[trade_date] = [] | |
| markets_by_date[trade_date].append(market) | |
| trading_dates = sorted([d for d in markets_by_date.keys() if start_dt <= d <= end_dt]) | |
| # Simulation state | |
| capital = starting_capital | |
| trades = [] | |
| daily_capital = [] | |
| bet_sizes = [] | |
| edges = [] | |
| bets_skipped = 0 | |
| total_opportunities = 0 | |
| for trade_date in trading_dates: | |
| if capital <= 0: | |
| break | |
| if trade_date > end_dt: | |
| break | |
| # Decision: attempt to invest today? | |
| if random_state.random() >= investment_probability: | |
| continue | |
| available_markets_today = markets_by_date[trade_date] | |
| if not available_markets_today: | |
| continue | |
| total_opportunities += 1 | |
| # Select market | |
| selected_market = available_markets_today[random_state.randint(0, len(available_markets_today))] | |
| market_prob = selected_market[prob_col] | |
| if pd.isna(market_prob) or market_prob <= 0 or market_prob >= 1: | |
| continue | |
| # Estimate true probability based on edge_estimate method | |
| if edge_estimate == 'historical': | |
| true_prob = get_historical_win_rate(df, prob_col, market_prob) | |
| elif edge_estimate == 'fixed_edge': | |
| true_prob = market_prob + 0.01 # Assume 1% edge | |
| elif edge_estimate == 'fixed_edge_2': | |
| true_prob = market_prob + 0.02 # Assume 2% edge | |
| else: | |
| true_prob = market_prob + 0.005 # Default small edge | |
| # Cap true_prob at reasonable bounds | |
| true_prob = min(0.999, max(0.001, true_prob)) | |
| # Calculate Kelly bet size | |
| full_kelly = calculate_kelly_fraction(true_prob, market_prob) | |
| bet_fraction = full_kelly * kelly_fraction | |
| # If no edge (Kelly <= 0), skip this bet | |
| if bet_fraction <= 0.001: # Tiny threshold to avoid floating point issues | |
| bets_skipped += 1 | |
| continue | |
| # Cap bet at 100% of capital | |
| bet_fraction = min(bet_fraction, 1.0) | |
| # Calculate bet amount | |
| bet_amount = capital * bet_fraction | |
| # Resolve the bet | |
| outcome = selected_market['outcome_int'] | |
| if pd.isna(outcome): | |
| continue | |
| edge = true_prob - market_prob | |
| edges.append(edge) | |
| bet_sizes.append(bet_fraction) | |
| if outcome == 1: # Win | |
| # Profit = bet_amount * (1/market_prob - 1) = bet_amount * (1 - market_prob) / market_prob | |
| profit = bet_amount * (1 - market_prob) / market_prob | |
| new_capital = capital + profit | |
| else: # Loss | |
| new_capital = capital - bet_amount | |
| sim_day = (trade_date - start_dt).days | |
| trades.append({ | |
| 'trade_number': len(trades) + 1, | |
| 'investment_date': trade_date, | |
| 'resolution_date': selected_market['closingDate'], | |
| 'market_probability': market_prob, | |
| 'estimated_true_prob': true_prob, | |
| 'edge': edge, | |
| 'kelly_fraction': full_kelly, | |
| 'actual_bet_fraction': bet_fraction, | |
| 'capital_before': capital, | |
| 'bet_amount': bet_amount, | |
| 'outcome': outcome, | |
| 'capital_after': new_capital, | |
| 'return': (new_capital - capital) / capital if capital > 0 else 0, | |
| 'sim_day': sim_day | |
| }) | |
| capital = new_capital | |
| if len(trades) % 5 == 0 or capital <= 0: | |
| daily_capital.append({ | |
| 'date': trade_date, | |
| 'capital': capital, | |
| 'day': sim_day | |
| }) | |
| if capital <= 0: | |
| capital = 0 | |
| break | |
| # Calculate final statistics | |
| total_return = (capital - starting_capital) / starting_capital if starting_capital > 0 else 0 | |
| went_bust = capital <= 0 | |
| if went_bust: | |
| ending_reason = 'bust' | |
| else: | |
| ending_reason = 'max_duration' | |
| final_sim_day = (min(end_dt, trading_dates[-1] if trading_dates else start_dt) - start_dt).days | |
| # Kelly-specific stats | |
| avg_bet_size = np.mean(bet_sizes) if bet_sizes else 0 | |
| avg_edge = np.mean(edges) if edges else 0 | |
| return { | |
| 'final_capital': capital, | |
| 'total_return': total_return, | |
| 'num_trades': len(trades), | |
| 'went_bust': went_bust, | |
| 'ending_reason': ending_reason, | |
| 'simulation_days': final_sim_day, | |
| 'trades': trades, | |
| 'daily_capital': daily_capital, | |
| 'kelly_stats': { | |
| 'avg_bet_size': avg_bet_size, | |
| 'avg_edge': avg_edge, | |
| 'bets_skipped': bets_skipped, | |
| 'total_opportunities': total_opportunities | |
| }, | |
| 'parameters': { | |
| 'starting_capital': starting_capital, | |
| 'start_date': start_date, | |
| 'max_duration_days': max_duration_days, | |
| 'days_before': days_before, | |
| 'min_prob_7d': min_prob_7d, | |
| 'min_prob_current': min_prob_current, | |
| 'investment_probability': investment_probability, | |
| 'kelly_fraction': kelly_fraction, | |
| 'edge_estimate': edge_estimate, | |
| 'min_volume': min_volume, | |
| 'random_seed': random_seed | |
| } | |
| } | |
| def plot_simulation_results(results_list: List[Dict[str, Any]], title: str = "Simulation Results"): | |
| """ | |
| Plot results from multiple simulation runs. | |
| Args: | |
| results_list: List of simulation result dictionaries | |
| title: Plot title | |
| """ | |
| if not results_list: | |
| print("No results to plot") | |
| return | |
| # Extract data | |
| final_capitals = [r['final_capital'] for r in results_list] | |
| total_returns = [r['total_return'] for r in results_list] | |
| num_trades = [r['num_trades'] for r in results_list] | |
| bust_rate = sum(1 for r in results_list if r['went_bust']) / len(results_list) | |
| # Create plots | |
| fig, axes = plt.subplots(2, 2, figsize=(15, 12)) | |
| # Final capital distribution | |
| axes[0, 0].hist(final_capitals, bins=50, alpha=0.7, edgecolor='black') | |
| axes[0, 0].axvline(np.mean(final_capitals), color='red', linestyle='--', | |
| label=f'Mean: ${np.mean(final_capitals):,.0f}') | |
| axes[0, 0].axvline(np.median(final_capitals), color='green', linestyle='--', | |
| label=f'Median: ${np.median(final_capitals):,.0f}') | |
| axes[0, 0].set_xlabel('Final Capital ($)') | |
| axes[0, 0].set_ylabel('Frequency') | |
| axes[0, 0].set_title('Final Capital Distribution') | |
| axes[0, 0].legend() | |
| axes[0, 0].grid(True, alpha=0.3) | |
| # Return distribution | |
| return_pct = [r * 100 for r in total_returns] | |
| axes[0, 1].hist(return_pct, bins=50, alpha=0.7, edgecolor='black') | |
| axes[0, 1].axvline(np.mean(return_pct), color='red', linestyle='--', | |
| label=f'Mean: {np.mean(return_pct):.1f}%') | |
| axes[0, 1].axvline(np.median(return_pct), color='green', linestyle='--', | |
| label=f'Median: {np.median(return_pct):.1f}%') | |
| axes[0, 1].axvline(0, color='black', linestyle='-', alpha=0.5, label='Break-even') | |
| axes[0, 1].set_xlabel('Total Return (%)') | |
| axes[0, 1].set_ylabel('Frequency') | |
| axes[0, 1].set_title('Return Distribution') | |
| axes[0, 1].legend() | |
| axes[0, 1].grid(True, alpha=0.3) | |
| # Number of trades distribution | |
| axes[1, 0].hist(num_trades, bins=30, alpha=0.7, edgecolor='black') | |
| axes[1, 0].axvline(np.mean(num_trades), color='red', linestyle='--', | |
| label=f'Mean: {np.mean(num_trades):.1f}') | |
| axes[1, 0].set_xlabel('Number of Trades') | |
| axes[1, 0].set_ylabel('Frequency') | |
| axes[1, 0].set_title('Number of Trades Distribution') | |
| axes[1, 0].legend() | |
| axes[1, 0].grid(True, alpha=0.3) | |
| # Summary statistics | |
| axes[1, 1].axis('off') | |
| stats_text = f""" | |
| Summary Statistics: | |
| Total Simulations: {len(results_list):,} | |
| Bust Rate: {bust_rate:.1%} | |
| Final Capital: | |
| Mean: ${np.mean(final_capitals):,.0f} | |
| Median: ${np.median(final_capitals):,.0f} | |
| Min: ${np.min(final_capitals):,.0f} | |
| Max: ${np.max(final_capitals):,.0f} | |
| Total Return: | |
| Mean: {np.mean(total_returns):.1%} | |
| Median: {np.median(total_returns):.1%} | |
| Min: {np.min(total_returns):.1%} | |
| Max: {np.max(total_returns):.1%} | |
| Trades per Simulation: | |
| Mean: {np.mean(num_trades):.1f} | |
| Median: {np.median(num_trades):.1f} | |
| """ | |
| axes[1, 1].text(0.1, 0.9, stats_text, transform=axes[1, 1].transAxes, | |
| fontsize=11, verticalalignment='top', fontfamily='monospace') | |
| plt.suptitle(title, fontsize=16, fontweight='bold') | |
| plt.tight_layout() | |
| plt.show() | |
| def print_simulation_summary(results_list: List[Dict[str, Any]]): | |
| """ | |
| Print detailed summary statistics for simulation results. | |
| Args: | |
| results_list: List of simulation result dictionaries | |
| """ | |
| if not results_list: | |
| print("No results to summarize") | |
| return | |
| # Extract data | |
| final_capitals = np.array([r['final_capital'] for r in results_list]) | |
| total_returns = np.array([r['total_return'] for r in results_list]) | |
| num_trades = np.array([r['num_trades'] for r in results_list]) | |
| # Calculate statistics | |
| bust_count = sum(1 for r in results_list if r['went_bust']) | |
| bust_rate = bust_count / len(results_list) | |
| target_reached_count = sum(1 for r in results_list if r.get('reached_target', False)) | |
| target_reached_rate = target_reached_count / len(results_list) | |
| positive_return_count = sum(1 for r in total_returns if r > 0) | |
| positive_return_rate = positive_return_count / len(results_list) | |
| # Check if target return was used | |
| target_return = results_list[0]['parameters'].get('target_return', None) | |
| print("=" * 60) | |
| print("SIMULATION SUMMARY") | |
| print("=" * 60) | |
| print(f"Total Simulations: {len(results_list):,}") | |
| print(f"Went Bust: {bust_count:,} ({bust_rate:.1%})") | |
| if target_return is not None: | |
| print(f"Reached Target ({target_return:.1%}): {target_reached_count:,} ({target_reached_rate:.1%})") | |
| print(f"Positive Returns: {positive_return_count:,} ({positive_return_rate:.1%})") | |
| print(f"\nFINAL CAPITAL STATISTICS:") | |
| print(f"Mean: ${final_capitals.mean():,.2f}") | |
| print(f"Median: ${np.median(final_capitals):,.2f}") | |
| print(f"Std Dev: ${final_capitals.std():,.2f}") | |
| print(f"Min: ${final_capitals.min():,.2f}") | |
| print(f"Max: ${final_capitals.max():,.2f}") | |
| print(f"\nRETURN STATISTICS:") | |
| print(f"Mean: {total_returns.mean():.1%}") | |
| print(f"Median: {np.median(total_returns):.1%}") | |
| print(f"Std Dev: {total_returns.std():.1%}") | |
| print(f"Min: {total_returns.min():.1%}") | |
| print(f"Max: {total_returns.max():.1%}") | |
| print(f"\nTRADE STATISTICS:") | |
| print(f"Mean Trades: {num_trades.mean():.1f}") | |
| print(f"Median Trades: {np.median(num_trades):.1f}") | |
| print(f"Min Trades: {num_trades.min()}") | |
| print(f"Max Trades: {num_trades.max()}") | |
| # Percentiles | |
| percentiles = [5, 10, 25, 75, 90, 95] | |
| print(f"\nRETURN PERCENTILES:") | |
| for p in percentiles: | |
| value = np.percentile(total_returns, p) | |
| print(f"{p}th percentile: {value:.1%}") | |
| def run_multi_fund_simulation(df: pd.DataFrame, | |
| n_funds: int = 5, | |
| starting_capital: float = 10000, | |
| start_date: str = '2025-01-01', | |
| max_duration_days: int = 365, | |
| days_before: int = 1, | |
| min_prob_7d: float = 0.90, | |
| min_prob_current: float = 0.90, | |
| investment_probability: float = 0.5, | |
| target_return: Optional[float] = None, | |
| min_volume: Optional[float] = None, | |
| random_seed: Optional[int] = None) -> Dict[str, Any]: | |
| """ | |
| Run a multi-fund simulation where capital is divided into independent funds. | |
| Each fund operates independently with the same investment probability (alpha). | |
| Args: | |
| df: Market data DataFrame | |
| n_funds: Number of independent funds to create | |
| starting_capital: Total initial capital (divided among funds) | |
| start_date: Simulation start date | |
| max_duration_days: Maximum simulation duration | |
| days_before: Days before resolution to invest | |
| min_prob_7d: Minimum probability at 7 days | |
| min_prob_current: Minimum probability at investment day | |
| investment_probability: Probability of investing on any given day (alpha) | |
| target_return: Target return threshold per fund (None = no threshold) | |
| min_volume: Minimum market volume to consider (None = no filter) | |
| random_seed: Random seed for reproducibility | |
| Returns: | |
| Dictionary containing multi-fund simulation results | |
| """ | |
| # Set up random state | |
| random_state = np.random.RandomState(random_seed) | |
| # Calculate capital per fund | |
| capital_per_fund = starting_capital / n_funds | |
| # Run simulation for each fund independently | |
| fund_results = [] | |
| all_trades = [] | |
| for fund_id in range(n_funds): | |
| # Use different seed for each fund to ensure independence | |
| fund_seed = random_state.randint(0, 1000000) | |
| # Run single fund simulation for this fund using fast version | |
| fund_result = run_single_fund_simulation_fast( | |
| df=df, | |
| starting_capital=capital_per_fund, | |
| start_date=start_date, | |
| max_duration_days=max_duration_days, | |
| days_before=days_before, | |
| min_prob_7d=min_prob_7d, | |
| min_prob_current=min_prob_current, | |
| investment_probability=investment_probability, | |
| target_return=target_return, | |
| min_volume=min_volume, | |
| random_seed=fund_seed | |
| ) | |
| # Add fund ID to result and trades | |
| fund_result['fund_id'] = fund_id | |
| for trade in fund_result['trades']: | |
| trade['fund_id'] = fund_id | |
| all_trades.append(trade) | |
| fund_results.append(fund_result) | |
| # Calculate portfolio-level statistics | |
| surviving_funds = sum(1 for fund in fund_results if not fund['went_bust']) | |
| total_final_capital = sum(fund['final_capital'] for fund in fund_results) | |
| total_portfolio_return = (total_final_capital - starting_capital) / starting_capital if starting_capital > 0 else 0 | |
| # Calculate average metrics across surviving funds | |
| if surviving_funds > 0: | |
| avg_capital_per_surviving_fund = sum(fund['final_capital'] for fund in fund_results if not fund['went_bust']) / surviving_funds | |
| avg_return_per_surviving_fund = sum(fund['total_return'] for fund in fund_results if not fund['went_bust']) / surviving_funds | |
| else: | |
| avg_capital_per_surviving_fund = 0 | |
| avg_return_per_surviving_fund = -1 # All funds went bust | |
| # Target achievement stats | |
| funds_reached_target = sum(1 for fund in fund_results if fund.get('reached_target', False)) | |
| target_achievement_rate = funds_reached_target / n_funds | |
| # Trading activity stats | |
| total_trades = len(all_trades) | |
| avg_trades_per_fund = total_trades / n_funds | |
| # Survivorship and diversification metrics | |
| survivorship_rate = surviving_funds / n_funds | |
| bust_rate = 1 - survivorship_rate | |
| return { | |
| 'portfolio_final_capital': total_final_capital, | |
| 'portfolio_total_return': total_portfolio_return, | |
| 'n_funds': n_funds, | |
| 'surviving_funds': surviving_funds, | |
| 'survivorship_rate': survivorship_rate, | |
| 'bust_rate': bust_rate, | |
| 'avg_capital_per_surviving_fund': avg_capital_per_surviving_fund, | |
| 'avg_return_per_surviving_fund': avg_return_per_surviving_fund, | |
| 'funds_reached_target': funds_reached_target, | |
| 'target_achievement_rate': target_achievement_rate, | |
| 'total_trades': total_trades, | |
| 'avg_trades_per_fund': avg_trades_per_fund, | |
| 'fund_results': fund_results, | |
| 'all_trades': all_trades, | |
| 'parameters': { | |
| 'n_funds': n_funds, | |
| 'starting_capital': starting_capital, | |
| 'capital_per_fund': capital_per_fund, | |
| 'start_date': start_date, | |
| 'max_duration_days': max_duration_days, | |
| 'days_before': days_before, | |
| 'min_prob_7d': min_prob_7d, | |
| 'min_prob_current': min_prob_current, | |
| 'investment_probability': investment_probability, | |
| 'target_return': target_return, | |
| 'min_volume': min_volume, | |
| 'random_seed': random_seed | |
| } | |
| } | |
| def plot_multi_fund_results(results_list: List[Dict[str, Any]], title: str = "Multi-Fund Simulation Results"): | |
| """ | |
| Plot results from multiple multi-fund simulation runs. | |
| Args: | |
| results_list: List of multi-fund simulation result dictionaries | |
| title: Plot title | |
| """ | |
| if not results_list: | |
| print("No results to plot") | |
| return | |
| # Extract portfolio-level data | |
| portfolio_final_capitals = [r['portfolio_final_capital'] for r in results_list] | |
| portfolio_returns = [r['portfolio_total_return'] for r in results_list] | |
| surviving_funds = [r['surviving_funds'] for r in results_list] | |
| survivorship_rates = [r['survivorship_rate'] for r in results_list] | |
| # Create plots | |
| fig, axes = plt.subplots(2, 2, figsize=(16, 12)) | |
| # Portfolio final capital distribution | |
| axes[0, 0].hist(portfolio_final_capitals, bins=30, alpha=0.7, edgecolor='black', color='steelblue') | |
| axes[0, 0].axvline(np.mean(portfolio_final_capitals), color='red', linestyle='--', | |
| label=f'Mean: ${np.mean(portfolio_final_capitals):,.0f}') | |
| axes[0, 0].axvline(np.median(portfolio_final_capitals), color='green', linestyle='--', | |
| label=f'Median: ${np.median(portfolio_final_capitals):,.0f}') | |
| axes[0, 0].set_xlabel('Portfolio Final Capital ($)') | |
| axes[0, 0].set_ylabel('Frequency') | |
| axes[0, 0].set_title('Portfolio Final Capital Distribution') | |
| axes[0, 0].legend() | |
| axes[0, 0].grid(True, alpha=0.3) | |
| # Portfolio return distribution | |
| return_pct = [r * 100 for r in portfolio_returns] | |
| axes[0, 1].hist(return_pct, bins=30, alpha=0.7, edgecolor='black', color='green') | |
| axes[0, 1].axvline(np.mean(return_pct), color='red', linestyle='--', | |
| label=f'Mean: {np.mean(return_pct):.1f}%') | |
| axes[0, 1].axvline(0, color='black', linestyle='-', alpha=0.5, label='Break-even') | |
| axes[0, 1].set_xlabel('Portfolio Total Return (%)') | |
| axes[0, 1].set_ylabel('Frequency') | |
| axes[0, 1].set_title('Portfolio Return Distribution') | |
| axes[0, 1].legend() | |
| axes[0, 1].grid(True, alpha=0.3) | |
| # Number of surviving funds distribution | |
| n_funds = results_list[0]['n_funds'] | |
| axes[1, 0].hist(surviving_funds, bins=range(n_funds + 2), alpha=0.7, edgecolor='black', color='orange') | |
| axes[1, 0].axvline(np.mean(surviving_funds), color='red', linestyle='--', | |
| label=f'Mean: {np.mean(surviving_funds):.1f}') | |
| axes[1, 0].set_xlabel('Number of Surviving Funds') | |
| axes[1, 0].set_ylabel('Frequency') | |
| axes[1, 0].set_title('Surviving Funds Distribution') | |
| axes[1, 0].legend() | |
| axes[1, 0].grid(True, alpha=0.3) | |
| axes[1, 0].set_xticks(range(n_funds + 1)) | |
| # Summary statistics | |
| axes[1, 1].axis('off') | |
| # Calculate additional stats | |
| total_bust_rate = sum(1 for r in results_list if r['surviving_funds'] == 0) / len(results_list) | |
| avg_survivorship = np.mean(survivorship_rates) | |
| stats_text = f""" | |
| Multi-Fund Summary Statistics: | |
| Total Simulations: {len(results_list):,} | |
| Funds per Portfolio: {n_funds} | |
| Total Bust Rate: {total_bust_rate:.1%} | |
| Portfolio Capital: | |
| Mean: ${np.mean(portfolio_final_capitals):,.0f} | |
| Median: ${np.median(portfolio_final_capitals):,.0f} | |
| Min: ${np.min(portfolio_final_capitals):,.0f} | |
| Max: ${np.max(portfolio_final_capitals):,.0f} | |
| Portfolio Return: | |
| Mean: {np.mean(portfolio_returns):.1%} | |
| Median: {np.median(portfolio_returns):.1%} | |
| Fund Survivorship: | |
| Avg Surviving: {np.mean(surviving_funds):.1f} / {n_funds} | |
| Avg Survivorship: {avg_survivorship:.1%} | |
| """ | |
| axes[1, 1].text(0.1, 0.9, stats_text, transform=axes[1, 1].transAxes, | |
| fontsize=11, verticalalignment='top', fontfamily='monospace') | |
| plt.suptitle(title, fontsize=16, fontweight='bold') | |
| plt.tight_layout() | |
| plt.show() | |
| def print_multi_fund_summary(results_list: List[Dict[str, Any]]): | |
| """ | |
| Print detailed summary statistics for multi-fund simulation results. | |
| Args: | |
| results_list: List of multi-fund simulation result dictionaries | |
| """ | |
| if not results_list: | |
| print("No results to summarize") | |
| return | |
| # Extract data | |
| n_funds = results_list[0]['n_funds'] | |
| portfolio_capitals = np.array([r['portfolio_final_capital'] for r in results_list]) | |
| portfolio_returns = np.array([r['portfolio_total_return'] for r in results_list]) | |
| surviving_funds = np.array([r['surviving_funds'] for r in results_list]) | |
| survivorship_rates = np.array([r['survivorship_rate'] for r in results_list]) | |
| # Calculate portfolio-level statistics | |
| total_bust_count = sum(1 for r in results_list if r['surviving_funds'] == 0) | |
| total_bust_rate = total_bust_count / len(results_list) | |
| positive_return_count = sum(1 for r in portfolio_returns if r > 0) | |
| positive_return_rate = positive_return_count / len(results_list) | |
| # Check if target return was used | |
| target_return = results_list[0]['parameters'].get('target_return', None) | |
| print("=" * 80) | |
| print("MULTI-FUND SIMULATION SUMMARY") | |
| print("=" * 80) | |
| print(f"Total Simulations: {len(results_list):,}") | |
| print(f"Funds per Portfolio: {n_funds}") | |
| print(f"Starting Capital per Fund: ${results_list[0]['parameters']['capital_per_fund']:,.0f}") | |
| print(f"Total Starting Capital: ${results_list[0]['parameters']['starting_capital']:,.0f}") | |
| print(f"\nPORTFOLIO SURVIVORSHIP:") | |
| print(f"Total Portfolio Bust Rate: {total_bust_rate:.1%} ({total_bust_count:,} portfolios)") | |
| print(f"Average Surviving Funds: {surviving_funds.mean():.1f} / {n_funds}") | |
| print(f"Average Survivorship Rate: {survivorship_rates.mean():.1%}") | |
| print(f"Portfolios with All Funds Surviving: {sum(1 for s in surviving_funds if s == n_funds)} ({sum(1 for s in surviving_funds if s == n_funds)/len(results_list):.1%})") | |
| if target_return is not None: | |
| target_achieved_portfolios = sum(1 for r in results_list if r['funds_reached_target'] > 0) | |
| avg_funds_reaching_target = np.mean([r['funds_reached_target'] for r in results_list]) | |
| print(f"\nTARGET ACHIEVEMENT ({target_return:.1%}):") | |
| print(f"Portfolios with ≥1 Fund Reaching Target: {target_achieved_portfolios:,} ({target_achieved_portfolios/len(results_list):.1%})") | |
| print(f"Average Funds Reaching Target: {avg_funds_reaching_target:.1f} / {n_funds}") | |
| print(f"\nPORTFOLIO PERFORMANCE:") | |
| print(f"Positive Returns: {positive_return_count:,} ({positive_return_rate:.1%})") | |
| print(f"\nPORTFOLIO CAPITAL STATISTICS:") | |
| print(f"Mean: ${portfolio_capitals.mean():,.2f}") | |
| print(f"Median: ${np.median(portfolio_capitals):,.2f}") | |
| print(f"Std Dev: ${portfolio_capitals.std():,.2f}") | |
| print(f"Min: ${portfolio_capitals.min():,.2f}") | |
| print(f"Max: ${portfolio_capitals.max():,.2f}") | |
| print(f"\nPORTFOLIO RETURN STATISTICS:") | |
| print(f"Mean: {portfolio_returns.mean():.1%}") | |
| print(f"Median: {np.median(portfolio_returns):.1%}") | |
| print(f"Std Dev: {portfolio_returns.std():.1%}") | |
| print(f"Min: {portfolio_returns.min():.1%}") | |
| print(f"Max: {portfolio_returns.max():.1%}") | |
| # Compare to single fund equivalent | |
| print(f"\nDIVERSIFICATION ANALYSIS:") | |
| single_fund_equivalent = results_list[0]['parameters']['starting_capital'] | |
| avg_portfolio_capital = portfolio_capitals.mean() | |
| diversification_benefit = (avg_portfolio_capital - single_fund_equivalent) / single_fund_equivalent | |
| print(f"Diversification Benefit: {diversification_benefit:+.1%} vs single fund baseline") | |
| # Risk metrics | |
| portfolio_volatility = portfolio_returns.std() | |
| print(f"Portfolio Return Volatility: {portfolio_volatility:.1%}") | |
| # Percentiles | |
| percentiles = [5, 10, 25, 75, 90, 95] | |
| print(f"\nPORTFOLIO RETURN PERCENTILES:") | |
| for p in percentiles: | |
| value = np.percentile(portfolio_returns, p) | |
| print(f"{p}th percentile: {value:.1%}") |