Spaces:

droov
/

opt

Sleeping

File size: 22,615 Bytes

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import statsmodels.api as sm
from tqdm import tqdm

# Small epsilon to avoid division by zero
eps = 1e-6

# --- Objective function components ---
def calculate_sortino(
        returns: torch.Tensor,
        min_acceptable_return: torch.Tensor
    ):
    """Calculates the Sortino ratio."""
    if min_acceptable_return is not None:
        excess_returns = returns - min_acceptable_return
    else:
        # If no MAR provided, treat 0 as the target
        excess_returns = returns

    # Calculate downside deviation only on returns below the target
    downside_returns = torch.where(excess_returns < 0, excess_returns, torch.tensor(0.0, device=returns.device))
    downside_deviation = torch.std(downside_returns, dim=0)
    
    # More robust division - avoid division by very small numbers
    downside_deviation = torch.clamp(downside_deviation, min=eps)
    
    # Calculate Sortino ratio with better stability
    sortino = torch.mean(excess_returns, dim=0) / downside_deviation
    
    # Clip extreme values to prevent propagation of extreme gradients
    sortino = torch.clamp(sortino, min=-100.0, max=100.0)
    
    return sortino

def calculate_max_drawdown(
        returns: torch.Tensor
    ):
    """Calculates max drawdown for the duration of the returns passed.
       Max drawdown is defined to be positive, takes the range [0, \\infty).
    """
    if returns.numel() == 0:
        return torch.tensor(0.0, device=returns.device) # Handle empty tensor
    
    # Handle NaN values in returns if any
    clean_returns = torch.nan_to_num(returns, nan=0.0)
    
    cum_returns = (clean_returns + 1).cumprod(dim=0)
    peak = torch.cummax(cum_returns, dim=0).values # Use torch.cummax
    
    # Prevent division by zero or very small peaks
    safe_peak = torch.clamp(peak, min=eps)
    
    drawdown = (peak - cum_returns) / safe_peak # Calculate drawdown relative to peak
    max_drawdown = torch.max(drawdown)
    
    # Clip extreme values
    max_drawdown = torch.clamp(max_drawdown, min=0.0, max=1.0)
    
    return max_drawdown

def calculate_turnover(
        new_weights: torch.Tensor,
        prev_weights: torch.Tensor
    ):
    """Turnover is defined as the sum of absolute differences
       between new and previous weights, divided by 2.
       Takes the range [0, \\infty).
    """
    # Safe handling of NaN weights
    new_weights_safe = torch.nan_to_num(new_weights, nan=1.0/new_weights.size(0))
    prev_weights_safe = torch.nan_to_num(prev_weights, nan=1.0/prev_weights.size(0))
    
    turnover = torch.sum(torch.abs(new_weights_safe - prev_weights_safe)) / 2.0
    
    # Clip to reasonable values
    turnover = torch.clamp(turnover, min=0.0, max=1.0)
    
    return turnover

def calculate_hhi(weights: torch.Tensor):
    """Calculate Herfindahl-Hirschman Index, a measure of concentration.
    Higher values indicate more concentration (less diversification).
    """
    return torch.sum(weights ** 2)

def concentration_penalty(
        weights: torch.Tensor,
        enp_min: float = 5.0,
        enp_max: float = 20.0
    ):
    """Calculate concentration penalty based on effective number of positions (ENP).
    ENP is the inverse of HHI. This encourages having between enp_min and enp_max
    effective positions.
    """
    hhi = calculate_hhi(weights)
    enp = 1.0 / (hhi + eps)
    penalty = torch.relu(enp_min - enp) + torch.relu(enp - enp_max)
    return penalty

def calculate_objective_func(
        returns: torch.Tensor,
        risk_free_rate: torch.Tensor,
        new_weights: torch.Tensor,
        prev_weights: torch.Tensor,
        alphas = [1.0, 1.0, 0.1, 0.25],  # Default alpha values [Sortino, MaxDrawdown, Turnover, Concentration]
        enp_min: float = 5.0,
        enp_max: float = 20.0
    ):
    """Calculates the weighted objective function to be MINIMIZED.
       Note: Sortino is maximized, drawdown, turnover, and concentration are minimized.
    """
    sortino = calculate_sortino(returns, risk_free_rate)
    max_drawdown = calculate_max_drawdown(returns)
    turnover = calculate_turnover(new_weights, prev_weights)
    conc_penalty = concentration_penalty(new_weights, enp_min, enp_max)

    # Apply scaling to individual components
    sortino_scaled = torch.clamp(sortino, min=-10.0, max=10.0)
    max_drawdown_scaled = torch.clamp(max_drawdown, min=0.0, max=1.0)
    turnover_scaled = torch.clamp(turnover, min=0.0, max=1.0)
    conc_penalty_scaled = torch.clamp(conc_penalty, min=0.0, max=10.0)

    # Objective: Maximize Sortino, Minimize MaxDrawdown, Minimize Turnover, Control Concentration
    # We negate Sortino because the optimizer minimizes the objective.
    objective = (
        -alphas[0] * sortino_scaled +
        alphas[1] * max_drawdown_scaled +
        alphas[2] * turnover_scaled +
        alphas[3] * conc_penalty_scaled
    )
                
    # Ensure objective is not NaN
    if torch.isnan(objective):
        print("Warning: NaN objective detected, using default value")
        objective = torch.tensor(0.0, requires_grad=True)
        
    return objective

# --- Main OGD Optimization Function ---
def run_ogd(
        data_df: pd.DataFrame,
        window_size: int = 20,
        learning_rate: float = 0.01,
        alphas: list[float] = [1.0, 1.0, 0.1, 0.25],  # Added concentration weight
        enp_min: float = 5.0,
        enp_max: float = 20.0,
        use_tqdm: bool = True,
        factor_data: pd.DataFrame = None
    ):
    """Runs the Online Gradient Descent (OGD) portfolio optimization.

    Args:
        data_df (pd.DataFrame): DataFrame with dates as index, ticker returns as columns,
                                and a final column named 'rf' for the risk-free rate.
        window_size (int): Lookback window for objective calculation.
        learning_rate (float): Learning rate for the SGD optimizer.
        alphas (list[float]): Weights for [Sortino, MaxDrawdown, Turnover, Concentration] in the objective.
        enp_min (float): Minimum effective number of positions target.
        enp_max (float): Maximum effective number of positions target.
        use_tqdm (bool): Whether to use tqdm progress bar.
        factor_data (pd.DataFrame, optional): DataFrame with factors for CAPM/FF3 analysis.

    Returns:
        tuple[pd.DataFrame, pd.DataFrame]:
            - weights_df: DataFrame of daily portfolio weights (dates index, tickers columns).
            - returns_series: Series of daily portfolio returns (dates index).
    """
    if data_df.empty or len(data_df) <= window_size:
        print("Warning: Dataframe too small for OGD with the given window size.")
        return pd.DataFrame(), pd.Series(dtype=float)

    # --- Add data validation ---
    # Check for NaN values in the input data
    num_nan_values = data_df.isna().sum().sum()
    if num_nan_values > 0:
        print(f"WARNING: Input data contains {num_nan_values} NaN values. Filling with 0.")
        data_df = data_df.fillna(0)
    
    # --- Print diagnostic info ---
    print(f"Data shape: {data_df.shape}")
    print(f"Sample data (first few rows):")
    print(data_df.iloc[:3, :5])  # Show first 3 rows, first 5 columns
    
    # Check for any columns with all zeros or NaNs
    zero_cols = (data_df == 0).all()
    if zero_cols.any():
        zero_count = zero_cols.sum()
        print(f"WARNING: {zero_count} columns contain all zeros.")

    # Separate stock returns and risk-free rate
    returns = data_df.drop(columns=['rf'])
    rf = data_df['rf']
    tickers = returns.columns.tolist()
    num_assets = len(tickers)
    num_days = len(data_df)

    # Convert to PyTorch tensors with explicit handling of NaN values
    # Replace NaN values with 0 during tensor conversion
    returns_tensor = torch.tensor(returns.fillna(0).values, dtype=torch.float32)
    rf_tensor = torch.tensor(rf.fillna(0).values, dtype=torch.float32)
    
    # Check if returns_tensor contains any NaN values (after conversion)
    if torch.isnan(returns_tensor).any():
        print("WARNING: returns_tensor contains NaN values after conversion. Replacing with zeros.")
        returns_tensor = torch.nan_to_num(returns_tensor, nan=0.0)

    # Initialize weights as logits (will be converted to probabilities via softmax)
    # Starting with zeros gives equal weights after softmax
    weights = torch.zeros((num_assets,), requires_grad=True)

    # Use Adam optimizer with reduced learning rate
    optimizer = torch.optim.Adam([weights], lr=learning_rate)

    # Logging structures
    weights_log = torch.zeros((num_days, num_assets), dtype=torch.float32)
    portfolio_returns_log = torch.zeros((num_days,), dtype=torch.float32)
    rolling_portfolio_returns = [] # Store recent portfolio returns for objective calc

    print(f"Starting OGD optimization for {num_days} days, {num_assets} assets...")
    
    # Initial weights distribution - equal weights
    initial_weights = torch.full((num_assets,), 1.0/num_assets)
    
    # Use tqdm for progress tracking if requested
    day_iterator = tqdm(range(num_days)) if use_tqdm else range(num_days)
    
    for i in day_iterator:
        # Check for NaN in weights and reset if needed
        if torch.isnan(weights).any():
            print(f"WARNING: NaN detected in weights at day {i}, resetting to uniform weights")
            with torch.no_grad():
                weights.copy_(torch.zeros((num_assets,)))
                
        # More restrictive clamping for numerical stability
        clamped_weights = torch.clamp(weights, min=-5, max=5)
        normalized_weights = torch.nn.functional.softmax(clamped_weights, dim=0)
        
        # Verify normalized weights are valid probabilities
        if torch.isnan(normalized_weights).any() or torch.sum(normalized_weights) < 0.99:
            print(f"WARNING: Invalid normalized weights at day {i}, using uniform weights")
            normalized_weights = initial_weights.clone()

        # Get daily asset returns and check for NaN values
        daily_asset_returns = returns_tensor[i, :]
        if torch.isnan(daily_asset_returns).any():
            print(f"WARNING: NaN detected in asset returns at day {i}, replacing with zeros")
            daily_asset_returns = torch.nan_to_num(daily_asset_returns, nan=0.0)
        
        # Calculate portfolio return for the current day
        daily_portfolio_return = torch.dot(normalized_weights, daily_asset_returns)

        # Check for NaN in portfolio return
        if torch.isnan(daily_portfolio_return):
            print(f"WARNING: NaN detected in portfolio return at day {i}, using zero")
            daily_portfolio_return = torch.tensor(0.0)
            
            # Debug information - print sample weights and returns to diagnose the issue
            if i < 5 or i % 50 == 0:  # Print for first few days and then occasionally
                print(f"  Debug info for day {i}:")
                print(f"  Sample weights: {normalized_weights[:5].tolist()}")
                print(f"  Sample returns: {daily_asset_returns[:5].tolist()}")
                print(f"  Sum of weights: {torch.sum(normalized_weights).item()}")
                nan_count = torch.isnan(daily_asset_returns).sum().item()
                print(f"  NaN count in returns: {nan_count}/{len(daily_asset_returns)}")

        # Log weights and returns (use detach() to prevent tracking history)
        weights_log[i, :] = normalized_weights.detach()
        portfolio_returns_log[i] = daily_portfolio_return.detach()

        # Add current return to rolling list for objective calculation
        # Detach returns when storing to break gradient history
        rolling_portfolio_returns.append(daily_portfolio_return.detach())

        # --- Objective Calculation and Optimization Step ---
        # Wait until we have enough data for the lookback window
        if len(rolling_portfolio_returns) > window_size:
            rolling_portfolio_returns.pop(0) # Remove oldest return

            # Verify we don't have all zeros in our portfolio returns
            all_zeros = all(r.item() == 0 for r in rolling_portfolio_returns)
            if all_zeros:
                print(f"WARNING: All portfolio returns are zero at day {i}, skipping optimization")
                continue

            # Prepare tensors for objective function
            past_portfolio_returns = torch.stack(rolling_portfolio_returns[:-1] + [daily_portfolio_return])
            
            # Get corresponding risk-free rates for the window
            start_idx = max(0, i - window_size + 1)
            past_rf = rf_tensor[start_idx : i + 1]

            # Get previous day's weights for turnover calculation
            prev_weights = weights_log[i-1, :] if i > 0 else normalized_weights.detach()

            # Zero out gradients before computation
            optimizer.zero_grad()
            
            try:
                # Recompute normalized weights for fresh gradient computation
                clamped_weights = torch.clamp(weights, min=-5, max=5)
                current_norm_weights = torch.nn.functional.softmax(clamped_weights, dim=0)
                
                # Recalculate today's return for gradient computation
                current_return = torch.dot(current_norm_weights, daily_asset_returns)
                
                # Create list with detached historical returns + current gradient-connected return
                historical_returns = rolling_portfolio_returns[:-1]
                new_returns_list = historical_returns + [current_return]
                past_portfolio_returns = torch.stack(new_returns_list)
                
                # Calculate objective with robust error handling
                objective = calculate_objective_func(
                    past_portfolio_returns,
                    past_rf,
                    current_norm_weights,
                    prev_weights,
                    alphas,
                    enp_min,
                    enp_max
                )
                
                # Check if objective computation produced valid result
                if not torch.isnan(objective):
                    # Check objective is not just a default zero
                    if objective.item() != 0.0 or i % 50 == 0:  # Allow some zeros through for logging
                        # Compute and apply gradients
                        objective.backward()
                        
                        # --- Enhanced Logging --- 
                        log_interval = 50
                        if (i + 1) % log_interval == 0 or num_days - (i + 1) < 5:
                            if not use_tqdm:  # Don't print logs if using tqdm to avoid cluttering
                                print(f"\n--- Step {i+1}/{num_days} Log ---")
                                print(f"  Objective: {objective.item():.6f}")
                                
                                # Log average gradient magnitude rather than all gradients
                                if weights.grad is not None:
                                    avg_grad = torch.mean(torch.abs(weights.grad)).item()
                                    print(f"  Average Gradient Magnitude: {avg_grad:.6f}")
                                
                                # Record some sample weights before update
                                weights_before = weights.detach().clone()
                                
                                # Apply gradient update
                                optimizer.step()
                                
                                # Record weights after update
                                weights_after = weights.detach().clone()
                                weight_change = torch.sum(torch.abs(weights_after - weights_before)).item()
                                print(f"  Weight Change (Sum Abs): {weight_change:.6f}")
                                
                                # Display a few normalized weights as a sample
                                print(f"  Sample Normalized Weights: {[f'{w:.4f}' for w in normalized_weights[:5].tolist()]}")
                        else:
                            # Update weights without detailed logging
                            optimizer.step()
                        
                        # Apply gradient clipping after optimizer step
                        with torch.no_grad():
                            if weights.grad is not None and torch.isnan(weights.grad).any():
                                print(f"  WARNING: NaN gradient detected at day {i}, zeroing gradients")
                                weights.grad.zero_()
                    else:
                        if not use_tqdm:
                            print(f"  WARNING: Zero objective at day {i}, skipping gradient update")
                else:
                    if not use_tqdm:
                        print(f"  WARNING: NaN objective at day {i}, skipping gradient update")
                    
            except Exception as e:
                print(f"  Optimization error at day {i}: {e}")
                # Skip this day rather than propagating errors

    print("OGD optimization finished.")
    
    # Final check for validity of results
    if torch.isnan(weights_log).any():
        print("WARNING: Final weights contain NaN values")
        weights_log = torch.nan_to_num(weights_log, nan=1.0/num_assets)
    
    if torch.isnan(portfolio_returns_log).any():
        print("WARNING: Final portfolio returns contain NaN values")
        portfolio_returns_log = torch.nan_to_num(portfolio_returns_log, nan=0.0)

    # Convert logs back to pandas DataFrames/Series with original index
    weights_df = pd.DataFrame(weights_log.numpy(), index=data_df.index, columns=tickers)
    returns_series = pd.Series(portfolio_returns_log.numpy(), index=data_df.index, name="PortfolioReturn")

    return weights_df, returns_series

# --- Analysis Functions ---
def compute_sharpe(returns_series, rf_series, annualization_factor=252):
    """Compute annualized Sharpe ratio."""
    excess = returns_series - rf_series
    annual_excess_return = np.mean(excess) * annualization_factor
    annual_volatility = np.std(excess) * np.sqrt(annualization_factor)
    return annual_excess_return / (annual_volatility + eps)

def compute_max_drawdown(returns_series):
    """Compute maximum drawdown."""
    cr = np.cumprod(returns_series + 1)
    peak = np.maximum.accumulate(cr)
    return np.max((peak - cr) / (peak + eps))

def compute_alpha(returns_series, rf_series, factor_data, model="CAPM"):
    """Compute alpha using either CAPM or Fama-French 3-factor model.
    
    Args:
        returns_series: Portfolio returns series
        rf_series: Risk-free rate series
        factor_data: DataFrame with factor returns (must include 'mktrf' for CAPM, 
                    and 'smb', 'hml' for FF3)
        model: 'CAPM' or 'FF3'
        
    Returns:
        tuple: (alpha, regression_result)
    """
    y = np.asarray(returns_series - rf_series)
    
    if model == "CAPM":
        X = np.asarray(factor_data[["mktrf"]])
    elif model == "FF3":
        X = np.asarray(factor_data[["mktrf", "smb", "hml"]])
    else:
        raise ValueError("Model must be 'CAPM' or 'FF3'")

    X = sm.add_constant(X)
    result = sm.OLS(y, X).fit()
    return result.params[0], result

# --- Visualization Functions ---
def plot_optimization_results(
        opt_returns_series, 
        weights_df, 
        benchmark_returns=None, 
        top_n=5, 
        title_suffix=""
    ):
    """Plot optimization results with comparison to benchmarks.
    
    Args:
        opt_returns_series: Series of optimized portfolio returns
        weights_df: DataFrame of weights over time
        benchmark_returns: Dict of benchmark return series {name: series}
        top_n: Number of top assets to highlight in weights plot
        title_suffix: Additional text to add to plot titles
    """
    # Convert to numpy for plotting
    dates = opt_returns_series.index
    opt_returns = opt_returns_series.values
    weights_np = weights_df.values
    
    # Create plot with return distribution and cumulative returns
    fig, axes = plt.subplots(2, 1, figsize=(12, 10))
    
    # Return distribution
    axes[0].hist(opt_returns, bins=50, alpha=0.5, label='Optimized', color='red')
    
    # Cumulative returns
    axes[1].plot(dates, np.cumprod(opt_returns + 1), label='Optimized', color='red')

    # Add benchmarks if provided
    if benchmark_returns:
        for name, b_returns in benchmark_returns.items():
            axes[0].hist(b_returns, bins=50, alpha=0.5, label=name)
            axes[1].plot(dates, np.cumprod(b_returns + 1), label=name)

    axes[0].set_title('Return Distribution')
    axes[0].legend()
    
    axes[1].set_title('Cumulative Returns')
    axes[1].legend()
    axes[1].xaxis.set_major_locator(mdates.YearLocator())
    axes[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    fig.suptitle(f"Performance Comparison {title_suffix}", fontsize=16)
    plt.tight_layout()
    plt.show()
    
    # Create plot with weights evolution and distribution
    fig, axes = plt.subplots(2, 1, figsize=(12, 10))
    
    # Weight evolution
    top_assets_idx = np.argsort(weights_np[-1])[-top_n:]
    for i in range(weights_np.shape[1]):
        label = weights_df.columns[i] if i in top_assets_idx else None
        lw = 2 if i in top_assets_idx else 0.3
        alpha = 0.8 if i in top_assets_idx else 0.3
        axes[0].plot(dates, weights_np[:, i], label=label, linewidth=lw, alpha=alpha)

    axes[0].xaxis.set_major_locator(mdates.YearLocator())
    axes[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    axes[0].set_title("Weights Over Time")
    axes[0].legend()

    # Weight distribution
    axes[1].hist(weights_np[-1], bins=100, log=True, color='blue', alpha=0.7)
    axes[1].set_title("Final Day Weight Distribution")
    
    plt.tight_layout()
    plt.show()
    
    # Return effective number of positions over time
    enp_series = 1.0 / np.sum(weights_np ** 2, axis=1)
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(dates, enp_series)
    ax.set_title("Effective Number of Positions Over Time")
    ax.set_ylabel("ENP")
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.tight_layout()
    plt.show()
    
    return None