Spaces:

droov
/

opt

Sleeping

opt / optimization.py

dhruv575

Built out new model

618fd42 11 months ago

22.6 kB

	import torch
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib.dates as mdates
	import seaborn as sns
	import statsmodels.api as sm
	from tqdm import tqdm

	# Small epsilon to avoid division by zero
	eps = 1e-6

	# --- Objective function components ---
	def calculate_sortino(
	returns: torch.Tensor,
	min_acceptable_return: torch.Tensor
	):
	"""Calculates the Sortino ratio."""
	if min_acceptable_return is not None:
	excess_returns = returns - min_acceptable_return
	else:
	# If no MAR provided, treat 0 as the target
	excess_returns = returns

	# Calculate downside deviation only on returns below the target
	downside_returns = torch.where(excess_returns < 0, excess_returns, torch.tensor(0.0, device=returns.device))
	downside_deviation = torch.std(downside_returns, dim=0)

	# More robust division - avoid division by very small numbers
	downside_deviation = torch.clamp(downside_deviation, min=eps)

	# Calculate Sortino ratio with better stability
	sortino = torch.mean(excess_returns, dim=0) / downside_deviation

	# Clip extreme values to prevent propagation of extreme gradients
	sortino = torch.clamp(sortino, min=-100.0, max=100.0)

	return sortino

	def calculate_max_drawdown(
	returns: torch.Tensor
	):
	"""Calculates max drawdown for the duration of the returns passed.
	Max drawdown is defined to be positive, takes the range [0, \\infty).
	"""
	if returns.numel() == 0:
	return torch.tensor(0.0, device=returns.device) # Handle empty tensor

	# Handle NaN values in returns if any
	clean_returns = torch.nan_to_num(returns, nan=0.0)

	cum_returns = (clean_returns + 1).cumprod(dim=0)
	peak = torch.cummax(cum_returns, dim=0).values # Use torch.cummax

	# Prevent division by zero or very small peaks
	safe_peak = torch.clamp(peak, min=eps)

	drawdown = (peak - cum_returns) / safe_peak # Calculate drawdown relative to peak
	max_drawdown = torch.max(drawdown)

	# Clip extreme values
	max_drawdown = torch.clamp(max_drawdown, min=0.0, max=1.0)

	return max_drawdown

	def calculate_turnover(
	new_weights: torch.Tensor,
	prev_weights: torch.Tensor
	):
	"""Turnover is defined as the sum of absolute differences
	between new and previous weights, divided by 2.
	Takes the range [0, \\infty).
	"""
	# Safe handling of NaN weights
	new_weights_safe = torch.nan_to_num(new_weights, nan=1.0/new_weights.size(0))
	prev_weights_safe = torch.nan_to_num(prev_weights, nan=1.0/prev_weights.size(0))

	turnover = torch.sum(torch.abs(new_weights_safe - prev_weights_safe)) / 2.0

	# Clip to reasonable values
	turnover = torch.clamp(turnover, min=0.0, max=1.0)

	return turnover

	def calculate_hhi(weights: torch.Tensor):
	"""Calculate Herfindahl-Hirschman Index, a measure of concentration.
	Higher values indicate more concentration (less diversification).
	"""
	return torch.sum(weights ** 2)

	def concentration_penalty(
	weights: torch.Tensor,
	enp_min: float = 5.0,
	enp_max: float = 20.0
	):
	"""Calculate concentration penalty based on effective number of positions (ENP).
	ENP is the inverse of HHI. This encourages having between enp_min and enp_max
	effective positions.
	"""
	hhi = calculate_hhi(weights)
	enp = 1.0 / (hhi + eps)
	penalty = torch.relu(enp_min - enp) + torch.relu(enp - enp_max)
	return penalty

	def calculate_objective_func(
	returns: torch.Tensor,
	risk_free_rate: torch.Tensor,
	new_weights: torch.Tensor,
	prev_weights: torch.Tensor,
	alphas = [1.0, 1.0, 0.1, 0.25], # Default alpha values [Sortino, MaxDrawdown, Turnover, Concentration]
	enp_min: float = 5.0,
	enp_max: float = 20.0
	):
	"""Calculates the weighted objective function to be MINIMIZED.
	Note: Sortino is maximized, drawdown, turnover, and concentration are minimized.
	"""
	sortino = calculate_sortino(returns, risk_free_rate)
	max_drawdown = calculate_max_drawdown(returns)
	turnover = calculate_turnover(new_weights, prev_weights)
	conc_penalty = concentration_penalty(new_weights, enp_min, enp_max)

	# Apply scaling to individual components
	sortino_scaled = torch.clamp(sortino, min=-10.0, max=10.0)
	max_drawdown_scaled = torch.clamp(max_drawdown, min=0.0, max=1.0)
	turnover_scaled = torch.clamp(turnover, min=0.0, max=1.0)
	conc_penalty_scaled = torch.clamp(conc_penalty, min=0.0, max=10.0)

	# Objective: Maximize Sortino, Minimize MaxDrawdown, Minimize Turnover, Control Concentration
	# We negate Sortino because the optimizer minimizes the objective.
	objective = (
	-alphas[0] * sortino_scaled +
	alphas[1] * max_drawdown_scaled +
	alphas[2] * turnover_scaled +
	alphas[3] * conc_penalty_scaled
	)

	# Ensure objective is not NaN
	if torch.isnan(objective):
	print("Warning: NaN objective detected, using default value")
	objective = torch.tensor(0.0, requires_grad=True)

	return objective

	# --- Main OGD Optimization Function ---
	def run_ogd(
	data_df: pd.DataFrame,
	window_size: int = 20,
	learning_rate: float = 0.01,
	alphas: list[float] = [1.0, 1.0, 0.1, 0.25], # Added concentration weight
	enp_min: float = 5.0,
	enp_max: float = 20.0,
	use_tqdm: bool = True,
	factor_data: pd.DataFrame = None
	):
	"""Runs the Online Gradient Descent (OGD) portfolio optimization.

	Args:
	data_df (pd.DataFrame): DataFrame with dates as index, ticker returns as columns,
	and a final column named 'rf' for the risk-free rate.
	window_size (int): Lookback window for objective calculation.
	learning_rate (float): Learning rate for the SGD optimizer.
	alphas (list[float]): Weights for [Sortino, MaxDrawdown, Turnover, Concentration] in the objective.
	enp_min (float): Minimum effective number of positions target.
	enp_max (float): Maximum effective number of positions target.
	use_tqdm (bool): Whether to use tqdm progress bar.
	factor_data (pd.DataFrame, optional): DataFrame with factors for CAPM/FF3 analysis.

	Returns:
	tuple[pd.DataFrame, pd.DataFrame]:
	- weights_df: DataFrame of daily portfolio weights (dates index, tickers columns).
	- returns_series: Series of daily portfolio returns (dates index).
	"""
	if data_df.empty or len(data_df) <= window_size:
	print("Warning: Dataframe too small for OGD with the given window size.")
	return pd.DataFrame(), pd.Series(dtype=float)

	# --- Add data validation ---
	# Check for NaN values in the input data
	num_nan_values = data_df.isna().sum().sum()
	if num_nan_values > 0:
	print(f"WARNING: Input data contains {num_nan_values} NaN values. Filling with 0.")
	data_df = data_df.fillna(0)

	# --- Print diagnostic info ---
	print(f"Data shape: {data_df.shape}")
	print(f"Sample data (first few rows):")
	print(data_df.iloc[:3, :5]) # Show first 3 rows, first 5 columns

	# Check for any columns with all zeros or NaNs
	zero_cols = (data_df == 0).all()
	if zero_cols.any():
	zero_count = zero_cols.sum()
	print(f"WARNING: {zero_count} columns contain all zeros.")

	# Separate stock returns and risk-free rate
	returns = data_df.drop(columns=['rf'])
	rf = data_df['rf']
	tickers = returns.columns.tolist()
	num_assets = len(tickers)
	num_days = len(data_df)

	# Convert to PyTorch tensors with explicit handling of NaN values
	# Replace NaN values with 0 during tensor conversion
	returns_tensor = torch.tensor(returns.fillna(0).values, dtype=torch.float32)
	rf_tensor = torch.tensor(rf.fillna(0).values, dtype=torch.float32)

	# Check if returns_tensor contains any NaN values (after conversion)
	if torch.isnan(returns_tensor).any():
	print("WARNING: returns_tensor contains NaN values after conversion. Replacing with zeros.")
	returns_tensor = torch.nan_to_num(returns_tensor, nan=0.0)

	# Initialize weights as logits (will be converted to probabilities via softmax)
	# Starting with zeros gives equal weights after softmax
	weights = torch.zeros((num_assets,), requires_grad=True)

	# Use Adam optimizer with reduced learning rate
	optimizer = torch.optim.Adam([weights], lr=learning_rate)

	# Logging structures
	weights_log = torch.zeros((num_days, num_assets), dtype=torch.float32)
	portfolio_returns_log = torch.zeros((num_days,), dtype=torch.float32)
	rolling_portfolio_returns = [] # Store recent portfolio returns for objective calc

	print(f"Starting OGD optimization for {num_days} days, {num_assets} assets...")

	# Initial weights distribution - equal weights
	initial_weights = torch.full((num_assets,), 1.0/num_assets)

	# Use tqdm for progress tracking if requested
	day_iterator = tqdm(range(num_days)) if use_tqdm else range(num_days)

	for i in day_iterator:
	# Check for NaN in weights and reset if needed
	if torch.isnan(weights).any():
	print(f"WARNING: NaN detected in weights at day {i}, resetting to uniform weights")
	with torch.no_grad():
	weights.copy_(torch.zeros((num_assets,)))

	# More restrictive clamping for numerical stability
	clamped_weights = torch.clamp(weights, min=-5, max=5)
	normalized_weights = torch.nn.functional.softmax(clamped_weights, dim=0)

	# Verify normalized weights are valid probabilities
	if torch.isnan(normalized_weights).any() or torch.sum(normalized_weights) < 0.99:
	print(f"WARNING: Invalid normalized weights at day {i}, using uniform weights")
	normalized_weights = initial_weights.clone()

	# Get daily asset returns and check for NaN values
	daily_asset_returns = returns_tensor[i, :]
	if torch.isnan(daily_asset_returns).any():
	print(f"WARNING: NaN detected in asset returns at day {i}, replacing with zeros")
	daily_asset_returns = torch.nan_to_num(daily_asset_returns, nan=0.0)

	# Calculate portfolio return for the current day
	daily_portfolio_return = torch.dot(normalized_weights, daily_asset_returns)

	# Check for NaN in portfolio return
	if torch.isnan(daily_portfolio_return):
	print(f"WARNING: NaN detected in portfolio return at day {i}, using zero")
	daily_portfolio_return = torch.tensor(0.0)

	# Debug information - print sample weights and returns to diagnose the issue
	if i < 5 or i % 50 == 0: # Print for first few days and then occasionally
	print(f" Debug info for day {i}:")
	print(f" Sample weights: {normalized_weights[:5].tolist()}")
	print(f" Sample returns: {daily_asset_returns[:5].tolist()}")
	print(f" Sum of weights: {torch.sum(normalized_weights).item()}")
	nan_count = torch.isnan(daily_asset_returns).sum().item()
	print(f" NaN count in returns: {nan_count}/{len(daily_asset_returns)}")

	# Log weights and returns (use detach() to prevent tracking history)
	weights_log[i, :] = normalized_weights.detach()
	portfolio_returns_log[i] = daily_portfolio_return.detach()

	# Add current return to rolling list for objective calculation
	# Detach returns when storing to break gradient history
	rolling_portfolio_returns.append(daily_portfolio_return.detach())

	# --- Objective Calculation and Optimization Step ---
	# Wait until we have enough data for the lookback window
	if len(rolling_portfolio_returns) > window_size:
	rolling_portfolio_returns.pop(0) # Remove oldest return

	# Verify we don't have all zeros in our portfolio returns
	all_zeros = all(r.item() == 0 for r in rolling_portfolio_returns)
	if all_zeros:
	print(f"WARNING: All portfolio returns are zero at day {i}, skipping optimization")
	continue

	# Prepare tensors for objective function
	past_portfolio_returns = torch.stack(rolling_portfolio_returns[:-1] + [daily_portfolio_return])

	# Get corresponding risk-free rates for the window
	start_idx = max(0, i - window_size + 1)
	past_rf = rf_tensor[start_idx : i + 1]

	# Get previous day's weights for turnover calculation
	prev_weights = weights_log[i-1, :] if i > 0 else normalized_weights.detach()

	# Zero out gradients before computation
	optimizer.zero_grad()

	try:
	# Recompute normalized weights for fresh gradient computation
	clamped_weights = torch.clamp(weights, min=-5, max=5)
	current_norm_weights = torch.nn.functional.softmax(clamped_weights, dim=0)

	# Recalculate today's return for gradient computation
	current_return = torch.dot(current_norm_weights, daily_asset_returns)

	# Create list with detached historical returns + current gradient-connected return
	historical_returns = rolling_portfolio_returns[:-1]
	new_returns_list = historical_returns + [current_return]
	past_portfolio_returns = torch.stack(new_returns_list)

	# Calculate objective with robust error handling
	objective = calculate_objective_func(
	past_portfolio_returns,
	past_rf,
	current_norm_weights,
	prev_weights,
	alphas,
	enp_min,
	enp_max
	)

	# Check if objective computation produced valid result
	if not torch.isnan(objective):
	# Check objective is not just a default zero
	if objective.item() != 0.0 or i % 50 == 0: # Allow some zeros through for logging
	# Compute and apply gradients
	objective.backward()

	# --- Enhanced Logging ---
	log_interval = 50
	if (i + 1) % log_interval == 0 or num_days - (i + 1) < 5:
	if not use_tqdm: # Don't print logs if using tqdm to avoid cluttering
	print(f"\n--- Step {i+1}/{num_days} Log ---")
	print(f" Objective: {objective.item():.6f}")

	# Log average gradient magnitude rather than all gradients
	if weights.grad is not None:
	avg_grad = torch.mean(torch.abs(weights.grad)).item()
	print(f" Average Gradient Magnitude: {avg_grad:.6f}")

	# Record some sample weights before update
	weights_before = weights.detach().clone()

	# Apply gradient update
	optimizer.step()

	# Record weights after update
	weights_after = weights.detach().clone()
	weight_change = torch.sum(torch.abs(weights_after - weights_before)).item()
	print(f" Weight Change (Sum Abs): {weight_change:.6f}")

	# Display a few normalized weights as a sample
	print(f" Sample Normalized Weights: {[f'{w:.4f}' for w in normalized_weights[:5].tolist()]}")
	else:
	# Update weights without detailed logging
	optimizer.step()

	# Apply gradient clipping after optimizer step
	with torch.no_grad():
	if weights.grad is not None and torch.isnan(weights.grad).any():
	print(f" WARNING: NaN gradient detected at day {i}, zeroing gradients")
	weights.grad.zero_()
	else:
	if not use_tqdm:
	print(f" WARNING: Zero objective at day {i}, skipping gradient update")
	else:
	if not use_tqdm:
	print(f" WARNING: NaN objective at day {i}, skipping gradient update")

	except Exception as e:
	print(f" Optimization error at day {i}: {e}")
	# Skip this day rather than propagating errors

	print("OGD optimization finished.")

	# Final check for validity of results
	if torch.isnan(weights_log).any():
	print("WARNING: Final weights contain NaN values")
	weights_log = torch.nan_to_num(weights_log, nan=1.0/num_assets)

	if torch.isnan(portfolio_returns_log).any():
	print("WARNING: Final portfolio returns contain NaN values")
	portfolio_returns_log = torch.nan_to_num(portfolio_returns_log, nan=0.0)

	# Convert logs back to pandas DataFrames/Series with original index
	weights_df = pd.DataFrame(weights_log.numpy(), index=data_df.index, columns=tickers)
	returns_series = pd.Series(portfolio_returns_log.numpy(), index=data_df.index, name="PortfolioReturn")

	return weights_df, returns_series

	# --- Analysis Functions ---
	def compute_sharpe(returns_series, rf_series, annualization_factor=252):
	"""Compute annualized Sharpe ratio."""
	excess = returns_series - rf_series
	annual_excess_return = np.mean(excess) * annualization_factor
	annual_volatility = np.std(excess) * np.sqrt(annualization_factor)
	return annual_excess_return / (annual_volatility + eps)

	def compute_max_drawdown(returns_series):
	"""Compute maximum drawdown."""
	cr = np.cumprod(returns_series + 1)
	peak = np.maximum.accumulate(cr)
	return np.max((peak - cr) / (peak + eps))

	def compute_alpha(returns_series, rf_series, factor_data, model="CAPM"):
	"""Compute alpha using either CAPM or Fama-French 3-factor model.

	Args:
	returns_series: Portfolio returns series
	rf_series: Risk-free rate series
	factor_data: DataFrame with factor returns (must include 'mktrf' for CAPM,
	and 'smb', 'hml' for FF3)
	model: 'CAPM' or 'FF3'

	Returns:
	tuple: (alpha, regression_result)
	"""
	y = np.asarray(returns_series - rf_series)

	if model == "CAPM":
	X = np.asarray(factor_data[["mktrf"]])
	elif model == "FF3":
	X = np.asarray(factor_data[["mktrf", "smb", "hml"]])
	else:
	raise ValueError("Model must be 'CAPM' or 'FF3'")

	X = sm.add_constant(X)
	result = sm.OLS(y, X).fit()
	return result.params[0], result

	# --- Visualization Functions ---
	def plot_optimization_results(
	opt_returns_series,
	weights_df,
	benchmark_returns=None,
	top_n=5,
	title_suffix=""
	):
	"""Plot optimization results with comparison to benchmarks.

	Args:
	opt_returns_series: Series of optimized portfolio returns
	weights_df: DataFrame of weights over time
	benchmark_returns: Dict of benchmark return series {name: series}
	top_n: Number of top assets to highlight in weights plot
	title_suffix: Additional text to add to plot titles
	"""
	# Convert to numpy for plotting
	dates = opt_returns_series.index
	opt_returns = opt_returns_series.values
	weights_np = weights_df.values

	# Create plot with return distribution and cumulative returns
	fig, axes = plt.subplots(2, 1, figsize=(12, 10))

	# Return distribution
	axes[0].hist(opt_returns, bins=50, alpha=0.5, label='Optimized', color='red')

	# Cumulative returns
	axes[1].plot(dates, np.cumprod(opt_returns + 1), label='Optimized', color='red')

	# Add benchmarks if provided
	if benchmark_returns:
	for name, b_returns in benchmark_returns.items():
	axes[0].hist(b_returns, bins=50, alpha=0.5, label=name)
	axes[1].plot(dates, np.cumprod(b_returns + 1), label=name)

	axes[0].set_title('Return Distribution')
	axes[0].legend()

	axes[1].set_title('Cumulative Returns')
	axes[1].legend()
	axes[1].xaxis.set_major_locator(mdates.YearLocator())
	axes[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

	fig.suptitle(f"Performance Comparison {title_suffix}", fontsize=16)
	plt.tight_layout()
	plt.show()

	# Create plot with weights evolution and distribution
	fig, axes = plt.subplots(2, 1, figsize=(12, 10))

	# Weight evolution
	top_assets_idx = np.argsort(weights_np[-1])[-top_n:]
	for i in range(weights_np.shape[1]):
	label = weights_df.columns[i] if i in top_assets_idx else None
	lw = 2 if i in top_assets_idx else 0.3
	alpha = 0.8 if i in top_assets_idx else 0.3
	axes[0].plot(dates, weights_np[:, i], label=label, linewidth=lw, alpha=alpha)

	axes[0].xaxis.set_major_locator(mdates.YearLocator())
	axes[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
	axes[0].set_title("Weights Over Time")
	axes[0].legend()

	# Weight distribution
	axes[1].hist(weights_np[-1], bins=100, log=True, color='blue', alpha=0.7)
	axes[1].set_title("Final Day Weight Distribution")

	plt.tight_layout()
	plt.show()

	# Return effective number of positions over time
	enp_series = 1.0 / np.sum(weights_np ** 2, axis=1)
	fig, ax = plt.subplots(figsize=(12, 5))
	ax.plot(dates, enp_series)
	ax.set_title("Effective Number of Positions Over Time")
	ax.set_ylabel("ENP")
	ax.xaxis.set_major_locator(mdates.YearLocator())
	ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
	plt.tight_layout()
	plt.show()

	return None