portfolio-optimizer / src /optimization.py
abnsol's picture
Update src/optimization.py
87dac5a verified
import os
import pandas as pd
import numpy as np
import yfinance as yf
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns, plotting
import matplotlib.pyplot as plt
TRADING_DAYS = 252
def load_processed_prices(proc_dir: str, tickers: list[str]) -> dict[str, pd.DataFrame]:
"""Load processed CSVs for each ticker."""
data = {}
for t in tickers:
fn = os.path.join(proc_dir, f"{t}_processed.csv")
df = pd.read_csv(fn, parse_dates=["Date"], index_col="Date")
df = df.sort_index()
data[t] = df
return data
def compute_mu_and_cov(data: dict[str, pd.DataFrame],
tsla_annual_return: float,
price_col: str = "Close") -> tuple[pd.Series, pd.DataFrame]:
"""
Compute expected returns (mu) and covariance matrix (Sigma).
Replaces TSLA's expected return with forecast-based value.
Args:
data (dict): {ticker: DataFrame with at least 'Close' column}.
tsla_annual_return (float): Forecast-based expected annual return for TSLA.
price_col (str): Column to use for returns, default = 'Close'.
Returns:
mu (pd.Series): Expected annual returns for each asset.
cov (pd.DataFrame): Annualized covariance matrix.
"""
returns = {}
mu = {}
for t, df in data.items():
daily_ret = df[price_col].pct_change().dropna()
returns[t] = daily_ret
if t == "TSLA":
mu[t] = tsla_annual_return
else:
avg_daily = daily_ret.mean()
mu[t] = (1 + avg_daily) ** TRADING_DAYS - 1
returns_df = pd.DataFrame(returns).dropna()
cov_daily = returns_df.cov()
cov_annual = cov_daily * TRADING_DAYS
mu_series = pd.Series(mu)
cov_annual = cov_annual.reindex(index=mu_series.index, columns=mu_series.index)
return mu_series, cov_annual
def optimize_portfolio(mu: pd.Series,
cov: pd.DataFrame,
rf: float = 0.0,
bounds: tuple = (0, 1)) -> dict:
"""
Run portfolio optimization (Max Sharpe & Min Volatility).
Args:
mu (pd.Series): Expected annual returns.
cov (pd.DataFrame): Annualized covariance matrix.
rf (float): Risk-free rate for Sharpe calculation.
bounds (tuple): Weight bounds, default (0,1) for long-only.
Returns:
dict with:
- 'weights_sharpe'
- 'perf_sharpe'
- 'weights_minvol'
- 'perf_minvol'
"""
ef = EfficientFrontier(mu, cov, weight_bounds=bounds)
weights_sharpe = ef.max_sharpe(risk_free_rate=rf)
cleaned_weights_sharpe = ef.clean_weights()
perf_sharpe = ef.portfolio_performance(verbose=False, risk_free_rate=rf)
ef_minvol = EfficientFrontier(mu, cov, weight_bounds=bounds)
weights_minvol = ef_minvol.min_volatility()
cleaned_weights_minvol = ef_minvol.clean_weights()
perf_minvol = ef_minvol.portfolio_performance(verbose=False, risk_free_rate=rf)
return {
"weights_sharpe": cleaned_weights_sharpe,
"perf_sharpe": {
"return": perf_sharpe[0],
"volatility": perf_sharpe[1],
"sharpe": perf_sharpe[2],
},
"weights_minvol": cleaned_weights_minvol,
"perf_minvol": {
"return": perf_minvol[0],
"volatility": perf_minvol[1],
"sharpe": perf_minvol[2],
},
}
def optimize_for_target(mu: pd.Series,
cov: pd.DataFrame,
target_type: str,
target_value: float,
bounds: tuple = (0, 1)) -> dict:
"""
Optimize for a specific target return or volatility.
"""
ef = EfficientFrontier(mu, cov, weight_bounds=bounds)
try:
if target_type == 'return':
weights = ef.efficient_return(target_value)
elif target_type == 'volatility':
weights = ef.efficient_risk(target_value)
else:
return {"error": "Invalid target_type. Use 'return' or 'volatility'."}
cleaned_weights = ef.clean_weights()
perf = ef.portfolio_performance(verbose=False)
return {
"weights": cleaned_weights,
"performance": {
"return": perf[0],
"volatility": perf[1],
"sharpe": perf[2],
}
}
except Exception as e:
return {"weights": {}, "performance": {"error": str(e)}}
def plot_efficient_frontier(mu, cov, results: dict):
"""
Plot the efficient frontier with key portfolios marked.
"""
ef = EfficientFrontier(mu, cov)
fig, ax = plt.subplots(figsize=(8, 6))
# Plot the frontier
plotting.plot_efficient_frontier(ef, ax=ax, show_assets=False)
# Get weights for plotting
sharpe_weights = np.array(list(results["weights_sharpe"].values()))
minvol_weights = np.array(list(results["weights_minvol"].values()))
# Find portfolio returns/volatility for plotting
sharpe_perf = results["perf_sharpe"]
minvol_perf = results["perf_minvol"]
# Plot markers
ax.scatter(sharpe_perf["volatility"], sharpe_perf["return"], marker="*", color="r", s=250, label="Max Sharpe")
ax.scatter(minvol_perf["volatility"], minvol_perf["return"], marker="X", color="g", s=200, label="Min Volatility")
ax.set_title("Efficient Frontier")
ax.legend()
return fig