import os import pandas as pd import numpy as np import yfinance as yf from pypfopt.efficient_frontier import EfficientFrontier from pypfopt import risk_models, expected_returns, plotting import matplotlib.pyplot as plt TRADING_DAYS = 252 def load_processed_prices(proc_dir: str, tickers: list[str]) -> dict[str, pd.DataFrame]: """Load processed CSVs for each ticker.""" data = {} for t in tickers: fn = os.path.join(proc_dir, f"{t}_processed.csv") df = pd.read_csv(fn, parse_dates=["Date"], index_col="Date") df = df.sort_index() data[t] = df return data def compute_mu_and_cov(data: dict[str, pd.DataFrame], tsla_annual_return: float, price_col: str = "Close") -> tuple[pd.Series, pd.DataFrame]: """ Compute expected returns (mu) and covariance matrix (Sigma). Replaces TSLA's expected return with forecast-based value. Args: data (dict): {ticker: DataFrame with at least 'Close' column}. tsla_annual_return (float): Forecast-based expected annual return for TSLA. price_col (str): Column to use for returns, default = 'Close'. Returns: mu (pd.Series): Expected annual returns for each asset. cov (pd.DataFrame): Annualized covariance matrix. """ returns = {} mu = {} for t, df in data.items(): daily_ret = df[price_col].pct_change().dropna() returns[t] = daily_ret if t == "TSLA": mu[t] = tsla_annual_return else: avg_daily = daily_ret.mean() mu[t] = (1 + avg_daily) ** TRADING_DAYS - 1 returns_df = pd.DataFrame(returns).dropna() cov_daily = returns_df.cov() cov_annual = cov_daily * TRADING_DAYS mu_series = pd.Series(mu) cov_annual = cov_annual.reindex(index=mu_series.index, columns=mu_series.index) return mu_series, cov_annual def optimize_portfolio(mu: pd.Series, cov: pd.DataFrame, rf: float = 0.0, bounds: tuple = (0, 1)) -> dict: """ Run portfolio optimization (Max Sharpe & Min Volatility). Args: mu (pd.Series): Expected annual returns. cov (pd.DataFrame): Annualized covariance matrix. rf (float): Risk-free rate for Sharpe calculation. bounds (tuple): Weight bounds, default (0,1) for long-only. Returns: dict with: - 'weights_sharpe' - 'perf_sharpe' - 'weights_minvol' - 'perf_minvol' """ ef = EfficientFrontier(mu, cov, weight_bounds=bounds) weights_sharpe = ef.max_sharpe(risk_free_rate=rf) cleaned_weights_sharpe = ef.clean_weights() perf_sharpe = ef.portfolio_performance(verbose=False, risk_free_rate=rf) ef_minvol = EfficientFrontier(mu, cov, weight_bounds=bounds) weights_minvol = ef_minvol.min_volatility() cleaned_weights_minvol = ef_minvol.clean_weights() perf_minvol = ef_minvol.portfolio_performance(verbose=False, risk_free_rate=rf) return { "weights_sharpe": cleaned_weights_sharpe, "perf_sharpe": { "return": perf_sharpe[0], "volatility": perf_sharpe[1], "sharpe": perf_sharpe[2], }, "weights_minvol": cleaned_weights_minvol, "perf_minvol": { "return": perf_minvol[0], "volatility": perf_minvol[1], "sharpe": perf_minvol[2], }, } def optimize_for_target(mu: pd.Series, cov: pd.DataFrame, target_type: str, target_value: float, bounds: tuple = (0, 1)) -> dict: """ Optimize for a specific target return or volatility. """ ef = EfficientFrontier(mu, cov, weight_bounds=bounds) try: if target_type == 'return': weights = ef.efficient_return(target_value) elif target_type == 'volatility': weights = ef.efficient_risk(target_value) else: return {"error": "Invalid target_type. Use 'return' or 'volatility'."} cleaned_weights = ef.clean_weights() perf = ef.portfolio_performance(verbose=False) return { "weights": cleaned_weights, "performance": { "return": perf[0], "volatility": perf[1], "sharpe": perf[2], } } except Exception as e: return {"weights": {}, "performance": {"error": str(e)}} def plot_efficient_frontier(mu, cov, results: dict): """ Plot the efficient frontier with key portfolios marked. """ ef = EfficientFrontier(mu, cov) fig, ax = plt.subplots(figsize=(8, 6)) # Plot the frontier plotting.plot_efficient_frontier(ef, ax=ax, show_assets=False) # Get weights for plotting sharpe_weights = np.array(list(results["weights_sharpe"].values())) minvol_weights = np.array(list(results["weights_minvol"].values())) # Find portfolio returns/volatility for plotting sharpe_perf = results["perf_sharpe"] minvol_perf = results["perf_minvol"] # Plot markers ax.scatter(sharpe_perf["volatility"], sharpe_perf["return"], marker="*", color="r", s=250, label="Max Sharpe") ax.scatter(minvol_perf["volatility"], minvol_perf["return"], marker="X", color="g", s=200, label="Min Volatility") ax.set_title("Efficient Frontier") ax.legend() return fig