from typing import Dict, List, Tuple import numpy as np import pandas as pd import cvxpy as cp def optimize_portfolio( latest_predictions: pd.DataFrame, feature_df: pd.DataFrame, risk_aversion: float = 8.0, max_weight: float = 0.35, sector_limit: float = 0.70, beta_limit: float = 1.20, ) -> Tuple[pd.DataFrame, List[Dict[str, float]], Dict[str, float]]: tickers = latest_predictions["ticker"].tolist() n = len(tickers) returns_wide = ( feature_df.pivot(index="date", columns="ticker", values="ret_1d") .dropna() .loc[:, tickers] ) sample_cov = returns_wide.cov().fillna(0.0).values diag_cov = np.diag(np.diag(sample_cov)) cov = 0.75 * sample_cov + 0.25 * diag_cov + np.eye(n) * 1e-6 mu = latest_predictions["expected_return"].fillna(0.0).values alpha_score = latest_predictions["alpha_score"].fillna(0.0).values beta = latest_predictions["ret_20d"].fillna(0.0).values * 4.0 + 1.0 sectors = latest_predictions["sector"].tolist() w = cp.Variable(n) objective = cp.Maximize( mu @ w + 0.0025 * (alpha_score @ w) - risk_aversion * cp.quad_form(w, cov) ) constraints = [ cp.sum(w) == 1, w >= 0, w <= max_weight, beta @ w <= beta_limit, ] for sec in sorted(set(sectors)): idx = [i for i, s in enumerate(sectors) if s == sec] constraints.append(cp.sum(w[idx]) <= sector_limit) problem = cp.Problem(objective, constraints) try: problem.solve(solver=cp.SCS, verbose=False) except Exception: pass if w.value is None or problem.status not in {"optimal", "optimal_inaccurate"}: scores = np.maximum(alpha_score, 0.0) if float(scores.sum()) < 1e-12: scores = np.ones(n) weights = scores / scores.sum() weights = np.minimum(weights, max_weight) if float(weights.sum()) < 1e-12: weights = np.repeat(1.0 / n, n) else: weights = weights / weights.sum() else: weights = np.maximum(np.array(w.value).flatten(), 0.0) if float(weights.sum()) < 1e-12: weights = np.repeat(1.0 / n, n) else: weights = weights / weights.sum() weight_df = pd.DataFrame({"ticker": tickers, "weight": weights}) exposures = [ {"factor": "beta", "exposure": float(beta @ weights), "limit": beta_limit}, {"factor": "alpha", "exposure": float(alpha_score @ weights), "limit": 999.0}, ] for sec in sorted(set(sectors)): idx = [i for i, s in enumerate(sectors) if s == sec] exposures.append({ "factor": f"sector_{sec.lower().replace(' ', '_')}", "exposure": float(weights[idx].sum()), "limit": sector_limit, }) aux = { "exp_return_daily": float(mu @ weights), "vol_daily": float(np.sqrt(max(weights.T @ cov @ weights, 1e-12))), } return weight_df, exposures, aux