"""factor_risk_model.py — Barra-Style Multi-Factor Risk Model Decomposes portfolio risk into factor (systematic) and specific (idiosyncratic) components. Models factor covariance using PCA + exponential weighting, and estimates specific risk from residuals. Essential for risk budgeting and attribution. References: - Grinold & Kahn 2000: "Active Portfolio Management" (Barra model) - Menchero et al. 2010: "The Barra US Equity Model (USE4)" - Connor et al. 2010: "The Structure of Factor Risk Premiums" """ import numpy as np, pandas as pd from scipy.linalg import eigh class FactorRiskModel: """Barra-style factor risk model.""" def __init__(self, n_factors=20, halflife=126): self.n_factors = n_factors self.halflife = halflife self.factor_cov = None self.factor_loadings = None self.specific_var = None self.factor_names = None def _exp_weights(self, n): lambda_ = 0.5 ** (1.0 / self.halflife) w = np.array([lambda_ ** (n - 1 - i) for i in range(n)]) return w / w.sum() def fit(self, returns): """Fit factor model via PCA with exponential weighting.""" r = returns.dropna() T, N = r.shape w = self._exp_weights(T) # Weighted covariance rw = r.values * np.sqrt(w[:, None]) cov = (rw.T @ rw) / (1 - lambda_ ** T) # normalize # PCA eigvals, eigvecs = eigh(cov) idx = np.argsort(eigvals)[::-1] eigvals = eigvals[idx]; eigvecs = eigvecs[:, idx] self.factor_loadings = eigvecs[:, :self.n_factors] self.factor_cov = np.diag(eigvals[:self.n_factors]) self.factor_names = [f"PC{i+1}" for i in range(self.n_factors)] # Specific risk from residuals factor_rets = r.values @ self.factor_loadings explained = factor_rets @ self.factor_loadings.T residuals = r.values - explained self.specific_var = np.var(residuals, axis=0) return self def portfolio_risk(self, weights): """Decompose portfolio risk into factor + specific.""" w = np.array(weights).reshape(-1) # Factor risk factor_exposure = w @ self.factor_loadings factor_var = factor_exposure @ self.factor_cov @ factor_exposure # Specific risk specific_var = np.sum((w ** 2) * self.specific_var) total_var = factor_var + specific_var return { 'total_vol': float(np.sqrt(total_var)), 'factor_vol': float(np.sqrt(factor_var)), 'specific_vol': float(np.sqrt(specific_var)), 'factor_pct': float(factor_var / (total_var + 1e-10) * 100), 'specific_pct': float(specific_var / (total_var + 1e-10) * 100), 'factor_exposures': dict(zip(self.factor_names, factor_exposure.tolist())) } def marginal_risk_contrib(self, weights): """Marginal risk contribution per asset.""" w = np.array(weights).reshape(-1) total_var = self.portfolio_risk(weights) sigma = total_var['total_vol'] # Gradient of variance w.r.t weights cov_total = (self.factor_loadings @ self.factor_cov @ self.factor_loadings.T + np.diag(self.specific_var)) grad = cov_total @ w mrc = w * grad / (sigma + 1e-10) return pd.Series(mrc, index=[f'Asset_{i}' for i in range(len(w))]) def risk_budget(self, weights, target_risk=None): """Risk budgeting: find weights such that each asset contributes equally.""" n = len(weights) w0 = np.ones(n) / n def risk_parity_objective(w): mrc = self.marginal_risk_contrib(w) target = mrc.sum() / n return np.sum((mrc - target) ** 2) # Simple iterative approach for _ in range(100): mrc = self.marginal_risk_contrib(w0) w0 = w0 * (1.0 / (mrc.values + 1e-10)) w0 = w0 / w0.sum() if target_risk: vol = self.portfolio_risk(w0)['total_vol'] w0 = w0 * (target_risk / (vol + 1e-10)) return w0 def risk_report(self, weights): """Human-readable risk decomposition.""" risk = self.portfolio_risk(weights) mrc = self.marginal_risk_contrib(weights) report = f"""## Factor Risk Decomposition | Risk Component | Volatility | % of Total | |----------------|-----------|------------| | Total | {risk['total_vol']*100:.2f}% | 100% | | Factor (Systematic) | {risk['factor_vol']*100:.2f}% | {risk['factor_pct']:.1f}% | | Specific (Idiosyncratic) | {risk['specific_vol']*100:.2f}% | {risk['specific_pct']:.1f}% | **Top Factor Exposures:** """ top = sorted(risk['factor_exposures'].items(), key=lambda x: abs(x[1]), reverse=True)[:5] for name, exp in top: report += f"- {name}: {exp:.3f}\n" report += f"\n**Top Risk Contributors:**\n" top_mrc = mrc.sort_values(ascending=False).head(5) for asset, contrib in top_mrc.items(): report += f"- {asset}: {contrib*100:.2f}%\n" return report if __name__ == '__main__': np.random.seed(42) returns = pd.DataFrame(np.random.normal(0.0003, 0.015, (500, 10)), columns=[f'Stock_{i}' for i in range(10)], index=pd.date_range('2022-01-01', periods=500, freq='B')) model = FactorRiskModel(n_factors=5).fit(returns) weights = np.array([0.1]*10) print(model.risk_report(weights))