Add cross-sectional factors - Fama-French, momentum, quality, low-vol style factors
Browse files- cross_sectional_factors.py +125 -0
cross_sectional_factors.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""cross_sectional_factors.py — Cross-Sectional Factor Construction
|
| 2 |
+
|
| 3 |
+
Implements classic and modern equity style factors: Fama-French 5-factor,
|
| 4 |
+
momentum (Carhart), quality ( profitability + low investment), low volatility,
|
| 5 |
+
value (book-to-market, earnings yield), size (SMB), and liquidity.
|
| 6 |
+
|
| 7 |
+
References:
|
| 8 |
+
- Fama & French 2015: "A Five-Factor Asset Pricing Model"
|
| 9 |
+
- Carhart 1997: "On Persistence in Mutual Fund Performance"
|
| 10 |
+
- Asness et al. 2013: "The Devil in HML's Details" (quality factor)
|
| 11 |
+
- Blitz & van Vliet 2007: "The Volatility Effect"
|
| 12 |
+
"""
|
| 13 |
+
import numpy as np, pandas as pd
|
| 14 |
+
|
| 15 |
+
class CrossSectionalFactorModel:
|
| 16 |
+
"""Constructs and scores equity style factors cross-sectionally."""
|
| 17 |
+
|
| 18 |
+
FACTORS = ['MKT', 'SMB', 'HML', 'RMW', 'CMA', 'MOM', 'QUAL', 'BAB', 'LIQ']
|
| 19 |
+
|
| 20 |
+
def __init__(self, lookback=252, n_quantiles=10):
|
| 21 |
+
self.lookback = lookback
|
| 22 |
+
self.n_q = n_quantiles
|
| 23 |
+
|
| 24 |
+
def value_factor(self, prices, book_values):
|
| 25 |
+
"""HML: High book-to-market minus Low."""
|
| 26 |
+
bv = book_values.reindex(prices.index, method='ffill')
|
| 27 |
+
btm = bv / prices
|
| 28 |
+
return self._long_short_rank(bt_m, 'high')
|
| 29 |
+
|
| 30 |
+
def size_factor(self, prices, market_caps):
|
| 31 |
+
"""SMB: Small minus Big."""
|
| 32 |
+
mc = market_caps.reindex(prices.index, method='ffill')
|
| 33 |
+
return self._long_short_rank(mc, 'low')
|
| 34 |
+
|
| 35 |
+
def momentum_factor(self, prices, window=252, skip=21):
|
| 36 |
+
"""MOM: 12-1 month momentum (skip most recent month)."""
|
| 37 |
+
mom = prices.pct_change(window).shift(skip)
|
| 38 |
+
return self._long_short_rank(mom, 'high')
|
| 39 |
+
|
| 40 |
+
def quality_factor(self, prices, roe, accruals, leverage):
|
| 41 |
+
"""QUAL: profitability + low accruals + low leverage."""
|
| 42 |
+
roe_s = self._zscore(roe)
|
| 43 |
+
acc_s = -self._zscore(accruals) # Low accruals = good
|
| 44 |
+
lev_s = -self._zscore(leverage) # Low leverage = good
|
| 45 |
+
qual = (roe_s + acc_s + lev_s) / 3.0
|
| 46 |
+
return self._long_short_rank(qual, 'high')
|
| 47 |
+
|
| 48 |
+
def low_vol_factor(self, prices, window=63):
|
| 49 |
+
"""BAB: Betting Against Beta / low volatility."""
|
| 50 |
+
vol = prices.pct_change().rolling(window).std() * np.sqrt(252)
|
| 51 |
+
return self._long_short_rank(vol, 'low')
|
| 52 |
+
|
| 53 |
+
def liquidity_factor(self, prices, volumes, window=63):
|
| 54 |
+
"""LIQ: Amihud illiquidity."""
|
| 55 |
+
ret = prices.pct_change().abs()
|
| 56 |
+
illiq = (ret / (volumes.reindex(prices.index) / prices)).rolling(window).mean()
|
| 57 |
+
return self._long_short_rank(illiq, 'low') # Long liquid, short illiquid
|
| 58 |
+
|
| 59 |
+
def _zscore(self, x):
|
| 60 |
+
return (x - x.mean()) / (x.std() + 1e-10)
|
| 61 |
+
|
| 62 |
+
def _long_short_rank(self, scores, direction='high'):
|
| 63 |
+
"""Form long-short portfolio from cross-sectional scores."""
|
| 64 |
+
valid = scores.dropna()
|
| 65 |
+
if len(valid) == 0: return pd.Series()
|
| 66 |
+
q = pd.qcut(valid, self.n_q, labels=False, duplicates='drop')
|
| 67 |
+
if direction == 'high':
|
| 68 |
+
long = q[q == q.max()].index
|
| 69 |
+
short = q[q == q.min()].index
|
| 70 |
+
else:
|
| 71 |
+
long = q[q == q.min()].index
|
| 72 |
+
short = q[q == q.max()].index
|
| 73 |
+
ls = pd.Series(0.0, index=scores.index)
|
| 74 |
+
ls.loc[long] = 1.0 / len(long)
|
| 75 |
+
ls.loc[short] = -1.0 / len(short)
|
| 76 |
+
return ls
|
| 77 |
+
|
| 78 |
+
def factor_returns(self, prices, factors_dict):
|
| 79 |
+
"""Compute factor returns from price series and factor portfolios."""
|
| 80 |
+
ret = prices.pct_change().shift(-1) # t+1 return
|
| 81 |
+
factor_rets = {}
|
| 82 |
+
for name, weights in factors_dict.items():
|
| 83 |
+
w = weights.reindex(ret.columns, fill_value=0)
|
| 84 |
+
factor_rets[name] = (ret * w).sum(axis=1)
|
| 85 |
+
return pd.DataFrame(factor_rets)
|
| 86 |
+
|
| 87 |
+
def factor_exposures(self, returns, factor_returns):
|
| 88 |
+
"""Estimate factor betas via rolling regression."""
|
| 89 |
+
betas = {}
|
| 90 |
+
for col in returns.columns:
|
| 91 |
+
y = returns[col].dropna()
|
| 92 |
+
X = factor_returns.reindex(y.index).dropna()
|
| 93 |
+
common = y.index.intersection(X.index)
|
| 94 |
+
if len(common) < 30: continue
|
| 95 |
+
yc = y.loc[common].values
|
| 96 |
+
Xc = np.column_stack([np.ones(len(common)), X.loc[common].values])
|
| 97 |
+
beta = np.linalg.lstsq(Xc, yc, rcond=None)[0]
|
| 98 |
+
betas[col] = dict(zip(['alpha'] + list(factor_returns.columns), beta))
|
| 99 |
+
return pd.DataFrame(betas).T
|
| 100 |
+
|
| 101 |
+
def factor_report(self, prices, book=None, mc=None, volumes=None):
|
| 102 |
+
"""Generate full factor report for an asset."""
|
| 103 |
+
ret = prices.pct_change().dropna()
|
| 104 |
+
report = {"momentum_12m": float((prices.iloc[-1]/prices.iloc[-min(252,len(prices))]-1)),
|
| 105 |
+
"volatility_3m": float(ret.tail(63).std()*np.sqrt(252)),
|
| 106 |
+
"sharpe_1y": float(ret.tail(252).mean()*252/(ret.tail(252).std()*np.sqrt(252)+1e-10)),
|
| 107 |
+
"max_drawdown": float(((1+ret).cumprod().expanding().max()-(1+ret).cumprod())/(1+ret).cumprod().expanding().max()).max()),
|
| 108 |
+
"skewness": float(ret.skew()),
|
| 109 |
+
"kurtosis": float(ret.kurtosis())}
|
| 110 |
+
if book is not None:
|
| 111 |
+
report["book_to_market"] = float(book.iloc[-1] / prices.iloc[-1]) if prices.iloc[-1] > 0 else 0
|
| 112 |
+
if mc is not None:
|
| 113 |
+
report["market_cap"] = float(mc.iloc[-1])
|
| 114 |
+
report["size_decile"] = int(pd.qcut(mc, 10, labels=False, duplicates='drop').iloc[-1]) + 1
|
| 115 |
+
if volumes is not None:
|
| 116 |
+
report["avg_volume"] = float(volumes.tail(20).mean())
|
| 117 |
+
report["dollar_volume"] = float(volumes.tail(20).mean() * prices.tail(20).mean())
|
| 118 |
+
return report
|
| 119 |
+
|
| 120 |
+
if __name__ == '__main__':
|
| 121 |
+
np.random.seed(42)
|
| 122 |
+
prices = pd.Series(np.cumprod(1 + np.random.normal(0.0005, 0.015, 500)),
|
| 123 |
+
index=pd.date_range('2022-01-01', periods=500, freq='B'))
|
| 124 |
+
model = CrossSectionalFactorModel()
|
| 125 |
+
print(model.factor_report(prices))
|