Upload stat_arb_features.py
Browse files- stat_arb_features.py +49 -0
stat_arb_features.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Statistical Arbitrage Features - Cointegration, spread, relative value"""
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from statsmodels.tsa.stattools import coint
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class StatArbFeatures:
|
| 8 |
+
"""Pairs trading and cointegration features"""
|
| 9 |
+
|
| 10 |
+
@staticmethod
|
| 11 |
+
def cointegration_spread(price_a, price_b, window=126):
|
| 12 |
+
features = pd.DataFrame(index=price_a.index)
|
| 13 |
+
ret_a = price_a.pct_change()
|
| 14 |
+
ret_b = price_b.pct_change()
|
| 15 |
+
rolling_cov = ret_a.rolling(window).cov(ret_b)
|
| 16 |
+
rolling_var = ret_b.rolling(window).var()
|
| 17 |
+
hedge = rolling_cov / rolling_var.replace(0, np.nan)
|
| 18 |
+
spread = np.log(price_a) - hedge * np.log(price_b)
|
| 19 |
+
spread_mean = spread.rolling(window).mean()
|
| 20 |
+
spread_std = spread.rolling(window).std().replace(0, 1)
|
| 21 |
+
features['coint_hedge'] = hedge
|
| 22 |
+
features['coint_spread'] = spread
|
| 23 |
+
features['coint_zscore'] = (spread - spread_mean) / spread_std
|
| 24 |
+
return features
|
| 25 |
+
|
| 26 |
+
@staticmethod
|
| 27 |
+
def relative_value(close, sector_close, window=21):
|
| 28 |
+
features = pd.DataFrame(index=close.index)
|
| 29 |
+
sector_avg = sector_close.mean(axis=1)
|
| 30 |
+
features['rv_ratio'] = close / sector_avg
|
| 31 |
+
features['rv_zscore'] = (features['rv_ratio'] - features['rv_ratio'].rolling(window).mean()) / \
|
| 32 |
+
features['rv_ratio'].rolling(window).std().replace(0, 1)
|
| 33 |
+
return features
|
| 34 |
+
|
| 35 |
+
@staticmethod
|
| 36 |
+
def half_life(series, window=60):
|
| 37 |
+
"""Compute rolling half-life of mean reversion"""
|
| 38 |
+
result = pd.Series(index=series.index, dtype=float)
|
| 39 |
+
for i in range(window, len(series)):
|
| 40 |
+
y = series.iloc[i-window:i].diff().dropna()
|
| 41 |
+
x = series.iloc[i-window:i-1].values.reshape(-1, 1)
|
| 42 |
+
if len(y) < 10:
|
| 43 |
+
result.iloc[i] = 21.0
|
| 44 |
+
continue
|
| 45 |
+
from sklearn.linear_model import LinearRegression
|
| 46 |
+
lr = LinearRegression()
|
| 47 |
+
lr.fit(x[:len(y)], y.values)
|
| 48 |
+
result.iloc[i] = -np.log(2) / lr.coef_[0] if lr.coef_[0] != 0 else 21.0
|
| 49 |
+
return result.clip(1, 252)
|