Spaces:
Paused
Paused
Update backtest_engine.py
Browse files- backtest_engine.py +45 -40
backtest_engine.py
CHANGED
|
@@ -104,88 +104,93 @@ def _transform_window_for_pattern(df_window):
|
|
| 104 |
|
| 105 |
def calculate_sniper_features_exact(df):
|
| 106 |
"""
|
| 107 |
-
Sniper Features Calculation -
|
| 108 |
-
Converts
|
| 109 |
"""
|
| 110 |
# 1. Standard Features
|
| 111 |
d = df.copy()
|
| 112 |
c = d['close']; h = d['high']; l = d['low']; v = d['volume']; o = d['open']
|
| 113 |
|
| 114 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
d['return_1m'] = c.pct_change(1).fillna(0)
|
| 116 |
d['return_3m'] = c.pct_change(3).fillna(0)
|
| 117 |
d['return_5m'] = c.pct_change(5).fillna(0)
|
| 118 |
d['return_15m'] = c.pct_change(15).fillna(0)
|
| 119 |
|
| 120 |
-
# Technicals
|
| 121 |
d['rsi_14'] = ta.rsi(c, length=14).fillna(50)
|
| 122 |
|
| 123 |
ema_9 = ta.ema(c, length=9).fillna(c)
|
| 124 |
ema_21 = ta.ema(c, length=21).fillna(c)
|
| 125 |
|
|
|
|
| 126 |
d['ema_9_slope'] = ((ema_9 - ema_9.shift(1)) / ema_9.shift(1).replace(0, np.nan)).fillna(0)
|
| 127 |
d['ema_21_dist'] = ((c - ema_21) / ema_21.replace(0, np.nan)).fillna(0)
|
| 128 |
|
| 129 |
-
# ---
|
| 130 |
-
#
|
| 131 |
atr_raw = ta.atr(h, l, c, length=100).fillna(0)
|
| 132 |
-
d['atr'] = (atr_raw
|
| 133 |
-
|
| 134 |
-
# Volume Z-Score (Already normalized by z-score logic)
|
| 135 |
-
def _z_roll(x, w=50):
|
| 136 |
-
r = x.rolling(w).mean()
|
| 137 |
-
s = x.rolling(w).std().replace(0, np.nan)
|
| 138 |
-
return ((x - r) / s).fillna(0)
|
| 139 |
|
|
|
|
| 140 |
d['vol_zscore_50'] = _z_roll(v, 50)
|
| 141 |
|
| 142 |
-
# Candle Geometry
|
| 143 |
-
rng = (h - l)
|
| 144 |
-
d['candle_range'] = (rng
|
| 145 |
-
d['close_pos_in_range'] = ((c - l) / rng
|
| 146 |
|
| 147 |
-
# 2
|
| 148 |
-
#
|
|
|
|
|
|
|
| 149 |
d['dollar_vol'] = c * v
|
| 150 |
-
|
|
|
|
| 151 |
|
| 152 |
-
# Roll Spread
|
| 153 |
dp = c.diff()
|
| 154 |
roll_cov = dp.rolling(64).cov(dp.shift(1)).fillna(0)
|
| 155 |
roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov)))
|
| 156 |
-
d['roll_spread'] = (roll_spread_raw
|
| 157 |
|
| 158 |
-
# OFI (
|
| 159 |
-
# Usually OFI is used as a signal direction, kept raw or z-scored.
|
| 160 |
-
# Let's keep raw here as it's volume-based, not price-based.
|
| 161 |
sign = np.sign(c.diff()).fillna(0)
|
| 162 |
d['signed_vol'] = sign * v
|
| 163 |
-
|
|
|
|
| 164 |
|
| 165 |
-
# VPIN
|
| 166 |
buy_vol = (sign > 0) * v
|
| 167 |
sell_vol = (sign < 0) * v
|
| 168 |
imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
|
| 169 |
tot = v.rolling(60).sum().replace(0, np.nan)
|
| 170 |
-
d['vpin'] = (imb / tot).fillna(0)
|
| 171 |
|
| 172 |
-
# Garman-Klass
|
| 173 |
-
|
| 174 |
-
d['rv_gk'] =
|
| 175 |
|
| 176 |
-
# VWAP Deviation
|
| 177 |
vwap_win = 20
|
| 178 |
vwap = (d['dollar_vol'].rolling(vwap_win).sum() / v.rolling(vwap_win).sum().replace(0, np.nan)).fillna(c)
|
| 179 |
-
d['vwap_dev'] = ((c - vwap)
|
| 180 |
|
| 181 |
-
# Liquidity Score (Composite - Z-scores
|
|
|
|
| 182 |
d['L_score'] = (
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
).fillna(0)
|
| 190 |
|
| 191 |
return sanitize_features(d)
|
|
|
|
| 104 |
|
| 105 |
def calculate_sniper_features_exact(df):
|
| 106 |
"""
|
| 107 |
+
Sniper Features Calculation - TRUE UNIVERSAL MODE (Z-SCORE).
|
| 108 |
+
Converts structural features to Z-Scores to bypass scale issues.
|
| 109 |
"""
|
| 110 |
# 1. Standard Features
|
| 111 |
d = df.copy()
|
| 112 |
c = d['close']; h = d['high']; l = d['low']; v = d['volume']; o = d['open']
|
| 113 |
|
| 114 |
+
# --- Helper: Rolling Z-Score ---
|
| 115 |
+
def _z_roll(x, w=200): # Window 200 is standard for regime detection
|
| 116 |
+
r = x.rolling(w).mean()
|
| 117 |
+
s = x.rolling(w).std().replace(0, np.nan)
|
| 118 |
+
return ((x - r) / s).fillna(0)
|
| 119 |
+
|
| 120 |
+
# Basic Returns (Keep as percentages, trees handle these well)
|
| 121 |
d['return_1m'] = c.pct_change(1).fillna(0)
|
| 122 |
d['return_3m'] = c.pct_change(3).fillna(0)
|
| 123 |
d['return_5m'] = c.pct_change(5).fillna(0)
|
| 124 |
d['return_15m'] = c.pct_change(15).fillna(0)
|
| 125 |
|
| 126 |
+
# Technicals (RSI is bounded 0-100, usually safe)
|
| 127 |
d['rsi_14'] = ta.rsi(c, length=14).fillna(50)
|
| 128 |
|
| 129 |
ema_9 = ta.ema(c, length=9).fillna(c)
|
| 130 |
ema_21 = ta.ema(c, length=21).fillna(c)
|
| 131 |
|
| 132 |
+
# Slopes/Distances -> Normalized
|
| 133 |
d['ema_9_slope'] = ((ema_9 - ema_9.shift(1)) / ema_9.shift(1).replace(0, np.nan)).fillna(0)
|
| 134 |
d['ema_21_dist'] = ((c - ema_21) / ema_21.replace(0, np.nan)).fillna(0)
|
| 135 |
|
| 136 |
+
# --- TRANSFORM 1: ATR (Vol) -> Z-Score ---
|
| 137 |
+
# Instead of raw value or %, check if Volatility is spiking relative to history
|
| 138 |
atr_raw = ta.atr(h, l, c, length=100).fillna(0)
|
| 139 |
+
d['atr'] = _z_roll(atr_raw, 500)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
+
# Volume Z-Score
|
| 142 |
d['vol_zscore_50'] = _z_roll(v, 50)
|
| 143 |
|
| 144 |
+
# Candle Geometry
|
| 145 |
+
rng = (h - l).replace(0, 1e-9)
|
| 146 |
+
d['candle_range'] = _z_roll(rng, 500) # Is this candle huge relative to history?
|
| 147 |
+
d['close_pos_in_range'] = ((c - l) / rng).fillna(0.5)
|
| 148 |
|
| 149 |
+
# --- TRANSFORM 2: Liquidity Proxies -> Z-Score ---
|
| 150 |
+
# This fixes the Amihud 1e-8 issue completely.
|
| 151 |
+
|
| 152 |
+
# Amihud
|
| 153 |
d['dollar_vol'] = c * v
|
| 154 |
+
amihud_raw = (d['return_1m'].abs() / d['dollar_vol'].replace(0, np.nan)).fillna(0)
|
| 155 |
+
d['amihud'] = _z_roll(amihud_raw, 500)
|
| 156 |
|
| 157 |
+
# Roll Spread
|
| 158 |
dp = c.diff()
|
| 159 |
roll_cov = dp.rolling(64).cov(dp.shift(1)).fillna(0)
|
| 160 |
roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov)))
|
| 161 |
+
d['roll_spread'] = _z_roll(roll_spread_raw, 500)
|
| 162 |
|
| 163 |
+
# OFI (Order Flow)
|
|
|
|
|
|
|
| 164 |
sign = np.sign(c.diff()).fillna(0)
|
| 165 |
d['signed_vol'] = sign * v
|
| 166 |
+
ofi_raw = d['signed_vol'].rolling(30).sum().fillna(0)
|
| 167 |
+
d['ofi'] = _z_roll(ofi_raw, 500)
|
| 168 |
|
| 169 |
+
# VPIN
|
| 170 |
buy_vol = (sign > 0) * v
|
| 171 |
sell_vol = (sign < 0) * v
|
| 172 |
imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
|
| 173 |
tot = v.rolling(60).sum().replace(0, np.nan)
|
| 174 |
+
d['vpin'] = (imb / tot).fillna(0) # VPIN is a ratio (0-1), usually safe without Z-score, but let's keep it raw
|
| 175 |
|
| 176 |
+
# Garman-Klass Volatility
|
| 177 |
+
rv_gk_raw = ((np.log(h / l)**2) / 2) - ((2 * np.log(2) - 1) * (np.log(c / o)**2))
|
| 178 |
+
d['rv_gk'] = _z_roll(rv_gk_raw.fillna(0), 500)
|
| 179 |
|
| 180 |
+
# VWAP Deviation
|
| 181 |
vwap_win = 20
|
| 182 |
vwap = (d['dollar_vol'].rolling(vwap_win).sum() / v.rolling(vwap_win).sum().replace(0, np.nan)).fillna(c)
|
| 183 |
+
d['vwap_dev'] = _z_roll((c - vwap), 500)
|
| 184 |
|
| 185 |
+
# Liquidity Score (Composite - Already using internal Z-scores in logic, but let's re-calc)
|
| 186 |
+
# Note: We use the already Z-scored columns now where possible
|
| 187 |
d['L_score'] = (
|
| 188 |
+
d['vol_zscore_50'] +
|
| 189 |
+
(-d['amihud']) + # Low illiquidity is good
|
| 190 |
+
(-d['roll_spread']) +
|
| 191 |
+
(-d['rv_gk'].abs()) +
|
| 192 |
+
(-d['vwap_dev'].abs()) +
|
| 193 |
+
d['ofi']
|
| 194 |
).fillna(0)
|
| 195 |
|
| 196 |
return sanitize_features(d)
|