Riy777 commited on
Commit
5e177d4
·
verified ·
1 Parent(s): 5bdb2c0

Update backtest_engine.py

Browse files
Files changed (1) hide show
  1. backtest_engine.py +45 -40
backtest_engine.py CHANGED
@@ -104,88 +104,93 @@ def _transform_window_for_pattern(df_window):
104
 
105
  def calculate_sniper_features_exact(df):
106
  """
107
- Sniper Features Calculation - With AUTO-NORMALIZATION Patch.
108
- Converts price-dependent features to percentages to fix Scale Mismatch.
109
  """
110
  # 1. Standard Features
111
  d = df.copy()
112
  c = d['close']; h = d['high']; l = d['low']; v = d['volume']; o = d['open']
113
 
114
- # Basic Returns
 
 
 
 
 
 
115
  d['return_1m'] = c.pct_change(1).fillna(0)
116
  d['return_3m'] = c.pct_change(3).fillna(0)
117
  d['return_5m'] = c.pct_change(5).fillna(0)
118
  d['return_15m'] = c.pct_change(15).fillna(0)
119
 
120
- # Technicals
121
  d['rsi_14'] = ta.rsi(c, length=14).fillna(50)
122
 
123
  ema_9 = ta.ema(c, length=9).fillna(c)
124
  ema_21 = ta.ema(c, length=21).fillna(c)
125
 
 
126
  d['ema_9_slope'] = ((ema_9 - ema_9.shift(1)) / ema_9.shift(1).replace(0, np.nan)).fillna(0)
127
  d['ema_21_dist'] = ((c - ema_21) / ema_21.replace(0, np.nan)).fillna(0)
128
 
129
- # --- GEM-ARCHITECT PATCH: Normalize Price-Dependent Features ---
130
- # ATR (Normalized by Close)
131
  atr_raw = ta.atr(h, l, c, length=100).fillna(0)
132
- d['atr'] = (atr_raw / c).fillna(0) # Now it's a percentage (e.g., 0.002 instead of 0.25)
133
-
134
- # Volume Z-Score (Already normalized by z-score logic)
135
- def _z_roll(x, w=50):
136
- r = x.rolling(w).mean()
137
- s = x.rolling(w).std().replace(0, np.nan)
138
- return ((x - r) / s).fillna(0)
139
 
 
140
  d['vol_zscore_50'] = _z_roll(v, 50)
141
 
142
- # Candle Geometry (Normalized)
143
- rng = (h - l)
144
- d['candle_range'] = (rng / c).fillna(0) # Normalized
145
- d['close_pos_in_range'] = ((c - l) / rng.replace(0, 1e-9)).fillna(0.5)
146
 
147
- # 2. Liquidity Proxies
148
- # Amihud (Ratio of %Ret to $Vol -> inherently somewhat scale-invariant but sensitive)
 
 
149
  d['dollar_vol'] = c * v
150
- d['amihud'] = (d['return_1m'].abs() / d['dollar_vol'].replace(0, np.nan)).fillna(0)
 
151
 
152
- # Roll Spread (Normalized)
153
  dp = c.diff()
154
  roll_cov = dp.rolling(64).cov(dp.shift(1)).fillna(0)
155
  roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov)))
156
- d['roll_spread'] = (roll_spread_raw / c).fillna(0) # Normalized
157
 
158
- # OFI (Volume based -> Normalized by Z-Score later or used raw if model expects raw volume flow)
159
- # Usually OFI is used as a signal direction, kept raw or z-scored.
160
- # Let's keep raw here as it's volume-based, not price-based.
161
  sign = np.sign(c.diff()).fillna(0)
162
  d['signed_vol'] = sign * v
163
- d['ofi'] = d['signed_vol'].rolling(30).sum().fillna(0)
 
164
 
165
- # VPIN (Ratio -> Unitless -> Safe)
166
  buy_vol = (sign > 0) * v
167
  sell_vol = (sign < 0) * v
168
  imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
169
  tot = v.rolling(60).sum().replace(0, np.nan)
170
- d['vpin'] = (imb / tot).fillna(0)
171
 
172
- # Garman-Klass (Log returns -> Unitless -> Safe)
173
- d['rv_gk'] = ((np.log(h / l)**2) / 2) - ((2 * np.log(2) - 1) * (np.log(c / o)**2))
174
- d['rv_gk'] = d['rv_gk'].fillna(0)
175
 
176
- # VWAP Deviation (Normalized)
177
  vwap_win = 20
178
  vwap = (d['dollar_vol'].rolling(vwap_win).sum() / v.rolling(vwap_win).sum().replace(0, np.nan)).fillna(c)
179
- d['vwap_dev'] = ((c - vwap) / c).fillna(0) # Normalized
180
 
181
- # Liquidity Score (Composite - Z-scores handle scaling automatically)
 
182
  d['L_score'] = (
183
- _z_roll(v, 500) +
184
- _z_roll(1 / (d['amihud'] + 1e-12), 500) +
185
- _z_roll(-d['roll_spread'], 500) +
186
- _z_roll(-d['rv_gk'].abs(), 500) +
187
- _z_roll(-d['vwap_dev'].abs(), 500) +
188
- _z_roll(d['ofi'], 500)
189
  ).fillna(0)
190
 
191
  return sanitize_features(d)
 
104
 
105
  def calculate_sniper_features_exact(df):
106
  """
107
+ Sniper Features Calculation - TRUE UNIVERSAL MODE (Z-SCORE).
108
+ Converts structural features to Z-Scores to bypass scale issues.
109
  """
110
  # 1. Standard Features
111
  d = df.copy()
112
  c = d['close']; h = d['high']; l = d['low']; v = d['volume']; o = d['open']
113
 
114
+ # --- Helper: Rolling Z-Score ---
115
+ def _z_roll(x, w=200): # Window 200 is standard for regime detection
116
+ r = x.rolling(w).mean()
117
+ s = x.rolling(w).std().replace(0, np.nan)
118
+ return ((x - r) / s).fillna(0)
119
+
120
+ # Basic Returns (Keep as percentages, trees handle these well)
121
  d['return_1m'] = c.pct_change(1).fillna(0)
122
  d['return_3m'] = c.pct_change(3).fillna(0)
123
  d['return_5m'] = c.pct_change(5).fillna(0)
124
  d['return_15m'] = c.pct_change(15).fillna(0)
125
 
126
+ # Technicals (RSI is bounded 0-100, usually safe)
127
  d['rsi_14'] = ta.rsi(c, length=14).fillna(50)
128
 
129
  ema_9 = ta.ema(c, length=9).fillna(c)
130
  ema_21 = ta.ema(c, length=21).fillna(c)
131
 
132
+ # Slopes/Distances -> Normalized
133
  d['ema_9_slope'] = ((ema_9 - ema_9.shift(1)) / ema_9.shift(1).replace(0, np.nan)).fillna(0)
134
  d['ema_21_dist'] = ((c - ema_21) / ema_21.replace(0, np.nan)).fillna(0)
135
 
136
+ # --- TRANSFORM 1: ATR (Vol) -> Z-Score ---
137
+ # Instead of raw value or %, check if Volatility is spiking relative to history
138
  atr_raw = ta.atr(h, l, c, length=100).fillna(0)
139
+ d['atr'] = _z_roll(atr_raw, 500)
 
 
 
 
 
 
140
 
141
+ # Volume Z-Score
142
  d['vol_zscore_50'] = _z_roll(v, 50)
143
 
144
+ # Candle Geometry
145
+ rng = (h - l).replace(0, 1e-9)
146
+ d['candle_range'] = _z_roll(rng, 500) # Is this candle huge relative to history?
147
+ d['close_pos_in_range'] = ((c - l) / rng).fillna(0.5)
148
 
149
+ # --- TRANSFORM 2: Liquidity Proxies -> Z-Score ---
150
+ # This fixes the Amihud 1e-8 issue completely.
151
+
152
+ # Amihud
153
  d['dollar_vol'] = c * v
154
+ amihud_raw = (d['return_1m'].abs() / d['dollar_vol'].replace(0, np.nan)).fillna(0)
155
+ d['amihud'] = _z_roll(amihud_raw, 500)
156
 
157
+ # Roll Spread
158
  dp = c.diff()
159
  roll_cov = dp.rolling(64).cov(dp.shift(1)).fillna(0)
160
  roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov)))
161
+ d['roll_spread'] = _z_roll(roll_spread_raw, 500)
162
 
163
+ # OFI (Order Flow)
 
 
164
  sign = np.sign(c.diff()).fillna(0)
165
  d['signed_vol'] = sign * v
166
+ ofi_raw = d['signed_vol'].rolling(30).sum().fillna(0)
167
+ d['ofi'] = _z_roll(ofi_raw, 500)
168
 
169
+ # VPIN
170
  buy_vol = (sign > 0) * v
171
  sell_vol = (sign < 0) * v
172
  imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
173
  tot = v.rolling(60).sum().replace(0, np.nan)
174
+ d['vpin'] = (imb / tot).fillna(0) # VPIN is a ratio (0-1), usually safe without Z-score, but let's keep it raw
175
 
176
+ # Garman-Klass Volatility
177
+ rv_gk_raw = ((np.log(h / l)**2) / 2) - ((2 * np.log(2) - 1) * (np.log(c / o)**2))
178
+ d['rv_gk'] = _z_roll(rv_gk_raw.fillna(0), 500)
179
 
180
+ # VWAP Deviation
181
  vwap_win = 20
182
  vwap = (d['dollar_vol'].rolling(vwap_win).sum() / v.rolling(vwap_win).sum().replace(0, np.nan)).fillna(c)
183
+ d['vwap_dev'] = _z_roll((c - vwap), 500)
184
 
185
+ # Liquidity Score (Composite - Already using internal Z-scores in logic, but let's re-calc)
186
+ # Note: We use the already Z-scored columns now where possible
187
  d['L_score'] = (
188
+ d['vol_zscore_50'] +
189
+ (-d['amihud']) + # Low illiquidity is good
190
+ (-d['roll_spread']) +
191
+ (-d['rv_gk'].abs()) +
192
+ (-d['vwap_dev'].abs()) +
193
+ d['ofi']
194
  ).fillna(0)
195
 
196
  return sanitize_features(d)