Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 9, 2025

Commit

bcb4fc4

verified ·

1 Parent(s): 1245491

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +77 -43

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V135.1 - GEM-Architect: EMA20 Fix)
 # ============================================================
 import asyncio
@@ -17,7 +17,7 @@ import traceback
 from numpy.lib.stride_tricks import sliding_window_view
 from datetime import datetime, timezone
 from typing import Dict, Any, List
-from scipy.special import expit # Sigmoid
 try:
     from ml_engine.processor import MLProcessor, SystemLimits
@@ -38,9 +38,11 @@ CACHE_DIR = "backtest_real_scores"
 # ============================================================
 def sanitize_features(df):
     if df is None or df.empty: return df
-    return df.replace([np.inf, -np.inf], np.nan).fillna(0.0)
 def _z_roll(x, w=500):
     r = x.rolling(w).mean()
     s = x.rolling(w).std().replace(0, np.nan)
     return ((x - r) / s).fillna(0)
@@ -49,12 +51,27 @@ def _revive_score_distribution(scores):
     scores = np.array(scores, dtype=np.float32)
     if len(scores) < 10: return scores
     std = np.std(scores)
     if std < 0.05:
         mean = np.mean(scores)
         z = (scores - mean) / (std + 1e-9)
         return expit(z)
     return scores
 # ============================================================
 # 🧪 THE BACKTESTER CLASS
 # ============================================================
@@ -83,7 +100,7 @@ class HeavyDutyBacktester:
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V135.1] Feature Parity + Full Diagnostics + Speed.")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
@@ -129,58 +146,67 @@ class HeavyDutyBacktester:
         return df.values.tolist()
     # ==============================================================
-    # 🏎️ VECTORIZED INDICATORS (EXACT MATCH TO LIVE SYSTEM)
     # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
         # 1. Clean Types
         cols = ['close', 'high', 'low', 'volume', 'open']
         for c in cols: df[c] = df[c].astype(np.float64)
         # ---------------------------------------------------------
         # 🧠 PART 1: TITAN FEATURES
         # ---------------------------------------------------------
-        df['RSI'] = ta.rsi(df['close'], length=14).fillna(50)
         macd = ta.macd(df['close'])
         if macd is not None:
-            df['MACD'] = macd.iloc[:, 0].fillna(0)
-            df['MACD_h'] = macd.iloc[:, 1].fillna(0)
         else:
             df['MACD'] = 0.0; df['MACD_h'] = 0.0
-        df['CCI'] = ta.cci(df['high'], df['low'], df['close'], length=20).fillna(0)
         adx = ta.adx(df['high'], df['low'], df['close'], length=14)
-        if adx is not None: df['ADX'] = adx.iloc[:, 0].fillna(0)
         else: df['ADX'] = 0.0
-        # Titan uses 9, 21, 50, 200
         for p in [9, 21, 50, 200]:
-            ema = ta.ema(df['close'], length=p)
-            df[f'EMA_{p}_dist'] = ((df['close'] / ema) - 1).fillna(0)
             df[f'ema{p}'] = ema
-        # ✅ [GEM-FIX] Explicitly calculate EMA20 for Legacy models
-        df['ema20'] = ta.ema(df['close'], length=20).fillna(df['close'])
         bb = ta.bbands(df['close'], length=20, std=2.0)
         if bb is not None:
-            df['BB_w'] = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1]).fillna(0)
-            df['BB_p'] = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0])).fillna(0)
-            df['bb_width'] = df['BB_w']
-        df['MFI'] = ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14).fillna(50)
         vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
         if vwap is not None:
-            df['VWAP_dist'] = ((df['close'] / vwap) - 1).fillna(0)
             df['vwap'] = vwap
         else:
             df['VWAP_dist'] = 0.0
             df['vwap'] = df['close']
-        df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14).fillna(0)
-        df['atr_pct'] = df['atr'] / df['close']
         # ---------------------------------------------------------
         # 🎯 PART 2: SNIPER FEATURES (1m Only)
@@ -192,10 +218,12 @@ class HeavyDutyBacktester:
             df['return_15m'] = df['close'].pct_change(15).fillna(0)
             df['rsi_14'] = df['RSI']
-            df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / df['ema9'].shift(1)).fillna(0)
             df['ema_21_dist'] = df['EMA_21_dist']
-            atr_100 = ta.atr(df['high'], df['low'], df['close'], length=100).fillna(0)
             df['atr_z'] = _z_roll(atr_100)
             df['vol_zscore_50'] = _z_roll(df['volume'], 50)
@@ -210,19 +238,19 @@ class HeavyDutyBacktester:
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
-            roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov)))
             df['roll_spread'] = _z_roll(roll_spread_raw)
             sign = np.sign(df['close'].diff()).fillna(0)
             signed_vol = sign * df['volume']
-            ofi_raw = signed_vol.rolling(30).sum()
             df['ofi'] = _z_roll(ofi_raw)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
             imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
-            tot = df['volume'].rolling(60).sum()
-            df['vpin'] = (imb / tot.replace(0, np.nan)).fillna(0)
             vwap_win = 20
             v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
@@ -236,29 +264,32 @@ class HeavyDutyBacktester:
         # ---------------------------------------------------------
         # 🧠 PART 3: ORACLE / HYDRA / LEGACY EXTRAS
         # ---------------------------------------------------------
-        df['slope'] = ta.slope(df['close'], length=7).fillna(0)
         vol_mean = df['volume'].rolling(20).mean()
-        vol_std = df['volume'].rolling(20).std()
-        df['vol_z'] = ((df['volume'] - vol_mean) / (vol_std + 1e-9)).fillna(0)
         df['rel_vol'] = df['volume'] / (df['volume'].rolling(50).mean() + 1e-9)
-        df['log_ret'] = np.log(df['close'] / df['close'].shift(1)).fillna(0)
         roll_max = df['high'].rolling(50).max()
         roll_min = df['low'].rolling(50).min()
         diff = (roll_max - roll_min).replace(0, 1e-9)
         df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
-        # ✅ Now 'ema20' exists!
-        df['trend_slope'] = ((df['ema20'] - df['ema20'].shift(5)) / df['ema20'].shift(5)).fillna(0)
-        df['volatility'] = (df['atr'] / df['close']).fillna(0)
         fib618 = roll_max - (diff * 0.382)
-        df['dist_fib618'] = ((df['close'] - fib618) / df['close']).fillna(0)
-        df['dist_ema50'] = (df['close'] - df['ema50']) / df['close']
-        df['ema200'] = ta.ema(df['close'], length=200)
-        df['dist_ema200'] = (df['close'] - df['ema200']) / df['close']
         if timeframe == '1m':
             for lag in [1, 2, 3, 5, 10, 20]:
@@ -267,11 +298,12 @@ class HeavyDutyBacktester:
                 df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
                 df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
         df.fillna(0, inplace=True)
         return df
     # ==============================================================
-    # 🧠 CPU PROCESSING (GLOBAL INFERENCE + FULL FEATURE PARITY)
     # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
@@ -282,7 +314,7 @@ class HeavyDutyBacktester:
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
-        print(f"   ⚙️ [CPU] Analyzing {sym} (Full Stack / High Fidelity)...", flush=True)
         t0 = time.time()
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
@@ -307,7 +339,7 @@ class HeavyDutyBacktester:
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
-        # 3. Global Index Maps
         arr_ts_1m = fast_1m['timestamp']
         map_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['5m']['timestamp']) - 1)
         map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
@@ -344,7 +376,10 @@ class HeavyDutyBacktester:
                     else:
                         t_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_TITAN = np.column_stack(t_vecs)
                 preds_t = titan_model.predict(xgb.DMatrix(X_TITAN))
                 global_titan_scores = _revive_score_distribution(preds_t)
             except Exception as e: print(f"Titan Error: {e}")
@@ -485,7 +520,6 @@ class HeavyDutyBacktester:
                 l2_arr = np.full(240, 0.7)
                 tgt_arr = np.full(240, 3.0)
-                # [rsi1, rsi5, rsi15, bb, vol, dist_ema, atr_p, norm, max, dists, time, entry, oracle, l2, target]
                 X_H = np.column_stack([
                     sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
                     zeros, atr_pct, norm_pnl, max_pnl_r,

 # ============================================================
+# 🧪 backtest_engine.py (V136.0 - GEM-Architect: Data Integrity Fixed)
 # ============================================================
 import asyncio
 from numpy.lib.stride_tricks import sliding_window_view
 from datetime import datetime, timezone
 from typing import Dict, Any, List
+from scipy.special import expit
 try:
     from ml_engine.processor import MLProcessor, SystemLimits
 # ============================================================
 def sanitize_features(df):
     if df is None or df.empty: return df
+    # Use ffill/bfill first to preserve trends, then 0 only as last resort
+    return df.replace([np.inf, -np.inf], np.nan).ffill().bfill().fillna(0.0)
 def _z_roll(x, w=500):
+    if not isinstance(x, pd.Series): x = pd.Series(x)
     r = x.rolling(w).mean()
     s = x.rolling(w).std().replace(0, np.nan)
     return ((x - r) / s).fillna(0)
     scores = np.array(scores, dtype=np.float32)
     if len(scores) < 10: return scores
     std = np.std(scores)
+    # If standard deviation is extremely low, it means model is outputting constant 'dead' values
     if std < 0.05:
         mean = np.mean(scores)
         z = (scores - mean) / (std + 1e-9)
         return expit(z)
     return scores
+# ✅ [GEM-FIX] Smart Indicator Wrapper (No more Zeros)
+def safe_ta(ind_output, index, fill_method='smart'):
+    if ind_output is None:
+        return pd.Series(0.0, index=index, dtype='float64')
+    if not isinstance(ind_output, pd.Series):
+        s = pd.Series(ind_output, index=index)
+    else:
+        s = ind_output
+    # Smart Fill: Backfill first (for warmup), then Forward fill
+    s = s.bfill().ffill()
+    return s.fillna(0.0).astype('float64')
 # ============================================================
 # 🧪 THE BACKTESTER CLASS
 # ============================================================
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V136.0] Data Integrity Edition (Smart-Fill Active).")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         return df.values.tolist()
     # ==============================================================
+    # 🏎️ VECTORIZED INDICATORS (SMART FILL)
     # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
         # 1. Clean Types
         cols = ['close', 'high', 'low', 'volume', 'open']
         for c in cols: df[c] = df[c].astype(np.float64)
+        # Ensure no gaps in price before calc
+        df[cols] = df[cols].ffill().bfill()
+        idx = df.index
         # ---------------------------------------------------------
         # 🧠 PART 1: TITAN FEATURES
         # ---------------------------------------------------------
+        df['RSI'] = safe_ta(ta.rsi(df['close'], length=14), idx)
+        # MACD
         macd = ta.macd(df['close'])
         if macd is not None:
+            df['MACD'] = safe_ta(macd.iloc[:, 0], idx)
+            df['MACD_h'] = safe_ta(macd.iloc[:, 1], idx)
         else:
             df['MACD'] = 0.0; df['MACD_h'] = 0.0
+        df['CCI'] = safe_ta(ta.cci(df['high'], df['low'], df['close'], length=20), idx)
         adx = ta.adx(df['high'], df['low'], df['close'], length=14)
+        if adx is not None: df['ADX'] = safe_ta(adx.iloc[:, 0], idx)
         else: df['ADX'] = 0.0
         for p in [9, 21, 50, 200]:
+            ema = safe_ta(ta.ema(df['close'], length=p), idx)
+            # Use replace(0, np.nan) to avoid Infinity
+            df[f'EMA_{p}_dist'] = ((df['close'] / ema.replace(0, np.nan)) - 1).fillna(0)
             df[f'ema{p}'] = ema
+        df['ema20'] = safe_ta(ta.ema(df['close'], length=20), idx)
         bb = ta.bbands(df['close'], length=20, std=2.0)
         if bb is not None:
+            # Width
+            w = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1].replace(0, np.nan)).fillna(0)
+            # %B
+            p = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0]).replace(0, np.nan)).fillna(0)
+            df['BB_w'] = w; df['BB_p'] = p
+            df['bb_width'] = w
+        else:
+            df['BB_w'] = 0; df['BB_p'] = 0; df['bb_width'] = 0
+        df['MFI'] = safe_ta(ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14), idx)
         vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
         if vwap is not None:
+            df['VWAP_dist'] = ((df['close'] / vwap.replace(0, np.nan)) - 1).fillna(0)
             df['vwap'] = vwap
         else:
             df['VWAP_dist'] = 0.0
             df['vwap'] = df['close']
+        df['atr'] = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=14), idx)
+        df['atr_pct'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
         # ---------------------------------------------------------
         # 🎯 PART 2: SNIPER FEATURES (1m Only)
             df['return_15m'] = df['close'].pct_change(15).fillna(0)
             df['rsi_14'] = df['RSI']
+            e9 = df['ema9'].replace(0, np.nan)
+            df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / e9.shift(1)).fillna(0)
             df['ema_21_dist'] = df['EMA_21_dist']
+            atr_100 = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=100), idx)
             df['atr_z'] = _z_roll(atr_100)
             df['vol_zscore_50'] = _z_roll(df['volume'], 50)
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
+            roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov))).fillna(0)
             df['roll_spread'] = _z_roll(roll_spread_raw)
             sign = np.sign(df['close'].diff()).fillna(0)
             signed_vol = sign * df['volume']
+            ofi_raw = signed_vol.rolling(30).sum().fillna(0)
             df['ofi'] = _z_roll(ofi_raw)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
             imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
+            tot = df['volume'].rolling(60).sum().replace(0, np.nan)
+            df['vpin'] = (imb / tot).fillna(0)
             vwap_win = 20
             v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
         # ---------------------------------------------------------
         # 🧠 PART 3: ORACLE / HYDRA / LEGACY EXTRAS
         # ---------------------------------------------------------
+        df['slope'] = safe_ta(ta.slope(df['close'], length=7), idx)
         vol_mean = df['volume'].rolling(20).mean()
+        vol_std = df['volume'].rolling(20).std().replace(0, np.nan)
+        df['vol_z'] = ((df['volume'] - vol_mean) / vol_std).fillna(0)
         df['rel_vol'] = df['volume'] / (df['volume'].rolling(50).mean() + 1e-9)
+        df['log_ret'] = np.log(df['close'] / df['close'].shift(1).replace(0, np.nan)).fillna(0)
         roll_max = df['high'].rolling(50).max()
         roll_min = df['low'].rolling(50).min()
         diff = (roll_max - roll_min).replace(0, 1e-9)
         df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
+        e20_s = df['ema20'].shift(5).replace(0, np.nan)
+        df['trend_slope'] = ((df['ema20'] - df['ema20'].shift(5)) / e20_s).fillna(0)
+        df['volatility'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
         fib618 = roll_max - (diff * 0.382)
+        df['dist_fib618'] = ((df['close'] - fib618) / df['close'].replace(0, np.nan)).fillna(0)
+        e50 = df['ema50'].replace(0, np.nan)
+        df['dist_ema50'] = ((df['close'] - df['ema50']) / e50).fillna(0)
+        e200 = safe_ta(ta.ema(df['close'], length=200), idx) # Safe Fill
+        df['ema200'] = e200
+        df['dist_ema200'] = ((df['close'] - e200) / e200.replace(0, np.nan)).fillna(0)
         if timeframe == '1m':
             for lag in [1, 2, 3, 5, 10, 20]:
                 df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
                 df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
+        # FINAL SANITIZATION
         df.fillna(0, inplace=True)
         return df
     # ==============================================================
+    # 🧠 CPU PROCESSING
     # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
+        print(f"   ⚙️ [CPU] Analyzing {sym}...", flush=True)
         t0 = time.time()
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
+        # 3. Global Maps
         arr_ts_1m = fast_1m['timestamp']
         map_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['5m']['timestamp']) - 1)
         map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
                     else:
                         t_vecs.append(np.zeros(len(arr_ts_1m)))
+                # Check mean to ensure data isn't all zeros
                 X_TITAN = np.column_stack(t_vecs)
+                # print(f"     [DEBUG] Titan Input Mean: {np.mean(X_TITAN):.4f}")
                 preds_t = titan_model.predict(xgb.DMatrix(X_TITAN))
                 global_titan_scores = _revive_score_distribution(preds_t)
             except Exception as e: print(f"Titan Error: {e}")
                 l2_arr = np.full(240, 0.7)
                 tgt_arr = np.full(240, 3.0)
                 X_H = np.column_stack([
                     sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
                     zeros, atr_pct, norm_pnl, max_pnl_r,