Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 8, 2025

Commit

67a0513

verified ·

1 Parent(s): 1ef6253

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +69 -90

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V118.8 - GEM-Architect: Dimensionality Fix)
 # ============================================================
 import asyncio
@@ -36,19 +36,11 @@ class HeavyDutyBacktester:
     def __init__(self, data_manager, processor):
         self.dm = data_manager
         self.proc = processor
-        # 🎛️ كثافة شبكة البحث
         self.GRID_DENSITY = 3
-        # إعدادات المحفظة
         self.INITIAL_CAPITAL = 10.0
         self.TRADING_FEES = 0.001
         self.MAX_SLOTS = 4
-        self.TARGET_COINS = [
-            'SOL/USDT', 'XRP/USDT', 'DOGE/USDT'
-        ]
         self.force_start_date = None
         self.force_end_date = None
@@ -62,7 +54,7 @@ class HeavyDutyBacktester:
         else:
             os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V118.8] Dimensionality Safe Mode. Models: {self._check_models_status()}")
     def _check_models_status(self):
         status = []
@@ -132,7 +124,6 @@ class HeavyDutyBacktester:
         df['ema50'] = ta.ema(df['close'], length=50)
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
-        # ✅ Global Calc for rel_vol
         df['vol_ma50'] = df['volume'].rolling(50).mean()
         df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
@@ -146,7 +137,7 @@ class HeavyDutyBacktester:
         df['vol_z'] = (df['volume'] - vol_mean) / (vol_std + 1e-9)
         df['atr_pct'] = df['atr'] / df['close']
-        # 🔥 L1 Score 🔥
         rsi_penalty = np.where(df['rsi'] > 70, (df['rsi'] - 70) * 2, 0)
         l1_score_raw = (df['rel_vol'] * 10) + (df['atr_pct'] * 1000) - rsi_penalty
         df['l1_score'] = l1_score_raw.fillna(0)
@@ -197,15 +188,13 @@ class HeavyDutyBacktester:
         df_1m = df_1m.sort_index()
         frames = {}
-        agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
-            resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
             resampled = self._calculate_indicators_vectorized(resampled, timeframe=tf_str)
             resampled['timestamp'] = resampled.index.astype(np.int64) // 10**6
             frames[tf_str] = resampled
@@ -227,16 +216,12 @@ class HeavyDutyBacktester:
         global_v2_probs = np.zeros(len(fast_1m['close']))
         if legacy_v2:
             try:
-                l_log = fast_1m['log_ret']
-                l_rsi = fast_1m['rsi'] / 100.0
-                l_fib = fast_1m['fib_pos']
-                l_vol = fast_1m['volatility']
                 l5_log = numpy_htf['5m']['log_ret'][map_1m_to_5m]
                 l5_rsi = numpy_htf['5m']['rsi'][map_1m_to_5m] / 100.0
                 l5_fib = numpy_htf['5m']['fib_pos'][map_1m_to_5m]
                 l5_trd = numpy_htf['5m']['trend_slope'][map_1m_to_5m]
                 l15_log = numpy_htf['15m']['log_ret'][map_1m_to_15m]
                 l15_rsi = numpy_htf['15m']['rsi'][map_1m_to_15m] / 100.0
                 l15_fib618 = numpy_htf['15m']['dist_fib618'][map_1m_to_15m]
@@ -244,10 +229,7 @@ class HeavyDutyBacktester:
                 lag_cols = []
                 for lag in [1, 2, 3, 5, 10, 20]:
-                    lag_cols.extend([
-                        fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'],
-                        fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']
-                    ])
                 X_GLOBAL_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lag_cols])
                 global_v2_probs = legacy_v2.predict(xgb.DMatrix(X_GLOBAL_V2))
@@ -258,13 +240,9 @@ class HeavyDutyBacktester:
         global_hydra_static = None
         if hydra_models:
             try:
-                h_rsi_1m = fast_1m['rsi']
-                h_rsi_5m = numpy_htf['5m']['rsi'][map_1m_to_5m]
-                h_rsi_15m = numpy_htf['15m']['rsi'][map_1m_to_15m]
-                h_bb = fast_1m['bb_width']
-                h_vol = fast_1m['rel_vol']
-                h_atr = fast_1m['atr']
-                h_close = fast_1m['close']
                 global_hydra_static = np.column_stack([h_rsi_1m, h_rsi_5m, h_rsi_15m, h_bb, h_vol, h_atr, h_close])
             except: pass
@@ -289,20 +267,16 @@ class HeavyDutyBacktester:
                 idx_1h = map_1m_to_1h[final_valid_indices]
                 idx_15m = map_1m_to_15m[final_valid_indices]
                 idx_4h = map_1m_to_4h[final_valid_indices]
                 titan_scores = np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95)
                 oracle_features = []
                 for col in getattr(self.proc.oracle, 'feature_cols', []):
                     if col.startswith('1h_'):
-                        c = col[3:]
-                        oracle_features.append(numpy_htf['1h'][c][idx_1h] if c in numpy_htf['1h'] else np.zeros(num_candidates))
                     elif col.startswith('15m_'):
-                        c = col[4:]
-                        oracle_features.append(numpy_htf['15m'][c][idx_15m] if c in numpy_htf['15m'] else np.zeros(num_candidates))
                     elif col.startswith('4h_'):
-                        c = col[3:]
-                        oracle_features.append(numpy_htf['4h'][c][idx_4h] if c in numpy_htf['4h'] else np.zeros(num_candidates))
                     elif col == 'sim_titan_score': oracle_features.append(titan_scores)
                     elif col == 'sim_mc_score': oracle_features.append(np.full(num_candidates, 0.5))
                     elif col == 'sim_pattern_score': oracle_features.append(np.full(num_candidates, 0.5))
@@ -310,10 +284,8 @@ class HeavyDutyBacktester:
                 X_oracle_big = np.column_stack(oracle_features)
                 preds = oracle_dir_model.predict(X_oracle_big)
-                if len(preds.shape) > 1 and preds.shape[1] > 1:
-                    oracle_preds = preds[:, 1]
-                else:
-                    oracle_preds = preds.flatten()
             except Exception as e: print(f"Oracle Error: {e}")
         # --- B. SNIPER MATRIX CONSTRUCTION ---
@@ -327,7 +299,8 @@ class HeavyDutyBacktester:
                     else: sniper_features.append(np.zeros(num_candidates))
                 X_sniper_big = np.column_stack(sniper_features)
-                preds_list = [m.predict(X_sniper_big) for m in sniper_models]
                 sniper_preds = np.mean(preds_list, axis=0)
             except Exception as e: print(f"Sniper Error: {e}")
@@ -339,27 +312,18 @@ class HeavyDutyBacktester:
             chunk_size = 5000
             for i in range(0, num_candidates, chunk_size):
                 chunk_indices = final_valid_indices[i : i + chunk_size]
-                batch_X = []
-                valid_batch_indices = []
                 for k, idx in enumerate(chunk_indices):
-                    start = idx + 1
-                    end = start + 240
                     sl_static = global_hydra_static[start:end]
                     entry_p = fast_1m['close'][idx]
-                    sl_close = sl_static[:, 6]
-                    sl_atr = sl_static[:, 5]
                     sl_dist = np.maximum(1.5 * sl_atr, entry_p * 0.015)
-                    sl_pnl = sl_close - entry_p
-                    sl_norm_pnl = sl_pnl / sl_dist
-                    sl_cum_max = np.maximum.accumulate(sl_close)
-                    sl_cum_max = np.maximum(sl_cum_max, entry_p)
                     sl_max_pnl_r = (sl_cum_max - entry_p) / sl_dist
                     sl_atr_pct = sl_atr / sl_close
                     zeros = np.zeros(240); ones = np.ones(240)
                     row = np.column_stack([
@@ -378,12 +342,10 @@ class HeavyDutyBacktester:
                         big_X_flat = big_X.reshape(-1, big_X.shape[-1])
                         preds_flat = hydra_models['crash'].predict_proba(big_X_flat)[:, 1]
                         preds_batch = preds_flat.reshape(len(batch_X), 240)
                         batch_max_risk = np.max(preds_batch, axis=1)
                         over_thresh = preds_batch > 0.6
                         has_crash = over_thresh.any(axis=1)
                         crash_times_rel = np.argmax(over_thresh, axis=1)
                         for j, glob_idx in enumerate(valid_batch_indices):
                             hydra_risk_preds[glob_idx] = batch_max_risk[j]
                             if has_crash[j]:
@@ -395,7 +357,6 @@ class HeavyDutyBacktester:
         # --- D. LEGACY V2 MAPPING ---
         legacy_risk_preds = np.zeros(num_candidates)
         legacy_time_preds = np.zeros(num_candidates, dtype=int)
         if legacy_v2:
             for k, idx in enumerate(final_valid_indices):
                 start = idx + 1
@@ -403,31 +364,51 @@ class HeavyDutyBacktester:
                     window = global_v2_probs[start : start + 240]
                     legacy_risk_preds[k] = np.max(window)
-        # --- E. FINAL DATAFRAME ---
-        titan_scores_final = np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95)
-        l1_scores_final = fast_1m['l1_score'][final_valid_indices]
-        timestamps_final = fast_1m['timestamp'][final_valid_indices]
-        closes_final = fast_1m['close'][final_valid_indices]
-        # ✅ FORCE FLATTEN ALL ARRAYS TO AVOID PANDAS DIMENSION ERRORS
-        ai_df = pd.DataFrame({
-            'timestamp': timestamps_final.flatten(),
-            'symbol': sym,
-            'close': closes_final.flatten(),
-            'real_titan': titan_scores_final.flatten(),
-            'oracle_conf': oracle_preds.flatten(),
-            'sniper_score': sniper_preds.flatten(),
-            'l1_score': l1_scores_final.flatten(),
-            'risk_hydra_crash': hydra_risk_preds.flatten(),
-            'time_hydra_crash': hydra_time_preds.flatten(),
-            'risk_legacy_v2': legacy_risk_preds.flatten(),
-            'time_legacy_panic': legacy_time_preds.flatten()
-        })
-        dt = time.time() - t0
-        if not ai_df.empty:
-            ai_df.to_pickle(scores_file)
-            print(f"   ✅ [{sym}] Completed {len(ai_df)} signals in {dt:.2f} seconds.", flush=True)
         del frames, fast_1m, numpy_htf, global_v2_probs, global_hydra_static
         gc.collect()
@@ -503,7 +484,6 @@ class HeavyDutyBacktester:
                 sym_id = arr_sym_int[i]
                 price = arr_close[i]
-                # Exits
                 if sym_id in positions:
                     pos = positions[sym_id]
                     entry = pos[0]; h_risk = pos[2]; h_time = pos[3]
@@ -521,7 +501,6 @@ class HeavyDutyBacktester:
                             dd = (peak_bal - tot) / peak_bal
                             if dd > max_dd: max_dd = dd
-                # Entries
                 if len(positions) < max_slots:
                     if mask_buy[i]:
                          if sym_id not in positions:
@@ -604,7 +583,7 @@ class HeavyDutyBacktester:
         return best['config'], best
 async def run_strategic_optimization_task():
-    print("\n🧪 [STRATEGIC BACKTEST] Hyper-Vectorized Mode...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)

 # ============================================================
+# 🧪 backtest_engine.py (V118.9 - GEM-Architect: Dimension Safe)
 # ============================================================
 import asyncio
     def __init__(self, data_manager, processor):
         self.dm = data_manager
         self.proc = processor
         self.GRID_DENSITY = 3
         self.INITIAL_CAPITAL = 10.0
         self.TRADING_FEES = 0.001
         self.MAX_SLOTS = 4
+        self.TARGET_COINS = ['SOL/USDT', 'XRP/USDT', 'DOGE/USDT']
         self.force_start_date = None
         self.force_end_date = None
         else:
             os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V118.9] Dimension Safe Mode. Models: {self._check_models_status()}")
     def _check_models_status(self):
         status = []
         df['ema50'] = ta.ema(df['close'], length=50)
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
         df['vol_ma50'] = df['volume'].rolling(50).mean()
         df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
         df['vol_z'] = (df['volume'] - vol_mean) / (vol_std + 1e-9)
         df['atr_pct'] = df['atr'] / df['close']
+        # L1 Score
         rsi_penalty = np.where(df['rsi'] > 70, (df['rsi'] - 70) * 2, 0)
         l1_score_raw = (df['rel_vol'] * 10) + (df['atr_pct'] * 1000) - rsi_penalty
         df['l1_score'] = l1_score_raw.fillna(0)
         df_1m = df_1m.sort_index()
         frames = {}
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
+            resampled = df_1m.resample(tf_code).agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}).dropna()
             resampled = self._calculate_indicators_vectorized(resampled, timeframe=tf_str)
             resampled['timestamp'] = resampled.index.astype(np.int64) // 10**6
             frames[tf_str] = resampled
         global_v2_probs = np.zeros(len(fast_1m['close']))
         if legacy_v2:
             try:
+                l_log = fast_1m['log_ret']; l_rsi = fast_1m['rsi'] / 100.0
+                l_fib = fast_1m['fib_pos']; l_vol = fast_1m['volatility']
                 l5_log = numpy_htf['5m']['log_ret'][map_1m_to_5m]
                 l5_rsi = numpy_htf['5m']['rsi'][map_1m_to_5m] / 100.0
                 l5_fib = numpy_htf['5m']['fib_pos'][map_1m_to_5m]
                 l5_trd = numpy_htf['5m']['trend_slope'][map_1m_to_5m]
                 l15_log = numpy_htf['15m']['log_ret'][map_1m_to_15m]
                 l15_rsi = numpy_htf['15m']['rsi'][map_1m_to_15m] / 100.0
                 l15_fib618 = numpy_htf['15m']['dist_fib618'][map_1m_to_15m]
                 lag_cols = []
                 for lag in [1, 2, 3, 5, 10, 20]:
+                    lag_cols.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'], fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']])
                 X_GLOBAL_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lag_cols])
                 global_v2_probs = legacy_v2.predict(xgb.DMatrix(X_GLOBAL_V2))
         global_hydra_static = None
         if hydra_models:
             try:
+                h_rsi_1m = fast_1m['rsi']; h_rsi_5m = numpy_htf['5m']['rsi'][map_1m_to_5m]
+                h_rsi_15m = numpy_htf['15m']['rsi'][map_1m_to_15m]; h_bb = fast_1m['bb_width']
+                h_vol = fast_1m['rel_vol']; h_atr = fast_1m['atr']; h_close = fast_1m['close']
                 global_hydra_static = np.column_stack([h_rsi_1m, h_rsi_5m, h_rsi_15m, h_bb, h_vol, h_atr, h_close])
             except: pass
                 idx_1h = map_1m_to_1h[final_valid_indices]
                 idx_15m = map_1m_to_15m[final_valid_indices]
                 idx_4h = map_1m_to_4h[final_valid_indices]
                 titan_scores = np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95)
                 oracle_features = []
                 for col in getattr(self.proc.oracle, 'feature_cols', []):
                     if col.startswith('1h_'):
+                        c = col[3:]; oracle_features.append(numpy_htf['1h'][c][idx_1h] if c in numpy_htf['1h'] else np.zeros(num_candidates))
                     elif col.startswith('15m_'):
+                        c = col[4:]; oracle_features.append(numpy_htf['15m'][c][idx_15m] if c in numpy_htf['15m'] else np.zeros(num_candidates))
                     elif col.startswith('4h_'):
+                        c = col[3:]; oracle_features.append(numpy_htf['4h'][c][idx_4h] if c in numpy_htf['4h'] else np.zeros(num_candidates))
                     elif col == 'sim_titan_score': oracle_features.append(titan_scores)
                     elif col == 'sim_mc_score': oracle_features.append(np.full(num_candidates, 0.5))
                     elif col == 'sim_pattern_score': oracle_features.append(np.full(num_candidates, 0.5))
                 X_oracle_big = np.column_stack(oracle_features)
                 preds = oracle_dir_model.predict(X_oracle_big)
+                if len(preds.shape) > 1 and preds.shape[1] > 1: oracle_preds = preds[:, 1]
+                else: oracle_preds = preds.flatten()
             except Exception as e: print(f"Oracle Error: {e}")
         # --- B. SNIPER MATRIX CONSTRUCTION ---
                     else: sniper_features.append(np.zeros(num_candidates))
                 X_sniper_big = np.column_stack(sniper_features)
+                # ✅ FIX: SQUEEZE PREDICTIONS
+                preds_list = [np.squeeze(m.predict(X_sniper_big)) for m in sniper_models]
                 sniper_preds = np.mean(preds_list, axis=0)
             except Exception as e: print(f"Sniper Error: {e}")
             chunk_size = 5000
             for i in range(0, num_candidates, chunk_size):
                 chunk_indices = final_valid_indices[i : i + chunk_size]
+                batch_X = []; valid_batch_indices = []
                 for k, idx in enumerate(chunk_indices):
+                    start = idx + 1; end = start + 240
                     sl_static = global_hydra_static[start:end]
                     entry_p = fast_1m['close'][idx]
+                    sl_close = sl_static[:, 6]; sl_atr = sl_static[:, 5]
                     sl_dist = np.maximum(1.5 * sl_atr, entry_p * 0.015)
+                    sl_pnl = sl_close - entry_p; sl_norm_pnl = sl_pnl / sl_dist
+                    sl_cum_max = np.maximum.accumulate(sl_close); sl_cum_max = np.maximum(sl_cum_max, entry_p)
                     sl_max_pnl_r = (sl_cum_max - entry_p) / sl_dist
                     sl_atr_pct = sl_atr / sl_close
                     zeros = np.zeros(240); ones = np.ones(240)
                     row = np.column_stack([
                         big_X_flat = big_X.reshape(-1, big_X.shape[-1])
                         preds_flat = hydra_models['crash'].predict_proba(big_X_flat)[:, 1]
                         preds_batch = preds_flat.reshape(len(batch_X), 240)
                         batch_max_risk = np.max(preds_batch, axis=1)
                         over_thresh = preds_batch > 0.6
                         has_crash = over_thresh.any(axis=1)
                         crash_times_rel = np.argmax(over_thresh, axis=1)
                         for j, glob_idx in enumerate(valid_batch_indices):
                             hydra_risk_preds[glob_idx] = batch_max_risk[j]
                             if has_crash[j]:
         # --- D. LEGACY V2 MAPPING ---
         legacy_risk_preds = np.zeros(num_candidates)
         legacy_time_preds = np.zeros(num_candidates, dtype=int)
         if legacy_v2:
             for k, idx in enumerate(final_valid_indices):
                 start = idx + 1
                     window = global_v2_probs[start : start + 240]
                     legacy_risk_preds[k] = np.max(window)
+        # --- E. FINAL DATAFRAME CONSTRUCTION (Safe Mode) ---
+        try:
+            # 1. Gather Arrays
+            arr_ts = fast_1m['timestamp'][final_valid_indices]
+            arr_close = fast_1m['close'][final_valid_indices]
+            arr_l1 = fast_1m['l1_score'][final_valid_indices]
+            arr_titan = np.clip(arr_l1 / 40.0, 0.1, 0.95)
+            # 2. Check Lengths
+            arrays = {
+                'timestamp': arr_ts,
+                'close': arr_close,
+                'real_titan': arr_titan,
+                'oracle_conf': oracle_preds,
+                'sniper_score': sniper_preds,
+                'l1_score': arr_l1,
+                'risk_hydra_crash': hydra_risk_preds,
+                'time_hydra_crash': hydra_time_preds,
+                'risk_legacy_v2': legacy_risk_preds,
+                'time_legacy_panic': legacy_time_preds
+            }
+            # 3. Explicitly Flatten & Verify
+            clean_arrays = {}
+            for k, v in arrays.items():
+                flat_v = np.array(v).flatten()
+                if len(flat_v) != num_candidates:
+                    print(f"❌ SIZE MISMATCH in {k}: Expected {num_candidates}, got {len(flat_v)}")
+                    # Fix by truncating or padding (Emergency Fix)
+                    if len(flat_v) > num_candidates: flat_v = flat_v[:num_candidates]
+                    else: flat_v = np.pad(flat_v, (0, num_candidates - len(flat_v)))
+                clean_arrays[k] = flat_v
+            # 4. Create DF
+            clean_arrays['symbol'] = sym
+            ai_df = pd.DataFrame(clean_arrays)
+            dt = time.time() - t0
+            if not ai_df.empty:
+                ai_df.to_pickle(scores_file)
+                print(f"   ✅ [{sym}] Completed {len(ai_df)} signals in {dt:.2f} seconds.", flush=True)
+        except Exception as e:
+            print(f"❌ DataFrame Construction Error: {e}")
+            traceback.print_exc()
         del frames, fast_1m, numpy_htf, global_v2_probs, global_hydra_static
         gc.collect()
                 sym_id = arr_sym_int[i]
                 price = arr_close[i]
                 if sym_id in positions:
                     pos = positions[sym_id]
                     entry = pos[0]; h_risk = pos[2]; h_time = pos[3]
                             dd = (peak_bal - tot) / peak_bal
                             if dd > max_dd: max_dd = dd
                 if len(positions) < max_slots:
                     if mask_buy[i]:
                          if sym_id not in positions:
         return best['config'], best
 async def run_strategic_optimization_task():
+    print("\n🧪 [STRATEGIC BACKTEST] Dimension Safe Mode...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)