Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 8, 2025

Commit

c383866

verified ·

1 Parent(s): 445dda3

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +208 -188

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V111.0 - GEM-Architect: Full Regime Loop Restored)
 # ============================================================
 import asyncio
@@ -55,7 +55,7 @@ class HeavyDutyBacktester:
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V111.0] Full Stack + Multi-Regime Strategic Loop.")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
@@ -104,23 +104,21 @@ class HeavyDutyBacktester:
         return unique_candles
     # ==============================================================
-    # 🏎️ VECTORIZED INDICATORS (ALL LAYERS)
     # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
-        # 1. Basic Setup
         df['close'] = df['close'].astype(float)
         df['high'] = df['high'].astype(float)
         df['low'] = df['low'].astype(float)
         df['volume'] = df['volume'].astype(float)
         df['open'] = df['open'].astype(float)
-        # 2. Standard Indicators
         df['rsi'] = ta.rsi(df['close'], length=14)
         df['ema20'] = ta.ema(df['close'], length=20)
         df['ema50'] = ta.ema(df['close'], length=50)
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
-        # 3. Hydra
         if timeframe == '1m':
             sma20 = df['close'].rolling(20).mean()
             std20 = df['close'].rolling(20).std()
@@ -128,47 +126,40 @@ class HeavyDutyBacktester:
             df['vol_ma50'] = df['volume'].rolling(50).mean()
             df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
-        # 4. Oracle
         df['slope'] = ta.slope(df['close'], length=7)
         vol_mean = df['volume'].rolling(20).mean()
         vol_std = df['volume'].rolling(20).std()
         df['vol_z'] = (df['volume'] - vol_mean) / (vol_std + 1e-9)
         df['atr_pct'] = df['atr'] / df['close']
-        # 5. Sniper (1m Only)
         if timeframe == '1m':
             df['ret'] = df['close'].pct_change()
             df['dollar_vol'] = df['close'] * df['volume']
             df['amihud'] = (df['ret'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
             df['roll_spread'] = (2 * np.sqrt(np.maximum(0, -roll_cov))).fillna(0)
             sign = np.sign(df['close'].diff()).fillna(0)
             df['signed_vol'] = sign * df['volume']
             df['ofi'] = df['signed_vol'].rolling(30).sum().fillna(0)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
             imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
             tot = df['volume'].rolling(60).sum()
             df['vpin'] = (imb / tot.replace(0, np.nan)).fillna(0)
             vwap = (df['close'] * df['volume']).rolling(20).sum() / df['volume'].rolling(20).sum()
             df['vwap_dev'] = (df['close'] - vwap).fillna(0)
             df['rv_gk'] = (np.log(df['high'] / df['low'])**2) / 2 - (2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2)
             df['return_1m'] = df['ret']
             df['return_5m'] = df['close'].pct_change(5)
             df['return_15m'] = df['close'].pct_change(15)
             r = df['volume'].rolling(500).mean()
             s = df['volume'].rolling(500).std()
             df['vol_zscore_50'] = ((df['volume'] - r) / s).fillna(0)
-        # 6. Legacy
         df['log_ret'] = np.log(df['close'] / df['close'].shift(1))
         roll_max = df['high'].rolling(50).max()
         roll_min = df['low'].rolling(50).min()
@@ -182,11 +173,19 @@ class HeavyDutyBacktester:
         df['ema200'] = ta.ema(df['close'], length=200)
         df['dist_ema200'] = (df['close'] - df['ema200']) / df['close']
         df.fillna(0, inplace=True)
         return df
     # ==============================================================
-    # 🧠 CPU PROCESSING (Full Stack Injection + Logs)
     # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
@@ -197,7 +196,7 @@ class HeavyDutyBacktester:
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
-        print(f"   ⚙️ [CPU] Analyzing {sym} (Full Stack: Titan+Oracle+Sniper+Hydra)...", flush=True)
         t0 = time.time()
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
@@ -208,12 +207,12 @@ class HeavyDutyBacktester:
         frames = {}
         agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
-        # --- 1. Vectorized Calculations ---
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
             resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
@@ -222,77 +221,74 @@ class HeavyDutyBacktester:
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
-        # --- 2. L1 Filter ---
         df_1h = frames['1h'].reindex(frames['5m'].index, method='ffill')
         df_5m = frames['5m'].copy()
         is_valid = (df_1h['rsi'] <= 70)
         valid_indices = df_5m[is_valid].index
         start_dt = df_1m.index[0] + pd.Timedelta(minutes=500)
         final_valid_indices = [t for t in valid_indices if t >= start_dt]
         total_signals = len(final_valid_indices)
-        print(f"     🎯 Candidates Found: {total_signals}. Running Models...", flush=True)
-        # --- 3. Model Loading ---
-        hydra_models = {}
-        hydra_cols = []
-        if self.proc.guardian_hydra:
-            hydra_models = self.proc.guardian_hydra.models
-            hydra_cols = self.proc.guardian_hydra.feature_cols
         legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
         legacy_v3 = getattr(self.proc.guardian_legacy, 'model_v3', None)
         v3_feat_names = getattr(self.proc.guardian_legacy, 'v3_feature_names', [])
         oracle_dir_model = getattr(self.proc.oracle, 'model_direction', None)
         oracle_cols = getattr(self.proc.oracle, 'feature_cols', [])
         sniper_models = getattr(self.proc.sniper, 'models', [])
         sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
         ai_results = []
-        # --- 4. Main Simulation Loop ---
         for i, current_time in enumerate(final_valid_indices):
             if i > 0 and i % 1000 == 0:
-                percent = (i / total_signals) * 100
-                print(f"     ⏳ [{sym}] Processing... {i}/{total_signals} ({percent:.1f}%)", flush=True)
             ts_val = int(current_time.timestamp() * 1000)
-            # Sync Indices
             idx_1m = np.searchsorted(fast_1m['timestamp'], ts_val)
-            idx_1h = np.searchsorted(numpy_htf['1h']['timestamp'], ts_val)
-            idx_5m = np.searchsorted(numpy_htf['5m']['timestamp'], ts_val) # ✅ FIXED
-            idx_15m = np.searchsorted(numpy_htf['15m']['timestamp'], ts_val)
-            idx_4h = np.searchsorted(numpy_htf['4h']['timestamp'], ts_val)
-            if idx_1m < 500 or idx_1m >= len(fast_1m['close']) - 240: continue
-            if idx_1h >= len(numpy_htf['1h']['close']): idx_1h = len(numpy_htf['1h']['close']) - 1
-            if idx_5m >= len(numpy_htf['5m']['close']): idx_5m = len(numpy_htf['5m']['close']) - 1
-            if idx_15m >= len(numpy_htf['15m']['close']): idx_15m = len(numpy_htf['15m']['close']) - 1
             if idx_4h >= len(numpy_htf['4h']['close']): idx_4h = len(numpy_htf['4h']['close']) - 1
-            # === LAYER 2: Titan ===
-            titan_score = 0.6
-            # === LAYER 3: Oracle Injection ===
             oracle_conf = 0.5
-            if oracle_dir_model and oracle_cols:
                 o_vec = []
                 for col in oracle_cols:
                     val = 0.0
-                    if col.startswith('1h_'):
-                        raw = col.replace('1h_', '')
-                        val = numpy_htf['1h'].get(raw, [0])[idx_1h]
-                    elif col.startswith('15m_'):
-                        raw = col.replace('15m_', '')
-                        val = numpy_htf['15m'].get(raw, [0])[idx_15m]
-                    elif col.startswith('4h_'):
-                        raw = col.replace('4h_', '')
-                        val = numpy_htf['4h'].get(raw, [0])[idx_4h]
-                    elif col == 'sim_titan_score': val = titan_score
                     elif col == 'sim_mc_score': val = 0.5
                     elif col == 'sim_pattern_score': val = 0.5
                     o_vec.append(val)
@@ -302,119 +298,154 @@ class HeavyDutyBacktester:
                     if oracle_conf < 0.5: oracle_conf = 1 - oracle_conf
                 except: pass
-            # === LAYER 4: Sniper Injection ===
             sniper_score = 0.5
-            if sniper_models and sniper_cols:
                 s_vec = []
                 for col in sniper_cols:
-                    if col in fast_1m:
-                        s_vec.append(fast_1m[col][idx_1m])
-                    elif col == 'L_score':
-                        l_val = fast_1m.get('vol_zscore_50', [0])[idx_1m]
-                        s_vec.append(l_val)
-                    else:
-                        s_vec.append(0.0)
                 try:
                     s_preds = [m.predict(np.array(s_vec).reshape(1, -1))[0] for m in sniper_models]
                     sniper_score = np.mean(s_preds)
                 except: pass
-            # === RISK SIMULATION ===
             entry_price = fast_1m['close'][idx_1m]
-            highest_price = entry_price
-            max_hydra_crash = 0.0; max_hydra_giveback = 0.0; hydra_crash_time = 0
-            max_legacy_v2 = 0.0; max_legacy_v3 = 0.0; legacy_panic_time = 0
-            end_idx = min(idx_1m + 240, len(fast_1m['close']) - 1)
-            for c_idx in range(idx_1m + 1, end_idx + 1):
-                curr_price = fast_1m['close'][c_idx]
-                curr_ts = int(fast_1m['timestamp'][c_idx])
-                if curr_price > highest_price: highest_price = curr_price
-                # A. Hydra
-                if hydra_models:
-                    atr_val = fast_1m['atr'][c_idx]
-                    sl_dist = 1.5 * atr_val if atr_val > 0 else entry_price * 0.015
-                    pnl_r = (curr_price - entry_price) / sl_dist
-                    max_pnl_r = (highest_price - entry_price) / sl_dist
-                    row_dict = {
-                        'rsi_1m': fast_1m['rsi'][c_idx],
-                        'rsi_5m': numpy_htf['5m']['rsi'][idx_1h],
-                        'rsi_15m': numpy_htf['15m']['rsi'][idx_15m],
-                        'bb_width': fast_1m['bb_width'][c_idx],
-                        'rel_vol': fast_1m['rel_vol'][c_idx],
-                        'atr_pct': atr_val / curr_price,
-                        'norm_pnl_r': pnl_r, 'max_pnl_r': max_pnl_r,
-                        'time_in_trade': (c_idx - idx_1m),
-                        'entry_type': 0.0, 'oracle_conf': oracle_conf, 'l2_score': 0.7, 'target_class': 3.0
-                    }
-                    vec = np.array([row_dict.get(c, 0.0) for c in hydra_cols]).reshape(1, -1)
-                    try:
-                        pc = hydra_models['crash'].predict_proba(vec)[0][1]
-                        if pc > max_hydra_crash:
-                            max_hydra_crash = pc
-                            if pc > 0.6 and hydra_crash_time == 0: hydra_crash_time = curr_ts
-                        pg = hydra_models['giveback'].predict_proba(vec)[0][1]
-                        if pg > max_hydra_giveback: max_hydra_giveback = pg
-                    except: pass
-                # B. Legacy (Full Logic)
-                if legacy_v2 or legacy_v3:
-                    c_5m_idx = idx_5m + (c_idx - idx_1m) // 5
-                    if c_5m_idx >= len(numpy_htf['5m']['rsi']): c_5m_idx = len(numpy_htf['5m']['rsi']) - 1
-                    c_15m_idx = idx_15m + (c_idx - idx_1m) // 15
-                    if c_15m_idx >= len(numpy_htf['15m']['rsi']): c_15m_idx = len(numpy_htf['15m']['rsi']) - 1
-                    if legacy_v2:
-                        f1 = [fast_1m['log_ret'][c_idx], fast_1m['rsi'][c_idx]/100.0, fast_1m['fib_pos'][c_idx], fast_1m['volatility'][c_idx]]
-                        f5 = [numpy_htf['5m']['log_ret'][c_5m_idx], numpy_htf['5m']['rsi'][c_5m_idx]/100.0, numpy_htf['5m']['fib_pos'][c_5m_idx], numpy_htf['5m']['trend_slope'][c_5m_idx]]
-                        f15 = [numpy_htf['15m']['log_ret'][c_15m_idx], numpy_htf['15m']['rsi'][c_15m_idx]/100.0, numpy_htf['15m']['dist_fib618'][c_15m_idx], numpy_htf['15m']['trend_slope'][c_15m_idx]]
-                        vec_v2 = f1 + f5 + f15
-                        lags = [1, 2, 3, 5, 10, 20]
-                        for lag in lags:
-                            l_idx = c_idx - lag
-                            if l_idx >= 0:
-                                vec_v2.extend([fast_1m['log_ret'][l_idx], fast_1m['rsi'][l_idx]/100.0, fast_1m['fib_pos'][l_idx], fast_1m['volatility'][l_idx]])
-                            else:
-                                vec_v2.extend([0.0, 0.5, 0.5, 0.0])
-                        try:
-                            dm_v2 = xgb.DMatrix(np.array(vec_v2).reshape(1, -1))
-                            pred_v2 = legacy_v2.predict(dm_v2)
-                            p_v2 = float(pred_v2[0][2]) if len(pred_v2.shape)>1 else float(pred_v2[0])
-                            if p_v2 > max_legacy_v2:
-                                max_legacy_v2 = p_v2
-                                if p_v2 > 0.8 and legacy_panic_time == 0: legacy_panic_time = curr_ts
-                        except: pass
-                    if legacy_v3 and v3_feat_names:
-                        v3_dict = {}
-                        v3_dict['rsi'] = fast_1m['rsi'][c_idx]
-                        v3_dict['dist_ema50'] = fast_1m['dist_ema50'][c_idx]
-                        v3_dict['dist_ema200'] = fast_1m['dist_ema200'][c_idx]
-                        v3_dict['log_ret'] = fast_1m['log_ret'][c_idx]
-                        v3_dict['rsi_5m'] = numpy_htf['5m']['rsi'][c_5m_idx]
-                        v3_dict['dist_ema50_5m'] = numpy_htf['5m']['dist_ema50'][c_5m_idx]
-                        v3_dict['dist_ema200_5m'] = numpy_htf['5m']['dist_ema200'][c_5m_idx]
-                        v3_dict['log_ret_5m'] = numpy_htf['5m']['log_ret'][c_5m_idx]
-                        v3_dict['rsi_15m'] = numpy_htf['15m']['rsi'][c_15m_idx]
-                        v3_dict['dist_ema50_15m'] = numpy_htf['15m']['dist_ema50'][c_15m_idx]
-                        v3_dict['dist_ema200_15m'] = numpy_htf['15m']['dist_ema200'][c_15m_idx]
-                        v3_dict['log_ret_15m'] = numpy_htf['15m']['log_ret'][c_15m_idx]
-                        try:
-                            df_v3 = pd.DataFrame(columns=v3_feat_names)
-                            df_v3.loc[0] = [v3_dict.get(n, 0.0) for n in v3_feat_names]
-                            df_v3 = df_v3.astype(float)
-                            pred_v3 = legacy_v3.predict(xgb.DMatrix(df_v3))
-                            p_v3 = float(pred_v3[0])
-                            if p_v3 > max_legacy_v3: max_legacy_v3 = p_v3
-                        except: pass
             ai_results.append({
                 'timestamp': ts_val, 'symbol': sym, 'close': entry_price,
-                'real_titan': titan_score,
                 'oracle_conf': oracle_conf,
                 'sniper_score': sniper_score,
                 'risk_hydra_crash': max_hydra_crash,
@@ -428,7 +459,7 @@ class HeavyDutyBacktester:
         dt = time.time() - t0
         if ai_results:
             pd.DataFrame(ai_results).to_pickle(scores_file)
-            print(f"   ✅ [{sym}] Finished {len(ai_results)} signals in {dt:.2f} seconds.", flush=True)
         else:
             print(f"   ⚠️ [{sym}] No valid signals. Time: {dt:.2f}s", flush=True)
@@ -436,7 +467,7 @@ class HeavyDutyBacktester:
         gc.collect()
     # ==============================================================
-    # PHASE 1: Main Loop
     # ==============================================================
     async def generate_truth_data(self):
         if self.force_start_date and self.force_end_date:
@@ -444,21 +475,16 @@ class HeavyDutyBacktester:
             dt_end = datetime.strptime(self.force_end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
             start_time_ms = int(dt_start.timestamp() * 1000)
             end_time_ms = int(dt_end.timestamp() * 1000)
-            print(f"\n🚜 [Phase 1] Processing Forced Era: {self.force_start_date} -> {self.force_end_date}")
-            for sym in self.TARGET_COINS:
-                try:
-                    candles = await self._fetch_all_data_fast(sym, start_time_ms, end_time_ms)
-                    if candles: await self._process_data_in_memory(sym, candles, start_time_ms, end_time_ms)
-                except Exception as e: print(f"   ❌ SKIP {sym}: {e}", flush=True)
-                gc.collect()
-        else:
-            # If no forced date, we might rely on the Scenario Loop in run_strategic_optimization_task calling this.
-            # But the Scenario Loop sets force_start_date.
-            pass
-    # ==============================================================
-    # PHASE 2: Optimization (Grid Search)
-    # ==============================================================
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
         results = []
@@ -475,15 +501,11 @@ class HeavyDutyBacktester:
         for config in combinations_batch:
             wallet = { "balance": initial_capital, "allocated": 0.0, "positions": {}, "trades_history": [] }
             w_titan = config['w_titan']; oracle_thresh = config.get('oracle_thresh', 0.6)
             sniper_thresh = config.get('sniper_thresh', 0.4); hydra_thresh = config['hydra_thresh']
-            peak_balance = initial_capital
-            max_drawdown = 0.0
             for ts, group in grouped_by_time:
-                # EXIT
                 active = list(wallet["positions"].keys())
                 current_prices = {row['symbol']: row['close'] for _, row in group.iterrows()}
@@ -503,13 +525,11 @@ class HeavyDutyBacktester:
                             del wallet['positions'][sym]
                             wallet['trades_history'].append({'pnl': pnl})
-                # Stats Update
                 total_eq = wallet['balance'] + wallet['allocated']
                 if total_eq > peak_balance: peak_balance = total_eq
                 dd = (peak_balance - total_eq) / peak_balance
                 if dd > max_drawdown: max_drawdown = dd
-                # ENTRY
                 if len(wallet['positions']) < max_slots:
                     for _, row in group.iterrows():
                         if row['symbol'] in wallet['positions']: continue
@@ -526,7 +546,6 @@ class HeavyDutyBacktester:
                             wallet['balance'] -= size
                             wallet['allocated'] += size
-            # Stats
             final_bal = wallet['balance'] + wallet['allocated']
             net_profit = final_bal - initial_capital
             trades = wallet['trades_history']
@@ -537,18 +556,26 @@ class HeavyDutyBacktester:
             max_win = max([t['pnl'] for t in trades]) if trades else 0
             max_loss = min([t['pnl'] for t in trades]) if trades else 0
             results.append({
                 'config': config, 'final_balance': final_bal, 'net_profit': net_profit,
                 'total_trades': total_t, 'win_count': win_count, 'loss_count': loss_count,
                 'win_rate': win_rate, 'max_single_win': max_win, 'max_single_loss': max_loss,
                 'max_drawdown': max_drawdown * 100
             })
         return results
     async def run_optimization(self, target_regime="RANGE"):
-        # Note: generate_truth_data is called by the Strategy Loop wrapper now
-        # so we process data for the specific era set in set_date_range
         await self.generate_truth_data()
         oracle_range = [0.5, 0.6, 0.7]
@@ -591,8 +618,6 @@ class HeavyDutyBacktester:
         print("-" * 60)
         print(f"   ⚙️ Oracle={best['config']['oracle_thresh']} | Sniper={best['config']['sniper_thresh']} | Hydra={best['config']['hydra_thresh']}")
         print("="*60)
-        return best['config'], best
 async def run_strategic_optimization_task():
     print("\n🧪 [STRATEGIC BACKTEST] Full Stack Mode...")
@@ -606,7 +631,6 @@ async def run_strategic_optimization_task():
         hub = AdaptiveHub(r2); await hub.initialize()
         optimizer = HeavyDutyBacktester(dm, proc)
-        # ✅ RESTORED: The Multi-Regime Strategic Loop
         scenarios = [
             {"regime": "BULL", "start": "2024-01-01", "end": "2024-03-30"},
             {"regime": "BEAR", "start": "2023-08-01", "end": "2023-09-15"},
@@ -617,11 +641,7 @@ async def run_strategic_optimization_task():
         for scen in scenarios:
             target = scen["regime"]
             optimizer.set_date_range(scen["start"], scen["end"])
-            # Run opt
             best_cfg, best_stats = await optimizer.run_optimization(target_regime=target)
-            # Save
             if best_cfg:
                 hub.submit_challenger(target, best_cfg, best_stats)

 # ============================================================
+# 🧪 backtest_engine.py (V112.0 - GEM-Architect: Matrix Batch Speed)
 # ============================================================
 import asyncio
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V112.0] Matrix-Batch Speed (No Loops Inside Signals).")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         return unique_candles
     # ==============================================================
+    # 🏎️ VECTORIZED INDICATORS (ALL LAYERS + LAGS)
     # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
         df['close'] = df['close'].astype(float)
         df['high'] = df['high'].astype(float)
         df['low'] = df['low'].astype(float)
         df['volume'] = df['volume'].astype(float)
         df['open'] = df['open'].astype(float)
         df['rsi'] = ta.rsi(df['close'], length=14)
         df['ema20'] = ta.ema(df['close'], length=20)
         df['ema50'] = ta.ema(df['close'], length=50)
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
+        # Hydra
         if timeframe == '1m':
             sma20 = df['close'].rolling(20).mean()
             std20 = df['close'].rolling(20).std()
             df['vol_ma50'] = df['volume'].rolling(50).mean()
             df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
+        # Oracle
         df['slope'] = ta.slope(df['close'], length=7)
         vol_mean = df['volume'].rolling(20).mean()
         vol_std = df['volume'].rolling(20).std()
         df['vol_z'] = (df['volume'] - vol_mean) / (vol_std + 1e-9)
         df['atr_pct'] = df['atr'] / df['close']
+        # Sniper
         if timeframe == '1m':
             df['ret'] = df['close'].pct_change()
             df['dollar_vol'] = df['close'] * df['volume']
             df['amihud'] = (df['ret'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
             df['roll_spread'] = (2 * np.sqrt(np.maximum(0, -roll_cov))).fillna(0)
             sign = np.sign(df['close'].diff()).fillna(0)
             df['signed_vol'] = sign * df['volume']
             df['ofi'] = df['signed_vol'].rolling(30).sum().fillna(0)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
             imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
             tot = df['volume'].rolling(60).sum()
             df['vpin'] = (imb / tot.replace(0, np.nan)).fillna(0)
             vwap = (df['close'] * df['volume']).rolling(20).sum() / df['volume'].rolling(20).sum()
             df['vwap_dev'] = (df['close'] - vwap).fillna(0)
             df['rv_gk'] = (np.log(df['high'] / df['low'])**2) / 2 - (2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2)
             df['return_1m'] = df['ret']
             df['return_5m'] = df['close'].pct_change(5)
             df['return_15m'] = df['close'].pct_change(15)
             r = df['volume'].rolling(500).mean()
             s = df['volume'].rolling(500).std()
             df['vol_zscore_50'] = ((df['volume'] - r) / s).fillna(0)
+        # Legacy Structure
         df['log_ret'] = np.log(df['close'] / df['close'].shift(1))
         roll_max = df['high'].rolling(50).max()
         roll_min = df['low'].rolling(50).min()
         df['ema200'] = ta.ema(df['close'], length=200)
         df['dist_ema200'] = (df['close'] - df['ema200']) / df['close']
+        # ✅ PRE-CALCULATE LAGS FOR V2 (This enables Batch Processing)
+        if timeframe == '1m':
+            for lag in [1, 2, 3, 5, 10, 20]:
+                df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
+                df[f'rsi_lag_{lag}'] = (df['rsi'].shift(lag).fillna(50) / 100.0)
+                df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
+                df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
         df.fillna(0, inplace=True)
         return df
     # ==============================================================
+    # 🧠 CPU PROCESSING (Matrix Batch Mode)
     # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
+        print(f"   ⚙️ [CPU] Analyzing {sym} (Matrix Batch Mode)...", flush=True)
         t0 = time.time()
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
         frames = {}
         agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
+        # 1. Calc 1m with Lags
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
+        # 2. Calc HTF
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
             resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
+        # 3. Create Global Index Maps (The Magic Step for Speed)
+        # Allows instant mapping from 1m index -> 5m/15m index without searching inside loop
+        # Using searchsorted on the whole array once
+        map_1m_to_1h = np.searchsorted(numpy_htf['1h']['timestamp'], fast_1m['timestamp'])
+        map_1m_to_5m = np.searchsorted(numpy_htf['5m']['timestamp'], fast_1m['timestamp'])
+        map_1m_to_15m = np.searchsorted(numpy_htf['15m']['timestamp'], fast_1m['timestamp'])
+        # Clamp indices to valid range
+        map_1m_to_1h = np.clip(map_1m_to_1h, 0, len(numpy_htf['1h']['timestamp']) - 1)
+        map_1m_to_5m = np.clip(map_1m_to_5m, 0, len(numpy_htf['5m']['timestamp']) - 1)
+        map_1m_to_15m = np.clip(map_1m_to_15m, 0, len(numpy_htf['15m']['timestamp']) - 1)
+        # 4. L1 Filter
         df_1h = frames['1h'].reindex(frames['5m'].index, method='ffill')
         df_5m = frames['5m'].copy()
         is_valid = (df_1h['rsi'] <= 70)
         valid_indices = df_5m[is_valid].index
         start_dt = df_1m.index[0] + pd.Timedelta(minutes=500)
         final_valid_indices = [t for t in valid_indices if t >= start_dt]
         total_signals = len(final_valid_indices)
+        print(f"     🎯 Candidates: {total_signals}. Running Matrix Models...", flush=True)
+        # 5. Load Models
+        hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
+        hydra_cols = getattr(self.proc.guardian_hydra, 'feature_cols', []) if self.proc.guardian_hydra else []
         legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
         legacy_v3 = getattr(self.proc.guardian_legacy, 'model_v3', None)
         v3_feat_names = getattr(self.proc.guardian_legacy, 'v3_feature_names', [])
         oracle_dir_model = getattr(self.proc.oracle, 'model_direction', None)
         oracle_cols = getattr(self.proc.oracle, 'feature_cols', [])
         sniper_models = getattr(self.proc.sniper, 'models', [])
         sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
         ai_results = []
+        # --- 6. Main Simulation Loop (BATCH MODE) ---
         for i, current_time in enumerate(final_valid_indices):
             if i > 0 and i % 1000 == 0:
+                print(f"     ⏳ [{sym}] Processing... {i}/{total_signals}", flush=True)
             ts_val = int(current_time.timestamp() * 1000)
+            # Find Entry Index
             idx_1m = np.searchsorted(fast_1m['timestamp'], ts_val)
+            # Safety Check
+            if idx_1m < 500 or idx_1m >= len(fast_1m['close']) - 245: continue
+            # Determine Indices
+            idx_1h = map_1m_to_1h[idx_1m]
+            idx_5m = map_1m_to_5m[idx_1m]
+            idx_15m = map_1m_to_15m[idx_1m]
+            idx_4h = np.searchsorted(numpy_htf['4h']['timestamp'], ts_val) # Do this once per signal (rare)
             if idx_4h >= len(numpy_htf['4h']['close']): idx_4h = len(numpy_htf['4h']['close']) - 1
+            # === Oracle (Single Call) ===
             oracle_conf = 0.5
+            if oracle_dir_model:
                 o_vec = []
                 for col in oracle_cols:
                     val = 0.0
+                    if col.startswith('1h_'): val = numpy_htf['1h'].get(col[3:], [0])[idx_1h]
+                    elif col.startswith('15m_'): val = numpy_htf['15m'].get(col[4:], [0])[idx_15m]
+                    elif col.startswith('4h_'): val = numpy_htf['4h'].get(col[3:], [0])[idx_4h]
+                    elif col == 'sim_titan_score': val = 0.6
                     elif col == 'sim_mc_score': val = 0.5
                     elif col == 'sim_pattern_score': val = 0.5
                     o_vec.append(val)
                     if oracle_conf < 0.5: oracle_conf = 1 - oracle_conf
                 except: pass
+            # === Sniper (Single Call) ===
             sniper_score = 0.5
+            if sniper_models:
                 s_vec = []
                 for col in sniper_cols:
+                    if col in fast_1m: s_vec.append(fast_1m[col][idx_1m])
+                    elif col == 'L_score': s_vec.append(fast_1m.get('vol_zscore_50', [0])[idx_1m])
+                    else: s_vec.append(0.0)
                 try:
                     s_preds = [m.predict(np.array(s_vec).reshape(1, -1))[0] for m in sniper_models]
                     sniper_score = np.mean(s_preds)
                 except: pass
+            # === RISK SIMULATION (MATRIX BATCH) ===
+            # We construct a matrix of 240 rows (4 hours) at once and predict ONCE.
+            # This replaces the minute-by-minute loop.
+            future_len = 240
+            start_idx = idx_1m + 1
+            end_idx = start_idx + future_len
+            # Slices for 1m data
+            sl_close = fast_1m['close'][start_idx:end_idx]
+            sl_ts = fast_1m['timestamp'][start_idx:end_idx]
             entry_price = fast_1m['close'][idx_1m]
+            # Mapped Indices for HTF slices
+            sl_map_5m = map_1m_to_5m[start_idx:end_idx]
+            sl_map_15m = map_1m_to_15m[start_idx:end_idx]
+            max_hydra_crash = 0.0; hydra_crash_time = 0
+            max_legacy_v2 = 0.0; legacy_panic_time = 0
+            # --- A. Hydra Batch ---
+            if hydra_models:
+                sl_atr = fast_1m['atr'][start_idx:end_idx]
+                sl_rsi_1m = fast_1m['rsi'][start_idx:end_idx]
+                sl_bb = fast_1m['bb_width'][start_idx:end_idx]
+                sl_vol = fast_1m['rel_vol'][start_idx:end_idx]
+                # HTF Lookups (Using integer array indexing - Fast)
+                sl_rsi_5m = numpy_htf['5m']['rsi'][sl_map_5m]
+                sl_rsi_15m = numpy_htf['15m']['rsi'][sl_map_15m]
+                # Calc Features
+                sl_dist = 1.5 * sl_atr
+                sl_dist = np.where(sl_dist > 0, sl_dist, entry_price * 0.015)
+                # PnL & Max PnL
+                sl_pnl = sl_close - entry_price
+                sl_norm_pnl = sl_pnl / sl_dist
+                # Max PnL needs cumulative max (rolling max from start of trade)
+                sl_cum_max = np.maximum.accumulate(sl_close)
+                # Correction: cum max of trade needs to start from entry price
+                sl_cum_max = np.maximum(sl_cum_max, entry_price)
+                sl_max_pnl_r = (sl_cum_max - entry_price) / sl_dist
+                sl_atr_pct = sl_atr / sl_close
+                sl_time = np.arange(1, future_len + 1)
+                # Stack Matrix: (240, N_Features)
+                # Feature Order Must Match hydra_cols
+                # Map cols manually to speed up
+                # Create dict of vectors
+                feat_vecs = {
+                    'rsi_1m': sl_rsi_1m, 'rsi_5m': sl_rsi_5m, 'rsi_15m': sl_rsi_15m,
+                    'bb_width': sl_bb, 'rel_vol': sl_vol,
+                    'dist_ema20_1h': np.zeros(future_len),
+                    'atr_pct': sl_atr_pct, 'norm_pnl_r': sl_norm_pnl, 'max_pnl_r': sl_max_pnl_r,
+                    'dist_tp_atr': np.zeros(future_len), 'dist_sl_atr': np.zeros(future_len),
+                    'time_in_trade': sl_time,
+                    'entry_type': np.zeros(future_len), 'oracle_conf': np.full(future_len, oracle_conf),
+                    'l2_score': np.full(future_len, 0.7), 'target_class': np.full(future_len, 3.0)
+                }
+                # Stack
+                X_hydra = np.column_stack([feat_vecs.get(c, np.zeros(future_len)) for c in hydra_cols])
+                try:
+                    # ONE PREDICTION FOR 240 ROWS
+                    probs_crash = hydra_models['crash'].predict_proba(X_hydra)[:, 1]
+                    # Find Max
+                    max_hydra_crash = np.max(probs_crash)
+                    # Find Time
+                    crash_indices = np.where(probs_crash > 0.6)[0]
+                    if len(crash_indices) > 0:
+                        hydra_crash_time = int(sl_ts[crash_indices[0]])
+                except: pass
+            # --- B. Legacy V2 Batch ---
+            if legacy_v2:
+                # 1m Feats
+                l_log = fast_1m['log_ret'][start_idx:end_idx]
+                l_rsi = fast_1m['rsi'][start_idx:end_idx] / 100.0
+                l_fib = fast_1m['fib_pos'][start_idx:end_idx]
+                l_vol = fast_1m['volatility'][start_idx:end_idx]
+                # 5m Feats (Mapped)
+                l5_log = numpy_htf['5m']['log_ret'][sl_map_5m]
+                l5_rsi = numpy_htf['5m']['rsi'][sl_map_5m] / 100.0
+                l5_fib = numpy_htf['5m']['fib_pos'][sl_map_5m]
+                l5_trd = numpy_htf['5m']['trend_slope'][sl_map_5m]
+                # 15m Feats (Mapped)
+                l15_log = numpy_htf['15m']['log_ret'][sl_map_15m]
+                l15_rsi = numpy_htf['15m']['rsi'][sl_map_15m] / 100.0
+                l15_fib618 = numpy_htf['15m']['dist_fib618'][sl_map_15m]
+                l15_trd = numpy_htf['15m']['trend_slope'][sl_map_15m]
+                # Lags (Pre-calculated in _calculate_indicators_vectorized)
+                # We just pull them from fast_1m
+                lag_cols = []
+                for lag in [1, 2, 3, 5, 10, 20]:
+                    lag_cols.append(fast_1m[f'log_ret_lag_{lag}'][start_idx:end_idx])
+                    lag_cols.append(fast_1m[f'rsi_lag_{lag}'][start_idx:end_idx])
+                    lag_cols.append(fast_1m[f'fib_pos_lag_{lag}'][start_idx:end_idx])
+                    lag_cols.append(fast_1m[f'volatility_lag_{lag}'][start_idx:end_idx])
+                # Stack All
+                X_v2 = np.column_stack([
+                    l_log, l_rsi, l_fib, l_vol,
+                    l5_log, l5_rsi, l5_fib, l5_trd,
+                    l15_log, l15_rsi, l15_fib618, l15_trd,
+                    *lag_cols
+                ])
+                try:
+                    # PREDICT BATCH
+                    dm_v2 = xgb.DMatrix(X_v2)
+                    preds_v2 = legacy_v2.predict(dm_v2)
+                    # Handle Multiclass
+                    probs_v2 = preds_v2[:, 2] if len(preds_v2.shape) > 1 else preds_v2
+                    max_legacy_v2 = np.max(probs_v2)
+                    panic_idx = np.where(probs_v2 > 0.8)[0]
+                    if len(panic_idx) > 0 and legacy_panic_time == 0:
+                        legacy_panic_time = int(sl_ts[panic_idx[0]])
+                except: pass
+            # --- Store Result ---
             ai_results.append({
                 'timestamp': ts_val, 'symbol': sym, 'close': entry_price,
+                'real_titan': 0.6,
                 'oracle_conf': oracle_conf,
                 'sniper_score': sniper_score,
                 'risk_hydra_crash': max_hydra_crash,
         dt = time.time() - t0
         if ai_results:
             pd.DataFrame(ai_results).to_pickle(scores_file)
+            print(f"   ✅ [{sym}] Batch-Processed {len(ai_results)} signals in {dt:.2f} seconds.", flush=True)
         else:
             print(f"   ⚠️ [{sym}] No valid signals. Time: {dt:.2f}s", flush=True)
         gc.collect()
     # ==============================================================
+    # PHASE 1 & 2 (Unchanged - Standard Optimization Logic)
     # ==============================================================
     async def generate_truth_data(self):
         if self.force_start_date and self.force_end_date:
             dt_end = datetime.strptime(self.force_end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
             start_time_ms = int(dt_start.timestamp() * 1000)
             end_time_ms = int(dt_end.timestamp() * 1000)
+            print(f"\n🚜 [Phase 1] Processing Era: {self.force_start_date} -> {self.force_end_date}")
+        else: return
+        for sym in self.TARGET_COINS:
+            try:
+                candles = await self._fetch_all_data_fast(sym, start_time_ms, end_time_ms)
+                if candles: await self._process_data_in_memory(sym, candles, start_time_ms, end_time_ms)
+            except Exception as e: print(f"   ❌ SKIP {sym}: {e}", flush=True)
+            gc.collect()
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
         results = []
         for config in combinations_batch:
             wallet = { "balance": initial_capital, "allocated": 0.0, "positions": {}, "trades_history": [] }
             w_titan = config['w_titan']; oracle_thresh = config.get('oracle_thresh', 0.6)
             sniper_thresh = config.get('sniper_thresh', 0.4); hydra_thresh = config['hydra_thresh']
+            peak_balance = initial_capital; max_drawdown = 0.0
             for ts, group in grouped_by_time:
                 active = list(wallet["positions"].keys())
                 current_prices = {row['symbol']: row['close'] for _, row in group.iterrows()}
                             del wallet['positions'][sym]
                             wallet['trades_history'].append({'pnl': pnl})
                 total_eq = wallet['balance'] + wallet['allocated']
                 if total_eq > peak_balance: peak_balance = total_eq
                 dd = (peak_balance - total_eq) / peak_balance
                 if dd > max_drawdown: max_drawdown = dd
                 if len(wallet['positions']) < max_slots:
                     for _, row in group.iterrows():
                         if row['symbol'] in wallet['positions']: continue
                             wallet['balance'] -= size
                             wallet['allocated'] += size
             final_bal = wallet['balance'] + wallet['allocated']
             net_profit = final_bal - initial_capital
             trades = wallet['trades_history']
             max_win = max([t['pnl'] for t in trades]) if trades else 0
             max_loss = min([t['pnl'] for t in trades]) if trades else 0
+            max_win_streak = 0; max_loss_streak = 0; curr_w = 0; curr_l = 0
+            for t in trades:
+                if t['pnl'] > 0:
+                    curr_w += 1; curr_l = 0
+                    if curr_w > max_win_streak: max_win_streak = curr_w
+                else:
+                    curr_l += 1; curr_w = 0
+                    if curr_l > max_loss_streak: max_loss_streak = curr_l
             results.append({
                 'config': config, 'final_balance': final_bal, 'net_profit': net_profit,
                 'total_trades': total_t, 'win_count': win_count, 'loss_count': loss_count,
                 'win_rate': win_rate, 'max_single_win': max_win, 'max_single_loss': max_loss,
+                'max_win_streak': max_win_streak, 'max_loss_streak': max_loss_streak,
                 'max_drawdown': max_drawdown * 100
             })
         return results
     async def run_optimization(self, target_regime="RANGE"):
         await self.generate_truth_data()
         oracle_range = [0.5, 0.6, 0.7]
         print("-" * 60)
         print(f"   ⚙️ Oracle={best['config']['oracle_thresh']} | Sniper={best['config']['sniper_thresh']} | Hydra={best['config']['hydra_thresh']}")
         print("="*60)
 async def run_strategic_optimization_task():
     print("\n🧪 [STRATEGIC BACKTEST] Full Stack Mode...")
         hub = AdaptiveHub(r2); await hub.initialize()
         optimizer = HeavyDutyBacktester(dm, proc)
         scenarios = [
             {"regime": "BULL", "start": "2024-01-01", "end": "2024-03-30"},
             {"regime": "BEAR", "start": "2023-08-01", "end": "2023-09-15"},
         for scen in scenarios:
             target = scen["regime"]
             optimizer.set_date_range(scen["start"], scen["end"])
             best_cfg, best_stats = await optimizer.run_optimization(target_regime=target)
             if best_cfg:
                 hub.submit_challenger(target, best_cfg, best_stats)