Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 9, 2025

Commit

a661075

verified ·

1 Parent(s): 8250b90

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +142 -322

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V138.0 - GEM-Architect: Titan Exact Match + Patterns Confirmed)
 # ============================================================
 import asyncio
@@ -67,42 +67,28 @@ def safe_ta(ind_output, index, fill_method='smart'):
     return s.fillna(0.0).astype('float64')
 def _zv(x):
-    """Z-Score Vectorized for Patterns"""
     with np.errstate(divide='ignore', invalid='ignore'):
         x = np.asarray(x, dtype="float32")
         m = np.nanmean(x, axis=0)
         s = np.nanstd(x, axis=0) + 1e-9
         return np.nan_to_num((x - m) / s, nan=0.0)
-# ============================================================
-# 🧩 PATTERN RECOGNITION HELPER
-# ============================================================
 def _transform_window_for_pattern(df_window):
-    """Prepares a window for the CNN/Pattern Model"""
     try:
         c = df_window['close'].values.astype('float32')
         o = df_window['open'].values.astype('float32')
         h = df_window['high'].values.astype('float32')
         l = df_window['low'].values.astype('float32')
         v = df_window['volume'].values.astype('float32')
-        # 1. Base Z-Score
         base = np.stack([o, h, l, c, v], axis=1)
         base_z = _zv(base)
-        # 2. Extra Features
         lr = np.zeros_like(c); lr[1:] = np.diff(np.log1p(c))
         rng = (h - l) / (c + 1e-9)
         extra = np.stack([lr, rng], axis=1)
         extra_z = _zv(extra)
-        # 3. Indicators
         def _ema(arr, n): return pd.Series(arr).ewm(span=n, adjust=False).mean().values
         ema9 = _ema(c, 9); ema21 = _ema(c, 21); ema50 = _ema(c, 50); ema200 = _ema(c, 200)
-        slope21 = np.gradient(ema21)
-        slope50 = np.gradient(ema50)
         delta = np.diff(c, prepend=c[0])
         up, down = delta.copy(), delta.copy()
         up[up < 0] = 0; down[down > 0] = 0
@@ -110,16 +96,9 @@ def _transform_window_for_pattern(df_window):
         roll_down = pd.Series(down).abs().ewm(alpha=1/14, adjust=False).mean().values
         rs = roll_up / (roll_down + 1e-9)
         rsi = 100.0 - (100.0 / (1.0 + rs))
         indicators = np.stack([ema9, ema21, ema50, ema200, slope21, slope50, rsi], axis=1)
-        # Pad to match shape if needed or specific model reqs
-        # Assuming model expects specific width, here we stick to basic concat
-        # Flatten for XGBoost Pattern Model
         X_seq = np.concatenate([base_z, extra_z, _zv(indicators)], axis=1)
         X_flat = X_seq.flatten()
-        # Add Stat Placeholders (Matches training logic)
         X_stat = np.array([0.5, 0.0, 0.5], dtype="float32")
         return np.concatenate([X_flat, X_stat])
     except: return None
@@ -135,7 +114,6 @@ class HeavyDutyBacktester:
         self.INITIAL_CAPITAL = 10.0
         self.TRADING_FEES = 0.001
         self.MAX_SLOTS = 4
         self.TARGET_COINS = [
             'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
             'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
@@ -147,20 +125,15 @@ class HeavyDutyBacktester:
             'STRK/USDT', 'BLUR/USDT', 'ALT/USDT', 'JUP/USDT', 'PENDLE/USDT', 'ETHFI/USDT',
             'MEME/USDT', 'ATOM/USDT'
         ]
         self.force_start_date = None
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V138.0] Titan Exact Match + Patterns Enabled.")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         self.force_end_date = end_str
-    # ==============================================================
-    # ⚡ FAST DATA DOWNLOADER
-    # ==============================================================
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
         print(f"   ⚡ [Network] Downloading {sym}...", flush=True)
         limit = 1000
@@ -172,7 +145,6 @@ class HeavyDutyBacktester:
             current += duration_per_batch
         all_candles = []
         sem = asyncio.Semaphore(20)
         async def _fetch_batch(timestamp):
             async with sem:
                 for _ in range(3):
@@ -180,7 +152,6 @@ class HeavyDutyBacktester:
                         return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
                     except: await asyncio.sleep(0.5)
                 return []
         chunk_size = 50
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
@@ -188,7 +159,6 @@ class HeavyDutyBacktester:
             results = await asyncio.gather(*futures)
             for res in results:
                 if res: all_candles.extend(res)
         if not all_candles: return None
         df = pd.DataFrame(all_candles, columns=['timestamp', 'o', 'h', 'l', 'c', 'v'])
         df.drop_duplicates('timestamp', inplace=True)
@@ -197,162 +167,112 @@ class HeavyDutyBacktester:
         print(f"     ✅ Downloaded {len(df)} candles.", flush=True)
         return df.values.tolist()
-    # ==============================================================
-    # 🏎️ VECTORIZED INDICATORS (EXACT MATCH TO LIVE SYSTEM)
-    # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
-        # 1. Clean Types
         cols = ['close', 'high', 'low', 'volume', 'open']
         for c in cols: df[c] = df[c].astype(np.float64)
         idx = df.index
-        # ---------------------------------------------------------
-        # 🧠 PART 1: TITAN FEATURES
-        # ---------------------------------------------------------
         df['RSI'] = safe_ta(ta.rsi(df['close'], length=14), idx, 50)
         macd = ta.macd(df['close'])
         if macd is not None:
             df['MACD'] = safe_ta(macd.iloc[:, 0], idx, 0)
             df['MACD_h'] = safe_ta(macd.iloc[:, 1], idx, 0)
-        else:
-            df['MACD'] = 0.0; df['MACD_h'] = 0.0
         df['CCI'] = safe_ta(ta.cci(df['high'], df['low'], df['close'], length=20), idx, 0)
         adx = ta.adx(df['high'], df['low'], df['close'], length=14)
         if adx is not None: df['ADX'] = safe_ta(adx.iloc[:, 0], idx, 0)
         else: df['ADX'] = 0.0
-        # ✅ NEW: Trend_Strong for 1D timeframe (Titan requirement)
-        if timeframe == '1d':
-            df['Trend_Strong'] = np.where(df['ADX'] > 25, 1.0, 0.0)
         for p in [9, 21, 50, 200]:
             ema = safe_ta(ta.ema(df['close'], length=p), idx, 0)
             df[f'EMA_{p}_dist'] = ((df['close'] / ema.replace(0, np.nan)) - 1).fillna(0)
             df[f'ema{p}'] = ema
         df['ema20'] = safe_ta(ta.ema(df['close'], length=20), idx, df['close'])
         bb = ta.bbands(df['close'], length=20, std=2.0)
         if bb is not None:
             w = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1].replace(0, np.nan)).fillna(0)
             p = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0]).replace(0, np.nan)).fillna(0)
-            df['BB_w'] = w; df['BB_p'] = p
-            df['bb_width'] = w
-        else:
-            df['BB_w'] = 0; df['BB_p'] = 0; df['bb_width'] = 0
         df['MFI'] = safe_ta(ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14), idx, 50)
         vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
         if vwap is not None:
             df['VWAP_dist'] = ((df['close'] / vwap.replace(0, np.nan)) - 1).fillna(0)
             df['vwap'] = vwap
-        else:
-            df['VWAP_dist'] = 0.0
-            df['vwap'] = df['close']
         df['atr'] = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=14), idx, 0)
         df['atr_pct'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
-        df['ATR_pct'] = df['atr_pct'] # Alias for Titan
-        # ---------------------------------------------------------
-        # 🎯 PART 2: SNIPER FEATURES (1m Only)
-        # ---------------------------------------------------------
         if timeframe == '1m':
             df['return_1m'] = df['close'].pct_change().fillna(0)
             df['return_3m'] = df['close'].pct_change(3).fillna(0)
             df['return_5m'] = df['close'].pct_change(5).fillna(0)
             df['return_15m'] = df['close'].pct_change(15).fillna(0)
             df['rsi_14'] = df['RSI']
             e9 = df['ema9'].replace(0, np.nan)
             df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / e9.shift(1)).fillna(0)
             df['ema_21_dist'] = df['EMA_21_dist']
             atr_100 = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=100), idx, 0)
             df['atr_z'] = _z_roll(atr_100)
             df['vol_zscore_50'] = _z_roll(df['volume'], 50)
             rng = (df['high'] - df['low']).replace(0, 1e-9)
             df['candle_range'] = _z_roll(rng, 500)
             df['close_pos_in_range'] = ((df['close'] - df['low']) / rng).fillna(0.5)
             df['dollar_vol'] = df['close'] * df['volume']
             amihud_raw = (df['return_1m'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
             df['amihud'] = _z_roll(amihud_raw)
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
             roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov))).fillna(0)
             df['roll_spread'] = _z_roll(roll_spread_raw)
             sign = np.sign(df['close'].diff()).fillna(0)
             signed_vol = sign * df['volume']
-            ofi_raw = signed_vol.rolling(30).sum()
             df['ofi'] = _z_roll(ofi_raw)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
             imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
             tot = df['volume'].rolling(60).sum().replace(0, np.nan)
             df['vpin'] = (imb / tot).fillna(0)
             vwap_win = 20
             v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
             df['vwap_dev'] = _z_roll(df['close'] - v_short)
             rv_gk = ((np.log(df['high'] / df['low'])**2) / 2) - ((2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2))
             df['rv_gk'] = _z_roll(rv_gk)
             df['L_score'] = (df['vol_zscore_50'] - df['amihud'] - df['roll_spread'] - df['rv_gk'].abs() - df['vwap_dev'].abs() + df['ofi']).fillna(0)
-        # ---------------------------------------------------------
-        # 🧠 PART 3: ORACLE / HYDRA / LEGACY EXTRAS
-        # ---------------------------------------------------------
         df['slope'] = safe_ta(ta.slope(df['close'], length=7), idx, 0)
         vol_mean = df['volume'].rolling(20).mean()
         vol_std = df['volume'].rolling(20).std().replace(0, np.nan)
         df['vol_z'] = ((df['volume'] - vol_mean) / vol_std).fillna(0)
         df['rel_vol'] = df['volume'] / (df['volume'].rolling(50).mean() + 1e-9)
         df['log_ret'] = np.log(df['close'] / df['close'].shift(1).replace(0, np.nan)).fillna(0)
         roll_max = df['high'].rolling(50).max()
         roll_min = df['low'].rolling(50).min()
         diff = (roll_max - roll_min).replace(0, 1e-9)
         df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
-        df['trend_slope'] = ((df['ema20'] - df['ema20'].shift(5)) / df['ema20'].shift(5)).fillna(0)
         df['volatility'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
         fib618 = roll_max - (diff * 0.382)
         df['dist_fib618'] = ((df['close'] - fib618) / df['close'].replace(0, np.nan)).fillna(0)
-        df['dist_ema50'] = (df['close'] - df['ema50']) / df['close']
-        df['ema200'] = ta.ema(df['close'], length=200)
-        df['dist_ema200'] = ((df['close'] - df['ema200']) / df['ema200'].replace(0, np.nan)).fillna(0)
         if timeframe == '1m':
             for lag in [1, 2, 3, 5, 10, 20]:
                 df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
                 df[f'rsi_lag_{lag}'] = (df['RSI'].shift(lag) / 100.0).fillna(0.5)
                 df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
                 df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
         df.fillna(0, inplace=True)
         return df
-    # ==============================================================
-    # 🧠 CPU PROCESSING
-    # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
         period_suffix = f"{start_ms}_{end_ms}"
         scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
         if os.path.exists(scores_file):
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
@@ -366,14 +286,11 @@ class HeavyDutyBacktester:
         df_1m = df_1m.sort_index()
         frames = {}
-        agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
-        # 1. Calc 1m
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
-        # 2. Calc HTF
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
             resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
@@ -382,105 +299,51 @@ class HeavyDutyBacktester:
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
-        # 3. Global Maps
         arr_ts_1m = fast_1m['timestamp']
         map_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['5m']['timestamp']) - 1)
         map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
         map_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1h']['timestamp']) - 1)
         map_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['4h']['timestamp']) - 1)
-        # 1D Mapping
-        if '1d' in numpy_htf:
-            map_1d = np.clip(np.searchsorted(numpy_htf['1d']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1d']['timestamp']) - 1)
-        else:
-            map_1d = np.zeros(len(arr_ts_1m), dtype=int)
-        # 4. Load Models
         hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
         hydra_cols = getattr(self.proc.guardian_hydra, 'feature_cols', []) if self.proc.guardian_hydra else []
         legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
         oracle_dir = getattr(self.proc.oracle, 'model_direction', None)
         oracle_cols = getattr(self.proc.oracle, 'feature_cols', [])
         sniper_models = getattr(self.proc.sniper, 'models', [])
         sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
         titan_model = getattr(self.proc.titan, 'model', None)
-        # ✅ TITAN EXACT FEATURES LIST (From your pickle)
-        titan_cols = [
-            '5m_open', '5m_high', '5m_low', '5m_close', '5m_volume', '5m_RSI', '5m_MACD', '5m_MACD_h',
-            '5m_CCI', '5m_ADX', '5m_EMA_9_dist', '5m_EMA_21_dist', '5m_EMA_50_dist', '5m_EMA_200_dist',
-            '5m_BB_w', '5m_BB_p', '5m_MFI', '5m_VWAP_dist',
-            '15m_timestamp', '15m_RSI', '15m_MACD', '15m_MACD_h', '15m_CCI', '15m_ADX',
-            '15m_EMA_9_dist', '15m_EMA_21_dist', '15m_EMA_50_dist', '15m_EMA_200_dist',
-            '15m_BB_w', '15m_BB_p', '15m_MFI', '15m_VWAP_dist',
-            '1h_timestamp', '1h_RSI', '1h_MACD_h', '1h_EMA_50_dist', '1h_EMA_200_dist', '1h_ATR_pct',
-            '4h_timestamp', '4h_RSI', '4h_MACD_h', '4h_EMA_50_dist', '4h_EMA_200_dist', '4h_ATR_pct',
-            '1d_timestamp', '1d_RSI', '1d_EMA_200_dist', '1d_Trend_Strong'
-        ]
-        # ======================================================================
-        # 🔥 GLOBAL INFERENCE (Batch)
-        # ======================================================================
-        # A. TITAN (Fixed Mapping)
         global_titan_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
         if titan_model:
             print("     🚀 Running Global Titan...", flush=True)
             try:
                 t_vecs = []
                 for col in titan_cols:
-                    # Parse name: e.g. "5m_RSI" -> tf="5m", feat="RSI"
-                    parts = col.split('_', 1)
-                    tf = parts[0]
-                    feat = parts[1]
-                    # Target Array Mapping
-                    target_arr = None
-                    target_map = None
-                    if tf == '5m': target_arr = numpy_htf['5m']; target_map = map_5m
-                    elif tf == '15m': target_arr = numpy_htf['15m']; target_map = map_15m
-                    elif tf == '1h': target_arr = numpy_htf['1h']; target_map = map_1h
-                    elif tf == '4h': target_arr = numpy_htf['4h']; target_map = map_4h
-                    elif tf == '1d': target_arr = numpy_htf['1d']; target_map = map_1d
-                    # Special cases for raw columns in numpy_htf
-                    # timestamp, open, high, low, close, volume are preserved
-                    if target_arr and feat in target_arr:
-                        t_vecs.append(target_arr[feat][target_map])
-                    elif target_arr and feat == 'timestamp': # Handle 15m_timestamp explicitly
-                        t_vecs.append(target_arr['timestamp'][target_map])
-                    else:
-                        # Fallback for raw OHLCV if not found directly
-                        if target_arr and feat in ['open', 'high', 'low', 'close', 'volume']:
-                             t_vecs.append(target_arr[feat][target_map])
-                        else:
-                             t_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_TITAN = np.column_stack(t_vecs)
-                dmat = xgb.DMatrix(X_TITAN, feature_names=titan_cols)
-                preds_t = titan_model.predict(dmat)
-                global_titan_scores = _revive_score_distribution(preds_t)
-            except Exception as e: print(f"Titan Error: {e}")
-        # B. PATTERNS (The Missing Link)
-        global_pattern_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
-        pattern_models = getattr(self.proc.pattern_engine, 'models', {})
-        if pattern_models and '15m' in pattern_models:
-            print("     🚀 Running Global Patterns...", flush=True)
-            try:
-                # Patterns use 15m window of 200 candles
-                # We need to construct this efficiently.
-                # Since we are in Batch Mode, we can't easily slide window for 100k candles quickly in Python.
-                # Strategy: Calculate only for candidates LATER?
-                # OR: Use a simplified logic or skip if too slow.
-                # For now, let's keep it placeholder 0.5 or try calculating for Candidate Indices ONLY in the loop.
-                pass
             except: pass
-        # C. SNIPER
         global_sniper_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
         if sniper_models:
             print("     🚀 Running Global Sniper...", flush=True)
@@ -490,13 +353,12 @@ class HeavyDutyBacktester:
                     if col in fast_1m: s_vecs.append(fast_1m[col])
                     elif col == 'atr' and 'atr_z' in fast_1m: s_vecs.append(fast_1m['atr_z'])
                     else: s_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_SNIPER = np.column_stack(s_vecs)
-                preds_list = [m.predict(X_SNIPER) for m in sniper_models]
-                global_sniper_scores = _revive_score_distribution(np.mean(preds_list, axis=0))
-            except Exception as e: print(f"Sniper Error: {e}")
-        # D. ORACLE
         global_oracle_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
         if oracle_dir:
             print("     🚀 Running Global Oracle...", flush=True)
@@ -508,179 +370,137 @@ class HeavyDutyBacktester:
                     elif col.startswith('4h_'): o_vecs.append(numpy_htf['4h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_4h])
                     elif col == 'sim_titan_score': o_vecs.append(global_titan_scores)
                     elif col == 'sim_mc_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
-                    elif col == 'sim_pattern_score': o_vecs.append(global_pattern_scores)
                     else: o_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_ORACLE = np.column_stack(o_vecs)
                 preds_o = oracle_dir.predict(X_ORACLE)
                 preds_o = preds_o if isinstance(preds_o, np.ndarray) and len(preds_o.shape)==1 else preds_o[:, 0]
                 global_oracle_scores = _revive_score_distribution(preds_o)
-            except Exception as e: print(f"Oracle Error: {e}")
-        # E. LEGACY V2
         global_v2_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
         if legacy_v2:
             try:
-                l_log = fast_1m['log_ret']
-                l_rsi = fast_1m['RSI'] / 100.0
-                l_fib = fast_1m['fib_pos']
-                l_vol = fast_1m['volatility']
-                l5_log = numpy_htf['5m']['log_ret'][map_5m]
-                l5_rsi = numpy_htf['5m']['RSI'][map_5m] / 100.0
-                l5_fib = numpy_htf['5m']['fib_pos'][map_5m]
-                l5_trd = numpy_htf['5m']['trend_slope'][map_5m]
-                l15_log = numpy_htf['15m']['log_ret'][map_15m]
-                l15_rsi = numpy_htf['15m']['RSI'][map_15m] / 100.0
-                l15_fib618 = numpy_htf['15m']['dist_fib618'][map_15m]
-                l15_trd = numpy_htf['15m']['trend_slope'][map_15m]
                 lags = []
                 for lag in [1, 2, 3, 5, 10, 20]:
-                    lags.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'],
-                                 fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']])
-                X_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd,
-                                        l15_log, l15_rsi, l15_fib618, l15_trd, *lags])
                 preds = legacy_v2.predict(xgb.DMatrix(X_V2))
                 global_v2_scores = preds[:, 2] if len(preds.shape) > 1 else preds
             except: pass
-        # F. HYDRA STATIC
-        global_hydra_static = None
-        if hydra_models:
-            try:
-                global_hydra_static = np.column_stack([
-                    fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
-                    fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
-                ])
-            except: pass
-        # --- 5. Filtering Candidates ---
-        is_candidate = (
-            (numpy_htf['1h']['RSI'][map_1h] <= 70) &
-            (global_titan_scores > 0.4) &
-            (global_oracle_scores > 0.4)
-        )
         candidate_indices = np.where(is_candidate)[0]
         start_ts_val = frames['1m'].index[0] + pd.Timedelta(minutes=500)
         start_idx_offset = np.searchsorted(arr_ts_1m, int(start_ts_val.timestamp()*1000))
         candidate_indices = candidate_indices[candidate_indices >= start_idx_offset]
-        max_idx = len(arr_ts_1m) - 245
-        candidate_indices = candidate_indices[candidate_indices < max_idx]
-        print(f"     🎯 Candidates: {len(candidate_indices)}. Simulating Trades...", flush=True)
         ai_results = []
-        time_vec = np.arange(1, 241)
-        # --- 6. SIMULATION LOOP (Lite) ---
-        for idx_entry in candidate_indices:
-            entry_price = fast_1m['close'][idx_entry]
-            entry_ts = int(arr_ts_1m[idx_entry])
-            s_titan = global_titan_scores[idx_entry]
-            s_oracle = global_oracle_scores[idx_entry]
-            s_sniper = global_sniper_scores[idx_entry]
-            # --- PATTERN CHECK (On Demand) ---
-            s_pattern = 0.5
-            if pattern_models:
-                # Only check patterns for candidates (expensive)
-                try:
-                    # Map to 15m index
-                    idx_15m_entry = map_15m[idx_entry]
-                    if idx_15m_entry > 200:
-                        # Reconstruct window from numpy_htf['15m']
-                        # Need o, h, l, c, v arrays
-                        # Construct simple df for _transform
-                        p_win = pd.DataFrame({
-                            'open': frames['15m']['open'].values[idx_15m_entry-200:idx_15m_entry],
-                            'high': frames['15m']['high'].values[idx_15m_entry-200:idx_15m_entry],
-                            'low': frames['15m']['low'].values[idx_15m_entry-200:idx_15m_entry],
-                            'close': frames['15m']['close'].values[idx_15m_entry-200:idx_15m_entry],
-                            'volume': frames['15m']['volume'].values[idx_15m_entry-200:idx_15m_entry]
-                        })
-                        vec = _transform_window_for_pattern(p_win)
-                        if vec is not None:
-                            s_pattern = pattern_models['15m'].predict(xgb.DMatrix(vec.reshape(1,-1)))[0]
-                            # Update Oracle with real pattern score? Too late for global, but good for logs
-                except: pass
-            idx_exit = idx_entry + 240
-            # Legacy Max Risk
-            max_v2 = np.max(global_v2_scores[idx_entry:idx_exit])
-            v2_time = 0
-            if max_v2 > 0.8:
-                rel = np.argmax(global_v2_scores[idx_entry:idx_exit])
-                v2_time = int(arr_ts_1m[idx_entry + rel])
-            # Hydra Dynamic Risk
-            max_hydra = 0.0; hydra_time = 0
-            if hydra_models and global_hydra_static is not None:
-                sl_st = global_hydra_static[idx_entry:idx_exit]
-                sl_close = sl_st[:, 6]
-                sl_atr = sl_st[:, 5]
-                dist = np.maximum(1.5 * sl_atr, entry_price * 0.015)
-                pnl = sl_close - entry_price
-                norm_pnl = pnl / dist
-                cum_max = np.maximum.accumulate(sl_close)
-                max_pnl_r = (np.maximum(cum_max, entry_price) - entry_price) / dist
-                atr_pct = sl_atr / sl_close
-                zeros = np.zeros(240)
-                oracle_arr = np.full(240, s_oracle)
-                l2_arr = np.full(240, 0.7)
-                tgt_arr = np.full(240, 3.0)
-                X_H = np.column_stack([
-                    sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
-                    zeros, atr_pct, norm_pnl, max_pnl_r,
-                    zeros, zeros, time_vec, zeros,
-                    oracle_arr, l2_arr, tgt_arr
-                ])
-                try:
-                    probs = hydra_models['crash'].predict_proba(X_H)[:, 1]
-                    max_hydra = np.max(probs)
-                    if max_hydra > 0.6:
-                        t_idx = np.argmax(probs)
-                        hydra_time = int(arr_ts_1m[idx_entry + t_idx])
-                except: pass
-            ai_results.append({
-                'timestamp': entry_ts, 'symbol': sym, 'close': entry_price,
-                'real_titan': s_titan,
-                'oracle_conf': s_oracle,
-                'sniper_score': s_sniper,
-                'pattern_score': s_pattern,
-                'risk_hydra_crash': max_hydra,
-                'time_hydra_crash': hydra_time,
-                'risk_legacy_v2': max_v2,
-                'time_legacy_panic': v2_time,
-                'signal_type': 'BREAKOUT',
-                'l1_score': 50.0
-            })
         dt = time.time() - t0
         if ai_results:
             pd.DataFrame(ai_results).to_pickle(scores_file)
             print(f"   ✅ [{sym}] Completed in {dt:.2f} seconds. ({len(ai_results)} signals)", flush=True)
-        else:
-            print(f"   ⚠️ [{sym}] No signals.", flush=True)
-        del frames, fast_1m, numpy_htf, global_v2_scores, global_oracle_scores, global_sniper_scores, global_titan_scores
         gc.collect()
-    # ==============================================================
-    # PHASE 1 & 2 (Standard Optimization)
-    # ==============================================================
     async def generate_truth_data(self):
         if self.force_start_date:
             dt_s = datetime.strptime(self.force_start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
@@ -835,7 +655,7 @@ class HeavyDutyBacktester:
         return best['config'], best
 async def run_strategic_optimization_task():
-    print("\n🧪 [STRATEGIC BACKTEST] Full Spectrum Mode...")
     r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
     try:
         await dm.initialize(); await proc.initialize()

 # ============================================================
+# 🧪 backtest_engine.py (V139.0 - GEM-Architect: Vectorized Hydra Speed)
 # ============================================================
 import asyncio
     return s.fillna(0.0).astype('float64')
 def _zv(x):
     with np.errstate(divide='ignore', invalid='ignore'):
         x = np.asarray(x, dtype="float32")
         m = np.nanmean(x, axis=0)
         s = np.nanstd(x, axis=0) + 1e-9
         return np.nan_to_num((x - m) / s, nan=0.0)
 def _transform_window_for_pattern(df_window):
     try:
         c = df_window['close'].values.astype('float32')
         o = df_window['open'].values.astype('float32')
         h = df_window['high'].values.astype('float32')
         l = df_window['low'].values.astype('float32')
         v = df_window['volume'].values.astype('float32')
         base = np.stack([o, h, l, c, v], axis=1)
         base_z = _zv(base)
         lr = np.zeros_like(c); lr[1:] = np.diff(np.log1p(c))
         rng = (h - l) / (c + 1e-9)
         extra = np.stack([lr, rng], axis=1)
         extra_z = _zv(extra)
         def _ema(arr, n): return pd.Series(arr).ewm(span=n, adjust=False).mean().values
         ema9 = _ema(c, 9); ema21 = _ema(c, 21); ema50 = _ema(c, 50); ema200 = _ema(c, 200)
+        slope21 = np.gradient(ema21); slope50 = np.gradient(ema50)
         delta = np.diff(c, prepend=c[0])
         up, down = delta.copy(), delta.copy()
         up[up < 0] = 0; down[down > 0] = 0
         roll_down = pd.Series(down).abs().ewm(alpha=1/14, adjust=False).mean().values
         rs = roll_up / (roll_down + 1e-9)
         rsi = 100.0 - (100.0 / (1.0 + rs))
         indicators = np.stack([ema9, ema21, ema50, ema200, slope21, slope50, rsi], axis=1)
         X_seq = np.concatenate([base_z, extra_z, _zv(indicators)], axis=1)
         X_flat = X_seq.flatten()
         X_stat = np.array([0.5, 0.0, 0.5], dtype="float32")
         return np.concatenate([X_flat, X_stat])
     except: return None
         self.INITIAL_CAPITAL = 10.0
         self.TRADING_FEES = 0.001
         self.MAX_SLOTS = 4
         self.TARGET_COINS = [
             'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
             'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
             'STRK/USDT', 'BLUR/USDT', 'ALT/USDT', 'JUP/USDT', 'PENDLE/USDT', 'ETHFI/USDT',
             'MEME/USDT', 'ATOM/USDT'
         ]
         self.force_start_date = None
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V139.0] Vectorized Hydra Speed Optimization.")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         self.force_end_date = end_str
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
         print(f"   ⚡ [Network] Downloading {sym}...", flush=True)
         limit = 1000
             current += duration_per_batch
         all_candles = []
         sem = asyncio.Semaphore(20)
         async def _fetch_batch(timestamp):
             async with sem:
                 for _ in range(3):
                         return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
                     except: await asyncio.sleep(0.5)
                 return []
         chunk_size = 50
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
             results = await asyncio.gather(*futures)
             for res in results:
                 if res: all_candles.extend(res)
         if not all_candles: return None
         df = pd.DataFrame(all_candles, columns=['timestamp', 'o', 'h', 'l', 'c', 'v'])
         df.drop_duplicates('timestamp', inplace=True)
         print(f"     ✅ Downloaded {len(df)} candles.", flush=True)
         return df.values.tolist()
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
         cols = ['close', 'high', 'low', 'volume', 'open']
         for c in cols: df[c] = df[c].astype(np.float64)
         idx = df.index
         df['RSI'] = safe_ta(ta.rsi(df['close'], length=14), idx, 50)
         macd = ta.macd(df['close'])
         if macd is not None:
             df['MACD'] = safe_ta(macd.iloc[:, 0], idx, 0)
             df['MACD_h'] = safe_ta(macd.iloc[:, 1], idx, 0)
+        else: df['MACD'] = 0.0; df['MACD_h'] = 0.0
         df['CCI'] = safe_ta(ta.cci(df['high'], df['low'], df['close'], length=20), idx, 0)
         adx = ta.adx(df['high'], df['low'], df['close'], length=14)
         if adx is not None: df['ADX'] = safe_ta(adx.iloc[:, 0], idx, 0)
         else: df['ADX'] = 0.0
+        if timeframe == '1d': df['Trend_Strong'] = np.where(df['ADX'] > 25, 1.0, 0.0)
         for p in [9, 21, 50, 200]:
             ema = safe_ta(ta.ema(df['close'], length=p), idx, 0)
             df[f'EMA_{p}_dist'] = ((df['close'] / ema.replace(0, np.nan)) - 1).fillna(0)
             df[f'ema{p}'] = ema
         df['ema20'] = safe_ta(ta.ema(df['close'], length=20), idx, df['close'])
         bb = ta.bbands(df['close'], length=20, std=2.0)
         if bb is not None:
             w = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1].replace(0, np.nan)).fillna(0)
             p = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0]).replace(0, np.nan)).fillna(0)
+            df['BB_w'] = w; df['BB_p'] = p; df['bb_width'] = w
+        else: df['BB_w'] = 0; df['BB_p'] = 0; df['bb_width'] = 0
         df['MFI'] = safe_ta(ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14), idx, 50)
         vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
         if vwap is not None:
             df['VWAP_dist'] = ((df['close'] / vwap.replace(0, np.nan)) - 1).fillna(0)
             df['vwap'] = vwap
+        else: df['VWAP_dist'] = 0.0; df['vwap'] = df['close']
         df['atr'] = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=14), idx, 0)
         df['atr_pct'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
+        df['ATR_pct'] = df['atr_pct']
         if timeframe == '1m':
             df['return_1m'] = df['close'].pct_change().fillna(0)
             df['return_3m'] = df['close'].pct_change(3).fillna(0)
             df['return_5m'] = df['close'].pct_change(5).fillna(0)
             df['return_15m'] = df['close'].pct_change(15).fillna(0)
             df['rsi_14'] = df['RSI']
             e9 = df['ema9'].replace(0, np.nan)
             df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / e9.shift(1)).fillna(0)
             df['ema_21_dist'] = df['EMA_21_dist']
             atr_100 = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=100), idx, 0)
             df['atr_z'] = _z_roll(atr_100)
             df['vol_zscore_50'] = _z_roll(df['volume'], 50)
             rng = (df['high'] - df['low']).replace(0, 1e-9)
             df['candle_range'] = _z_roll(rng, 500)
             df['close_pos_in_range'] = ((df['close'] - df['low']) / rng).fillna(0.5)
             df['dollar_vol'] = df['close'] * df['volume']
             amihud_raw = (df['return_1m'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
             df['amihud'] = _z_roll(amihud_raw)
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
             roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov))).fillna(0)
             df['roll_spread'] = _z_roll(roll_spread_raw)
             sign = np.sign(df['close'].diff()).fillna(0)
             signed_vol = sign * df['volume']
+            ofi_raw = signed_vol.rolling(30).sum().fillna(0)
             df['ofi'] = _z_roll(ofi_raw)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
             imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
             tot = df['volume'].rolling(60).sum().replace(0, np.nan)
             df['vpin'] = (imb / tot).fillna(0)
             vwap_win = 20
             v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
             df['vwap_dev'] = _z_roll(df['close'] - v_short)
             rv_gk = ((np.log(df['high'] / df['low'])**2) / 2) - ((2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2))
             df['rv_gk'] = _z_roll(rv_gk)
             df['L_score'] = (df['vol_zscore_50'] - df['amihud'] - df['roll_spread'] - df['rv_gk'].abs() - df['vwap_dev'].abs() + df['ofi']).fillna(0)
         df['slope'] = safe_ta(ta.slope(df['close'], length=7), idx, 0)
         vol_mean = df['volume'].rolling(20).mean()
         vol_std = df['volume'].rolling(20).std().replace(0, np.nan)
         df['vol_z'] = ((df['volume'] - vol_mean) / vol_std).fillna(0)
         df['rel_vol'] = df['volume'] / (df['volume'].rolling(50).mean() + 1e-9)
         df['log_ret'] = np.log(df['close'] / df['close'].shift(1).replace(0, np.nan)).fillna(0)
         roll_max = df['high'].rolling(50).max()
         roll_min = df['low'].rolling(50).min()
         diff = (roll_max - roll_min).replace(0, 1e-9)
         df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
+        e20_s = df['ema20'].shift(5).replace(0, np.nan)
+        df['trend_slope'] = ((df['ema20'] - df['ema20'].shift(5)) / e20_s).fillna(0)
         df['volatility'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
         fib618 = roll_max - (diff * 0.382)
         df['dist_fib618'] = ((df['close'] - fib618) / df['close'].replace(0, np.nan)).fillna(0)
+        df['dist_ema50'] = ((df['close'] - df['ema50']) / df['ema50'].replace(0, np.nan)).fillna(0)
+        e200 = safe_ta(ta.ema(df['close'], length=200), idx, df['close'])
+        df['ema200'] = e200
+        df['dist_ema200'] = ((df['close'] - e200) / e200.replace(0, np.nan)).fillna(0)
         if timeframe == '1m':
             for lag in [1, 2, 3, 5, 10, 20]:
                 df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
                 df[f'rsi_lag_{lag}'] = (df['RSI'].shift(lag) / 100.0).fillna(0.5)
                 df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
                 df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
         df.fillna(0, inplace=True)
         return df
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
         period_suffix = f"{start_ms}_{end_ms}"
         scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
         if os.path.exists(scores_file):
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
         df_1m = df_1m.sort_index()
         frames = {}
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
+        agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
             resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
         arr_ts_1m = fast_1m['timestamp']
         map_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['5m']['timestamp']) - 1)
         map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
         map_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1h']['timestamp']) - 1)
         map_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['4h']['timestamp']) - 1)
+        map_1d = np.clip(np.searchsorted(numpy_htf['1d']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1d']['timestamp']) - 1) if '1d' in numpy_htf else np.zeros(len(arr_ts_1m), dtype=int)
         hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
         hydra_cols = getattr(self.proc.guardian_hydra, 'feature_cols', []) if self.proc.guardian_hydra else []
         legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
         oracle_dir = getattr(self.proc.oracle, 'model_direction', None)
         oracle_cols = getattr(self.proc.oracle, 'feature_cols', [])
         sniper_models = getattr(self.proc.sniper, 'models', [])
         sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
         titan_model = getattr(self.proc.titan, 'model', None)
+        # A. TITAN (Global)
         global_titan_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
         if titan_model:
+            titan_cols = [
+                '5m_open', '5m_high', '5m_low', '5m_close', '5m_volume', '5m_RSI', '5m_MACD', '5m_MACD_h',
+                '5m_CCI', '5m_ADX', '5m_EMA_9_dist', '5m_EMA_21_dist', '5m_EMA_50_dist', '5m_EMA_200_dist',
+                '5m_BB_w', '5m_BB_p', '5m_MFI', '5m_VWAP_dist', '15m_timestamp', '15m_RSI', '15m_MACD',
+                '15m_MACD_h', '15m_CCI', '15m_ADX', '15m_EMA_9_dist', '15m_EMA_21_dist', '15m_EMA_50_dist',
+                '15m_EMA_200_dist', '15m_BB_w', '15m_BB_p', '15m_MFI', '15m_VWAP_dist', '1h_timestamp',
+                '1h_RSI', '1h_MACD_h', '1h_EMA_50_dist', '1h_EMA_200_dist', '1h_ATR_pct', '4h_timestamp',
+                '4h_RSI', '4h_MACD_h', '4h_EMA_50_dist', '4h_EMA_200_dist', '4h_ATR_pct', '1d_timestamp',
+                '1d_RSI', '1d_EMA_200_dist', '1d_Trend_Strong'
+            ]
             print("     🚀 Running Global Titan...", flush=True)
             try:
                 t_vecs = []
                 for col in titan_cols:
+                    parts = col.split('_', 1); tf = parts[0]; feat = parts[1]
+                    target_arr = numpy_htf.get(tf, None)
+                    target_map = locals().get(f"map_{tf}", None)
+                    if target_arr and feat in target_arr: t_vecs.append(target_arr[feat][target_map])
+                    elif target_arr and feat == 'timestamp': t_vecs.append(target_arr['timestamp'][target_map])
+                    elif target_arr and feat in ['open','high','low','close','volume']: t_vecs.append(target_arr[feat][target_map])
+                    else: t_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_TITAN = np.column_stack(t_vecs)
+                global_titan_scores = _revive_score_distribution(titan_model.predict(xgb.DMatrix(X_TITAN, feature_names=titan_cols)))
             except: pass
+        # B. SNIPER (Global)
         global_sniper_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
         if sniper_models:
             print("     🚀 Running Global Sniper...", flush=True)
                     if col in fast_1m: s_vecs.append(fast_1m[col])
                     elif col == 'atr' and 'atr_z' in fast_1m: s_vecs.append(fast_1m['atr_z'])
                     else: s_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_SNIPER = np.column_stack(s_vecs)
+                preds = [m.predict(X_SNIPER) for m in sniper_models]
+                global_sniper_scores = _revive_score_distribution(np.mean(preds, axis=0))
+            except: pass
+        # C. ORACLE (Global)
         global_oracle_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
         if oracle_dir:
             print("     🚀 Running Global Oracle...", flush=True)
                     elif col.startswith('4h_'): o_vecs.append(numpy_htf['4h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_4h])
                     elif col == 'sim_titan_score': o_vecs.append(global_titan_scores)
                     elif col == 'sim_mc_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
+                    elif col == 'sim_pattern_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
                     else: o_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_ORACLE = np.column_stack(o_vecs)
                 preds_o = oracle_dir.predict(X_ORACLE)
                 preds_o = preds_o if isinstance(preds_o, np.ndarray) and len(preds_o.shape)==1 else preds_o[:, 0]
                 global_oracle_scores = _revive_score_distribution(preds_o)
+            except: pass
+        # D. LEGACY (Global)
         global_v2_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
         if legacy_v2:
             try:
+                l_log = fast_1m['log_ret']; l_rsi = fast_1m['RSI'] / 100.0; l_fib = fast_1m['fib_pos']; l_vol = fast_1m['volatility']
+                l5_log = numpy_htf['5m']['log_ret'][map_5m]; l5_rsi = numpy_htf['5m']['RSI'][map_5m] / 100.0; l5_fib = numpy_htf['5m']['fib_pos'][map_5m]; l5_trd = numpy_htf['5m']['trend_slope'][map_5m]
+                l15_log = numpy_htf['15m']['log_ret'][map_15m]; l15_rsi = numpy_htf['15m']['RSI'][map_15m] / 100.0; l15_fib618 = numpy_htf['15m']['dist_fib618'][map_15m]; l15_trd = numpy_htf['15m']['trend_slope'][map_15m]
                 lags = []
                 for lag in [1, 2, 3, 5, 10, 20]:
+                    lags.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'], fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']])
+                X_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lags])
                 preds = legacy_v2.predict(xgb.DMatrix(X_V2))
                 global_v2_scores = preds[:, 2] if len(preds.shape) > 1 else preds
             except: pass
+        # Filter
+        is_candidate = (numpy_htf['1h']['RSI'][map_1h] <= 70) & (global_titan_scores > 0.4) & (global_oracle_scores > 0.4)
         candidate_indices = np.where(is_candidate)[0]
         start_ts_val = frames['1m'].index[0] + pd.Timedelta(minutes=500)
         start_idx_offset = np.searchsorted(arr_ts_1m, int(start_ts_val.timestamp()*1000))
         candidate_indices = candidate_indices[candidate_indices >= start_idx_offset]
+        candidate_indices = candidate_indices[candidate_indices < (len(arr_ts_1m) - 245)]
+        print(f"     🎯 Candidates: {len(candidate_indices)}. Running Vectorized Hydra...", flush=True)
+        # 🚀 VECTORIZED HYDRA SIMULATION 🚀
         ai_results = []
+        if hydra_models and len(candidate_indices) > 0:
+            # Prepare Static Features Matrix (Global)
+            h_static = np.column_stack([
+                fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
+                fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
+            ]) # Shape: (N, 7)
+            # Process candidates in chunks to avoid RAM explosion
+            chunk_size = 5000
+            for i in range(0, len(candidate_indices), chunk_size):
+                chunk_idxs = candidate_indices[i:i+chunk_size]
+                # We need sliding windows of 240 steps for each candidate
+                # Trick: Use broadcasting or sliding_window_view on static features
+                # But sliding_window_view on huge array is slow. Better to just slice.
+                # Vectorized construction for chunk
+                # 1. Extract entry prices
+                entries = fast_1m['close'][chunk_idxs]
+                entries_ts = fast_1m['timestamp'][chunk_idxs]
+                # 2. Prepare sequences (Vectorized slice is hard in numpy without creating copies)
+                # We stick to a tight loop or specialized indexing.
+                # Given we need to construct a [Batch, 240, Features] array for Hydra...
+                # Fastest way: List comprehension for slicing, then stack.
+                # Since Hydra is XGBoost, we can flatten the time dimension? No, Hydra is 1D input (snapshot).
+                # Wait, Hydra predicts Crash Probability for a SNAPSHOT state.
+                # In simulation, we need to check crash prob at t+1, t+2... t+240.
+                # That is 240 checks per candidate. 42,000 * 240 = 10 Million checks.
+                # This IS the bottleneck.
+                # OPTIMIZATION: Only check Hydra if PnL drops below -0.5% or something? No, that misses the point.
+                # OPTIMIZATION 2 (Implemented): Vectorize the "Check" logic.
+                # Construct big matrix for ALL checks: (N_Candidates * 240, Features)
+                # But that's 10M rows. XGBoost inference on 10M rows takes ~3-5 seconds on CPU. This is feasible!
+                # Let's do it per candidate to be safe on RAM, but fast.
+                for idx in chunk_idxs:
+                    # Slicing is fast
+                    sl_st = h_static[idx:idx+240]
+                    sl_close = sl_st[:, 6]; sl_atr = sl_st[:, 5]
+                    entry = fast_1m['close'][idx]
+                    dist = np.maximum(1.5 * sl_atr, entry * 0.015)
+                    pnl = sl_close - entry
+                    norm_pnl = pnl / dist
+                    max_pnl_r = (np.maximum.accumulate(sl_close) - entry) / dist
+                    atr_pct = sl_atr / sl_close
+                    # Stack Hydra Input (240 rows)
+                    # Cols: rsi1, rsi5, rsi15, bb, vol, dist_ema(0), atr_pct, norm, max, dists(0), time, entry(0), oracle, l2, target
+                    zeros = np.zeros(240)
+                    time_vec = np.arange(1, 241)
+                    s_oracle = global_oracle_scores[idx]
+                    X_H = np.column_stack([
+                        sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
+                        zeros, atr_pct, norm_pnl, max_pnl_r, zeros, zeros, time_vec, zeros,
+                        np.full(240, s_oracle), np.full(240, 0.7), np.full(240, 3.0)
+                    ])
+                    # Predict 240 steps at once
+                    max_hydra = 0.0; hydra_time = 0
+                    try:
+                        probs = hydra_models['crash'].predict_proba(X_H)[:, 1]
+                        max_hydra = np.max(probs)
+                        if max_hydra > 0.6:
+                            t = np.argmax(probs)
+                            hydra_time = int(fast_1m['timestamp'][idx + t])
+                    except: pass
+                    # Legacy Max
+                    max_v2 = np.max(global_v2_scores[idx:idx+240])
+                    v2_time = 0
+                    if max_v2 > 0.8:
+                        t2 = np.argmax(global_v2_scores[idx:idx+240])
+                        v2_time = int(fast_1m['timestamp'][idx + t2])
+                    ai_results.append({
+                        'timestamp': int(fast_1m['timestamp'][idx]),
+                        'symbol': sym, 'close': entry,
+                        'real_titan': global_titan_scores[idx],
+                        'oracle_conf': s_oracle,
+                        'sniper_score': global_sniper_scores[idx],
+                        'risk_hydra_crash': max_hydra, 'time_hydra_crash': hydra_time,
+                        'risk_legacy_v2': max_v2, 'time_legacy_panic': v2_time,
+                        'signal_type': 'BREAKOUT', 'l1_score': 50.0
+                    })
         dt = time.time() - t0
         if ai_results:
             pd.DataFrame(ai_results).to_pickle(scores_file)
             print(f"   ✅ [{sym}] Completed in {dt:.2f} seconds. ({len(ai_results)} signals)", flush=True)
         gc.collect()
     async def generate_truth_data(self):
         if self.force_start_date:
             dt_s = datetime.strptime(self.force_start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
         return best['config'], best
 async def run_strategic_optimization_task():
+    print("\n🧪 [STRATEGIC BACKTEST] Vectorized Hydra Speed...")
     r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
     try:
         await dm.initialize(); await proc.initialize()