Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 8, 2025

Commit

bf66206

verified ·

1 Parent(s): d01877f

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +145 -181

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V118.10 - GEM-Architect: Multi-Class Fix)
 # ============================================================
 import asyncio
@@ -17,7 +17,7 @@ import traceback
 from datetime import datetime, timezone
 from typing import Dict, Any, List
-# ✅ استيراد المحركات الأساسية
 try:
     from ml_engine.processor import MLProcessor, SystemLimits
     from ml_engine.data_manager import DataManager
@@ -44,7 +44,6 @@ class HeavyDutyBacktester:
         self.force_start_date = None
         self.force_end_date = None
-        # 🔥 تنظيف الكاش 🔥
         if os.path.exists(CACHE_DIR):
             files = glob.glob(os.path.join(CACHE_DIR, "*"))
             print(f"🧹 [System] Flushing Cache: Deleting {len(files)} old files...", flush=True)
@@ -54,7 +53,7 @@ class HeavyDutyBacktester:
         else:
             os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V118.10] Multi-Class Fix Mode. Models: {self._check_models_status()}")
     def _check_models_status(self):
         status = []
@@ -68,9 +67,20 @@ class HeavyDutyBacktester:
         self.force_start_date = start_str
         self.force_end_date = end_str
-    # ==============================================================
-    # ⚡ FAST DATA DOWNLOADER
-    # ==============================================================
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
         print(f"   ⚡ [Network] Downloading {sym}...", flush=True)
         limit = 1000
@@ -101,7 +111,6 @@ class HeavyDutyBacktester:
                 if res: all_candles.extend(res)
         if not all_candles: return None
         filtered = [c for c in all_candles if c[0] >= start_ms and c[0] <= end_ms]
         seen = set(); unique_candles = []
         for c in filtered:
@@ -112,18 +121,13 @@ class HeavyDutyBacktester:
         print(f"     ✅ Downloaded {len(unique_candles)} candles.", flush=True)
         return unique_candles
-    # ==============================================================
-    # 🏎️ VECTORIZED INDICATORS (Robust)
-    # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
-        for col in ['close', 'high', 'low', 'volume', 'open']:
-            df[col] = df[col].astype(float)
         df['rsi'] = ta.rsi(df['close'], length=14)
         df['ema20'] = ta.ema(df['close'], length=20)
         df['ema50'] = ta.ema(df['close'], length=50)
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
         df['vol_ma50'] = df['volume'].rolling(50).mean()
         df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
@@ -132,11 +136,8 @@ class HeavyDutyBacktester:
             std20 = df['close'].rolling(20).std()
             df['bb_width'] = ((sma20 + 2*std20) - (sma20 - 2*std20)) / sma20
-        vol_mean = df['volume'].rolling(20).mean()
-        vol_std = df['volume'].rolling(20).std()
-        df['vol_z'] = (df['volume'] - vol_mean) / (vol_std + 1e-9)
         df['atr_pct'] = df['atr'] / df['close']
         # L1 Score
         rsi_penalty = np.where(df['rsi'] > 70, (df['rsi'] - 70) * 2, 0)
         l1_score_raw = (df['rel_vol'] * 10) + (df['atr_pct'] * 1000) - rsi_penalty
@@ -145,11 +146,8 @@ class HeavyDutyBacktester:
         if timeframe == '1m':
             df['log_ret'] = np.log(df['close'] / df['close'].shift(1))
             df['ret'] = df['close'].pct_change()
-            roll_max = df['high'].rolling(50).max()
             roll_min = df['low'].rolling(50).min()
-            diff = (roll_max - roll_min).replace(0, 1e-9)
-            df['fib_pos'] = (df['close'] - roll_min) / diff
             df['volatility'] = df['atr'] / df['close']
             df['trend_slope'] = (df['ema20'] - df['ema20'].shift(5)) / df['ema20'].shift(5)
@@ -166,9 +164,6 @@ class HeavyDutyBacktester:
         df.fillna(0, inplace=True)
         return df
-    # ==============================================================
-    # 🧠 CPU PROCESSING (HYPER-VECTORIZED)
-    # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
         period_suffix = f"{start_ms}_{end_ms}"
@@ -178,164 +173,141 @@ class HeavyDutyBacktester:
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
-        print(f"   ⚙️ [CPU] Analyzing {sym} (Hyper-Vectorized Mode)...", flush=True)
         t0 = time.time()
-        # 1. Data Prep
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
         df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
         df_1m.set_index('datetime', inplace=True)
         df_1m = df_1m.sort_index()
         frames = {}
-        agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
-            resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
             resampled = self._calculate_indicators_vectorized(resampled, timeframe=tf_str)
             resampled['timestamp'] = resampled.index.astype(np.int64) // 10**6
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
-        # 2. Time Alignment
         map_1m_to_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['1h']['timestamp'])-1)
         map_1m_to_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['5m']['timestamp'])-1)
         map_1m_to_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['15m']['timestamp'])-1)
         map_1m_to_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['4h']['timestamp'])-1)
-        # 3. Model Access
         oracle_dir_model = getattr(self.proc.oracle, 'model_direction', None)
         sniper_models = getattr(self.proc.sniper, 'models', [])
         hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
         legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
-        # 4. 🔥 Pre-Calc Legacy V2 🔥
         global_v2_probs = np.zeros(len(fast_1m['close']))
         if legacy_v2:
             try:
-                l_log = fast_1m['log_ret']; l_rsi = fast_1m['rsi'] / 100.0
-                l_fib = fast_1m['fib_pos']; l_vol = fast_1m['volatility']
-                l5_log = numpy_htf['5m']['log_ret'][map_1m_to_5m]
-                l5_rsi = numpy_htf['5m']['rsi'][map_1m_to_5m] / 100.0
-                l5_fib = numpy_htf['5m']['fib_pos'][map_1m_to_5m]
-                l5_trd = numpy_htf['5m']['trend_slope'][map_1m_to_5m]
-                l15_log = numpy_htf['15m']['log_ret'][map_1m_to_15m]
-                l15_rsi = numpy_htf['15m']['rsi'][map_1m_to_15m] / 100.0
-                l15_fib618 = numpy_htf['15m']['dist_fib618'][map_1m_to_15m]
-                l15_trd = numpy_htf['15m']['trend_slope'][map_1m_to_15m]
-                lag_cols = []
-                for lag in [1, 2, 3, 5, 10, 20]:
-                    lag_cols.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'], fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']])
-                X_GLOBAL_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lag_cols])
-                global_v2_probs = legacy_v2.predict(xgb.DMatrix(X_GLOBAL_V2))
-                # ✅ FIX: Handle Multi-Class output if exists
-                if len(global_v2_probs.shape) > 1:
-                    # Assuming last column is Panic/Crash prob (or index 2)
-                    global_v2_probs = global_v2_probs[:, -1]
             except: pass
-        # 5. 🔥 Pre-Assemble Hydra Static 🔥
         global_hydra_static = None
         if hydra_models:
             try:
-                h_rsi_1m = fast_1m['rsi']; h_rsi_5m = numpy_htf['5m']['rsi'][map_1m_to_5m]
-                h_rsi_15m = numpy_htf['15m']['rsi'][map_1m_to_15m]; h_bb = fast_1m['bb_width']
-                h_vol = fast_1m['rel_vol']; h_atr = fast_1m['atr']; h_close = fast_1m['close']
-                global_hydra_static = np.column_stack([h_rsi_1m, h_rsi_5m, h_rsi_15m, h_bb, h_vol, h_atr, h_close])
             except: pass
-        # 6. Candidate Filtering
         valid_indices_mask = fast_1m['l1_score'] >= 5.0
         valid_indices = np.where(valid_indices_mask)[0]
         mask_bounds = (valid_indices > 500) & (valid_indices < len(fast_1m['close']) - 245)
         final_valid_indices = valid_indices[mask_bounds]
-        print(f"     🎯 Raw Candidates (Score > 5): {len(final_valid_indices)}. Vectorized Scoring...", flush=True)
         num_candidates = len(final_valid_indices)
         if num_candidates == 0: return
-        # ✅ TIME VECTOR DEFINED
         time_vec = np.arange(1, 241)
-        # --- A. ORACLE MATRIX CONSTRUCTION ---
-        oracle_preds = np.full(num_candidates, 0.5)
         if oracle_dir_model:
             try:
                 idx_1h = map_1m_to_1h[final_valid_indices]
                 idx_15m = map_1m_to_15m[final_valid_indices]
                 idx_4h = map_1m_to_4h[final_valid_indices]
                 titan_scores = np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95)
-                oracle_features = []
                 for col in getattr(self.proc.oracle, 'feature_cols', []):
-                    if col.startswith('1h_'):
-                        c = col[3:]; oracle_features.append(numpy_htf['1h'][c][idx_1h] if c in numpy_htf['1h'] else np.zeros(num_candidates))
-                    elif col.startswith('15m_'):
-                        c = col[4:]; oracle_features.append(numpy_htf['15m'][c][idx_15m] if c in numpy_htf['15m'] else np.zeros(num_candidates))
-                    elif col.startswith('4h_'):
-                        c = col[3:]; oracle_features.append(numpy_htf['4h'][c][idx_4h] if c in numpy_htf['4h'] else np.zeros(num_candidates))
-                    elif col == 'sim_titan_score': oracle_features.append(titan_scores)
-                    elif col == 'sim_mc_score': oracle_features.append(np.full(num_candidates, 0.5))
-                    elif col == 'sim_pattern_score': oracle_features.append(np.full(num_candidates, 0.5))
-                    else: oracle_features.append(np.zeros(num_candidates))
-                X_oracle_big = np.column_stack(oracle_features)
-                preds = oracle_dir_model.predict(X_oracle_big)
-                # ✅ FIX: Handle Multi-Class (take last column usually)
-                if len(preds.shape) > 1 and preds.shape[1] > 1: oracle_preds = preds[:, -1]
-                else: oracle_preds = preds.flatten()
             except Exception as e: print(f"Oracle Error: {e}")
-        # --- B. SNIPER MATRIX CONSTRUCTION (FIXED) ---
-        sniper_preds = np.full(num_candidates, 0.5)
         if sniper_models:
             try:
-                sniper_features = []
                 for col in getattr(self.proc.sniper, 'feature_names', []):
-                    if col in fast_1m: sniper_features.append(fast_1m[col][final_valid_indices])
-                    elif col == 'L_score': sniper_features.append(fast_1m.get('vol_zscore_50', np.zeros(len(fast_1m['close'])))[final_valid_indices])
-                    else: sniper_features.append(np.zeros(num_candidates))
-                X_sniper_big = np.column_stack(sniper_features)
-                # ✅ FIX: Extract Positive Class Prob if Multi-Class
-                batch_preds = []
                 for m in sniper_models:
-                    raw_p = m.predict(X_sniper_big)
-                    if len(raw_p.shape) > 1 and raw_p.shape[1] > 1:
-                        # Assuming index 2 is Buy (0=Sell, 1=Hold, 2=Buy) or index 1 if binary
-                        # Safest: Take last column
-                        batch_preds.append(raw_p[:, -1])
-                    else:
-                        batch_preds.append(raw_p)
-                sniper_preds = np.mean(batch_preds, axis=0)
             except Exception as e: print(f"Sniper Error: {e}")
-        # --- C. HYDRA MATRIX CONSTRUCTION ---
-        hydra_risk_preds = np.zeros(num_candidates)
-        hydra_time_preds = np.zeros(num_candidates, dtype=int)
         if hydra_models and global_hydra_static is not None:
             chunk_size = 5000
             for i in range(0, num_candidates, chunk_size):
-                chunk_indices = final_valid_indices[i : i + chunk_size]
-                batch_X = []; valid_batch_indices = []
-                for k, idx in enumerate(chunk_indices):
                     start = idx + 1; end = start + 240
                     sl_static = global_hydra_static[start:end]
                     entry_p = fast_1m['close'][idx]
                     sl_close = sl_static[:, 6]; sl_atr = sl_static[:, 5]
                     sl_dist = np.maximum(1.5 * sl_atr, entry_p * 0.015)
@@ -344,87 +316,78 @@ class HeavyDutyBacktester:
                     sl_max_pnl_r = (sl_cum_max - entry_p) / sl_dist
                     sl_atr_pct = sl_atr / sl_close
                     zeros = np.zeros(240); ones = np.ones(240)
-                    row = np.column_stack([
-                        sl_static[:, 0], sl_static[:, 1], sl_static[:, 2],
-                        sl_static[:, 3], sl_static[:, 4],
-                        zeros, sl_atr_pct, sl_norm_pnl, sl_max_pnl_r,
-                        zeros, zeros, time_vec,
-                        zeros, ones*0.6, ones*0.7, ones*3.0
-                    ])
-                    batch_X.append(row)
-                    valid_batch_indices.append(i + k)
                 if batch_X:
                     try:
-                        big_X = np.array(batch_X)
                         big_X_flat = big_X.reshape(-1, big_X.shape[-1])
                         preds_flat = hydra_models['crash'].predict_proba(big_X_flat)[:, 1]
                         preds_batch = preds_flat.reshape(len(batch_X), 240)
-                        batch_max_risk = np.max(preds_batch, axis=1)
                         over_thresh = preds_batch > 0.6
                         has_crash = over_thresh.any(axis=1)
                         crash_times_rel = np.argmax(over_thresh, axis=1)
-                        for j, glob_idx in enumerate(valid_batch_indices):
-                            hydra_risk_preds[glob_idx] = batch_max_risk[j]
-                            if has_crash[j]:
-                                start_t_idx = final_valid_indices[glob_idx] + 1
-                                abs_time = fast_1m['timestamp'][start_t_idx + crash_times_rel[j]]
-                                hydra_time_preds[glob_idx] = abs_time
                     except Exception: pass
-        # --- D. LEGACY V2 MAPPING ---
-        legacy_risk_preds = np.zeros(num_candidates)
-        legacy_time_preds = np.zeros(num_candidates, dtype=int)
         if legacy_v2:
-            for k, idx in enumerate(final_valid_indices):
                 start = idx + 1
                 if start + 240 < len(global_v2_probs):
-                    window = global_v2_probs[start : start + 240]
-                    legacy_risk_preds[k] = np.max(window)
-        # --- E. FINAL DATAFRAME CONSTRUCTION (Safe Mode) ---
-        try:
-            # 1. Gather Arrays
-            arr_ts = fast_1m['timestamp'][final_valid_indices]
-            arr_close = fast_1m['close'][final_valid_indices]
-            arr_l1 = fast_1m['l1_score'][final_valid_indices]
-            arr_titan = np.clip(arr_l1 / 40.0, 0.1, 0.95)
-            # 2. Check Lengths & Flatten
-            arrays = {
-                'timestamp': arr_ts,
-                'close': arr_close,
-                'real_titan': arr_titan,
-                'oracle_conf': oracle_preds,
-                'sniper_score': sniper_preds,
-                'l1_score': arr_l1,
-                'risk_hydra_crash': hydra_risk_preds,
-                'time_hydra_crash': hydra_time_preds,
-                'risk_legacy_v2': legacy_risk_preds,
-                'time_legacy_panic': legacy_time_preds
-            }
-            clean_arrays = {}
-            for k, v in arrays.items():
-                flat_v = np.array(v).flatten()
-                # Safety Truncate
-                if len(flat_v) > num_candidates: flat_v = flat_v[:num_candidates]
-                elif len(flat_v) < num_candidates:
-                    print(f"⚠️ PADDING {k}: {len(flat_v)} -> {num_candidates}")
-                    flat_v = np.pad(flat_v, (0, num_candidates - len(flat_v)))
-                clean_arrays[k] = flat_v
-            clean_arrays['symbol'] = sym
-            ai_df = pd.DataFrame(clean_arrays)
-            dt = time.time() - t0
-            if not ai_df.empty:
-                ai_df.to_pickle(scores_file)
-                print(f"   ✅ [{sym}] Completed {len(ai_df)} signals in {dt:.2f} seconds.", flush=True)
-        except Exception as e:
-            print(f"❌ DataFrame Construction Error: {e}")
-            traceback.print_exc()
         del frames, fast_1m, numpy_htf, global_v2_probs, global_hydra_static
         gc.collect()
@@ -459,7 +422,7 @@ class HeavyDutyBacktester:
         global_df = pd.concat(all_data)
         global_df.sort_values('timestamp', inplace=True)
-        # 🚀 Numpy Conversion 🚀
         arr_ts = global_df['timestamp'].values
         arr_close = global_df['close'].values.astype(np.float64)
         arr_symbol = global_df['symbol'].values
@@ -559,8 +522,9 @@ class HeavyDutyBacktester:
         await self.generate_truth_data()
         d = self.GRID_DENSITY
-        oracle_range = np.linspace(0.45, 0.8, d).tolist()
-        sniper_range = np.linspace(0.35, 0.7, d).tolist()
         hydra_range = np.linspace(0.70, 0.95, d).tolist()
         l1_range = [10.0, 15.0, 20.0, 25.0]
         titan_range = [0.4, 0.6]
@@ -601,7 +565,7 @@ class HeavyDutyBacktester:
         return best['config'], best
 async def run_strategic_optimization_task():
-    print("\n🧪 [STRATEGIC BACKTEST] Dimension Safe Mode...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)

 # ============================================================
+# 🧪 backtest_engine.py (V119.0 - GEM-Architect: The Synchronizer)
 # ============================================================
 import asyncio
 from datetime import datetime, timezone
 from typing import Dict, Any, List
+# ✅ استيراد المحركات
 try:
     from ml_engine.processor import MLProcessor, SystemLimits
     from ml_engine.data_manager import DataManager
         self.force_start_date = None
         self.force_end_date = None
         if os.path.exists(CACHE_DIR):
             files = glob.glob(os.path.join(CACHE_DIR, "*"))
             print(f"🧹 [System] Flushing Cache: Deleting {len(files)} old files...", flush=True)
         else:
             os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V119.0] Synchronized Integrity Mode. Models: {self._check_models_status()}")
     def _check_models_status(self):
         status = []
         self.force_start_date = start_str
         self.force_end_date = end_str
+    # --- Helper: Robust Probability Extraction ---
+    def _extract_probs(self, raw_preds):
+        """Extracts positive class probability regardless of shape (N,), (N,1), (N,3)"""
+        if isinstance(raw_preds, list): raw_preds = np.array(raw_preds)
+        if raw_preds.ndim == 1:
+            return raw_preds # Already 1D probabilities or regression
+        elif raw_preds.ndim == 2:
+            cols = raw_preds.shape[1]
+            if cols == 1: return raw_preds.flatten()
+            if cols == 2: return raw_preds[:, 1] # Binary [Neg, Pos]
+            if cols >= 3: return raw_preds[:, -1] # Multi [Sell, Hold, Buy] -> Buy
+        return raw_preds.flatten()
     async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
         print(f"   ⚡ [Network] Downloading {sym}...", flush=True)
         limit = 1000
                 if res: all_candles.extend(res)
         if not all_candles: return None
         filtered = [c for c in all_candles if c[0] >= start_ms and c[0] <= end_ms]
         seen = set(); unique_candles = []
         for c in filtered:
         print(f"     ✅ Downloaded {len(unique_candles)} candles.", flush=True)
         return unique_candles
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
+        for col in ['close', 'high', 'low', 'volume', 'open']: df[col] = df[col].astype(float)
         df['rsi'] = ta.rsi(df['close'], length=14)
         df['ema20'] = ta.ema(df['close'], length=20)
         df['ema50'] = ta.ema(df['close'], length=50)
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
         df['vol_ma50'] = df['volume'].rolling(50).mean()
         df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
             std20 = df['close'].rolling(20).std()
             df['bb_width'] = ((sma20 + 2*std20) - (sma20 - 2*std20)) / sma20
         df['atr_pct'] = df['atr'] / df['close']
         # L1 Score
         rsi_penalty = np.where(df['rsi'] > 70, (df['rsi'] - 70) * 2, 0)
         l1_score_raw = (df['rel_vol'] * 10) + (df['atr_pct'] * 1000) - rsi_penalty
         if timeframe == '1m':
             df['log_ret'] = np.log(df['close'] / df['close'].shift(1))
             df['ret'] = df['close'].pct_change()
             roll_min = df['low'].rolling(50).min()
+            df['fib_pos'] = (df['close'] - roll_min) / (df['high'].rolling(50).max() - roll_min + 1e-9)
             df['volatility'] = df['atr'] / df['close']
             df['trend_slope'] = (df['ema20'] - df['ema20'].shift(5)) / df['ema20'].shift(5)
         df.fillna(0, inplace=True)
         return df
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
         period_suffix = f"{start_ms}_{end_ms}"
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
+        print(f"   ⚙️ [CPU] Analyzing {sym} (Synchronized Mode)...", flush=True)
         t0 = time.time()
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
         df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
         df_1m.set_index('datetime', inplace=True)
         df_1m = df_1m.sort_index()
         frames = {}
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
         numpy_htf = {}
         for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
+            resampled = df_1m.resample(tf_code).agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}).dropna()
             resampled = self._calculate_indicators_vectorized(resampled, timeframe=tf_str)
             resampled['timestamp'] = resampled.index.astype(np.int64) // 10**6
             frames[tf_str] = resampled
             numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
+        # Time Alignment
         map_1m_to_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['1h']['timestamp'])-1)
         map_1m_to_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['5m']['timestamp'])-1)
         map_1m_to_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['15m']['timestamp'])-1)
         map_1m_to_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['4h']['timestamp'])-1)
+        # Model Access
         oracle_dir_model = getattr(self.proc.oracle, 'model_direction', None)
         sniper_models = getattr(self.proc.sniper, 'models', [])
         hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
         legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
+        # Pre-Calc Legacy V2 (Global)
         global_v2_probs = np.zeros(len(fast_1m['close']))
         if legacy_v2:
             try:
+                # Optimized construction
+                X_GLOBAL_V2 = np.column_stack([
+                    fast_1m['log_ret'], fast_1m['rsi']/100.0, fast_1m['fib_pos'], fast_1m['volatility'],
+                    numpy_htf['5m']['log_ret'][map_1m_to_5m], numpy_htf['5m']['rsi'][map_1m_to_5m]/100.0, numpy_htf['5m']['fib_pos'][map_1m_to_5m], numpy_htf['5m']['trend_slope'][map_1m_to_5m],
+                    numpy_htf['15m']['log_ret'][map_1m_to_15m], numpy_htf['15m']['rsi'][map_1m_to_15m]/100.0, numpy_htf['15m']['dist_fib618'][map_1m_to_15m], numpy_htf['15m']['trend_slope'][map_1m_to_15m],
+                    *[fast_1m[f'log_ret_lag_{l}'] for l in [1,2,3,5,10,20]],
+                    *[fast_1m[f'rsi_lag_{l}'] for l in [1,2,3,5,10,20]],
+                    *[fast_1m[f'fib_pos_lag_{l}'] for l in [1,2,3,5,10,20]],
+                    *[fast_1m[f'volatility_lag_{l}'] for l in [1,2,3,5,10,20]]
+                ])
+                raw = legacy_v2.predict(xgb.DMatrix(X_GLOBAL_V2))
+                global_v2_probs = self._extract_probs(raw)
             except: pass
+        # Pre-Assemble Hydra Static
         global_hydra_static = None
         if hydra_models:
             try:
+                global_hydra_static = np.column_stack([
+                    fast_1m['rsi'], numpy_htf['5m']['rsi'][map_1m_to_5m], numpy_htf['15m']['rsi'][map_1m_to_15m],
+                    fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
+                ])
             except: pass
+        # 🎯 CANDIDATE SELECTION
+        # L1 Score > 5.0 (Loose pre-filter)
         valid_indices_mask = fast_1m['l1_score'] >= 5.0
         valid_indices = np.where(valid_indices_mask)[0]
         mask_bounds = (valid_indices > 500) & (valid_indices < len(fast_1m['close']) - 245)
         final_valid_indices = valid_indices[mask_bounds]
         num_candidates = len(final_valid_indices)
+        print(f"     🎯 Raw Candidates (Score > 5): {num_candidates}. Calculating Scores...", flush=True)
         if num_candidates == 0: return
+        # 🚀 PRE-ALLOCATE ARRAYS (STRICT ALIGNMENT)
+        # Using arrays of exact size N guarantees no shifting
+        res_oracle = np.full(num_candidates, 0.5, dtype=np.float32)
+        res_sniper = np.full(num_candidates, 0.5, dtype=np.float32)
+        res_hydra_risk = np.zeros(num_candidates, dtype=np.float32)
+        res_hydra_time = np.zeros(num_candidates, dtype=np.int64)
+        res_legacy_risk = np.zeros(num_candidates, dtype=np.float32)
         time_vec = np.arange(1, 241)
+        # --- A. ORACLE BATCHING ---
         if oracle_dir_model:
             try:
                 idx_1h = map_1m_to_1h[final_valid_indices]
                 idx_15m = map_1m_to_15m[final_valid_indices]
                 idx_4h = map_1m_to_4h[final_valid_indices]
                 titan_scores = np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95)
+                features = []
                 for col in getattr(self.proc.oracle, 'feature_cols', []):
+                    if col.startswith('1h_'): features.append(numpy_htf['1h'].get(col[3:], np.zeros(len(idx_1h)))[idx_1h])
+                    elif col.startswith('15m_'): features.append(numpy_htf['15m'].get(col[4:], np.zeros(len(idx_15m)))[idx_15m])
+                    elif col.startswith('4h_'): features.append(numpy_htf['4h'].get(col[3:], np.zeros(len(idx_4h)))[idx_4h])
+                    elif col == 'sim_titan_score': features.append(titan_scores)
+                    elif col == 'sim_mc_score': features.append(np.full(num_candidates, 0.5))
+                    elif col == 'sim_pattern_score': features.append(np.full(num_candidates, 0.5))
+                    else: features.append(np.zeros(num_candidates))
+                X_oracle = np.column_stack(features)
+                preds = oracle_dir_model.predict(X_oracle)
+                res_oracle = self._extract_probs(preds)
             except Exception as e: print(f"Oracle Error: {e}")
+        # --- B. SNIPER BATCHING ---
         if sniper_models:
             try:
+                features = []
                 for col in getattr(self.proc.sniper, 'feature_names', []):
+                    if col in fast_1m: features.append(fast_1m[col][final_valid_indices])
+                    elif col == 'L_score': features.append(fast_1m.get('vol_zscore_50', np.zeros(len(fast_1m['close'])))[final_valid_indices])
+                    else: features.append(np.zeros(num_candidates))
+                X_sniper = np.column_stack(features)
+                preds_list = []
                 for m in sniper_models:
+                    raw = m.predict(X_sniper)
+                    preds_list.append(self._extract_probs(raw))
+                res_sniper = np.mean(preds_list, axis=0)
             except Exception as e: print(f"Sniper Error: {e}")
+        # --- C. HYDRA BATCHING (Optimized Loop) ---
         if hydra_models and global_hydra_static is not None:
             chunk_size = 5000
             for i in range(0, num_candidates, chunk_size):
+                # Indices inside 'final_valid_indices'
+                chunk_range = range(i, min(i + chunk_size, num_candidates))
+                global_indices = final_valid_indices[chunk_range]
+                batch_X = []
+                for idx in global_indices:
                     start = idx + 1; end = start + 240
                     sl_static = global_hydra_static[start:end]
                     entry_p = fast_1m['close'][idx]
                     sl_close = sl_static[:, 6]; sl_atr = sl_static[:, 5]
                     sl_dist = np.maximum(1.5 * sl_atr, entry_p * 0.015)
                     sl_max_pnl_r = (sl_cum_max - entry_p) / sl_dist
                     sl_atr_pct = sl_atr / sl_close
                     zeros = np.zeros(240); ones = np.ones(240)
+                    # 16 Features exact
+                    batch_X.append(np.column_stack([
+                        sl_static[:, 0], sl_static[:, 1], sl_static[:, 2], # 3 RSIs
+                        sl_static[:, 3], sl_static[:, 4], # BB, Vol
+                        zeros, sl_atr_pct, sl_norm_pnl, sl_max_pnl_r, # 4 dynamics
+                        zeros, zeros, time_vec, # 3 static
+                        zeros, ones*0.6, ones*0.7, ones*3.0 # 4 placeholders
+                    ]))
                 if batch_X:
                     try:
+                        big_X = np.array(batch_X) # (B, 240, 16)
+                        # Flatten for model if needed (Assuming Hydra takes 2D)
+                        # NOTE: Verify if Hydra takes 3D or 2D. Assuming 2D stacked:
                         big_X_flat = big_X.reshape(-1, big_X.shape[-1])
                         preds_flat = hydra_models['crash'].predict_proba(big_X_flat)[:, 1]
                         preds_batch = preds_flat.reshape(len(batch_X), 240)
+                        max_risks = np.max(preds_batch, axis=1)
                         over_thresh = preds_batch > 0.6
                         has_crash = over_thresh.any(axis=1)
                         crash_times_rel = np.argmax(over_thresh, axis=1)
+                        # Direct Assignment by Slice
+                        res_hydra_risk[chunk_range] = max_risks
+                        # Calculate absolute times
+                        crash_abs_times = np.zeros(len(batch_X), dtype=np.int64)
+                        for j, has in enumerate(has_crash):
+                            if has:
+                                t_idx = global_indices[j] + 1 + crash_times_rel[j]
+                                crash_abs_times[j] = fast_1m['timestamp'][t_idx]
+                        res_hydra_time[chunk_range] = crash_abs_times
                     except Exception: pass
+        # --- D. LEGACY MAPPING ---
         if legacy_v2:
+            # Vectorized Look-ahead max? Hard. Loop is safest for correctness.
+            # Optimized scalar loop
+            for i, idx in enumerate(final_valid_indices):
                 start = idx + 1
                 if start + 240 < len(global_v2_probs):
+                    # We can't vector slice variable windows efficiently in numpy without stride tricks
+                    # Simple loop is fine for 1D array
+                    res_legacy_risk[i] = np.max(global_v2_probs[start : start + 240])
+        # 📊 MANDATORY DIAGNOSTICS
+        print(f"     📊 [Stats] Oracle: Min={res_oracle.min():.2f} Max={res_oracle.max():.2f} Mean={res_oracle.mean():.2f}")
+        print(f"     📊 [Stats] Sniper: Min={res_sniper.min():.2f} Max={res_sniper.max():.2f} Mean={res_sniper.mean():.2f}")
+        print(f"     📊 [Stats] L1 Score: Min={fast_1m['l1_score'][final_valid_indices].min():.1f} Max={fast_1m['l1_score'][final_valid_indices].max():.1f}")
+        # --- E. CONSTRUCT DF ---
+        ai_df = pd.DataFrame({
+            'timestamp': fast_1m['timestamp'][final_valid_indices],
+            'symbol': sym,
+            'close': fast_1m['close'][final_valid_indices],
+            'real_titan': np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95),
+            'oracle_conf': res_oracle,
+            'sniper_score': res_sniper,
+            'l1_score': fast_1m['l1_score'][final_valid_indices],
+            'risk_hydra_crash': res_hydra_risk,
+            'time_hydra_crash': res_hydra_time,
+            'risk_legacy_v2': res_legacy_risk,
+            'time_legacy_panic': np.zeros(num_candidates, dtype=int) # Placeholder
+        })
+        dt = time.time() - t0
+        if not ai_df.empty:
+            ai_df.to_pickle(scores_file)
+            print(f"   ✅ [{sym}] Completed {len(ai_df)} signals in {dt:.2f} seconds.", flush=True)
         del frames, fast_1m, numpy_htf, global_v2_probs, global_hydra_static
         gc.collect()
         global_df = pd.concat(all_data)
         global_df.sort_values('timestamp', inplace=True)
+        # Arrays
         arr_ts = global_df['timestamp'].values
         arr_close = global_df['close'].values.astype(np.float64)
         arr_symbol = global_df['symbol'].values
         await self.generate_truth_data()
         d = self.GRID_DENSITY
+        # Lowered Floors to Catch Signals
+        oracle_range = np.linspace(0.40, 0.8, d).tolist() # Lowered floor to 0.40
+        sniper_range = np.linspace(0.30, 0.7, d).tolist() # Lowered floor to 0.30
         hydra_range = np.linspace(0.70, 0.95, d).tolist()
         l1_range = [10.0, 15.0, 20.0, 25.0]
         titan_range = [0.4, 0.6]
         return best['config'], best
 async def run_strategic_optimization_task():
+    print("\n🧪 [STRATEGIC BACKTEST] Synchronized Integrity Mode...")
     r2 = R2Service()
     dm = DataManager(None, None, r2)
     proc = MLProcessor(dm)