Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 9, 2025

Commit

6649776

verified ·

1 Parent(s): eab27d3

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +178 -172

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V134.0 - GEM-Architect: Feature Parity Edition)
 # ============================================================
 import asyncio
@@ -10,12 +10,14 @@ import time
 import logging
 import itertools
 import os
 import gc
 import sys
 import traceback
 from datetime import datetime, timezone
 from typing import Dict, Any, List
-from numpy.lib.stride_tricks import sliding_window_view
 try:
     from ml_engine.processor import MLProcessor, SystemLimits
@@ -31,6 +33,32 @@ except ImportError:
 logging.getLogger('ml_engine').setLevel(logging.WARNING)
 CACHE_DIR = "backtest_real_scores"
 class HeavyDutyBacktester:
     def __init__(self, data_manager, processor):
         self.dm = data_manager
@@ -56,7 +84,7 @@ class HeavyDutyBacktester:
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V134.0] Feature Parity Mode (Exact Live System Logic).")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
@@ -75,7 +103,7 @@ class HeavyDutyBacktester:
             tasks.append(current)
             current += duration_per_batch
         all_candles = []
-        sem = asyncio.Semaphore(15)
         async def _fetch_batch(timestamp):
             async with sem:
@@ -85,7 +113,7 @@ class HeavyDutyBacktester:
                     except: await asyncio.sleep(0.5)
                 return []
-        chunk_size = 25
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
             futures = [_fetch_batch(ts) for ts in chunk_tasks]
@@ -101,29 +129,19 @@ class HeavyDutyBacktester:
         print(f"     ✅ Downloaded {len(df)} candles.", flush=True)
         return df.values.tolist()
-    # ==============================================================
-    # 🏎️ HELPER: Rolling Z-Score (For Sniper)
-    # ==============================================================
-    def _z_roll(self, x, w=500):
-        r = x.rolling(w).mean()
-        s = x.rolling(w).std().replace(0, np.nan)
-        return ((x - r) / s).fillna(0)
     # ==============================================================
     # 🏎️ VECTORIZED INDICATORS (EXACT MATCH TO LIVE SYSTEM)
     # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
         # 1. Clean Types
         cols = ['close', 'high', 'low', 'volume', 'open']
-        for c in cols: df[c] = df[c].astype(np.float64) # Use float64 for precision match
         # ---------------------------------------------------------
-        # 🧠 PART 1: TITAN FEATURES (Exact Replica of TitanEngine)
         # ---------------------------------------------------------
-        # RSI
         df['RSI'] = ta.rsi(df['close'], length=14).fillna(50)
-        # MACD
         macd = ta.macd(df['close'])
         if macd is not None:
             df['MACD'] = macd.iloc[:, 0].fillna(0)
@@ -131,35 +149,25 @@ class HeavyDutyBacktester:
         else:
             df['MACD'] = 0.0; df['MACD_h'] = 0.0
-        # CCI
         df['CCI'] = ta.cci(df['high'], df['low'], df['close'], length=20).fillna(0)
-        # ADX
         adx = ta.adx(df['high'], df['low'], df['close'], length=14)
         if adx is not None: df['ADX'] = adx.iloc[:, 0].fillna(0)
         else: df['ADX'] = 0.0
-        # EMAs & Distances
         for p in [9, 21, 50, 200]:
             ema = ta.ema(df['close'], length=p)
             df[f'EMA_{p}_dist'] = ((df['close'] / ema) - 1).fillna(0)
-            df[f'ema{p}'] = ema # Keep raw for others
-        # Bollinger Bands (Width & %B)
         bb = ta.bbands(df['close'], length=20, std=2.0)
         if bb is not None:
-            # Width = (Upper - Lower) / Middle
             df['BB_w'] = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1]).fillna(0)
-            # %B = (Price - Lower) / (Upper - Lower)
             df['BB_p'] = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0])).fillna(0)
-            # Helper for Hydra
-            df['bb_width'] = df['BB_w'] # Alias
-        # MFI
         df['MFI'] = ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14).fillna(50)
-        # VWAP
         vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
         if vwap is not None:
             df['VWAP_dist'] = ((df['close'] / vwap) - 1).fillna(0)
@@ -168,7 +176,6 @@ class HeavyDutyBacktester:
             df['VWAP_dist'] = 0.0
             df['vwap'] = df['close']
-        # ATR (for others)
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14).fillna(0)
         df['atr_pct'] = df['atr'] / df['close']
@@ -181,36 +188,32 @@ class HeavyDutyBacktester:
             df['return_5m'] = df['close'].pct_change(5).fillna(0)
             df['return_15m'] = df['close'].pct_change(15).fillna(0)
-            df['rsi_14'] = df['RSI'] # Alias
-            # Sniper specific derivations
             df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / df['ema9'].shift(1)).fillna(0)
-            df['ema_21_dist'] = df['EMA_21_dist'] # Reuse
-            # Z-Scores for Sniper
             atr_100 = ta.atr(df['high'], df['low'], df['close'], length=100).fillna(0)
-            df['atr_z'] = self._z_roll(atr_100) # Mapped later
-            df['vol_zscore_50'] = self._z_roll(df['volume'], 50)
             rng = (df['high'] - df['low']).replace(0, 1e-9)
-            df['candle_range'] = self._z_roll(rng, 500)
             df['close_pos_in_range'] = ((df['close'] - df['low']) / rng).fillna(0.5)
-            # Liquidity Proxies
             df['dollar_vol'] = df['close'] * df['volume']
             amihud_raw = (df['return_1m'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
-            df['amihud'] = self._z_roll(amihud_raw)
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
             roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov)))
-            df['roll_spread'] = self._z_roll(roll_spread_raw)
             sign = np.sign(df['close'].diff()).fillna(0)
             signed_vol = sign * df['volume']
             ofi_raw = signed_vol.rolling(30).sum()
-            df['ofi'] = self._z_roll(ofi_raw)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
@@ -220,10 +223,10 @@ class HeavyDutyBacktester:
             vwap_win = 20
             v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
-            df['vwap_dev'] = self._z_roll(df['close'] - v_short)
             rv_gk = ((np.log(df['high'] / df['low'])**2) / 2) - ((2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2))
-            df['rv_gk'] = self._z_roll(rv_gk)
             # L_Score approximation
             df['L_score'] = (df['vol_zscore_50'] - df['amihud'] - df['roll_spread'] - df['rv_gk'].abs() - df['vwap_dev'].abs() + df['ofi']).fillna(0)
@@ -249,7 +252,10 @@ class HeavyDutyBacktester:
         fib618 = roll_max - (diff * 0.382)
         df['dist_fib618'] = ((df['close'] - fib618) / df['close']).fillna(0)
-        # Legacy Lags
         if timeframe == '1m':
             for lag in [1, 2, 3, 5, 10, 20]:
                 df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
@@ -261,7 +267,7 @@ class HeavyDutyBacktester:
         return df
     # ==============================================================
-    # 🧠 CPU PROCESSING (GLOBAL INFERENCE)
     # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
@@ -272,7 +278,7 @@ class HeavyDutyBacktester:
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
-        print(f"   ⚙️ [CPU] Analyzing {sym} (Global Inference)...", flush=True)
         t0 = time.time()
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
@@ -283,7 +289,7 @@ class HeavyDutyBacktester:
         frames = {}
         agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
-        # 1. Calc 1m (Base)
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
@@ -299,15 +305,10 @@ class HeavyDutyBacktester:
         # 3. Global Index Maps
         arr_ts_1m = fast_1m['timestamp']
-        map_5m = np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m)
-        map_15m = np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m)
-        map_1h = np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m)
-        map_4h = np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m)
-        map_5m = np.clip(map_5m, 0, len(numpy_htf['5m']['timestamp']) - 1)
-        map_15m = np.clip(map_15m, 0, len(numpy_htf['15m']['timestamp']) - 1)
-        map_1h = np.clip(map_1h, 0, len(numpy_htf['1h']['timestamp']) - 1)
-        map_4h = np.clip(map_4h, 0, len(numpy_htf['4h']['timestamp']) - 1)
         # 4. Load Models
         hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
@@ -332,17 +333,8 @@ class HeavyDutyBacktester:
         if titan_model and titan_cols:
             print("     🚀 Running Global Titan...", flush=True)
             try:
-                # Titan needs 5m features aligned to 1m
-                # Build feature matrix from numpy_htf['5m'] using map_5m
                 t_vecs = []
                 for col in titan_cols:
-                    # Titan features usually have no prefix in the pickle list,
-                    # but in htf dict we have raw names.
-                    # Need to verify if titan_cols expects "RSI" or "5m_RSI"??
-                    # Usually Titan is trained on ONE timeframe (5m).
-                    # So we just pull the raw column from numpy_htf['5m'].
-                    # Fix: Clean name (e.g. if trained as 'RSI', grab 'RSI')
                     if col in numpy_htf['5m']:
                         t_vecs.append(numpy_htf['5m'][col][map_5m])
                     else:
@@ -350,7 +342,7 @@ class HeavyDutyBacktester:
                 X_TITAN = np.column_stack(t_vecs)
                 preds_t = titan_model.predict(xgb.DMatrix(X_TITAN))
-                global_titan_scores = preds_t
             except Exception as e: print(f"Titan Error: {e}")
         # B. SNIPER (1m Direct)
@@ -361,13 +353,12 @@ class HeavyDutyBacktester:
                 s_vecs = []
                 for col in sniper_cols:
                     if col in fast_1m: s_vecs.append(fast_1m[col])
-                    # Fix mapping for 'atr' -> 'atr_z' if needed
                     elif col == 'atr' and 'atr_z' in fast_1m: s_vecs.append(fast_1m['atr_z'])
                     else: s_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_SNIPER = np.column_stack(s_vecs)
                 preds_list = [m.predict(X_SNIPER) for m in sniper_models]
-                global_sniper_scores = np.mean(preds_list, axis=0)
             except Exception as e: print(f"Sniper Error: {e}")
         # C. ORACLE (HTF Mix)
@@ -387,9 +378,8 @@ class HeavyDutyBacktester:
                 X_ORACLE = np.column_stack(o_vecs)
                 preds_o = oracle_dir.predict(X_ORACLE)
-                global_oracle_scores = preds_o if isinstance(preds_o, np.ndarray) and len(preds_o.shape)==1 else preds_o[:, 0]
-                # Adjust if binary (assuming 0=Long, 1=Short or vice versa, check training)
-                # Usually we want Confidence > 0.6. Assuming output is Long Prob.
             except Exception as e: print(f"Oracle Error: {e}")
         # D. LEGACY V2 (Global)
@@ -434,11 +424,6 @@ class HeavyDutyBacktester:
             except: pass
         # --- 5. Filtering Candidates ---
-        # Using Oracle and Sniper to filter BEFORE loop
-        # This saves simulating trades that would never be entered
-        # Valid: (Titan > 0.5) & (Oracle > 0.5) & (Sniper > 0.3) & (RSI < 70)
-        # This reduces loop count drastically
         is_candidate = (
             (numpy_htf['1h']['RSI'][map_1h] <= 70) &
             (global_titan_scores > 0.4) &
@@ -447,7 +432,6 @@ class HeavyDutyBacktester:
         candidate_indices = np.where(is_candidate)[0]
-        # Date Filter
         start_ts_val = frames['1m'].index[0] + pd.Timedelta(minutes=500)
         start_idx_offset = np.searchsorted(arr_ts_1m, int(start_ts_val.timestamp()*1000))
         candidate_indices = candidate_indices[candidate_indices >= start_idx_offset]
@@ -497,7 +481,6 @@ class HeavyDutyBacktester:
                 l2_arr = np.full(240, 0.7)
                 tgt_arr = np.full(240, 3.0)
-                # [rsi1, rsi5, rsi15, bb, vol, dist_ema, atr_p, norm, max, dists, time, entry, oracle, l2, target]
                 X_H = np.column_stack([
                     sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
                     zeros, atr_pct, norm_pnl, max_pnl_r,
@@ -537,120 +520,126 @@ class HeavyDutyBacktester:
         gc.collect()
     # ==============================================================
-    # PHASE 1 & 2 (Standard Optimization)
     # ==============================================================
     async def generate_truth_data(self):
-        if self.force_start_date and self.force_end_date:
-            dt_start = datetime.strptime(self.force_start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
-            dt_end = datetime.strptime(self.force_end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
-            start_time_ms = int(dt_start.timestamp() * 1000)
-            end_time_ms = int(dt_end.timestamp() * 1000)
             print(f"\n🚜 [Phase 1] Processing Era: {self.force_start_date} -> {self.force_end_date}")
-        else: return
-        for sym in self.TARGET_COINS:
-            try:
-                candles = await self._fetch_all_data_fast(sym, start_time_ms, end_time_ms)
-                if candles: await self._process_data_in_memory(sym, candles, start_time_ms, end_time_ms)
-            except Exception as e: print(f"   ❌ SKIP {sym}: {e}", flush=True)
-            gc.collect()
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
-        results = []
-        all_data = []
-        for fp in scores_files:
-            try:
-                df = pd.read_pickle(fp)
-                if not df.empty: all_data.append(df)
             except: pass
-        if not all_data: return []
-        global_df = pd.concat(all_data)
-        global_df.sort_values('timestamp', inplace=True)
-        grouped_by_time = global_df.groupby('timestamp')
-        for config in combinations_batch:
-            wallet = { "balance": initial_capital, "allocated": 0.0, "positions": {}, "trades_history": [] }
-            w_titan = config['w_titan']; oracle_thresh = config.get('oracle_thresh', 0.6)
-            sniper_thresh = config.get('sniper_thresh', 0.4); hydra_thresh = config['hydra_thresh']
-            peak_balance = initial_capital; max_drawdown = 0.0
-            for ts, group in grouped_by_time:
-                active = list(wallet["positions"].keys())
-                current_prices = {row['symbol']: row['close'] for _, row in group.iterrows()}
-                for sym in active:
-                    if sym in current_prices:
-                        curr = current_prices[sym]
-                        pos = wallet["positions"][sym]
-                        h_risk = pos.get('risk_hydra_crash', 0)
-                        h_time = pos.get('time_hydra_crash', 0)
-                        is_crash = (h_risk > hydra_thresh) and (h_time > 0) and (ts >= h_time)
-                        pnl = (curr - pos['entry']) / pos['entry']
-                        if is_crash or pnl > 0.04 or pnl < -0.02:
-                            wallet['balance'] += pos['size'] * (1 + pnl - (fees_pct*2))
-                            wallet['allocated'] -= pos['size']
-                            del wallet['positions'][sym]
-                            wallet['trades_history'].append({'pnl': pnl})
-                total_eq = wallet['balance'] + wallet['allocated']
-                if total_eq > peak_balance: peak_balance = total_eq
-                dd = (peak_balance - total_eq) / peak_balance
-                if dd > max_drawdown: max_drawdown = dd
-                if len(wallet['positions']) < max_slots:
-                    for _, row in group.iterrows():
-                        if row['symbol'] in wallet['positions']: continue
-                        if row['oracle_conf'] < oracle_thresh: continue
-                        if row['sniper_score'] < sniper_thresh: continue
-                        if row['real_titan'] < w_titan: continue # Titan Check
-                        size = 10.0
-                        if wallet['balance'] >= size:
-                            wallet['positions'][row['symbol']] = {
-                                'entry': row['close'], 'size': size,
-                                'risk_hydra_crash': row['risk_hydra_crash'],
-                                'time_hydra_crash': row['time_hydra_crash']
-                            }
-                            wallet['balance'] -= size
-                            wallet['allocated'] += size
-            final_bal = wallet['balance'] + wallet['allocated']
-            net_profit = final_bal - initial_capital
-            trades = wallet['trades_history']
-            total_t = len(trades)
-            win_count = len([t for t in trades if t['pnl'] > 0])
-            loss_count = len([t for t in trades if t['pnl'] <= 0])
-            win_rate = (win_count / total_t * 100) if total_t > 0 else 0
-            max_win = max([t['pnl'] for t in trades]) if trades else 0
-            max_loss = min([t['pnl'] for t in trades]) if trades else 0
-            max_win_streak = 0; max_loss_streak = 0; curr_w = 0; curr_l = 0
-            for t in trades:
                 if t['pnl'] > 0:
                     curr_w += 1; curr_l = 0
-                    if curr_w > max_win_streak: max_win_streak = curr_w
                 else:
                     curr_l += 1; curr_w = 0
-                    if curr_l > max_loss_streak: max_loss_streak = curr_l
-            results.append({
-                'config': config, 'final_balance': final_bal, 'net_profit': net_profit,
-                'total_trades': total_t, 'win_count': win_count, 'loss_count': loss_count,
-                'win_rate': win_rate, 'max_single_win': max_win, 'max_single_loss': max_loss,
-                'max_drawdown': max_drawdown * 100
             })
-        return results
     async def run_optimization(self, target_regime="RANGE"):
         await self.generate_truth_data()
         oracle_r = np.linspace(0.4, 0.7, 3); sniper_r = np.linspace(0.4, 0.7, 3)
-        hydra_r = [0.85, 0.95]
         combos = []
-        for o, s, h in itertools.product(oracle_r, sniper_r, hydra_r):
             combos.append({
-                'w_titan': 0.5, 'w_struct': 0.3, 'thresh': 0.5, 'l1_thresh': 50.0,
                 'oracle_thresh': o, 'sniper_thresh': s, 'hydra_thresh': h, 'legacy_thresh': 0.95
             })
@@ -661,6 +650,14 @@ class HeavyDutyBacktester:
         results_list.sort(key=lambda x: x['net_profit'], reverse=True)
         best = results_list[0]
         print("\n" + "="*60)
         print(f"🏆 CHAMPION REPORT [{target_regime}]:")
         print(f"   💰 Final Balance:   ${best['final_balance']:,.2f}")
@@ -668,13 +665,22 @@ class HeavyDutyBacktester:
         print("-" * 60)
         print(f"   📊 Total Trades:    {best['total_trades']}")
         print(f"   📈 Win Rate:        {best['win_rate']:.1f}%")
         print("-" * 60)
         print(f"   ⚙️ Oracle={best['config']['oracle_thresh']:.2f} | Sniper={best['config']['sniper_thresh']:.2f} | Hydra={best['config']['hydra_thresh']:.2f}")
         print("="*60)
         return best['config'], best
 async def run_strategic_optimization_task():
-    print("\n🧪 [STRATEGIC BACKTEST] Feature Parity Mode...")
     r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
     try:
         await dm.initialize(); await proc.initialize()

 # ============================================================
+# 🧪 backtest_engine.py (V135.0 - GEM-Architect: Feature Parity + Full Diagnostics)
 # ============================================================
 import asyncio
 import logging
 import itertools
 import os
+import glob
 import gc
 import sys
 import traceback
+from numpy.lib.stride_tricks import sliding_window_view
 from datetime import datetime, timezone
 from typing import Dict, Any, List
+from scipy.special import expit # Sigmoid
 try:
     from ml_engine.processor import MLProcessor, SystemLimits
 logging.getLogger('ml_engine').setLevel(logging.WARNING)
 CACHE_DIR = "backtest_real_scores"
+# ============================================================
+# 🛡️ GLOBAL HELPERS
+# ============================================================
+def sanitize_features(df):
+    if df is None or df.empty: return df
+    return df.replace([np.inf, -np.inf], np.nan).fillna(0.0)
+def _z_roll(x, w=500):
+    r = x.rolling(w).mean()
+    s = x.rolling(w).std().replace(0, np.nan)
+    return ((x - r) / s).fillna(0)
+def _revive_score_distribution(scores):
+    """Normalize flattened scores to 0-1 range if they are compressed"""
+    scores = np.array(scores, dtype=np.float32)
+    if len(scores) < 10: return scores
+    std = np.std(scores)
+    if std < 0.05:
+        mean = np.mean(scores)
+        z = (scores - mean) / (std + 1e-9)
+        return expit(z)
+    return scores
+# ============================================================
+# 🧪 THE BACKTESTER CLASS
+# ============================================================
 class HeavyDutyBacktester:
     def __init__(self, data_manager, processor):
         self.dm = data_manager
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V135.0] Feature Parity + Full Diagnostics + Speed.")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
             tasks.append(current)
             current += duration_per_batch
         all_candles = []
+        sem = asyncio.Semaphore(20)
         async def _fetch_batch(timestamp):
             async with sem:
                     except: await asyncio.sleep(0.5)
                 return []
+        chunk_size = 50
         for i in range(0, len(tasks), chunk_size):
             chunk_tasks = tasks[i:i + chunk_size]
             futures = [_fetch_batch(ts) for ts in chunk_tasks]
         print(f"     ✅ Downloaded {len(df)} candles.", flush=True)
         return df.values.tolist()
     # ==============================================================
     # 🏎️ VECTORIZED INDICATORS (EXACT MATCH TO LIVE SYSTEM)
     # ==============================================================
     def _calculate_indicators_vectorized(self, df, timeframe='1m'):
         # 1. Clean Types
         cols = ['close', 'high', 'low', 'volume', 'open']
+        for c in cols: df[c] = df[c].astype(np.float64)
         # ---------------------------------------------------------
+        # 🧠 PART 1: TITAN FEATURES
         # ---------------------------------------------------------
         df['RSI'] = ta.rsi(df['close'], length=14).fillna(50)
         macd = ta.macd(df['close'])
         if macd is not None:
             df['MACD'] = macd.iloc[:, 0].fillna(0)
         else:
             df['MACD'] = 0.0; df['MACD_h'] = 0.0
         df['CCI'] = ta.cci(df['high'], df['low'], df['close'], length=20).fillna(0)
         adx = ta.adx(df['high'], df['low'], df['close'], length=14)
         if adx is not None: df['ADX'] = adx.iloc[:, 0].fillna(0)
         else: df['ADX'] = 0.0
         for p in [9, 21, 50, 200]:
             ema = ta.ema(df['close'], length=p)
             df[f'EMA_{p}_dist'] = ((df['close'] / ema) - 1).fillna(0)
+            df[f'ema{p}'] = ema
         bb = ta.bbands(df['close'], length=20, std=2.0)
         if bb is not None:
             df['BB_w'] = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1]).fillna(0)
             df['BB_p'] = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0])).fillna(0)
+            df['bb_width'] = df['BB_w']
         df['MFI'] = ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14).fillna(50)
         vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
         if vwap is not None:
             df['VWAP_dist'] = ((df['close'] / vwap) - 1).fillna(0)
             df['VWAP_dist'] = 0.0
             df['vwap'] = df['close']
         df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14).fillna(0)
         df['atr_pct'] = df['atr'] / df['close']
             df['return_5m'] = df['close'].pct_change(5).fillna(0)
             df['return_15m'] = df['close'].pct_change(15).fillna(0)
+            df['rsi_14'] = df['RSI']
             df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / df['ema9'].shift(1)).fillna(0)
+            df['ema_21_dist'] = df['EMA_21_dist']
             atr_100 = ta.atr(df['high'], df['low'], df['close'], length=100).fillna(0)
+            df['atr_z'] = _z_roll(atr_100)
+            df['vol_zscore_50'] = _z_roll(df['volume'], 50)
             rng = (df['high'] - df['low']).replace(0, 1e-9)
+            df['candle_range'] = _z_roll(rng, 500)
             df['close_pos_in_range'] = ((df['close'] - df['low']) / rng).fillna(0.5)
             df['dollar_vol'] = df['close'] * df['volume']
             amihud_raw = (df['return_1m'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
+            df['amihud'] = _z_roll(amihud_raw)
             dp = df['close'].diff()
             roll_cov = dp.rolling(64).cov(dp.shift(1))
             roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov)))
+            df['roll_spread'] = _z_roll(roll_spread_raw)
             sign = np.sign(df['close'].diff()).fillna(0)
             signed_vol = sign * df['volume']
             ofi_raw = signed_vol.rolling(30).sum()
+            df['ofi'] = _z_roll(ofi_raw)
             buy_vol = (sign > 0) * df['volume']
             sell_vol = (sign < 0) * df['volume']
             vwap_win = 20
             v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
+            df['vwap_dev'] = _z_roll(df['close'] - v_short)
             rv_gk = ((np.log(df['high'] / df['low'])**2) / 2) - ((2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2))
+            df['rv_gk'] = _z_roll(rv_gk)
             # L_Score approximation
             df['L_score'] = (df['vol_zscore_50'] - df['amihud'] - df['roll_spread'] - df['rv_gk'].abs() - df['vwap_dev'].abs() + df['ofi']).fillna(0)
         fib618 = roll_max - (diff * 0.382)
         df['dist_fib618'] = ((df['close'] - fib618) / df['close']).fillna(0)
+        df['dist_ema50'] = (df['close'] - df['ema50']) / df['close']
+        df['ema200'] = ta.ema(df['close'], length=200)
+        df['dist_ema200'] = (df['close'] - df['ema200']) / df['close']
         if timeframe == '1m':
             for lag in [1, 2, 3, 5, 10, 20]:
                 df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
         return df
     # ==============================================================
+    # 🧠 CPU PROCESSING (GLOBAL INFERENCE + FULL FEATURE PARITY)
     # ==============================================================
     async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
         safe_sym = sym.replace('/', '_')
              print(f"   📂 [{sym}] Data Exists -> Skipping.")
              return
+        print(f"   ⚙️ [CPU] Analyzing {sym} (Full Stack / High Fidelity)...", flush=True)
         t0 = time.time()
         df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
         frames = {}
         agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
+        # 1. Calc 1m
         frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
         frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
         fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
         # 3. Global Index Maps
         arr_ts_1m = fast_1m['timestamp']
+        map_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['5m']['timestamp']) - 1)
+        map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
+        map_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1h']['timestamp']) - 1)
+        map_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['4h']['timestamp']) - 1)
         # 4. Load Models
         hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
         if titan_model and titan_cols:
             print("     🚀 Running Global Titan...", flush=True)
             try:
                 t_vecs = []
                 for col in titan_cols:
                     if col in numpy_htf['5m']:
                         t_vecs.append(numpy_htf['5m'][col][map_5m])
                     else:
                 X_TITAN = np.column_stack(t_vecs)
                 preds_t = titan_model.predict(xgb.DMatrix(X_TITAN))
+                global_titan_scores = _revive_score_distribution(preds_t)
             except Exception as e: print(f"Titan Error: {e}")
         # B. SNIPER (1m Direct)
                 s_vecs = []
                 for col in sniper_cols:
                     if col in fast_1m: s_vecs.append(fast_1m[col])
                     elif col == 'atr' and 'atr_z' in fast_1m: s_vecs.append(fast_1m['atr_z'])
                     else: s_vecs.append(np.zeros(len(arr_ts_1m)))
                 X_SNIPER = np.column_stack(s_vecs)
                 preds_list = [m.predict(X_SNIPER) for m in sniper_models]
+                global_sniper_scores = _revive_score_distribution(np.mean(preds_list, axis=0))
             except Exception as e: print(f"Sniper Error: {e}")
         # C. ORACLE (HTF Mix)
                 X_ORACLE = np.column_stack(o_vecs)
                 preds_o = oracle_dir.predict(X_ORACLE)
+                preds_o = preds_o if isinstance(preds_o, np.ndarray) and len(preds_o.shape)==1 else preds_o[:, 0]
+                global_oracle_scores = _revive_score_distribution(preds_o)
             except Exception as e: print(f"Oracle Error: {e}")
         # D. LEGACY V2 (Global)
             except: pass
         # --- 5. Filtering Candidates ---
         is_candidate = (
             (numpy_htf['1h']['RSI'][map_1h] <= 70) &
             (global_titan_scores > 0.4) &
         candidate_indices = np.where(is_candidate)[0]
         start_ts_val = frames['1m'].index[0] + pd.Timedelta(minutes=500)
         start_idx_offset = np.searchsorted(arr_ts_1m, int(start_ts_val.timestamp()*1000))
         candidate_indices = candidate_indices[candidate_indices >= start_idx_offset]
                 l2_arr = np.full(240, 0.7)
                 tgt_arr = np.full(240, 3.0)
                 X_H = np.column_stack([
                     sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
                     zeros, atr_pct, norm_pnl, max_pnl_r,
         gc.collect()
     # ==============================================================
+    # PHASE 1: Truth Data
     # ==============================================================
     async def generate_truth_data(self):
+        if self.force_start_date:
+            dt_s = datetime.strptime(self.force_start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
+            dt_e = datetime.strptime(self.force_end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
+            ms_s = int(dt_s.timestamp()*1000); ms_e = int(dt_e.timestamp()*1000)
             print(f"\n🚜 [Phase 1] Processing Era: {self.force_start_date} -> {self.force_end_date}")
+            for sym in self.TARGET_COINS:
+                c = await self._fetch_all_data_fast(sym, ms_s, ms_e)
+                if c: await self._process_data_in_memory(sym, c, ms_s, ms_e)
+    # ==============================================================
+    # PHASE 2: Optimization (Detailed Stats)
+    # ==============================================================
     @staticmethod
     def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
+        print(f"     ⏳ [System] Loading {len(scores_files)} datasets...", flush=True)
+        data = []
+        for f in scores_files:
+            try: data.append(pd.read_pickle(f))
             except: pass
+        if not data: return []
+        df = pd.concat(data).sort_values('timestamp')
+        ts = df['timestamp'].values; close = df['close'].values.astype(float)
+        sym = df['symbol'].values; sym_map = {s:i for i,s in enumerate(np.unique(sym))}
+        sym_id = np.array([sym_map[s] for s in sym])
+        oracle = df['oracle_conf'].values; sniper = df['sniper_score'].values
+        hydra = df['risk_hydra_crash'].values; titan = df['real_titan'].values
+        l1 = df['l1_score'].values
+        legacy_v2 = df['risk_legacy_v2'].values
+        N = len(ts)
+        print(f"     🚀 [System] Testing {len(combinations_batch)} configs on {N} candles...", flush=True)
+        res = []
+        for cfg in combinations_batch:
+            pos = {}; log = []
+            bal = initial_capital; alloc = 0.0
+            mask = (l1 >= cfg['l1_thresh']) & (oracle >= cfg['oracle_thresh']) & (sniper >= cfg['sniper_thresh']) & (titan >= 0.55)
+            for i in range(N):
+                s = sym_id[i]; p = close[i]
+                if s in pos:
+                    entry = pos[s][0]; h_r = pos[s][1]; titan_entry = pos[s][3]
+                    crash_hydra = (h_r > cfg['hydra_thresh'])
+                    panic_legacy = (legacy_v2[i] > cfg['legacy_thresh'])
+                    pnl = (p - entry)/entry
+                    if crash_hydra or panic_legacy or pnl > 0.04 or pnl < -0.02:
+                        realized = pnl - fees_pct*2
+                        bal += pos[s][2] * (1 + realized)
+                        alloc -= pos[s][2]
+                        is_consensus = (titan_entry > 0.55)
+                        log.append({'pnl': realized, 'consensus': is_consensus})
+                        del pos[s]
+                if len(pos) < max_slots and mask[i]:
+                    if s not in pos and bal >= 5.0:
+                        size = min(10.0, bal * 0.98)
+                        pos[s] = (p, hydra[i], size, titan[i])
+                        bal -= size; alloc += size
+            final_bal = bal + alloc
+            profit = final_bal - initial_capital
+            # Detailed Stats
+            tot = len(log)
+            winning = [x for x in log if x['pnl'] > 0]
+            losing = [x for x in log if x['pnl'] <= 0]
+            win_count = len(winning); loss_count = len(losing)
+            win_rate = (win_count/tot*100) if tot else 0
+            avg_win = np.mean([x['pnl'] for x in winning]) if winning else 0
+            avg_loss = np.mean([x['pnl'] for x in losing]) if losing else 0
+            gross_p = sum([x['pnl'] for x in winning])
+            gross_l = abs(sum([x['pnl'] for x in losing]))
+            profit_factor = (gross_p / gross_l) if gross_l > 0 else 99.9
+            max_win_s = 0; max_loss_s = 0; curr_w = 0; curr_l = 0
+            for t in log:
                 if t['pnl'] > 0:
                     curr_w += 1; curr_l = 0
+                    if curr_w > max_win_s: max_win_s = curr_w
                 else:
                     curr_l += 1; curr_w = 0
+                    if curr_l > max_loss_s: max_loss_s = curr_l
+            cons_trades = [x for x in log if x['consensus']]
+            n_cons = len(cons_trades)
+            agree_rate = (n_cons/tot*100) if tot else 0
+            cons_win_rate = (sum(1 for x in cons_trades if x['pnl']>0)/n_cons*100) if n_cons else 0
+            cons_avg_pnl = (sum(x['pnl'] for x in cons_trades)/n_cons*100) if n_cons else 0
+            res.append({
+                'config': cfg, 'final_balance': final_bal, 'net_profit': profit,
+                'total_trades': tot, 'win_rate': win_rate, 'max_drawdown': 0,
+                'win_count': win_count, 'loss_count': loss_count,
+                'avg_win': avg_win, 'avg_loss': avg_loss,
+                'max_win_streak': max_win_s, 'max_loss_streak': max_loss_s,
+                'profit_factor': profit_factor,
+                'consensus_agreement_rate': agree_rate,
+                'high_consensus_win_rate': cons_win_rate,
+                'high_consensus_avg_pnl': cons_avg_pnl
             })
+        return res
     async def run_optimization(self, target_regime="RANGE"):
         await self.generate_truth_data()
         oracle_r = np.linspace(0.4, 0.7, 3); sniper_r = np.linspace(0.4, 0.7, 3)
+        hydra_r = [0.85, 0.95]; l1_r = [10.0]
         combos = []
+        for o, s, h, l1 in itertools.product(oracle_r, sniper_r, hydra_r, l1_r):
             combos.append({
+                'w_titan': 0.4, 'w_struct': 0.3, 'thresh': l1, 'l1_thresh': l1,
                 'oracle_thresh': o, 'sniper_thresh': s, 'hydra_thresh': h, 'legacy_thresh': 0.95
             })
         results_list.sort(key=lambda x: x['net_profit'], reverse=True)
         best = results_list[0]
+        # Auto-Diagnosis
+        diag = []
+        if best['total_trades'] > 2000 and best['net_profit'] < 10: diag.append("⚠️ Overtrading")
+        if best['win_rate'] > 55 and best['net_profit'] < 0: diag.append("⚠️ Fee Burn")
+        if abs(best['avg_loss']) > best['avg_win']: diag.append("⚠️ Risk/Reward Inversion")
+        if best['max_loss_streak'] > 10: diag.append("⚠️ Consecutive Loss Risk")
+        if not diag: diag.append("✅ System Healthy")
         print("\n" + "="*60)
         print(f"🏆 CHAMPION REPORT [{target_regime}]:")
         print(f"   💰 Final Balance:   ${best['final_balance']:,.2f}")
         print("-" * 60)
         print(f"   📊 Total Trades:    {best['total_trades']}")
         print(f"   📈 Win Rate:        {best['win_rate']:.1f}%")
+        print(f"   ✅ Winning Trades:  {best['win_count']} (Avg: {best['avg_win']*100:.2f}%)")
+        print(f"   ❌ Losing Trades:   {best['loss_count']} (Avg: {best['avg_loss']*100:.2f}%)")
+        print(f"   🌊 Max Streaks:     Win {best['max_win_streak']} | Loss {best['max_loss_streak']}")
+        print(f"   ⚖️ Profit Factor:   {best['profit_factor']:.2f}")
+        print("-" * 60)
+        print(f"   🧠 CONSENSUS ANALYTICS:")
+        print(f"   🤝 Model Agreement Rate:     {best['consensus_agreement_rate']:.1f}%")
+        print(f"   🌟 High-Consensus Win Rate:  {best['high_consensus_win_rate']:.1f}%")
         print("-" * 60)
+        print(f"   🩺 DIAGNOSIS: {' '.join(diag)}")
         print(f"   ⚙️ Oracle={best['config']['oracle_thresh']:.2f} | Sniper={best['config']['sniper_thresh']:.2f} | Hydra={best['config']['hydra_thresh']:.2f}")
         print("="*60)
         return best['config'], best
 async def run_strategic_optimization_task():
+    print("\n🧪 [STRATEGIC BACKTEST] Full Spectrum Mode...")
     r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
     try:
         await dm.initialize(); await proc.initialize()