Tradtesting

Paused

App Files Files Community

Riy777 commited on Dec 10, 2025

Commit

3060cdb

verified ·

1 Parent(s): a661075

Update backtest_engine.py

Browse files

Files changed (1) hide show

backtest_engine.py +89 -97

backtest_engine.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ============================================================
-# 🧪 backtest_engine.py (V139.0 - GEM-Architect: Vectorized Hydra Speed)
 # ============================================================
 import asyncio
@@ -115,20 +115,12 @@ class HeavyDutyBacktester:
         self.TRADING_FEES = 0.001
         self.MAX_SLOTS = 4
         self.TARGET_COINS = [
-            'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
-            'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
-            'SEI/USDT', 'TIA/USDT', 'MATIC/USDT', 'NEAR/USDT', 'RUNE/USDT', 'PYTH/USDT',
-            'WIF/USDT', 'PEPE/USDT', 'SHIB/USDT', 'TRX/USDT', 'DOT/USDT', 'UNI/USDT',
-            'ONDO/USDT', 'ENA/USDT', 'HBAR/USDT', 'XLM/USDT', 'TAO/USDT', 'ZK/USDT',
-            'ZRO/USDT', 'KCS/USDT', 'ICP/USDT', 'SAND/USDT', 'AXS/USDT', 'APE/USDT',
-            'GMT/USDT', 'CHZ/USDT', 'CFX/USDT', 'LDO/USDT', 'FET/USDT', 'JTO/USDT',
-            'STRK/USDT', 'BLUR/USDT', 'ALT/USDT', 'JUP/USDT', 'PENDLE/USDT', 'ETHFI/USDT',
-            'MEME/USDT', 'ATOM/USDT'
         ]
         self.force_start_date = None
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
-        print(f"🧪 [Backtest V139.0] Vectorized Hydra Speed Optimization.")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
@@ -402,72 +394,31 @@ class HeavyDutyBacktester:
         candidate_indices = candidate_indices[candidate_indices < (len(arr_ts_1m) - 245)]
         print(f"     🎯 Candidates: {len(candidate_indices)}. Running Vectorized Hydra...", flush=True)
-        # 🚀 VECTORIZED HYDRA SIMULATION 🚀
         ai_results = []
         if hydra_models and len(candidate_indices) > 0:
-            # Prepare Static Features Matrix (Global)
             h_static = np.column_stack([
                 fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
                 fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
-            ]) # Shape: (N, 7)
-            # Process candidates in chunks to avoid RAM explosion
             chunk_size = 5000
             for i in range(0, len(candidate_indices), chunk_size):
                 chunk_idxs = candidate_indices[i:i+chunk_size]
-                # We need sliding windows of 240 steps for each candidate
-                # Trick: Use broadcasting or sliding_window_view on static features
-                # But sliding_window_view on huge array is slow. Better to just slice.
-                # Vectorized construction for chunk
-                # 1. Extract entry prices
-                entries = fast_1m['close'][chunk_idxs]
-                entries_ts = fast_1m['timestamp'][chunk_idxs]
-                # 2. Prepare sequences (Vectorized slice is hard in numpy without creating copies)
-                # We stick to a tight loop or specialized indexing.
-                # Given we need to construct a [Batch, 240, Features] array for Hydra...
-                # Fastest way: List comprehension for slicing, then stack.
-                # Since Hydra is XGBoost, we can flatten the time dimension? No, Hydra is 1D input (snapshot).
-                # Wait, Hydra predicts Crash Probability for a SNAPSHOT state.
-                # In simulation, we need to check crash prob at t+1, t+2... t+240.
-                # That is 240 checks per candidate. 42,000 * 240 = 10 Million checks.
-                # This IS the bottleneck.
-                # OPTIMIZATION: Only check Hydra if PnL drops below -0.5% or something? No, that misses the point.
-                # OPTIMIZATION 2 (Implemented): Vectorize the "Check" logic.
-                # Construct big matrix for ALL checks: (N_Candidates * 240, Features)
-                # But that's 10M rows. XGBoost inference on 10M rows takes ~3-5 seconds on CPU. This is feasible!
-                # Let's do it per candidate to be safe on RAM, but fast.
                 for idx in chunk_idxs:
-                    # Slicing is fast
                     sl_st = h_static[idx:idx+240]
                     sl_close = sl_st[:, 6]; sl_atr = sl_st[:, 5]
                     entry = fast_1m['close'][idx]
                     dist = np.maximum(1.5 * sl_atr, entry * 0.015)
                     pnl = sl_close - entry
                     norm_pnl = pnl / dist
                     max_pnl_r = (np.maximum.accumulate(sl_close) - entry) / dist
                     atr_pct = sl_atr / sl_close
-                    # Stack Hydra Input (240 rows)
-                    # Cols: rsi1, rsi5, rsi15, bb, vol, dist_ema(0), atr_pct, norm, max, dists(0), time, entry(0), oracle, l2, target
-                    zeros = np.zeros(240)
-                    time_vec = np.arange(1, 241)
-                    s_oracle = global_oracle_scores[idx]
                     X_H = np.column_stack([
                         sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
                         zeros, atr_pct, norm_pnl, max_pnl_r, zeros, zeros, time_vec, zeros,
                         np.full(240, s_oracle), np.full(240, 0.7), np.full(240, 3.0)
                     ])
-                    # Predict 240 steps at once
                     max_hydra = 0.0; hydra_time = 0
                     try:
                         probs = hydra_models['crash'].predict_proba(X_H)[:, 1]
@@ -476,14 +427,11 @@ class HeavyDutyBacktester:
                             t = np.argmax(probs)
                             hydra_time = int(fast_1m['timestamp'][idx + t])
                     except: pass
-                    # Legacy Max
                     max_v2 = np.max(global_v2_scores[idx:idx+240])
                     v2_time = 0
                     if max_v2 > 0.8:
                         t2 = np.argmax(global_v2_scores[idx:idx+240])
                         v2_time = int(fast_1m['timestamp'][idx + t2])
                     ai_results.append({
                         'timestamp': int(fast_1m['timestamp'][idx]),
                         'symbol': sym, 'close': entry,
@@ -519,65 +467,115 @@ class HeavyDutyBacktester:
             try: data.append(pd.read_pickle(f))
             except: pass
         if not data: return []
-        df = pd.concat(data).sort_values('timestamp')
-        ts = df['timestamp'].values; close = df['close'].values.astype(float)
-        sym = df['symbol'].values; sym_map = {s:i for i,s in enumerate(np.unique(sym))}
         sym_id = np.array([sym_map[s] for s in sym])
-        oracle = df['oracle_conf'].values; sniper = df['sniper_score'].values
-        hydra = df['risk_hydra_crash'].values; titan = df['real_titan'].values
-        l1 = df['l1_score'].values
-        legacy_v2 = df['risk_legacy_v2'].values
         N = len(ts)
         print(f"     🚀 [System] Testing {len(combinations_batch)} configs on {N} candles...", flush=True)
         res = []
         for cfg in combinations_batch:
-            pos = {}; log = []
-            bal = initial_capital; alloc = 0.0
-            mask = (l1 >= cfg['l1_thresh']) & (oracle >= cfg['oracle_thresh']) & (sniper >= cfg['sniper_thresh']) & (titan >= 0.55)
             for i in range(N):
-                s = sym_id[i]; p = close[i]
                 if s in pos:
-                    entry = pos[s][0]; h_r = pos[s][1]; titan_entry = pos[s][3]
-                    crash_hydra = (h_r > cfg['hydra_thresh'])
-                    panic_legacy = (legacy_v2[i] > cfg['legacy_thresh'])
-                    pnl = (p - entry)/entry
-                    if crash_hydra or panic_legacy or pnl > 0.04 or pnl < -0.02:
-                        realized = pnl - fees_pct*2
-                        bal += pos[s][2] * (1 + realized)
-                        alloc -= pos[s][2]
-                        is_consensus = (titan_entry > 0.55)
-                        log.append({'pnl': realized, 'consensus': is_consensus})
                         del pos[s]
-                if len(pos) < max_slots and mask[i]:
                     if s not in pos and bal >= 5.0:
                         size = min(10.0, bal * 0.98)
-                        pos[s] = (p, hydra[i], size, titan[i])
-                        bal -= size; alloc += size
             final_bal = bal + alloc
             profit = final_bal - initial_capital
             tot = len(log)
             winning = [x for x in log if x['pnl'] > 0]
             losing = [x for x in log if x['pnl'] <= 0]
-            win_count = len(winning); loss_count = len(losing)
-            win_rate = (win_count/tot*100) if tot else 0
-            avg_win = np.mean([x['pnl'] for x in winning]) if winning else 0
-            avg_loss = np.mean([x['pnl'] for x in losing]) if losing else 0
             gross_p = sum([x['pnl'] for x in winning])
             gross_l = abs(sum([x['pnl'] for x in losing]))
             profit_factor = (gross_p / gross_l) if gross_l > 0 else 99.9
             max_win_s = 0; max_loss_s = 0; curr_w = 0; curr_l = 0
             for t in log:
                 if t['pnl'] > 0:
@@ -587,12 +585,6 @@ class HeavyDutyBacktester:
                     curr_l += 1; curr_w = 0
                     if curr_l > max_loss_s: max_loss_s = curr_l
-            cons_trades = [x for x in log if x['consensus']]
-            n_cons = len(cons_trades)
-            agree_rate = (n_cons/tot*100) if tot else 0
-            cons_win_rate = (sum(1 for x in cons_trades if x['pnl']>0)/n_cons*100) if n_cons else 0
-            cons_avg_pnl = (sum(x['pnl'] for x in cons_trades)/n_cons*100) if n_cons else 0
             res.append({
                 'config': cfg, 'final_balance': final_bal, 'net_profit': profit,
                 'total_trades': tot, 'win_rate': win_rate, 'max_drawdown': 0,
@@ -600,9 +592,9 @@ class HeavyDutyBacktester:
                 'avg_win': avg_win, 'avg_loss': avg_loss,
                 'max_win_streak': max_win_s, 'max_loss_streak': max_loss_s,
                 'profit_factor': profit_factor,
-                'consensus_agreement_rate': agree_rate,
-                'high_consensus_win_rate': cons_win_rate,
-                'high_consensus_avg_pnl': cons_avg_pnl
             })
         return res
@@ -655,7 +647,7 @@ class HeavyDutyBacktester:
         return best['config'], best
 async def run_strategic_optimization_task():
-    print("\n🧪 [STRATEGIC BACKTEST] Vectorized Hydra Speed...")
     r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
     try:
         await dm.initialize(); await proc.initialize()

 # ============================================================
+# 🧪 backtest_engine.py (V140.0 - GEM-Architect: Bulletproof Logic)
 # ============================================================
 import asyncio
         self.TRADING_FEES = 0.001
         self.MAX_SLOTS = 4
         self.TARGET_COINS = [
+            'SOL/USDT'
         ]
         self.force_start_date = None
         self.force_end_date = None
         if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
+        print(f"🧪 [Backtest V140.0] Bulletproof Scalar Logic.")
     def set_date_range(self, start_str, end_str):
         self.force_start_date = start_str
         candidate_indices = candidate_indices[candidate_indices < (len(arr_ts_1m) - 245)]
         print(f"     🎯 Candidates: {len(candidate_indices)}. Running Vectorized Hydra...", flush=True)
+        # 🚀 VECTORIZED HYDRA SIMULATION
         ai_results = []
         if hydra_models and len(candidate_indices) > 0:
             h_static = np.column_stack([
                 fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
                 fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
+            ])
             chunk_size = 5000
             for i in range(0, len(candidate_indices), chunk_size):
                 chunk_idxs = candidate_indices[i:i+chunk_size]
                 for idx in chunk_idxs:
                     sl_st = h_static[idx:idx+240]
                     sl_close = sl_st[:, 6]; sl_atr = sl_st[:, 5]
                     entry = fast_1m['close'][idx]
                     dist = np.maximum(1.5 * sl_atr, entry * 0.015)
                     pnl = sl_close - entry
                     norm_pnl = pnl / dist
                     max_pnl_r = (np.maximum.accumulate(sl_close) - entry) / dist
                     atr_pct = sl_atr / sl_close
+                    zeros = np.zeros(240); time_vec = np.arange(1, 241); s_oracle = global_oracle_scores[idx]
                     X_H = np.column_stack([
                         sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
                         zeros, atr_pct, norm_pnl, max_pnl_r, zeros, zeros, time_vec, zeros,
                         np.full(240, s_oracle), np.full(240, 0.7), np.full(240, 3.0)
                     ])
                     max_hydra = 0.0; hydra_time = 0
                     try:
                         probs = hydra_models['crash'].predict_proba(X_H)[:, 1]
                             t = np.argmax(probs)
                             hydra_time = int(fast_1m['timestamp'][idx + t])
                     except: pass
                     max_v2 = np.max(global_v2_scores[idx:idx+240])
                     v2_time = 0
                     if max_v2 > 0.8:
                         t2 = np.argmax(global_v2_scores[idx:idx+240])
                         v2_time = int(fast_1m['timestamp'][idx + t2])
                     ai_results.append({
                         'timestamp': int(fast_1m['timestamp'][idx]),
                         'symbol': sym, 'close': entry,
             try: data.append(pd.read_pickle(f))
             except: pass
         if not data: return []
+        # ✅ [GEM-FIX] Reset Index to avoid 'Truth value' error
+        df = pd.concat(data).sort_values('timestamp').reset_index(drop=True)
+        ts = df['timestamp'].values
+        close = df['close'].values.astype(float)
+        sym = df['symbol'].values
+        # Map symbols to integers
+        u_syms = np.unique(sym)
+        sym_map = {s: i for i, s in enumerate(u_syms)}
         sym_id = np.array([sym_map[s] for s in sym])
+        # Extract features as pure numpy arrays (scalar safety)
+        oracle = df['oracle_conf'].values.astype(float)
+        sniper = df['sniper_score'].values.astype(float)
+        hydra = df['risk_hydra_crash'].values.astype(float)
+        titan = df['real_titan'].values.astype(float)
+        l1 = df['l1_score'].values.astype(float)
+        # Handle Legacy (fill 0 if missing)
+        legacy_v2 = df['risk_legacy_v2'].values.astype(float) if 'risk_legacy_v2' in df else np.zeros(len(df))
+        # Extra: Hydra Time (for expiry check)
+        h_times = df['time_hydra_crash'].values.astype(int)
         N = len(ts)
         print(f"     🚀 [System] Testing {len(combinations_batch)} configs on {N} candles...", flush=True)
         res = []
         for cfg in combinations_batch:
+            pos = {}
+            log = []
+            bal = float(initial_capital)
+            alloc = 0.0
+            # Pre-calc mask (Boolean Array)
+            mask = (l1 >= cfg['l1_thresh']) & \
+                   (oracle >= cfg['oracle_thresh']) & \
+                   (sniper >= cfg['sniper_thresh']) & \
+                   (titan >= 0.55)
+            # Loop
             for i in range(N):
+                s = sym_id[i]
+                p = float(close[i])
+                curr_t = ts[i]
+                # 1. Exit Logic
                 if s in pos:
+                    entry_p, h_risk_val, size_val, h_time_val = pos[s]
+                    # Explicit Scalar bools
+                    crash_hydra = bool(h_risk_val > cfg['hydra_thresh'])
+                    # Logic: If current time > crash time prediction, signal is stale?
+                    # Or if prediction was for a future time?
+                    # Assuming h_time_val is the timestamp of predicted crash
+                    time_match = bool(h_time_val > 0 and curr_t >= h_time_val)
+                    # Legacy Logic (Global array check)
+                    # Note: Legacy array corresponds to candle index, but here we iterate sorted time
+                    # We need to trust the backtest signal 'risk_legacy_v2' is aligned.
+                    # Yes, it comes from df row 'i'.
+                    panic_legacy = bool(legacy_v2[i] > cfg['legacy_thresh'])
+                    pnl = (p - entry_p) / entry_p
+                    # Combined Exit
+                    # Exit if: Hydra Crash AND Time Match OR Legacy Panic OR TP/SL
+                    should_exit = (crash_hydra and time_match) or panic_legacy or (pnl > 0.04) or (pnl < -0.02)
+                    if should_exit:
+                        realized = pnl - (fees_pct * 2)
+                        bal += size_val * (1.0 + realized)
+                        alloc -= size_val
                         del pos[s]
+                        log.append({'pnl': realized})
+                # 2. Entry Logic
+                # Use scalar boolean from mask
+                if len(pos) < max_slots and bool(mask[i]):
                     if s not in pos and bal >= 5.0:
                         size = min(10.0, bal * 0.98)
+                        # Store: Entry, HydraRisk, Size, HydraTime
+                        pos[s] = (p, hydra[i], size, h_times[i])
+                        bal -= size
+                        alloc += size
             final_bal = bal + alloc
             profit = final_bal - initial_capital
+            # Stats
             tot = len(log)
             winning = [x for x in log if x['pnl'] > 0]
             losing = [x for x in log if x['pnl'] <= 0]
+            win_count = len(winning)
+            loss_count = len(losing)
+            win_rate = (win_count/tot*100) if tot > 0 else 0.0
+            avg_win = np.mean([x['pnl'] for x in winning]) if winning else 0.0
+            avg_loss = np.mean([x['pnl'] for x in losing]) if losing else 0.0
             gross_p = sum([x['pnl'] for x in winning])
             gross_l = abs(sum([x['pnl'] for x in losing]))
             profit_factor = (gross_p / gross_l) if gross_l > 0 else 99.9
+            # Streaks
             max_win_s = 0; max_loss_s = 0; curr_w = 0; curr_l = 0
             for t in log:
                 if t['pnl'] > 0:
                     curr_l += 1; curr_w = 0
                     if curr_l > max_loss_s: max_loss_s = curr_l
             res.append({
                 'config': cfg, 'final_balance': final_bal, 'net_profit': profit,
                 'total_trades': tot, 'win_rate': win_rate, 'max_drawdown': 0,
                 'avg_win': avg_win, 'avg_loss': avg_loss,
                 'max_win_streak': max_win_s, 'max_loss_streak': max_loss_s,
                 'profit_factor': profit_factor,
+                'consensus_agreement_rate': 0.0,
+                'high_consensus_win_rate': 0.0,
+                'high_consensus_avg_pnl': 0.0
             })
         return res
         return best['config'], best
 async def run_strategic_optimization_task():
+    print("\n🧪 [STRATEGIC BACKTEST] Full Spectrum Mode...")
     r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
     try:
         await dm.initialize(); await proc.initialize()