Riy777 commited on
Commit
3060cdb
·
verified ·
1 Parent(s): a661075

Update backtest_engine.py

Browse files
Files changed (1) hide show
  1. backtest_engine.py +89 -97
backtest_engine.py CHANGED
@@ -1,5 +1,5 @@
1
  # ============================================================
2
- # 🧪 backtest_engine.py (V139.0 - GEM-Architect: Vectorized Hydra Speed)
3
  # ============================================================
4
 
5
  import asyncio
@@ -115,20 +115,12 @@ class HeavyDutyBacktester:
115
  self.TRADING_FEES = 0.001
116
  self.MAX_SLOTS = 4
117
  self.TARGET_COINS = [
118
- 'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
119
- 'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
120
- 'SEI/USDT', 'TIA/USDT', 'MATIC/USDT', 'NEAR/USDT', 'RUNE/USDT', 'PYTH/USDT',
121
- 'WIF/USDT', 'PEPE/USDT', 'SHIB/USDT', 'TRX/USDT', 'DOT/USDT', 'UNI/USDT',
122
- 'ONDO/USDT', 'ENA/USDT', 'HBAR/USDT', 'XLM/USDT', 'TAO/USDT', 'ZK/USDT',
123
- 'ZRO/USDT', 'KCS/USDT', 'ICP/USDT', 'SAND/USDT', 'AXS/USDT', 'APE/USDT',
124
- 'GMT/USDT', 'CHZ/USDT', 'CFX/USDT', 'LDO/USDT', 'FET/USDT', 'JTO/USDT',
125
- 'STRK/USDT', 'BLUR/USDT', 'ALT/USDT', 'JUP/USDT', 'PENDLE/USDT', 'ETHFI/USDT',
126
- 'MEME/USDT', 'ATOM/USDT'
127
  ]
128
  self.force_start_date = None
129
  self.force_end_date = None
130
  if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
131
- print(f"🧪 [Backtest V139.0] Vectorized Hydra Speed Optimization.")
132
 
133
  def set_date_range(self, start_str, end_str):
134
  self.force_start_date = start_str
@@ -402,72 +394,31 @@ class HeavyDutyBacktester:
402
  candidate_indices = candidate_indices[candidate_indices < (len(arr_ts_1m) - 245)]
403
  print(f" 🎯 Candidates: {len(candidate_indices)}. Running Vectorized Hydra...", flush=True)
404
 
405
- # 🚀 VECTORIZED HYDRA SIMULATION 🚀
406
  ai_results = []
407
  if hydra_models and len(candidate_indices) > 0:
408
- # Prepare Static Features Matrix (Global)
409
  h_static = np.column_stack([
410
  fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
411
  fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
412
- ]) # Shape: (N, 7)
413
-
414
- # Process candidates in chunks to avoid RAM explosion
415
  chunk_size = 5000
416
  for i in range(0, len(candidate_indices), chunk_size):
417
  chunk_idxs = candidate_indices[i:i+chunk_size]
418
-
419
- # We need sliding windows of 240 steps for each candidate
420
- # Trick: Use broadcasting or sliding_window_view on static features
421
- # But sliding_window_view on huge array is slow. Better to just slice.
422
-
423
- # Vectorized construction for chunk
424
- # 1. Extract entry prices
425
- entries = fast_1m['close'][chunk_idxs]
426
- entries_ts = fast_1m['timestamp'][chunk_idxs]
427
-
428
- # 2. Prepare sequences (Vectorized slice is hard in numpy without creating copies)
429
- # We stick to a tight loop or specialized indexing.
430
- # Given we need to construct a [Batch, 240, Features] array for Hydra...
431
-
432
- # Fastest way: List comprehension for slicing, then stack.
433
- # Since Hydra is XGBoost, we can flatten the time dimension? No, Hydra is 1D input (snapshot).
434
- # Wait, Hydra predicts Crash Probability for a SNAPSHOT state.
435
- # In simulation, we need to check crash prob at t+1, t+2... t+240.
436
- # That is 240 checks per candidate. 42,000 * 240 = 10 Million checks.
437
- # This IS the bottleneck.
438
-
439
- # OPTIMIZATION: Only check Hydra if PnL drops below -0.5% or something? No, that misses the point.
440
- # OPTIMIZATION 2 (Implemented): Vectorize the "Check" logic.
441
-
442
- # Construct big matrix for ALL checks: (N_Candidates * 240, Features)
443
- # But that's 10M rows. XGBoost inference on 10M rows takes ~3-5 seconds on CPU. This is feasible!
444
-
445
- # Let's do it per candidate to be safe on RAM, but fast.
446
  for idx in chunk_idxs:
447
- # Slicing is fast
448
  sl_st = h_static[idx:idx+240]
449
  sl_close = sl_st[:, 6]; sl_atr = sl_st[:, 5]
450
  entry = fast_1m['close'][idx]
451
-
452
  dist = np.maximum(1.5 * sl_atr, entry * 0.015)
453
  pnl = sl_close - entry
454
  norm_pnl = pnl / dist
455
  max_pnl_r = (np.maximum.accumulate(sl_close) - entry) / dist
456
  atr_pct = sl_atr / sl_close
457
-
458
- # Stack Hydra Input (240 rows)
459
- # Cols: rsi1, rsi5, rsi15, bb, vol, dist_ema(0), atr_pct, norm, max, dists(0), time, entry(0), oracle, l2, target
460
- zeros = np.zeros(240)
461
- time_vec = np.arange(1, 241)
462
- s_oracle = global_oracle_scores[idx]
463
-
464
  X_H = np.column_stack([
465
  sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
466
  zeros, atr_pct, norm_pnl, max_pnl_r, zeros, zeros, time_vec, zeros,
467
  np.full(240, s_oracle), np.full(240, 0.7), np.full(240, 3.0)
468
  ])
469
-
470
- # Predict 240 steps at once
471
  max_hydra = 0.0; hydra_time = 0
472
  try:
473
  probs = hydra_models['crash'].predict_proba(X_H)[:, 1]
@@ -476,14 +427,11 @@ class HeavyDutyBacktester:
476
  t = np.argmax(probs)
477
  hydra_time = int(fast_1m['timestamp'][idx + t])
478
  except: pass
479
-
480
- # Legacy Max
481
  max_v2 = np.max(global_v2_scores[idx:idx+240])
482
  v2_time = 0
483
  if max_v2 > 0.8:
484
  t2 = np.argmax(global_v2_scores[idx:idx+240])
485
  v2_time = int(fast_1m['timestamp'][idx + t2])
486
-
487
  ai_results.append({
488
  'timestamp': int(fast_1m['timestamp'][idx]),
489
  'symbol': sym, 'close': entry,
@@ -519,65 +467,115 @@ class HeavyDutyBacktester:
519
  try: data.append(pd.read_pickle(f))
520
  except: pass
521
  if not data: return []
522
- df = pd.concat(data).sort_values('timestamp')
523
 
524
- ts = df['timestamp'].values; close = df['close'].values.astype(float)
525
- sym = df['symbol'].values; sym_map = {s:i for i,s in enumerate(np.unique(sym))}
 
 
 
 
 
 
 
 
526
  sym_id = np.array([sym_map[s] for s in sym])
527
 
528
- oracle = df['oracle_conf'].values; sniper = df['sniper_score'].values
529
- hydra = df['risk_hydra_crash'].values; titan = df['real_titan'].values
530
- l1 = df['l1_score'].values
531
- legacy_v2 = df['risk_legacy_v2'].values
 
 
 
 
 
 
 
 
532
 
533
  N = len(ts)
534
  print(f" 🚀 [System] Testing {len(combinations_batch)} configs on {N} candles...", flush=True)
535
 
536
  res = []
537
  for cfg in combinations_batch:
538
- pos = {}; log = []
539
- bal = initial_capital; alloc = 0.0
540
- mask = (l1 >= cfg['l1_thresh']) & (oracle >= cfg['oracle_thresh']) & (sniper >= cfg['sniper_thresh']) & (titan >= 0.55)
 
 
 
 
 
 
 
541
 
 
542
  for i in range(N):
543
- s = sym_id[i]; p = close[i]
 
 
 
 
544
  if s in pos:
545
- entry = pos[s][0]; h_r = pos[s][1]; titan_entry = pos[s][3]
546
- crash_hydra = (h_r > cfg['hydra_thresh'])
547
- panic_legacy = (legacy_v2[i] > cfg['legacy_thresh'])
548
- pnl = (p - entry)/entry
 
 
 
 
 
549
 
550
- if crash_hydra or panic_legacy or pnl > 0.04 or pnl < -0.02:
551
- realized = pnl - fees_pct*2
552
- bal += pos[s][2] * (1 + realized)
553
- alloc -= pos[s][2]
554
- is_consensus = (titan_entry > 0.55)
555
- log.append({'pnl': realized, 'consensus': is_consensus})
 
 
 
 
 
 
 
 
 
 
556
  del pos[s]
557
-
558
- if len(pos) < max_slots and mask[i]:
 
 
 
559
  if s not in pos and bal >= 5.0:
560
  size = min(10.0, bal * 0.98)
561
- pos[s] = (p, hydra[i], size, titan[i])
562
- bal -= size; alloc += size
 
 
563
 
564
  final_bal = bal + alloc
565
  profit = final_bal - initial_capital
566
 
 
567
  tot = len(log)
568
  winning = [x for x in log if x['pnl'] > 0]
569
  losing = [x for x in log if x['pnl'] <= 0]
570
 
571
- win_count = len(winning); loss_count = len(losing)
572
- win_rate = (win_count/tot*100) if tot else 0
 
573
 
574
- avg_win = np.mean([x['pnl'] for x in winning]) if winning else 0
575
- avg_loss = np.mean([x['pnl'] for x in losing]) if losing else 0
576
 
577
  gross_p = sum([x['pnl'] for x in winning])
578
  gross_l = abs(sum([x['pnl'] for x in losing]))
579
  profit_factor = (gross_p / gross_l) if gross_l > 0 else 99.9
580
 
 
581
  max_win_s = 0; max_loss_s = 0; curr_w = 0; curr_l = 0
582
  for t in log:
583
  if t['pnl'] > 0:
@@ -587,12 +585,6 @@ class HeavyDutyBacktester:
587
  curr_l += 1; curr_w = 0
588
  if curr_l > max_loss_s: max_loss_s = curr_l
589
 
590
- cons_trades = [x for x in log if x['consensus']]
591
- n_cons = len(cons_trades)
592
- agree_rate = (n_cons/tot*100) if tot else 0
593
- cons_win_rate = (sum(1 for x in cons_trades if x['pnl']>0)/n_cons*100) if n_cons else 0
594
- cons_avg_pnl = (sum(x['pnl'] for x in cons_trades)/n_cons*100) if n_cons else 0
595
-
596
  res.append({
597
  'config': cfg, 'final_balance': final_bal, 'net_profit': profit,
598
  'total_trades': tot, 'win_rate': win_rate, 'max_drawdown': 0,
@@ -600,9 +592,9 @@ class HeavyDutyBacktester:
600
  'avg_win': avg_win, 'avg_loss': avg_loss,
601
  'max_win_streak': max_win_s, 'max_loss_streak': max_loss_s,
602
  'profit_factor': profit_factor,
603
- 'consensus_agreement_rate': agree_rate,
604
- 'high_consensus_win_rate': cons_win_rate,
605
- 'high_consensus_avg_pnl': cons_avg_pnl
606
  })
607
  return res
608
 
@@ -655,7 +647,7 @@ class HeavyDutyBacktester:
655
  return best['config'], best
656
 
657
  async def run_strategic_optimization_task():
658
- print("\n🧪 [STRATEGIC BACKTEST] Vectorized Hydra Speed...")
659
  r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
660
  try:
661
  await dm.initialize(); await proc.initialize()
 
1
  # ============================================================
2
+ # 🧪 backtest_engine.py (V140.0 - GEM-Architect: Bulletproof Logic)
3
  # ============================================================
4
 
5
  import asyncio
 
115
  self.TRADING_FEES = 0.001
116
  self.MAX_SLOTS = 4
117
  self.TARGET_COINS = [
118
+ 'SOL/USDT'
 
 
 
 
 
 
 
 
119
  ]
120
  self.force_start_date = None
121
  self.force_end_date = None
122
  if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
123
+ print(f"🧪 [Backtest V140.0] Bulletproof Scalar Logic.")
124
 
125
  def set_date_range(self, start_str, end_str):
126
  self.force_start_date = start_str
 
394
  candidate_indices = candidate_indices[candidate_indices < (len(arr_ts_1m) - 245)]
395
  print(f" 🎯 Candidates: {len(candidate_indices)}. Running Vectorized Hydra...", flush=True)
396
 
397
+ # 🚀 VECTORIZED HYDRA SIMULATION
398
  ai_results = []
399
  if hydra_models and len(candidate_indices) > 0:
 
400
  h_static = np.column_stack([
401
  fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
402
  fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
403
+ ])
 
 
404
  chunk_size = 5000
405
  for i in range(0, len(candidate_indices), chunk_size):
406
  chunk_idxs = candidate_indices[i:i+chunk_size]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  for idx in chunk_idxs:
 
408
  sl_st = h_static[idx:idx+240]
409
  sl_close = sl_st[:, 6]; sl_atr = sl_st[:, 5]
410
  entry = fast_1m['close'][idx]
 
411
  dist = np.maximum(1.5 * sl_atr, entry * 0.015)
412
  pnl = sl_close - entry
413
  norm_pnl = pnl / dist
414
  max_pnl_r = (np.maximum.accumulate(sl_close) - entry) / dist
415
  atr_pct = sl_atr / sl_close
416
+ zeros = np.zeros(240); time_vec = np.arange(1, 241); s_oracle = global_oracle_scores[idx]
 
 
 
 
 
 
417
  X_H = np.column_stack([
418
  sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
419
  zeros, atr_pct, norm_pnl, max_pnl_r, zeros, zeros, time_vec, zeros,
420
  np.full(240, s_oracle), np.full(240, 0.7), np.full(240, 3.0)
421
  ])
 
 
422
  max_hydra = 0.0; hydra_time = 0
423
  try:
424
  probs = hydra_models['crash'].predict_proba(X_H)[:, 1]
 
427
  t = np.argmax(probs)
428
  hydra_time = int(fast_1m['timestamp'][idx + t])
429
  except: pass
 
 
430
  max_v2 = np.max(global_v2_scores[idx:idx+240])
431
  v2_time = 0
432
  if max_v2 > 0.8:
433
  t2 = np.argmax(global_v2_scores[idx:idx+240])
434
  v2_time = int(fast_1m['timestamp'][idx + t2])
 
435
  ai_results.append({
436
  'timestamp': int(fast_1m['timestamp'][idx]),
437
  'symbol': sym, 'close': entry,
 
467
  try: data.append(pd.read_pickle(f))
468
  except: pass
469
  if not data: return []
 
470
 
471
+ # [GEM-FIX] Reset Index to avoid 'Truth value' error
472
+ df = pd.concat(data).sort_values('timestamp').reset_index(drop=True)
473
+
474
+ ts = df['timestamp'].values
475
+ close = df['close'].values.astype(float)
476
+ sym = df['symbol'].values
477
+
478
+ # Map symbols to integers
479
+ u_syms = np.unique(sym)
480
+ sym_map = {s: i for i, s in enumerate(u_syms)}
481
  sym_id = np.array([sym_map[s] for s in sym])
482
 
483
+ # Extract features as pure numpy arrays (scalar safety)
484
+ oracle = df['oracle_conf'].values.astype(float)
485
+ sniper = df['sniper_score'].values.astype(float)
486
+ hydra = df['risk_hydra_crash'].values.astype(float)
487
+ titan = df['real_titan'].values.astype(float)
488
+ l1 = df['l1_score'].values.astype(float)
489
+
490
+ # Handle Legacy (fill 0 if missing)
491
+ legacy_v2 = df['risk_legacy_v2'].values.astype(float) if 'risk_legacy_v2' in df else np.zeros(len(df))
492
+
493
+ # Extra: Hydra Time (for expiry check)
494
+ h_times = df['time_hydra_crash'].values.astype(int)
495
 
496
  N = len(ts)
497
  print(f" 🚀 [System] Testing {len(combinations_batch)} configs on {N} candles...", flush=True)
498
 
499
  res = []
500
  for cfg in combinations_batch:
501
+ pos = {}
502
+ log = []
503
+ bal = float(initial_capital)
504
+ alloc = 0.0
505
+
506
+ # Pre-calc mask (Boolean Array)
507
+ mask = (l1 >= cfg['l1_thresh']) & \
508
+ (oracle >= cfg['oracle_thresh']) & \
509
+ (sniper >= cfg['sniper_thresh']) & \
510
+ (titan >= 0.55)
511
 
512
+ # Loop
513
  for i in range(N):
514
+ s = sym_id[i]
515
+ p = float(close[i])
516
+ curr_t = ts[i]
517
+
518
+ # 1. Exit Logic
519
  if s in pos:
520
+ entry_p, h_risk_val, size_val, h_time_val = pos[s]
521
+
522
+ # Explicit Scalar bools
523
+ crash_hydra = bool(h_risk_val > cfg['hydra_thresh'])
524
+
525
+ # Logic: If current time > crash time prediction, signal is stale?
526
+ # Or if prediction was for a future time?
527
+ # Assuming h_time_val is the timestamp of predicted crash
528
+ time_match = bool(h_time_val > 0 and curr_t >= h_time_val)
529
 
530
+ # Legacy Logic (Global array check)
531
+ # Note: Legacy array corresponds to candle index, but here we iterate sorted time
532
+ # We need to trust the backtest signal 'risk_legacy_v2' is aligned.
533
+ # Yes, it comes from df row 'i'.
534
+ panic_legacy = bool(legacy_v2[i] > cfg['legacy_thresh'])
535
+
536
+ pnl = (p - entry_p) / entry_p
537
+
538
+ # Combined Exit
539
+ # Exit if: Hydra Crash AND Time Match OR Legacy Panic OR TP/SL
540
+ should_exit = (crash_hydra and time_match) or panic_legacy or (pnl > 0.04) or (pnl < -0.02)
541
+
542
+ if should_exit:
543
+ realized = pnl - (fees_pct * 2)
544
+ bal += size_val * (1.0 + realized)
545
+ alloc -= size_val
546
  del pos[s]
547
+ log.append({'pnl': realized})
548
+
549
+ # 2. Entry Logic
550
+ # Use scalar boolean from mask
551
+ if len(pos) < max_slots and bool(mask[i]):
552
  if s not in pos and bal >= 5.0:
553
  size = min(10.0, bal * 0.98)
554
+ # Store: Entry, HydraRisk, Size, HydraTime
555
+ pos[s] = (p, hydra[i], size, h_times[i])
556
+ bal -= size
557
+ alloc += size
558
 
559
  final_bal = bal + alloc
560
  profit = final_bal - initial_capital
561
 
562
+ # Stats
563
  tot = len(log)
564
  winning = [x for x in log if x['pnl'] > 0]
565
  losing = [x for x in log if x['pnl'] <= 0]
566
 
567
+ win_count = len(winning)
568
+ loss_count = len(losing)
569
+ win_rate = (win_count/tot*100) if tot > 0 else 0.0
570
 
571
+ avg_win = np.mean([x['pnl'] for x in winning]) if winning else 0.0
572
+ avg_loss = np.mean([x['pnl'] for x in losing]) if losing else 0.0
573
 
574
  gross_p = sum([x['pnl'] for x in winning])
575
  gross_l = abs(sum([x['pnl'] for x in losing]))
576
  profit_factor = (gross_p / gross_l) if gross_l > 0 else 99.9
577
 
578
+ # Streaks
579
  max_win_s = 0; max_loss_s = 0; curr_w = 0; curr_l = 0
580
  for t in log:
581
  if t['pnl'] > 0:
 
585
  curr_l += 1; curr_w = 0
586
  if curr_l > max_loss_s: max_loss_s = curr_l
587
 
 
 
 
 
 
 
588
  res.append({
589
  'config': cfg, 'final_balance': final_bal, 'net_profit': profit,
590
  'total_trades': tot, 'win_rate': win_rate, 'max_drawdown': 0,
 
592
  'avg_win': avg_win, 'avg_loss': avg_loss,
593
  'max_win_streak': max_win_s, 'max_loss_streak': max_loss_s,
594
  'profit_factor': profit_factor,
595
+ 'consensus_agreement_rate': 0.0,
596
+ 'high_consensus_win_rate': 0.0,
597
+ 'high_consensus_avg_pnl': 0.0
598
  })
599
  return res
600
 
 
647
  return best['config'], best
648
 
649
  async def run_strategic_optimization_task():
650
+ print("\n🧪 [STRATEGIC BACKTEST] Full Spectrum Mode...")
651
  r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
652
  try:
653
  await dm.initialize(); await proc.initialize()