Riy777 commited on
Commit
7dfab99
·
verified ·
1 Parent(s): 6f6725f

Update backtest_engine.py

Browse files
Files changed (1) hide show
  1. backtest_engine.py +530 -367
backtest_engine.py CHANGED
@@ -1,5 +1,5 @@
1
  # ============================================================
2
- # 🧪 backtest_engine.py (V223.0 - GEM-Architect: The Immutable Truth) [PATCHED]
3
  # ============================================================
4
 
5
  import asyncio
@@ -23,23 +23,20 @@ from typing import Dict, Any, List
23
  try:
24
  import pandas_ta as ta
25
  import xgboost as xgb
26
- import lightgbm as lgb
27
  except ImportError as e:
28
  raise ImportError(f"🔴 CRITICAL: Missing mandatory dependency for Truth Mode: {e}")
29
 
30
- # ------------------------------------------------------------
31
- # 2️⃣ Internal Project Modules (Hard Requirement for this runner)
32
- # ------------------------------------------------------------
33
  try:
34
  from ml_engine.processor import MLProcessor
35
  from ml_engine.data_manager import DataManager
36
  from learning_hub.adaptive_hub import AdaptiveHub
37
  from r2 import R2Service
38
  from governance_engine import GovernanceEngine
39
- except ImportError as e:
40
- raise ImportError(f"🔴 CRITICAL: Missing internal project module: {e}")
41
 
42
- logging.getLogger('ml_engine').setLevel(logging.WARNING)
43
 
44
  CACHE_DIR = "backtest_v223_immutable"
45
  GOV_CACHE_DIR = os.path.join(CACHE_DIR, "gov_cache")
@@ -52,31 +49,36 @@ for d in [CACHE_DIR, GOV_CACHE_DIR, PATTERN_CACHE_DIR]:
52
  # ============================================================
53
  # 🛠️ HELPER FUNCTIONS (Strict Math & Logic)
54
  # ============================================================
55
- def optimize_dataframe_memory(df: pd.DataFrame) -> pd.DataFrame:
56
- float_cols = df.select_dtypes(include=['float64']).columns
57
- df[float_cols] = df[float_cols].astype('float32')
58
-
59
- int_cols = df.select_dtypes(include=['int64', 'int32']).columns
 
 
 
60
  for col in int_cols:
61
  c_min = df[col].min()
62
  c_max = df[col].max()
63
  if c_min > -128 and c_max < 127:
64
- df[col] = df[col].astype('int8')
65
  elif c_min > -32768 and c_max < 32767:
66
- df[col] = df[col].astype('int16')
67
  else:
68
- df[col] = df[col].astype('int32')
69
  return df
70
 
 
71
  def tf_to_offset(tf: str):
72
- if tf.endswith('m') and tf[:-1].isdigit():
73
  return f"{int(tf[:-1])}T"
74
- if tf.endswith('h') and tf[:-1].isdigit():
75
  return f"{int(tf[:-1])}h"
76
- if tf.endswith('d') and tf[:-1].isdigit():
77
  return f"{int(tf[:-1])}D"
78
  return None
79
 
 
80
  def calc_max_drawdown(equity_curve):
81
  if not equity_curve:
82
  return 0.0
@@ -85,6 +87,7 @@ def calc_max_drawdown(equity_curve):
85
  dd = (eq - peak) / (peak + 1e-9)
86
  return float(dd.min()) * 100
87
 
 
88
  def calc_profit_factor(wins, losses):
89
  gross_win = np.sum(wins)
90
  gross_loss = abs(np.sum(losses))
@@ -92,6 +95,7 @@ def calc_profit_factor(wins, losses):
92
  return 99.0
93
  return float(gross_win / gross_loss)
94
 
 
95
  def calc_ulcer_index(equity_curve):
96
  """Ulcer Index (drawdown severity)"""
97
  if not equity_curve:
@@ -99,7 +103,8 @@ def calc_ulcer_index(equity_curve):
99
  eq = np.asarray(equity_curve, dtype=np.float64)
100
  peak = np.maximum.accumulate(eq)
101
  dd_pct = (eq - peak) / (peak + 1e-12) * 100.0
102
- return float(np.sqrt(np.mean(dd_pct ** 2)))
 
103
 
104
  def calc_sharpe(returns, eps=1e-12):
105
  """Sharpe on per-trade returns (risk-free assumed 0)"""
@@ -112,6 +117,7 @@ def calc_sharpe(returns, eps=1e-12):
112
  return 0.0
113
  return float(mu / sd * np.sqrt(len(r)))
114
 
 
115
  def calc_sortino(returns, eps=1e-12):
116
  """Sortino on per-trade returns (downside deviation)"""
117
  if returns is None or len(returns) < 2:
@@ -126,6 +132,7 @@ def calc_sortino(returns, eps=1e-12):
126
  return 0.0
127
  return float(mu / dd * np.sqrt(len(r)))
128
 
 
129
  def calc_cagr(initial_capital, final_balance, start_ms, end_ms):
130
  """CAGR using timeline duration (ms). If too short => 0."""
131
  if initial_capital <= 0 or final_balance <= 0 or end_ms <= start_ms:
@@ -138,6 +145,7 @@ def calc_cagr(initial_capital, final_balance, start_ms, end_ms):
138
  except:
139
  return 0.0
140
 
 
141
  def calc_calmar(cagr, max_drawdown_pct):
142
  """Calmar = CAGR / |MaxDD| (MaxDD is negative %)"""
143
  dd = abs(max_drawdown_pct)
@@ -145,6 +153,7 @@ def calc_calmar(cagr, max_drawdown_pct):
145
  return 99.0
146
  return float((cagr * 100.0) / dd)
147
 
 
148
  def calc_consecutive_streaks(pnls):
149
  """Return (max_consec_wins, max_consec_losses) based on pnl sign."""
150
  max_w = max_l = 0
@@ -160,6 +169,7 @@ def calc_consecutive_streaks(pnls):
160
  max_l = max(max_l, cur_l)
161
  return int(max_w), int(max_l)
162
 
 
163
  # ============================================================
164
  # 🧪 THE VALIDATED TRUTH BACKTESTER
165
  # ============================================================
@@ -182,24 +192,64 @@ class HeavyDutyBacktester:
182
  self.MAX_SLOTS = 4
183
 
184
  self.GRID_RANGES = {
185
- 'TITAN': np.linspace(0.40, 0.70, self.GRID_DENSITY),
186
- 'ORACLE': np.linspace(0.55, 0.80, self.GRID_DENSITY),
187
- 'SNIPER': np.linspace(0.30, 0.65, self.GRID_DENSITY),
188
- 'PATTERN': np.linspace(0.30, 0.70, self.GRID_DENSITY),
189
- 'GOV_SCORE': np.linspace(50.0, 80.0, self.GRID_DENSITY),
190
- 'HYDRA_THRESH': np.linspace(0.60, 0.90, self.GRID_DENSITY),
191
- 'LEGACY_THRESH': np.linspace(0.85, 0.98, self.GRID_DENSITY)
192
  }
193
 
194
  self.TARGET_COINS = [
195
- 'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
196
- 'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
197
- 'SEI/USDT', 'MINA/USDT', 'MATIC/USDT', 'NEAR/USDT', 'RUNE/USDT', 'API3/USDT',
198
- 'FLOKI/USDT', 'BABYDOGE/USDT', 'SHIB/USDT', 'TRX/USDT', 'DOT/USDT', 'UNI/USDT',
199
- 'ONDO/USDT', 'SNX/USDT', 'HBAR/USDT', 'XLM/USDT', 'AGIX/USDT', 'IMX/USDT',
200
- 'LRC/USDT', 'KCS/USDT', 'ICP/USDT', 'SAND/USDT', 'AXS/USDT', 'APE/USDT',
201
- 'GMT/USDT', 'CHZ/USDT', 'CFX/USDT', 'LDO/USDT', 'FET/USDT', 'RPL/USDT',
202
- 'MNT/USDT', 'RAY/USDT', 'CAKE/USDT', 'SRM/USDT', 'PENDLE/USDT', 'ATOM/USDT'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  ]
204
 
205
  self.USE_FIXED_DATES = False
@@ -208,28 +258,30 @@ class HeavyDutyBacktester:
208
  self.force_end_date = "2024-02-01"
209
 
210
  self.required_timeframes = self._determine_required_timeframes()
211
- print(f"🧪 [Backtest V223.0] IMMUTABLE TRUTH. TFs: {self.required_timeframes}")
212
 
213
  def _verify_system_integrity(self):
214
  errors = []
215
- if not getattr(self.proc, 'titan', None) or not getattr(self.proc.titan, 'model', None):
216
  errors.append("❌ Titan Engine missing")
217
- if not getattr(self.proc, 'oracle', None) or not getattr(self.proc.oracle, 'model_direction', None):
218
  errors.append("❌ Oracle Engine missing")
219
- if not getattr(self.proc, 'pattern_engine', None):
220
  errors.append("❌ Pattern Engine missing")
221
- if not getattr(self.proc, 'sniper', None):
222
  errors.append("❌ Sniper Engine missing")
223
 
224
- if not getattr(self.proc, 'guardian_hydra', None) or not getattr(self.proc.guardian_hydra, 'models', None):
 
225
  errors.append("❌ Hydra Guardian missing/models not loaded")
226
  else:
227
  m = self.proc.guardian_hydra.models
228
- if 'crash' not in m or 'giveback' not in m:
229
  errors.append("❌ Hydra missing crash/giveback heads")
 
230
  try:
231
- _ = m['crash'].predict_proba
232
- _ = m['giveback'].predict_proba
233
  except:
234
  errors.append("❌ Hydra heads must implement predict_proba()")
235
 
@@ -237,21 +289,21 @@ class HeavyDutyBacktester:
237
  raise RuntimeError(f"CRITICAL INTEGRITY FAILURE: {errors}")
238
 
239
  def _determine_required_timeframes(self):
240
- tfs = set(['5m', '15m', '1h', '4h'])
241
 
242
  def maybe_add(prefix: str):
243
  if tf_to_offset(prefix):
244
  tfs.add(prefix)
245
 
246
- if hasattr(self.proc.titan.model, 'feature_names'):
247
  for f in self.proc.titan.model.feature_names:
248
- if '_' in f:
249
- maybe_add(f.split('_', 1)[0])
250
 
251
- if hasattr(self.proc.oracle, 'feature_cols'):
252
  for f in self.proc.oracle.feature_cols:
253
- if '_' in f:
254
- maybe_add(f.split('_', 1)[0])
255
 
256
  return list(tfs)
257
 
@@ -275,14 +327,14 @@ class HeavyDutyBacktester:
275
  async with sem:
276
  for _ in range(3):
277
  try:
278
- return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
279
  except:
280
  await asyncio.sleep(0.5)
281
  return []
282
 
283
  chunk_size = 50
284
  for i in range(0, len(tasks), chunk_size):
285
- res = await asyncio.gather(*[_fetch_batch(t) for t in tasks[i:i + chunk_size]])
286
  for r in res:
287
  if r:
288
  all_candles.extend(r)
@@ -290,70 +342,133 @@ class HeavyDutyBacktester:
290
  if not all_candles:
291
  return None
292
 
293
- df = pd.DataFrame(all_candles, columns=['timestamp', 'o', 'h', 'l', 'c', 'v'])
294
- df.drop_duplicates('timestamp', inplace=True)
295
- df = df[(df['timestamp'] >= start_ms) & (df['timestamp'] <= end_ms)].sort_values('timestamp')
296
  return df.values.tolist()
297
 
298
- def _calculate_all_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
299
- cols = ['open', 'high', 'low', 'close', 'volume']
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  for c in cols:
301
- df[c] = df[c].astype(np.float64)
302
 
303
- c = df['close']
304
- h = df['high']
305
- l = df['low']
306
- v = df['volume']
307
 
 
 
 
 
 
 
308
  for span in [9, 20, 21, 50, 200]:
309
- df[f'ema{span}'] = c.ewm(span=span, adjust=False).mean()
310
-
311
- bb = ta.bbands(c, length=20, std=2.0)
312
- if bb is not None:
313
- df['lower_bb'] = bb.iloc[:, 0]
314
- df['upper_bb'] = bb.iloc[:, 2]
315
- df['bb_width'] = bb.iloc[:, 3]
316
- df['bb_pct'] = bb.iloc[:, 4]
317
  else:
318
- raise ValueError("BB Fail")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  macd = ta.macd(c)
321
- if macd is not None:
322
- df['MACD'] = macd.iloc[:, 0]
323
- df['MACD_h'] = macd.iloc[:, 1]
324
- df['MACD_s'] = macd.iloc[:, 2]
325
  else:
326
- raise ValueError("MACD Fail")
 
 
327
 
328
- df['RSI'] = ta.rsi(c, length=14).fillna(50)
329
- df['ATR'] = ta.atr(h, l, c, length=14).fillna(0)
330
- df['ADX'] = ta.adx(h, l, c, length=14).iloc[:, 0].fillna(0)
 
 
 
 
 
 
331
 
332
  try:
333
- df['CHOP'] = ta.chop(h, l, c, length=14).fillna(50)
334
  except:
335
- df['CHOP'] = 50
336
 
337
  try:
338
- df['vwap'] = ta.vwap(h, l, c, v).fillna(c)
339
  except:
340
- df['vwap'] = c
341
 
342
- df['EMA_9_dist'] = (c / df['ema9']) - 1
343
- df['EMA_21_dist'] = (c / df['ema21']) - 1
344
- df['EMA_50_dist'] = (c / df['ema50']) - 1
345
- df['EMA_200_dist'] = (c / df['ema200']) - 1
346
- df['VWAP_dist'] = (c / df['vwap']) - 1
347
- df['ATR_pct'] = (df['ATR'] / c) * 100
 
348
 
349
  mean_vol = v.rolling(50).mean() + 1e-9
350
- df['rel_vol'] = v / mean_vol
351
- df['log_ret'] = np.concatenate([[0], np.diff(np.log(c + 1e-9))])
352
 
353
  return df.fillna(0)
354
 
355
  async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
356
- safe_sym = sym.replace('/', '_')
357
  period_suffix = f"{start_ms}_{end_ms}"
358
  scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_processed.pkl"
359
 
@@ -364,73 +479,74 @@ class HeavyDutyBacktester:
364
  print(f" ⚙️ [CPU] Processing {sym} (Truth Mode)...", flush=True)
365
  t0 = time.time()
366
 
367
- df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
368
- df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'] + 60000, unit='ms', utc=True)
369
- df_1m.set_index('datetime', inplace=True)
370
  df_1m = df_1m.sort_index()
371
 
372
  df_1m = self._calculate_all_indicators(df_1m)
373
 
 
 
 
374
  arr_ts_1m = (df_1m.index.astype(np.int64) // 10**6).values
375
- fast_1m_close = df_1m['close'].values.astype(np.float32)
376
 
377
  numpy_htf = {}
378
- agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
379
-
380
  for tf_str in self.required_timeframes:
381
  offset = tf_to_offset(tf_str)
382
  if not offset:
383
  continue
384
- resampled = df_1m.resample(offset, label='right', closed='right').agg(agg_dict).dropna()
385
  resampled = self._calculate_all_indicators(resampled)
386
- resampled['timestamp'] = resampled.index.astype(np.int64) // 10**6
 
 
387
  numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
388
 
389
- if '1h' not in numpy_htf:
390
  raise RuntimeError(f"CRITICAL: '1h' missing for {sym}.")
391
 
392
  def get_safe_map(tf):
393
- if tf not in numpy_htf or len(numpy_htf[tf]['timestamp']) == 0:
394
  return np.full(len(arr_ts_1m), -1, dtype=np.int32)
395
- htf_ts = numpy_htf[tf]['timestamp']
396
- idx = np.searchsorted(htf_ts, arr_ts_1m, side='right') - 1
397
  return idx.astype(np.int32)
398
 
399
  maps = {tf: get_safe_map(tf) for tf in self.required_timeframes}
400
-
401
  validity_mask = np.ones(len(arr_ts_1m), dtype=bool)
402
  for tf in maps:
403
  validity_mask &= (maps[tf] >= 0)
404
  validity_mask[:200] = False
405
 
406
- # 1) Pattern (Cached)
407
  global_pattern_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
408
  pat_cache_file = os.path.join(PATTERN_CACHE_DIR, f"{safe_sym}_{period_suffix}_pat.pkl")
409
  pattern_results_map = {}
410
 
411
  if os.path.exists(pat_cache_file):
412
- with open(pat_cache_file, 'rb') as f:
413
  pattern_results_map = pickle.load(f)
414
- elif '15m' in numpy_htf:
415
- ts_15m = numpy_htf['15m']['timestamp']
416
- cols = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
417
- df_15m_source = pd.DataFrame({c: numpy_htf['15m'][c] for c in cols})
418
-
419
  for i in range(200, len(df_15m_source)):
420
- window = df_15m_source.iloc[i - 200:i + 1]
421
- ohlcv_input = {'15m': window.values.tolist()}
422
  try:
423
  res = await self.proc.pattern_engine.detect_chart_patterns(ohlcv_input)
424
- pattern_results_map[ts_15m[i]] = res.get('pattern_confidence', 0.0)
425
  except:
426
  pass
427
-
428
- with open(pat_cache_file, 'wb') as f:
429
  pickle.dump(pattern_results_map, f)
430
 
431
- if '15m' in maps:
432
- map_15 = maps['15m']
433
- ts_15_arr = numpy_htf['15m']['timestamp']
434
  for i in range(len(arr_ts_1m)):
435
  if not validity_mask[i]:
436
  continue
@@ -438,46 +554,44 @@ class HeavyDutyBacktester:
438
  if idx >= 0:
439
  global_pattern_scores[i] = pattern_results_map.get(ts_15_arr[idx], 0.0)
440
 
441
- # 2) Governance (Cached)
442
  gov_scores_final = np.zeros(len(arr_ts_1m), dtype=np.float32)
443
  gov_cache_file = os.path.join(GOV_CACHE_DIR, f"{safe_sym}_{period_suffix}_gov.pkl")
444
  gov_results_map = {}
445
 
446
  if os.path.exists(gov_cache_file):
447
- with open(gov_cache_file, 'rb') as f:
448
  gov_results_map = pickle.load(f)
449
- elif '15m' in numpy_htf:
450
- cols = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
451
- df_15m_g = pd.DataFrame({c: numpy_htf['15m'][c] for c in cols})
452
- ts_15m = numpy_htf['15m']['timestamp']
453
-
454
- has_1h = '1h' in numpy_htf
455
- df_1h_g = pd.DataFrame({c: numpy_htf['1h'][c] for c in cols}) if has_1h else None
456
- ts_1h = numpy_htf['1h']['timestamp'] if has_1h else None
457
 
458
  for i in range(200, len(df_15m_g)):
459
  curr_ts = ts_15m[i]
460
- win_15 = df_15m_g.iloc[i - 120:i + 1]
461
- ohlcv_input = {'15m': win_15.values.tolist()}
462
 
463
  if has_1h:
464
- idx_1h = np.searchsorted(ts_1h, curr_ts, side='right') - 1
465
  if idx_1h >= 50:
466
- ohlcv_input['1h'] = df_1h_g.iloc[idx_1h - 60:idx_1h + 1].values.tolist()
467
-
468
  try:
469
  res = await self.gov_engine.evaluate_trade(sym, ohlcv_input, {}, "NORMAL", False, has_1h)
470
- score = res.get('governance_score', 0.0) if res.get('grade') != 'REJECT' else 0.0
471
  gov_results_map[curr_ts] = score
472
  except:
473
  pass
474
 
475
- with open(gov_cache_file, 'wb') as f:
476
  pickle.dump(gov_results_map, f)
477
 
478
- if '15m' in maps:
479
- map_15 = maps['15m']
480
- ts_15_arr = numpy_htf['15m']['timestamp']
481
  for i in range(len(arr_ts_1m)):
482
  if not validity_mask[i]:
483
  continue
@@ -485,28 +599,27 @@ class HeavyDutyBacktester:
485
  if idx >= 0:
486
  gov_scores_final[i] = gov_results_map.get(ts_15_arr[idx], 0.0)
487
 
488
- # 3) Market State
489
- map_1h = maps['1h']
490
  valid_1h = map_1h >= 0
491
  idx_1h = map_1h[valid_1h]
492
 
493
- h1_chop = numpy_htf['1h']['CHOP'][idx_1h]
494
- h1_adx = numpy_htf['1h']['ADX'][idx_1h]
495
- h1_atr_pct = numpy_htf['1h']['ATR_pct'][idx_1h]
496
 
497
  market_ok = np.ones(len(arr_ts_1m), dtype=bool)
498
  market_ok[valid_1h] = ~((h1_chop > 61.8) | ((h1_atr_pct < 0.3) & (h1_adx < 20)))
499
 
500
  coin_state = np.zeros(len(arr_ts_1m), dtype=np.int8)
501
-
502
- h1_rsi = numpy_htf['1h']['RSI'][idx_1h]
503
- h1_bbw = numpy_htf['1h']['bb_width'][idx_1h]
504
- h1_upper = numpy_htf['1h']['upper_bb'][idx_1h]
505
- h1_ema20 = numpy_htf['1h']['ema20'][idx_1h]
506
- h1_ema50 = numpy_htf['1h']['ema50'][idx_1h]
507
- h1_ema200 = numpy_htf['1h']['ema200'][idx_1h]
508
- h1_close = numpy_htf['1h']['close'][idx_1h]
509
- h1_rel_vol = numpy_htf['1h']['rel_vol'][idx_1h]
510
 
511
  mask_acc = (h1_bbw < 0.20) & (h1_rsi >= 35) & (h1_rsi <= 65)
512
  mask_safe = (h1_adx > 25) & (h1_ema20 > h1_ema50) & (h1_ema50 > h1_ema200) & (h1_rsi > 50) & (h1_rsi < 75)
@@ -521,55 +634,49 @@ class HeavyDutyBacktester:
521
  coin_state[~validity_mask] = 0
522
  coin_state[~market_ok] = 0
523
 
524
- # 4) Titan & Oracle
525
  titan_cols = self.proc.titan.model.feature_names
526
  t_vecs = []
527
-
528
  for col in titan_cols:
529
- parts = col.split('_', 1)
530
  if len(parts) < 2:
531
  raise ValueError(f"Titan Feature Format Error: {col}")
532
- tf, raw_feat = parts[0], parts[1]
533
-
534
- lookup_key = 'bb_pct' if raw_feat in ['BB_p', 'BB_pct'] else ('bb_width' if raw_feat == 'BB_w' else raw_feat)
535
 
536
  if tf not in numpy_htf:
537
  raise ValueError(f"Titan requires TF not built: {tf} (feature: {col})")
538
- if lookup_key not in numpy_htf[tf] and lookup_key != 'timestamp':
539
  raise ValueError(f"Missing Titan Feature: {col}")
540
 
541
  idx = maps[tf]
542
  vals = np.zeros(len(arr_ts_1m), dtype=np.float32)
543
  valid = idx >= 0
544
 
545
- if lookup_key == 'timestamp':
546
- vals[valid] = numpy_htf[tf]['timestamp'][idx[valid]]
547
  else:
548
  vals[valid] = numpy_htf[tf][lookup_key][idx[valid]]
549
-
550
  t_vecs.append(vals)
551
 
552
  X_TITAN = np.column_stack(t_vecs)
553
- global_titan_scores = self.proc.titan.model.predict(
554
- xgb.DMatrix(X_TITAN, feature_names=titan_cols)
555
- )
556
 
557
  oracle_cols = self.proc.oracle.feature_cols
558
  o_vecs = []
559
-
560
  for col in oracle_cols:
561
- if col == 'sim_titan_score':
562
  o_vecs.append(global_titan_scores)
563
- elif col in ['sim_pattern_score', 'pattern_score']:
564
  o_vecs.append(global_pattern_scores)
565
- elif col == 'sim_mc_score':
566
- o_vecs.append(np.zeros(len(arr_ts_1m)))
567
  else:
568
- parts = col.split('_', 1)
569
  if len(parts) != 2:
570
  raise ValueError(f"Oracle Feature Error: {col}")
571
  tf, key = parts
572
-
573
  if tf not in numpy_htf:
574
  raise ValueError(f"Oracle requires TF not built: {tf} (feature: {col})")
575
  if key not in numpy_htf[tf]:
@@ -585,55 +692,58 @@ class HeavyDutyBacktester:
585
  preds_o = self.proc.oracle.model_direction.predict(X_ORACLE)
586
  if isinstance(preds_o, np.ndarray) and len(preds_o.shape) > 1:
587
  preds_o = preds_o[:, 0]
588
- global_oracle_scores = preds_o
589
 
590
- # 5) Sniper
591
  df_sniper_feats = self.proc.sniper._calculate_features_live(df_1m)
592
  X_sniper = df_sniper_feats[self.proc.sniper.feature_names].fillna(0)
593
  preds_accum = np.zeros(len(X_sniper), dtype=np.float32)
594
  for model in self.proc.sniper.models:
595
- preds_accum += model.predict(X_sniper)
596
- global_sniper_scores = (preds_accum / len(self.proc.sniper.models)).astype(np.float32)
597
 
598
- # 6) Hydra Static
599
- map_5 = maps['5m']
600
- map_15 = maps['15m']
601
- map_1 = maps.get('1h', map_15)
602
 
603
- f_rsi_1m = df_1m['RSI'].values.astype(np.float32)
604
 
605
  f_rsi_5m = np.zeros(len(arr_ts_1m), dtype=np.float32)
606
  v5 = map_5 >= 0
607
- f_rsi_5m[v5] = numpy_htf['5m']['RSI'][map_5[v5]]
 
608
 
609
  f_rsi_15m = np.zeros(len(arr_ts_1m), dtype=np.float32)
610
  v15 = map_15 >= 0
611
- f_rsi_15m[v15] = numpy_htf['15m']['RSI'][map_15[v15]]
 
612
 
613
  f_dist_1h = np.zeros(len(arr_ts_1m), dtype=np.float32)
614
  v1 = map_1 >= 0
615
- ema20_1h = numpy_htf['1h']['ema20'][map_1[v1]]
616
- close_1h = numpy_htf['1h']['close'][map_1[v1]]
617
  f_dist_1h[v1] = (close_1h - ema20_1h) / (close_1h + 1e-12)
618
 
619
- hydra_static = np.column_stack([
620
- f_rsi_1m,
621
- f_rsi_5m,
622
- f_rsi_15m,
623
- df_1m['bb_width'].values.astype(np.float32),
624
- df_1m['rel_vol'].values.astype(np.float32),
625
- f_dist_1h,
626
- (df_1m['ATR_pct'].values.astype(np.float32) / 100.0),
627
- ]).astype(np.float32)
628
-
629
- # SAVE
630
- min_gov = self.GRID_RANGES['GOV_SCORE'][0]
631
- min_oracle = self.GRID_RANGES['ORACLE'][0]
632
- min_titan = self.GRID_RANGES['TITAN'][0]
633
- min_sniper = self.GRID_RANGES['SNIPER'][0]
634
- min_pattern = self.GRID_RANGES['PATTERN'][0]
635
-
636
- # PATCH: Remove "\" line continuation completely (syntax-safe)
 
637
  filter_mask = (
638
  validity_mask
639
  & (coin_state > 0)
@@ -646,30 +756,32 @@ class HeavyDutyBacktester:
646
 
647
  valid_idxs = np.where(filter_mask)[0]
648
 
649
- signals_df = pd.DataFrame({
650
- 'timestamp': arr_ts_1m[valid_idxs],
651
- 'symbol': sym,
652
- 'close': fast_1m_close[valid_idxs],
653
- 'coin_state': coin_state[valid_idxs],
654
- 'gov_score': gov_scores_final[valid_idxs],
655
- 'titan_score': global_titan_scores[valid_idxs],
656
- 'oracle_conf': global_oracle_scores[valid_idxs],
657
- 'sniper_score': global_sniper_scores[valid_idxs],
658
- 'pattern_score': global_pattern_scores[valid_idxs]
659
- })
 
 
660
 
661
  sim_data = {
662
- 'timestamp': arr_ts_1m.astype(np.int64),
663
- 'close': fast_1m_close,
664
- 'high': df_1m['high'].values.astype(np.float32),
665
- 'low': df_1m['low'].values.astype(np.float32),
666
- 'atr': df_1m['ATR'].values.astype(np.float32),
667
- 'hydra_static': hydra_static,
668
- 'oracle_conf': global_oracle_scores.astype(np.float32),
669
- 'titan_score': global_titan_scores.astype(np.float32)
670
  }
671
 
672
- pd.to_pickle({'signals': signals_df, 'sim_data': sim_data}, scores_file)
673
  dt = time.time() - t0
674
  print(f" ✅ [{sym}] Processed in {dt:.2f}s. Signals: {len(signals_df)}")
675
  gc.collect()
@@ -687,24 +799,42 @@ class HeavyDutyBacktester:
687
  ms_e = int(dt_e.timestamp() * 1000)
688
 
689
  for sym in self.TARGET_COINS:
690
- c = await self._fetch_all_data_fast(sym, ms_s, ms_e)
691
- if c:
692
- await self._process_data_in_memory(sym, c, ms_s, ms_e)
 
 
 
 
693
 
694
  def _flush_position_interval(
695
- self, cfg, open_sym, pos, curr_ts, sim_env, crash_model, giveback_model, fees_pct,
696
- trade_pnls, trade_returns, trade_durations, equity_curve, cash_bal, wins_losses,
697
- last_update_map, end_idx_override=None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698
  ):
699
  c_data = sim_env[open_sym]
700
- full_ts = c_data['timestamp']
701
 
702
  start_idx = int(last_update_map.get(open_sym, 0))
703
  if start_idx < 0:
704
  start_idx = 0
705
 
706
  if end_idx_override is None:
707
- end_idx = int(np.searchsorted(full_ts, curr_ts, side='right'))
708
  else:
709
  end_idx = int(end_idx_override)
710
 
@@ -712,46 +842,43 @@ class HeavyDutyBacktester:
712
  if end_idx <= start_idx:
713
  return cash_bal, False
714
 
715
- interval_high = c_data['high'][start_idx:end_idx]
716
- interval_low = c_data['low'][start_idx:end_idx]
717
- interval_close = c_data['close'][start_idx:end_idx]
718
- interval_atr = c_data['atr'][start_idx:end_idx]
719
 
720
- h_static = c_data['hydra_static'][start_idx:end_idx]
721
- h_oracle = c_data['oracle_conf'][start_idx:end_idx]
722
- h_titan = c_data['titan_score'][start_idx:end_idx]
723
 
724
- entry_p = float(pos['entry_p'])
725
- entry_time = int(pos['entry_ts'])
726
 
727
- prev_high = float(pos.get('highest_price', entry_p))
728
  current_highs = np.maximum.accumulate(np.concatenate([[prev_high], interval_high]))[1:]
729
- pos['highest_price'] = float(current_highs[-1])
730
 
731
  durations = (full_ts[start_idx:end_idx] - entry_time) / 60000.0
732
 
733
- # ✅ PATCH: Safe ATR math (no div-by-zero)
734
  sl_dist = np.maximum(1.5 * interval_atr, 1e-8)
735
-
736
  pnl = interval_close - entry_p
737
  norm_pnl = pnl / sl_dist
738
  max_pnl = (current_highs - entry_p) / sl_dist
739
 
740
  zeros = np.zeros(len(interval_close), dtype=np.float32)
741
  h_dynamic = np.column_stack([norm_pnl, max_pnl, zeros, zeros, durations]).astype(np.float32)
742
-
743
  threes = np.full(len(interval_close), 3.0, dtype=np.float32)
744
  h_context = np.column_stack([zeros, h_oracle, h_titan, threes]).astype(np.float32)
745
-
746
  X_H = np.column_stack([h_static, h_dynamic, h_context]).astype(np.float32)
747
 
748
  crash_probs = crash_model.predict_proba(X_H)[:, 1]
749
  give_probs = giveback_model.predict_proba(X_H)[:, 1]
750
 
751
- sl_hit = interval_low < pos['sl_p']
752
- tp_hit = interval_high > pos['tp_p']
753
- hydra_hit = (crash_probs > cfg['HYDRA_THRESH']) | (give_probs > cfg['HYDRA_THRESH'])
754
- legacy_hit = (crash_probs > cfg['LEGACY_THRESH']) | (give_probs > cfg['LEGACY_THRESH'])
755
 
756
  any_exit = sl_hit | tp_hit | legacy_hit | hydra_hit
757
  last_update_map[open_sym] = end_idx
@@ -763,79 +890,73 @@ class HeavyDutyBacktester:
763
  exit_ts = int(full_ts[start_idx + idx])
764
 
765
  if sl_hit[idx]:
766
- exit_p = float(pos['sl_p']) * (1 - self.SLIPPAGE_PCT)
767
  elif tp_hit[idx]:
768
- exit_p = float(pos['tp_p']) * (1 - self.SLIPPAGE_PCT)
769
  elif legacy_hit[idx]:
770
  exit_p = float(interval_close[idx]) * (1 - (self.SLIPPAGE_PCT * 2.0))
771
  else:
772
  exit_p = float(interval_close[idx]) * (1 - self.SLIPPAGE_PCT)
773
 
774
- net = (pos['qty'] * exit_p) * (1 - fees_pct)
775
  cash_bal += net
776
- pnl_real = float(net - pos['cost'])
777
 
778
  trade_pnls.append(pnl_real)
779
- trade_returns.append(pnl_real / (float(pos['cost']) + 1e-12))
780
  trade_durations.append((exit_ts - entry_time) / 60000.0)
781
  equity_curve.append(float(cash_bal))
782
 
783
  if pnl_real > 0:
784
- wins_losses['wins'] += 1
785
  else:
786
- wins_losses['losses'] += 1
787
 
788
  return cash_bal, True
789
 
790
  def _worker_optimize(self, combinations_batch, scores_files, initial_capital, fees_pct, max_slots, target_state):
791
  all_signals = []
792
  sim_env = {}
793
- crash_model = self.proc.guardian_hydra.models['crash']
794
- giveback_model = self.proc.guardian_hydra.models['giveback']
795
 
796
  for f in scores_files:
797
  try:
798
  data = pd.read_pickle(f)
799
- sig = optimize_dataframe_memory(data['signals'])
800
  if sig is None or len(sig) == 0:
801
  continue
802
  all_signals.append(sig)
803
- sym = str(sig['symbol'].iloc[0])
804
- sim_env[sym] = data['sim_data']
805
  except:
806
  pass
807
 
808
  if not all_signals:
809
  return []
810
 
811
- timeline_df = pd.concat(all_signals).sort_values('timestamp').reset_index(drop=True)
812
-
813
- t_ts = timeline_df['timestamp'].values.astype(np.int64)
814
- t_sym = timeline_df['symbol'].values
815
- t_close = timeline_df['close'].values.astype(np.float64)
816
- t_state = timeline_df['coin_state'].values
817
- t_gov = timeline_df['gov_score'].values.astype(np.float64)
818
- t_oracle = timeline_df['oracle_conf'].values.astype(np.float64)
819
- t_titan = timeline_df['titan_score'].values.astype(np.float64)
820
- t_sniper = timeline_df['sniper_score'].values.astype(np.float64)
821
- t_pattern = timeline_df['pattern_score'].values.astype(np.float64)
822
-
823
  del all_signals, timeline_df
824
  gc.collect()
825
 
826
- # PATCH: Use true sim_env timeline (not just signals timeline)
827
- all_starts = [int(v['timestamp'][0]) for v in sim_env.values() if len(v['timestamp']) > 0]
828
- all_ends = [int(v['timestamp'][-1]) for v in sim_env.values() if len(v['timestamp']) > 0]
829
- start_ms = min(all_starts) if all_starts else (int(t_ts[0]) if len(t_ts) else 0)
830
- end_ms = max(all_ends) if all_ends else (int(t_ts[-1]) if len(t_ts) else 0)
831
 
832
  res = []
833
  BATCH_SIZE = 300
834
  USE_MARK_TO_MARKET_EQUITY = True
835
 
836
  for i in range(0, len(combinations_batch), BATCH_SIZE):
837
- batch = combinations_batch[i:i + BATCH_SIZE]
838
-
839
  for cfg in batch:
840
  cash_bal = float(initial_capital)
841
  active_positions = {}
@@ -845,11 +966,10 @@ class HeavyDutyBacktester:
845
  trade_pnls = []
846
  trade_returns = []
847
  trade_durations = []
848
-
849
  equity_curve = [float(initial_capital)]
850
  equity_ts = [start_ms]
851
 
852
- wins_losses = {'wins': 0, 'losses': 0}
853
  exposure_steps = 0
854
 
855
  def mark_to_market_equity(curr_ts):
@@ -861,7 +981,7 @@ class HeavyDutyBacktester:
861
  if px is None:
862
  continue
863
  has_open = True
864
- open_val += (pos['qty'] * px * (1 - self.SLIPPAGE_PCT)) * (1 - fees_pct)
865
  if has_open:
866
  exposure_steps += 1
867
  eq = cash_bal + open_val
@@ -874,12 +994,25 @@ class HeavyDutyBacktester:
874
  sym = str(sym)
875
  last_price[sym] = float(p)
876
 
877
- # 1) Update Positions
878
  to_close = []
879
  for open_sym, pos in list(active_positions.items()):
880
  cash_bal, closed = self._flush_position_interval(
881
- cfg, open_sym, pos, curr_ts, sim_env, crash_model, giveback_model, fees_pct,
882
- trade_pnls, trade_returns, trade_durations, equity_curve, cash_bal, wins_losses, last_update_map
 
 
 
 
 
 
 
 
 
 
 
 
 
883
  )
884
  if closed:
885
  to_close.append(open_sym)
@@ -890,77 +1023,85 @@ class HeavyDutyBacktester:
890
  if USE_MARK_TO_MARKET_EQUITY:
891
  mark_to_market_equity(curr_ts)
892
 
893
- # 2) Entry (✅ PATCH: remove "\" continuation)
894
  is_valid = (
895
  (int(c_state) == int(target_state))
896
- and (float(gov) >= float(cfg['GOV_SCORE']))
897
- and (float(oracle) >= float(cfg['ORACLE']))
898
- and (float(titan) >= float(cfg['TITAN']))
899
- and (float(sniper) >= float(cfg['SNIPER']))
900
- and (float(pattern) >= float(cfg['PATTERN']))
901
  )
902
 
903
  if is_valid and sym not in active_positions:
904
  slots = 1 if cash_bal < self.MIN_CAPITAL_FOR_SPLIT else int(max_slots)
905
-
906
  if len(active_positions) < slots and cash_bal >= 5.0:
907
  size = (cash_bal * 0.95) if cash_bal < self.MIN_CAPITAL_FOR_SPLIT else (cash_bal / max_slots)
908
-
909
  if size >= 5.0:
910
  ep = float(p) * (1 + self.SLIPPAGE_PCT)
911
  fee = float(size) * fees_pct
912
  cost = float(size)
913
  qty = (cost - fee) / (ep + 1e-12)
914
 
915
- sym_ts = sim_env[sym]['timestamp']
916
- idx = int(np.searchsorted(sym_ts, curr_ts, side='right') - 1)
917
  idx = max(0, min(idx, len(sym_ts) - 1))
918
- atr_val = float(sim_env[sym]['atr'][idx])
919
 
920
  active_positions[sym] = {
921
- 'qty': float(qty),
922
- 'entry_p': float(ep),
923
- 'cost': float(cost),
924
- 'entry_ts': int(curr_ts),
925
- 'sl_p': float(ep - 1.5 * atr_val),
926
- 'tp_p': float(ep + 2.5 * atr_val),
927
- 'highest_price': float(ep)
928
  }
929
-
930
  cash_bal -= float(cost)
931
  last_update_map[sym] = min(idx + 1, len(sym_ts))
932
 
933
- # Tail Flush
934
  for open_sym, pos in list(active_positions.items()):
935
  cash_bal, closed = self._flush_position_interval(
936
- cfg, open_sym, pos, int(sim_env[open_sym]['timestamp'][-1]), sim_env,
937
- crash_model, giveback_model, fees_pct,
938
- trade_pnls, trade_returns, trade_durations, equity_curve, cash_bal,
939
- wins_losses, last_update_map, end_idx_override=len(sim_env[open_sym]['timestamp'])
 
 
 
 
 
 
 
 
 
 
 
 
940
  )
941
 
942
  if closed:
943
  del active_positions[open_sym]
944
  else:
 
945
  c_data = sim_env[open_sym]
946
- last_p = float(c_data['close'][-1])
947
- net = (pos['qty'] * last_p * (1 - self.SLIPPAGE_PCT)) * (1 - fees_pct)
948
- pnl_val = float(net - pos['cost'])
949
  cash_bal += float(net)
950
 
951
  trade_pnls.append(pnl_val)
952
- trade_returns.append(pnl_val / (float(pos['cost']) + 1e-12))
953
-
954
- entry_ts = int(pos['entry_ts'])
955
- last_ts = int(c_data['timestamp'][-1])
956
  trade_durations.append((last_ts - entry_ts) / 60000.0)
957
-
958
  equity_curve.append(float(cash_bal))
959
 
960
  if pnl_val > 0:
961
- wins_losses['wins'] += 1
962
  else:
963
- wins_losses['losses'] += 1
964
 
965
  del active_positions[open_sym]
966
 
@@ -969,15 +1110,13 @@ class HeavyDutyBacktester:
969
 
970
  max_dd = calc_max_drawdown(equity_curve)
971
  ulcer = calc_ulcer_index(equity_curve)
972
-
973
  wins_list = [p for p in trade_pnls if p > 0]
974
  loss_list = [p for p in trade_pnls if p <= 0]
975
-
976
  prof_fac = calc_profit_factor(wins_list, loss_list)
977
- mean_pnl = float(np.mean(trade_pnls))
978
 
979
- sd = float(np.std(trade_pnls))
980
- sqn = float((mean_pnl / sd) * np.sqrt(len(trade_pnls))) if sd > 0 else 0.0
 
981
 
982
  sharpe = calc_sharpe(trade_returns)
983
  sortino = calc_sortino(trade_returns)
@@ -991,30 +1130,36 @@ class HeavyDutyBacktester:
991
  med_dur = float(np.median(trade_durations)) if trade_durations else 0.0
992
  max_w_streak, max_l_streak = calc_consecutive_streaks(trade_pnls)
993
 
994
- payoff = float(np.mean(wins_list) / max(abs(np.mean(loss_list)), 1e-12)) if (len(wins_list) > 0 and len(loss_list) > 0) else 99.0
995
-
996
- res.append({
997
- 'config': cfg,
998
- 'net_profit': float(cash_bal - initial_capital),
999
- 'total_trades': int(len(trade_pnls)),
1000
- 'final_balance': float(cash_bal),
1001
- 'win_rate': float((wins_losses['wins'] / len(trade_pnls)) * 100.0),
1002
- 'sqn': sqn,
1003
- 'max_drawdown': float(max_dd),
1004
- 'ulcer_index': ulcer,
1005
- 'profit_factor': prof_fac,
1006
- 'payoff_ratio': payoff,
1007
- 'sharpe': sharpe,
1008
- 'sortino': sortino,
1009
- 'cagr': cagr,
1010
- 'calmar': calmar,
1011
- 'expectancy': mean_pnl,
1012
- 'exposure_pct': exposure_pct,
1013
- 'avg_trade_duration_min': avg_dur,
1014
- 'median_trade_duration_min': med_dur,
1015
- 'max_consec_wins': max_w_streak,
1016
- 'max_consec_losses': max_l_streak
1017
- })
 
 
 
 
 
 
1018
 
1019
  gc.collect()
1020
 
@@ -1039,15 +1184,11 @@ class HeavyDutyBacktester:
1039
 
1040
  for state_name, state_id in [("ACCUMULATION", 1), ("SAFE_TREND", 2), ("EXPLOSIVE", 3)]:
1041
  print(f"\n🌀 Optimizing [{state_name}]...")
1042
- t0 = time.time()
1043
-
1044
- results = self._worker_optimize(
1045
- combos, files, self.INITIAL_CAPITAL, self.TRADING_FEES, self.MAX_SLOTS, state_id
1046
- )
1047
  if not results:
1048
  continue
1049
 
1050
- results.sort(key=lambda x: (x['calmar'], x['sqn']), reverse=True)
1051
  best = results[0]
1052
 
1053
  print(f"🏆 BEST [{state_name}]:")
@@ -1056,26 +1197,48 @@ class HeavyDutyBacktester:
1056
  print(f" 🎲 SQN: {best['sqn']:.2f} | PF: {best['profit_factor']:.2f} | Payoff: {best['payoff_ratio']:.2f}")
1057
  print(f" 📉 MaxDD: {best['max_drawdown']:.2f}% | Ulcer: {best['ulcer_index']:.2f}")
1058
  print(f" 📈 Sharpe/Sortino: {best['sharpe']:.2f} / {best['sortino']:.2f}")
1059
- print(f" 🧮 CAGR/Calmar: {(best['cagr'] * 100):.2f}% / {best['calmar']:.2f}")
1060
  print(f" ⏱️ Exposure: {best['exposure_pct']:.1f}% | Dur(avg/med): {best['avg_trade_duration_min']:.1f}/{best['median_trade_duration_min']:.1f} min")
1061
  print(f" 🔁 Streaks W/L: {best['max_consec_wins']} / {best['max_consec_losses']}")
1062
  print(f" ⚙️ Config: {best['config']}")
1063
 
 
 
 
 
1064
  async def run_strategic_optimization_task():
1065
  r2 = R2Service()
1066
  dm = DataManager(None, None, r2)
1067
  proc = MLProcessor(dm)
1068
 
1069
- await dm.initialize()
1070
- await proc.initialize()
 
 
 
 
1071
 
1072
- if getattr(proc, 'guardian_hydra', None):
1073
- proc.guardian_hydra.set_silent_mode(True)
1074
 
1075
- opt = HeavyDutyBacktester(dm, proc)
1076
- await opt.run_optimization()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1077
 
1078
- await dm.close()
1079
 
1080
  if __name__ == "__main__":
1081
  asyncio.run(run_strategic_optimization_task())
 
1
  # ============================================================
2
+ # 🧪 backtest_engine.py (V223.1 - GEM-Architect: The Immutable Truth - Hardened)
3
  # ============================================================
4
 
5
  import asyncio
 
23
  try:
24
  import pandas_ta as ta
25
  import xgboost as xgb
26
+ import lightgbm as lgb # (kept for environment parity, may be used elsewhere)
27
  except ImportError as e:
28
  raise ImportError(f"🔴 CRITICAL: Missing mandatory dependency for Truth Mode: {e}")
29
 
 
 
 
30
  try:
31
  from ml_engine.processor import MLProcessor
32
  from ml_engine.data_manager import DataManager
33
  from learning_hub.adaptive_hub import AdaptiveHub
34
  from r2 import R2Service
35
  from governance_engine import GovernanceEngine
36
+ except ImportError:
37
+ pass
38
 
39
+ logging.getLogger("ml_engine").setLevel(logging.WARNING)
40
 
41
  CACHE_DIR = "backtest_v223_immutable"
42
  GOV_CACHE_DIR = os.path.join(CACHE_DIR, "gov_cache")
 
49
  # ============================================================
50
  # 🛠️ HELPER FUNCTIONS (Strict Math & Logic)
51
  # ============================================================
52
+ def optimize_dataframe_memory(df: pd.DataFrame):
53
+ if df is None or len(df) == 0:
54
+ return df
55
+ float_cols = df.select_dtypes(include=["float64"]).columns
56
+ if len(float_cols) > 0:
57
+ df[float_cols] = df[float_cols].astype("float32")
58
+
59
+ int_cols = df.select_dtypes(include=["int64", "int32"]).columns
60
  for col in int_cols:
61
  c_min = df[col].min()
62
  c_max = df[col].max()
63
  if c_min > -128 and c_max < 127:
64
+ df[col] = df[col].astype("int8")
65
  elif c_min > -32768 and c_max < 32767:
66
+ df[col] = df[col].astype("int16")
67
  else:
68
+ df[col] = df[col].astype("int32")
69
  return df
70
 
71
+
72
  def tf_to_offset(tf: str):
73
+ if tf.endswith("m") and tf[:-1].isdigit():
74
  return f"{int(tf[:-1])}T"
75
+ if tf.endswith("h") and tf[:-1].isdigit():
76
  return f"{int(tf[:-1])}h"
77
+ if tf.endswith("d") and tf[:-1].isdigit():
78
  return f"{int(tf[:-1])}D"
79
  return None
80
 
81
+
82
  def calc_max_drawdown(equity_curve):
83
  if not equity_curve:
84
  return 0.0
 
87
  dd = (eq - peak) / (peak + 1e-9)
88
  return float(dd.min()) * 100
89
 
90
+
91
  def calc_profit_factor(wins, losses):
92
  gross_win = np.sum(wins)
93
  gross_loss = abs(np.sum(losses))
 
95
  return 99.0
96
  return float(gross_win / gross_loss)
97
 
98
+
99
  def calc_ulcer_index(equity_curve):
100
  """Ulcer Index (drawdown severity)"""
101
  if not equity_curve:
 
103
  eq = np.asarray(equity_curve, dtype=np.float64)
104
  peak = np.maximum.accumulate(eq)
105
  dd_pct = (eq - peak) / (peak + 1e-12) * 100.0
106
+ return float(np.sqrt(np.mean(dd_pct**2)))
107
+
108
 
109
  def calc_sharpe(returns, eps=1e-12):
110
  """Sharpe on per-trade returns (risk-free assumed 0)"""
 
117
  return 0.0
118
  return float(mu / sd * np.sqrt(len(r)))
119
 
120
+
121
  def calc_sortino(returns, eps=1e-12):
122
  """Sortino on per-trade returns (downside deviation)"""
123
  if returns is None or len(returns) < 2:
 
132
  return 0.0
133
  return float(mu / dd * np.sqrt(len(r)))
134
 
135
+
136
  def calc_cagr(initial_capital, final_balance, start_ms, end_ms):
137
  """CAGR using timeline duration (ms). If too short => 0."""
138
  if initial_capital <= 0 or final_balance <= 0 or end_ms <= start_ms:
 
145
  except:
146
  return 0.0
147
 
148
+
149
  def calc_calmar(cagr, max_drawdown_pct):
150
  """Calmar = CAGR / |MaxDD| (MaxDD is negative %)"""
151
  dd = abs(max_drawdown_pct)
 
153
  return 99.0
154
  return float((cagr * 100.0) / dd)
155
 
156
+
157
  def calc_consecutive_streaks(pnls):
158
  """Return (max_consec_wins, max_consec_losses) based on pnl sign."""
159
  max_w = max_l = 0
 
169
  max_l = max(max_l, cur_l)
170
  return int(max_w), int(max_l)
171
 
172
+
173
  # ============================================================
174
  # 🧪 THE VALIDATED TRUTH BACKTESTER
175
  # ============================================================
 
192
  self.MAX_SLOTS = 4
193
 
194
  self.GRID_RANGES = {
195
+ "TITAN": np.linspace(0.40, 0.70, self.GRID_DENSITY),
196
+ "ORACLE": np.linspace(0.55, 0.80, self.GRID_DENSITY),
197
+ "SNIPER": np.linspace(0.30, 0.65, self.GRID_DENSITY),
198
+ "PATTERN": np.linspace(0.30, 0.70, self.GRID_DENSITY),
199
+ "GOV_SCORE": np.linspace(50.0, 80.0, self.GRID_DENSITY),
200
+ "HYDRA_THRESH": np.linspace(0.60, 0.90, self.GRID_DENSITY),
201
+ "LEGACY_THRESH": np.linspace(0.85, 0.98, self.GRID_DENSITY),
202
  }
203
 
204
  self.TARGET_COINS = [
205
+ "SOL/USDT",
206
+ "XRP/USDT",
207
+ "DOGE/USDT",
208
+ "ADA/USDT",
209
+ "AVAX/USDT",
210
+ "LINK/USDT",
211
+ "TON/USDT",
212
+ "INJ/USDT",
213
+ "APT/USDT",
214
+ "OP/USDT",
215
+ "ARB/USDT",
216
+ "SUI/USDT",
217
+ "SEI/USDT",
218
+ "MINA/USDT",
219
+ "MATIC/USDT",
220
+ "NEAR/USDT",
221
+ "RUNE/USDT",
222
+ "API3/USDT",
223
+ "FLOKI/USDT",
224
+ "BABYDOGE/USDT",
225
+ "SHIB/USDT",
226
+ "TRX/USDT",
227
+ "DOT/USDT",
228
+ "UNI/USDT",
229
+ "ONDO/USDT",
230
+ "SNX/USDT",
231
+ "HBAR/USDT",
232
+ "XLM/USDT",
233
+ "AGIX/USDT",
234
+ "IMX/USDT",
235
+ "LRC/USDT",
236
+ "KCS/USDT",
237
+ "ICP/USDT",
238
+ "SAND/USDT",
239
+ "AXS/USDT",
240
+ "APE/USDT",
241
+ "GMT/USDT",
242
+ "CHZ/USDT",
243
+ "CFX/USDT",
244
+ "LDO/USDT",
245
+ "FET/USDT",
246
+ "RPL/USDT",
247
+ "MNT/USDT",
248
+ "RAY/USDT",
249
+ "CAKE/USDT",
250
+ "SRM/USDT",
251
+ "PENDLE/USDT",
252
+ "ATOM/USDT",
253
  ]
254
 
255
  self.USE_FIXED_DATES = False
 
258
  self.force_end_date = "2024-02-01"
259
 
260
  self.required_timeframes = self._determine_required_timeframes()
261
+ print(f"🧪 [Backtest V223.1] IMMUTABLE TRUTH. TFs: {self.required_timeframes}")
262
 
263
  def _verify_system_integrity(self):
264
  errors = []
265
+ if not getattr(self.proc, "titan", None) or not getattr(self.proc.titan, "model", None):
266
  errors.append("❌ Titan Engine missing")
267
+ if not getattr(self.proc, "oracle", None) or not getattr(self.proc.oracle, "model_direction", None):
268
  errors.append("❌ Oracle Engine missing")
269
+ if not getattr(self.proc, "pattern_engine", None):
270
  errors.append("❌ Pattern Engine missing")
271
+ if not getattr(self.proc, "sniper", None):
272
  errors.append("❌ Sniper Engine missing")
273
 
274
+ # Hydra integrity (models + heads)
275
+ if not getattr(self.proc, "guardian_hydra", None) or not getattr(self.proc.guardian_hydra, "models", None):
276
  errors.append("❌ Hydra Guardian missing/models not loaded")
277
  else:
278
  m = self.proc.guardian_hydra.models
279
+ if "crash" not in m or "giveback" not in m:
280
  errors.append("❌ Hydra missing crash/giveback heads")
281
+
282
  try:
283
+ _ = m["crash"].predict_proba
284
+ _ = m["giveback"].predict_proba
285
  except:
286
  errors.append("❌ Hydra heads must implement predict_proba()")
287
 
 
289
  raise RuntimeError(f"CRITICAL INTEGRITY FAILURE: {errors}")
290
 
291
  def _determine_required_timeframes(self):
292
+ tfs = set(["5m", "15m", "1h", "4h"])
293
 
294
  def maybe_add(prefix: str):
295
  if tf_to_offset(prefix):
296
  tfs.add(prefix)
297
 
298
+ if hasattr(self.proc.titan.model, "feature_names"):
299
  for f in self.proc.titan.model.feature_names:
300
+ if "_" in f:
301
+ maybe_add(f.split("_", 1)[0])
302
 
303
+ if hasattr(self.proc.oracle, "feature_cols"):
304
  for f in self.proc.oracle.feature_cols:
305
+ if "_" in f:
306
+ maybe_add(f.split("_", 1)[0])
307
 
308
  return list(tfs)
309
 
 
327
  async with sem:
328
  for _ in range(3):
329
  try:
330
+ return await self.dm.exchange.fetch_ohlcv(sym, "1m", since=timestamp, limit=limit)
331
  except:
332
  await asyncio.sleep(0.5)
333
  return []
334
 
335
  chunk_size = 50
336
  for i in range(0, len(tasks), chunk_size):
337
+ res = await asyncio.gather(*[_fetch_batch(t) for t in tasks[i : i + chunk_size]])
338
  for r in res:
339
  if r:
340
  all_candles.extend(r)
 
342
  if not all_candles:
343
  return None
344
 
345
+ df = pd.DataFrame(all_candles, columns=["timestamp", "o", "h", "l", "c", "v"])
346
+ df.drop_duplicates("timestamp", inplace=True)
347
+ df = df[(df["timestamp"] >= start_ms) & (df["timestamp"] <= end_ms)].sort_values("timestamp")
348
  return df.values.tolist()
349
 
350
+ # --------------------------
351
+ # Indicator Hardening Layer
352
+ # --------------------------
353
+ @staticmethod
354
+ def _safe_bbands(close: pd.Series, length=20, std=2.0):
355
+ basis = close.rolling(length).mean()
356
+ dev = close.rolling(length).std(ddof=0)
357
+ upper = basis + std * dev
358
+ lower = basis - std * dev
359
+ width = (upper - lower) / (basis.abs() + 1e-12)
360
+ pct = (close - lower) / ((upper - lower) + 1e-12)
361
+ return lower, upper, width, pct
362
+
363
+ def _calculate_all_indicators(self, df: pd.DataFrame):
364
+ cols = ["open", "high", "low", "close", "volume"]
365
  for c in cols:
366
+ df[c] = pd.to_numeric(df[c], errors="coerce")
367
 
368
+ df[cols] = df[cols].replace([np.inf, -np.inf], np.nan)
369
+ df.dropna(subset=["close"], inplace=True)
 
 
370
 
371
+ c = df["close"].astype(np.float64)
372
+ h = df["high"].astype(np.float64)
373
+ l = df["low"].astype(np.float64)
374
+ v = df["volume"].astype(np.float64)
375
+
376
+ # EMAs
377
  for span in [9, 20, 21, 50, 200]:
378
+ df[f"ema{span}"] = c.ewm(span=span, adjust=False).mean()
379
+
380
+ # BBANDS (hardened)
381
+ if len(df) < 30:
382
+ df["lower_bb"] = c
383
+ df["upper_bb"] = c
384
+ df["bb_width"] = 0.0
385
+ df["bb_pct"] = 0.5
386
  else:
387
+ bb = ta.bbands(c, length=20, std=2.0)
388
+ if bb is None or (isinstance(bb, pd.DataFrame) and bb.shape[1] < 3):
389
+ lower, upper, width, pct = self._safe_bbands(c, 20, 2.0)
390
+ df["lower_bb"] = lower
391
+ df["upper_bb"] = upper
392
+ df["bb_width"] = width
393
+ df["bb_pct"] = pct
394
+ else:
395
+ if isinstance(bb, pd.DataFrame):
396
+ col_lower = [x for x in bb.columns if "BBL" in x]
397
+ col_upper = [x for x in bb.columns if "BBU" in x]
398
+ col_width = [x for x in bb.columns if "BBB" in x] # bandwidth
399
+ col_pct = [x for x in bb.columns if "BBP" in x] # %B
400
+
401
+ if col_lower and col_upper:
402
+ df["lower_bb"] = bb[col_lower[0]]
403
+ df["upper_bb"] = bb[col_upper[0]]
404
+ else:
405
+ lower, upper, width, pct = self._safe_bbands(c, 20, 2.0)
406
+ df["lower_bb"] = lower
407
+ df["upper_bb"] = upper
408
+
409
+ if col_width:
410
+ df["bb_width"] = bb[col_width[0]]
411
+ else:
412
+ df["bb_width"] = (df["upper_bb"] - df["lower_bb"]) / (c.abs() + 1e-12)
413
 
414
+ if col_pct:
415
+ df["bb_pct"] = bb[col_pct[0]]
416
+ else:
417
+ df["bb_pct"] = (c - df["lower_bb"]) / ((df["upper_bb"] - df["lower_bb"]) + 1e-12)
418
+ else:
419
+ lower, upper, width, pct = self._safe_bbands(c, 20, 2.0)
420
+ df["lower_bb"] = lower
421
+ df["upper_bb"] = upper
422
+ df["bb_width"] = width
423
+ df["bb_pct"] = pct
424
+
425
+ # MACD (hardened)
426
  macd = ta.macd(c)
427
+ if macd is not None and isinstance(macd, pd.DataFrame) and macd.shape[1] >= 3:
428
+ df["MACD"] = macd.iloc[:, 0]
429
+ df["MACD_h"] = macd.iloc[:, 1]
430
+ df["MACD_s"] = macd.iloc[:, 2]
431
  else:
432
+ df["MACD"] = 0.0
433
+ df["MACD_h"] = 0.0
434
+ df["MACD_s"] = 0.0
435
 
436
+ # RSI/ATR/ADX
437
+ df["RSI"] = ta.rsi(c, length=14).fillna(50)
438
+ df["ATR"] = ta.atr(h, l, c, length=14).fillna(0)
439
+
440
+ adx_df = ta.adx(h, l, c, length=14)
441
+ if adx_df is not None and isinstance(adx_df, pd.DataFrame) and adx_df.shape[1] >= 1:
442
+ df["ADX"] = adx_df.iloc[:, 0].fillna(0)
443
+ else:
444
+ df["ADX"] = 0.0
445
 
446
  try:
447
+ df["CHOP"] = ta.chop(h, l, c, length=14).fillna(50)
448
  except:
449
+ df["CHOP"] = 50
450
 
451
  try:
452
+ df["vwap"] = ta.vwap(h, l, c, v).fillna(c)
453
  except:
454
+ df["vwap"] = c
455
 
456
+ # Distances / derived
457
+ df["EMA_9_dist"] = (c / (df["ema9"] + 1e-12)) - 1
458
+ df["EMA_21_dist"] = (c / (df["ema21"] + 1e-12)) - 1
459
+ df["EMA_50_dist"] = (c / (df["ema50"] + 1e-12)) - 1
460
+ df["EMA_200_dist"] = (c / (df["ema200"] + 1e-12)) - 1
461
+ df["VWAP_dist"] = (c / (df["vwap"] + 1e-12)) - 1
462
+ df["ATR_pct"] = (df["ATR"] / (c + 1e-12)) * 100
463
 
464
  mean_vol = v.rolling(50).mean() + 1e-9
465
+ df["rel_vol"] = v / mean_vol
466
+ df["log_ret"] = np.concatenate([[0], np.diff(np.log(c + 1e-9))])
467
 
468
  return df.fillna(0)
469
 
470
  async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
471
+ safe_sym = sym.replace("/", "_")
472
  period_suffix = f"{start_ms}_{end_ms}"
473
  scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_processed.pkl"
474
 
 
479
  print(f" ⚙️ [CPU] Processing {sym} (Truth Mode)...", flush=True)
480
  t0 = time.time()
481
 
482
+ df_1m = pd.DataFrame(candles, columns=["timestamp", "open", "high", "low", "close", "volume"])
483
+ df_1m["datetime"] = pd.to_datetime(df_1m["timestamp"] + 60000, unit="ms", utc=True)
484
+ df_1m.set_index("datetime", inplace=True)
485
  df_1m = df_1m.sort_index()
486
 
487
  df_1m = self._calculate_all_indicators(df_1m)
488
 
489
+ if len(df_1m) < 300:
490
+ raise RuntimeError(f"{sym} has too few valid candles after cleaning: {len(df_1m)}")
491
+
492
  arr_ts_1m = (df_1m.index.astype(np.int64) // 10**6).values
493
+ fast_1m_close = df_1m["close"].values.astype(np.float32)
494
 
495
  numpy_htf = {}
496
+ agg_dict = {"open": "first", "high": "max", "low": "min", "close": "last", "volume": "sum"}
 
497
  for tf_str in self.required_timeframes:
498
  offset = tf_to_offset(tf_str)
499
  if not offset:
500
  continue
501
+ resampled = df_1m.resample(offset, label="right", closed="right").agg(agg_dict).dropna()
502
  resampled = self._calculate_all_indicators(resampled)
503
+ if len(resampled) == 0:
504
+ continue
505
+ resampled["timestamp"] = resampled.index.astype(np.int64) // 10**6
506
  numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
507
 
508
+ if "1h" not in numpy_htf:
509
  raise RuntimeError(f"CRITICAL: '1h' missing for {sym}.")
510
 
511
  def get_safe_map(tf):
512
+ if tf not in numpy_htf or len(numpy_htf[tf]["timestamp"]) == 0:
513
  return np.full(len(arr_ts_1m), -1, dtype=np.int32)
514
+ htf_ts = numpy_htf[tf]["timestamp"]
515
+ idx = np.searchsorted(htf_ts, arr_ts_1m, side="right") - 1
516
  return idx.astype(np.int32)
517
 
518
  maps = {tf: get_safe_map(tf) for tf in self.required_timeframes}
 
519
  validity_mask = np.ones(len(arr_ts_1m), dtype=bool)
520
  for tf in maps:
521
  validity_mask &= (maps[tf] >= 0)
522
  validity_mask[:200] = False
523
 
524
+ # 1. Pattern (Cached)
525
  global_pattern_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
526
  pat_cache_file = os.path.join(PATTERN_CACHE_DIR, f"{safe_sym}_{period_suffix}_pat.pkl")
527
  pattern_results_map = {}
528
 
529
  if os.path.exists(pat_cache_file):
530
+ with open(pat_cache_file, "rb") as f:
531
  pattern_results_map = pickle.load(f)
532
+ elif "15m" in numpy_htf:
533
+ ts_15m = numpy_htf["15m"]["timestamp"]
534
+ cols = ["timestamp", "open", "high", "low", "close", "volume"]
535
+ df_15m_source = pd.DataFrame({c: numpy_htf["15m"][c] for c in cols})
 
536
  for i in range(200, len(df_15m_source)):
537
+ window = df_15m_source.iloc[i - 200 : i + 1]
538
+ ohlcv_input = {"15m": window.values.tolist()}
539
  try:
540
  res = await self.proc.pattern_engine.detect_chart_patterns(ohlcv_input)
541
+ pattern_results_map[ts_15m[i]] = res.get("pattern_confidence", 0.0)
542
  except:
543
  pass
544
+ with open(pat_cache_file, "wb") as f:
 
545
  pickle.dump(pattern_results_map, f)
546
 
547
+ if "15m" in maps and "15m" in numpy_htf:
548
+ map_15 = maps["15m"]
549
+ ts_15_arr = numpy_htf["15m"]["timestamp"]
550
  for i in range(len(arr_ts_1m)):
551
  if not validity_mask[i]:
552
  continue
 
554
  if idx >= 0:
555
  global_pattern_scores[i] = pattern_results_map.get(ts_15_arr[idx], 0.0)
556
 
557
+ # 2. Governance (Cached)
558
  gov_scores_final = np.zeros(len(arr_ts_1m), dtype=np.float32)
559
  gov_cache_file = os.path.join(GOV_CACHE_DIR, f"{safe_sym}_{period_suffix}_gov.pkl")
560
  gov_results_map = {}
561
 
562
  if os.path.exists(gov_cache_file):
563
+ with open(gov_cache_file, "rb") as f:
564
  gov_results_map = pickle.load(f)
565
+ elif "15m" in numpy_htf:
566
+ cols = ["timestamp", "open", "high", "low", "close", "volume"]
567
+ df_15m_g = pd.DataFrame({c: numpy_htf["15m"][c] for c in cols})
568
+ ts_15m = numpy_htf["15m"]["timestamp"]
569
+ has_1h = "1h" in numpy_htf
570
+ df_1h_g = pd.DataFrame({c: numpy_htf["1h"][c] for c in cols}) if has_1h else None
571
+ ts_1h = numpy_htf["1h"]["timestamp"] if has_1h else None
 
572
 
573
  for i in range(200, len(df_15m_g)):
574
  curr_ts = ts_15m[i]
575
+ win_15 = df_15m_g.iloc[i - 120 : i + 1]
576
+ ohlcv_input = {"15m": win_15.values.tolist()}
577
 
578
  if has_1h:
579
+ idx_1h = np.searchsorted(ts_1h, curr_ts, side="right") - 1
580
  if idx_1h >= 50:
581
+ ohlcv_input["1h"] = df_1h_g.iloc[idx_1h - 60 : idx_1h + 1].values.tolist()
 
582
  try:
583
  res = await self.gov_engine.evaluate_trade(sym, ohlcv_input, {}, "NORMAL", False, has_1h)
584
+ score = res.get("governance_score", 0.0) if res.get("grade") != "REJECT" else 0.0
585
  gov_results_map[curr_ts] = score
586
  except:
587
  pass
588
 
589
+ with open(gov_cache_file, "wb") as f:
590
  pickle.dump(gov_results_map, f)
591
 
592
+ if "15m" in maps and "15m" in numpy_htf:
593
+ map_15 = maps["15m"]
594
+ ts_15_arr = numpy_htf["15m"]["timestamp"]
595
  for i in range(len(arr_ts_1m)):
596
  if not validity_mask[i]:
597
  continue
 
599
  if idx >= 0:
600
  gov_scores_final[i] = gov_results_map.get(ts_15_arr[idx], 0.0)
601
 
602
+ # 3. Market State
603
+ map_1h = maps["1h"]
604
  valid_1h = map_1h >= 0
605
  idx_1h = map_1h[valid_1h]
606
 
607
+ h1_chop = numpy_htf["1h"]["CHOP"][idx_1h]
608
+ h1_adx = numpy_htf["1h"]["ADX"][idx_1h]
609
+ h1_atr_pct = numpy_htf["1h"]["ATR_pct"][idx_1h]
610
 
611
  market_ok = np.ones(len(arr_ts_1m), dtype=bool)
612
  market_ok[valid_1h] = ~((h1_chop > 61.8) | ((h1_atr_pct < 0.3) & (h1_adx < 20)))
613
 
614
  coin_state = np.zeros(len(arr_ts_1m), dtype=np.int8)
615
+ h1_rsi = numpy_htf["1h"]["RSI"][idx_1h]
616
+ h1_bbw = numpy_htf["1h"]["bb_width"][idx_1h]
617
+ h1_upper = numpy_htf["1h"]["upper_bb"][idx_1h]
618
+ h1_ema20 = numpy_htf["1h"]["ema20"][idx_1h]
619
+ h1_ema50 = numpy_htf["1h"]["ema50"][idx_1h]
620
+ h1_ema200 = numpy_htf["1h"]["ema200"][idx_1h]
621
+ h1_close = numpy_htf["1h"]["close"][idx_1h]
622
+ h1_rel_vol = numpy_htf["1h"]["rel_vol"][idx_1h]
 
623
 
624
  mask_acc = (h1_bbw < 0.20) & (h1_rsi >= 35) & (h1_rsi <= 65)
625
  mask_safe = (h1_adx > 25) & (h1_ema20 > h1_ema50) & (h1_ema50 > h1_ema200) & (h1_rsi > 50) & (h1_rsi < 75)
 
634
  coin_state[~validity_mask] = 0
635
  coin_state[~market_ok] = 0
636
 
637
+ # 4. Titan & Oracle
638
  titan_cols = self.proc.titan.model.feature_names
639
  t_vecs = []
 
640
  for col in titan_cols:
641
+ parts = col.split("_", 1)
642
  if len(parts) < 2:
643
  raise ValueError(f"Titan Feature Format Error: {col}")
644
+ tf = parts[0]
645
+ raw_feat = parts[1]
646
+ lookup_key = "bb_pct" if raw_feat in ["BB_p", "BB_pct"] else ("bb_width" if raw_feat == "BB_w" else raw_feat)
647
 
648
  if tf not in numpy_htf:
649
  raise ValueError(f"Titan requires TF not built: {tf} (feature: {col})")
650
+ if lookup_key not in numpy_htf[tf] and lookup_key != "timestamp":
651
  raise ValueError(f"Missing Titan Feature: {col}")
652
 
653
  idx = maps[tf]
654
  vals = np.zeros(len(arr_ts_1m), dtype=np.float32)
655
  valid = idx >= 0
656
 
657
+ if lookup_key == "timestamp":
658
+ vals[valid] = numpy_htf[tf]["timestamp"][idx[valid]]
659
  else:
660
  vals[valid] = numpy_htf[tf][lookup_key][idx[valid]]
 
661
  t_vecs.append(vals)
662
 
663
  X_TITAN = np.column_stack(t_vecs)
664
+ global_titan_scores = self.proc.titan.model.predict(xgb.DMatrix(X_TITAN, feature_names=titan_cols))
 
 
665
 
666
  oracle_cols = self.proc.oracle.feature_cols
667
  o_vecs = []
 
668
  for col in oracle_cols:
669
+ if col == "sim_titan_score":
670
  o_vecs.append(global_titan_scores)
671
+ elif col in ["sim_pattern_score", "pattern_score"]:
672
  o_vecs.append(global_pattern_scores)
673
+ elif col == "sim_mc_score":
674
+ o_vecs.append(np.zeros(len(arr_ts_1m), dtype=np.float32))
675
  else:
676
+ parts = col.split("_", 1)
677
  if len(parts) != 2:
678
  raise ValueError(f"Oracle Feature Error: {col}")
679
  tf, key = parts
 
680
  if tf not in numpy_htf:
681
  raise ValueError(f"Oracle requires TF not built: {tf} (feature: {col})")
682
  if key not in numpy_htf[tf]:
 
692
  preds_o = self.proc.oracle.model_direction.predict(X_ORACLE)
693
  if isinstance(preds_o, np.ndarray) and len(preds_o.shape) > 1:
694
  preds_o = preds_o[:, 0]
695
+ global_oracle_scores = preds_o.astype(np.float32)
696
 
697
+ # 5. Sniper
698
  df_sniper_feats = self.proc.sniper._calculate_features_live(df_1m)
699
  X_sniper = df_sniper_feats[self.proc.sniper.feature_names].fillna(0)
700
  preds_accum = np.zeros(len(X_sniper), dtype=np.float32)
701
  for model in self.proc.sniper.models:
702
+ preds_accum += model.predict(X_sniper).astype(np.float32)
703
+ global_sniper_scores = (preds_accum / max(1, len(self.proc.sniper.models))).astype(np.float32)
704
 
705
+ # 6. Hydra Static
706
+ map_5 = maps["5m"]
707
+ map_15 = maps["15m"]
708
+ map_1 = maps.get("1h", map_15)
709
 
710
+ f_rsi_1m = df_1m["RSI"].values.astype(np.float32)
711
 
712
  f_rsi_5m = np.zeros(len(arr_ts_1m), dtype=np.float32)
713
  v5 = map_5 >= 0
714
+ if "5m" in numpy_htf:
715
+ f_rsi_5m[v5] = numpy_htf["5m"]["RSI"][map_5[v5]].astype(np.float32)
716
 
717
  f_rsi_15m = np.zeros(len(arr_ts_1m), dtype=np.float32)
718
  v15 = map_15 >= 0
719
+ if "15m" in numpy_htf:
720
+ f_rsi_15m[v15] = numpy_htf["15m"]["RSI"][map_15[v15]].astype(np.float32)
721
 
722
  f_dist_1h = np.zeros(len(arr_ts_1m), dtype=np.float32)
723
  v1 = map_1 >= 0
724
+ ema20_1h = numpy_htf["1h"]["ema20"][map_1[v1]].astype(np.float32)
725
+ close_1h = numpy_htf["1h"]["close"][map_1[v1]].astype(np.float32)
726
  f_dist_1h[v1] = (close_1h - ema20_1h) / (close_1h + 1e-12)
727
 
728
+ hydra_static = np.column_stack(
729
+ [
730
+ f_rsi_1m,
731
+ f_rsi_5m,
732
+ f_rsi_15m,
733
+ df_1m["bb_width"].values.astype(np.float32),
734
+ df_1m["rel_vol"].values.astype(np.float32),
735
+ f_dist_1h,
736
+ (df_1m["ATR_pct"].values.astype(np.float32) / 100.0),
737
+ ]
738
+ ).astype(np.float32)
739
+
740
+ # SAVE (signals filter)
741
+ min_gov = float(self.GRID_RANGES["GOV_SCORE"][0])
742
+ min_oracle = float(self.GRID_RANGES["ORACLE"][0])
743
+ min_titan = float(self.GRID_RANGES["TITAN"][0])
744
+ min_sniper = float(self.GRID_RANGES["SNIPER"][0])
745
+ min_pattern = float(self.GRID_RANGES["PATTERN"][0])
746
+
747
  filter_mask = (
748
  validity_mask
749
  & (coin_state > 0)
 
756
 
757
  valid_idxs = np.where(filter_mask)[0]
758
 
759
+ signals_df = pd.DataFrame(
760
+ {
761
+ "timestamp": arr_ts_1m[valid_idxs],
762
+ "symbol": sym,
763
+ "close": fast_1m_close[valid_idxs],
764
+ "coin_state": coin_state[valid_idxs],
765
+ "gov_score": gov_scores_final[valid_idxs],
766
+ "titan_score": global_titan_scores[valid_idxs].astype(np.float32),
767
+ "oracle_conf": global_oracle_scores[valid_idxs].astype(np.float32),
768
+ "sniper_score": global_sniper_scores[valid_idxs].astype(np.float32),
769
+ "pattern_score": global_pattern_scores[valid_idxs].astype(np.float32),
770
+ }
771
+ )
772
 
773
  sim_data = {
774
+ "timestamp": arr_ts_1m.astype(np.int64),
775
+ "close": fast_1m_close,
776
+ "high": df_1m["high"].values.astype(np.float32),
777
+ "low": df_1m["low"].values.astype(np.float32),
778
+ "atr": df_1m["ATR"].values.astype(np.float32),
779
+ "hydra_static": hydra_static,
780
+ "oracle_conf": global_oracle_scores.astype(np.float32),
781
+ "titan_score": global_titan_scores.astype(np.float32),
782
  }
783
 
784
+ pd.to_pickle({"signals": signals_df, "sim_data": sim_data}, scores_file)
785
  dt = time.time() - t0
786
  print(f" ✅ [{sym}] Processed in {dt:.2f}s. Signals: {len(signals_df)}")
787
  gc.collect()
 
799
  ms_e = int(dt_e.timestamp() * 1000)
800
 
801
  for sym in self.TARGET_COINS:
802
+ try:
803
+ c = await self._fetch_all_data_fast(sym, ms_s, ms_e)
804
+ if c:
805
+ await self._process_data_in_memory(sym, c, ms_s, ms_e)
806
+ except Exception as e:
807
+ print(f"[WARN] {sym} skipped due to error: {e}")
808
+ traceback.print_exc()
809
 
810
  def _flush_position_interval(
811
+ self,
812
+ cfg,
813
+ open_sym,
814
+ pos,
815
+ curr_ts,
816
+ sim_env,
817
+ crash_model,
818
+ giveback_model,
819
+ fees_pct,
820
+ trade_pnls,
821
+ trade_returns,
822
+ trade_durations,
823
+ equity_curve,
824
+ cash_bal,
825
+ wins_losses,
826
+ last_update_map,
827
+ end_idx_override=None,
828
  ):
829
  c_data = sim_env[open_sym]
830
+ full_ts = c_data["timestamp"]
831
 
832
  start_idx = int(last_update_map.get(open_sym, 0))
833
  if start_idx < 0:
834
  start_idx = 0
835
 
836
  if end_idx_override is None:
837
+ end_idx = int(np.searchsorted(full_ts, curr_ts, side="right"))
838
  else:
839
  end_idx = int(end_idx_override)
840
 
 
842
  if end_idx <= start_idx:
843
  return cash_bal, False
844
 
845
+ interval_high = c_data["high"][start_idx:end_idx]
846
+ interval_low = c_data["low"][start_idx:end_idx]
847
+ interval_close = c_data["close"][start_idx:end_idx]
848
+ interval_atr = c_data["atr"][start_idx:end_idx]
849
 
850
+ h_static = c_data["hydra_static"][start_idx:end_idx]
851
+ h_oracle = c_data["oracle_conf"][start_idx:end_idx]
852
+ h_titan = c_data["titan_score"][start_idx:end_idx]
853
 
854
+ entry_p = float(pos["entry_p"])
855
+ entry_time = int(pos["entry_ts"])
856
 
857
+ prev_high = float(pos.get("highest_price", entry_p))
858
  current_highs = np.maximum.accumulate(np.concatenate([[prev_high], interval_high]))[1:]
859
+ pos["highest_price"] = float(current_highs[-1])
860
 
861
  durations = (full_ts[start_idx:end_idx] - entry_time) / 60000.0
862
 
863
+ # ✅ Safe ATR math
864
  sl_dist = np.maximum(1.5 * interval_atr, 1e-8)
 
865
  pnl = interval_close - entry_p
866
  norm_pnl = pnl / sl_dist
867
  max_pnl = (current_highs - entry_p) / sl_dist
868
 
869
  zeros = np.zeros(len(interval_close), dtype=np.float32)
870
  h_dynamic = np.column_stack([norm_pnl, max_pnl, zeros, zeros, durations]).astype(np.float32)
 
871
  threes = np.full(len(interval_close), 3.0, dtype=np.float32)
872
  h_context = np.column_stack([zeros, h_oracle, h_titan, threes]).astype(np.float32)
 
873
  X_H = np.column_stack([h_static, h_dynamic, h_context]).astype(np.float32)
874
 
875
  crash_probs = crash_model.predict_proba(X_H)[:, 1]
876
  give_probs = giveback_model.predict_proba(X_H)[:, 1]
877
 
878
+ sl_hit = interval_low < pos["sl_p"]
879
+ tp_hit = interval_high > pos["tp_p"]
880
+ hydra_hit = (crash_probs > cfg["HYDRA_THRESH"]) | (give_probs > cfg["HYDRA_THRESH"])
881
+ legacy_hit = (crash_probs > cfg["LEGACY_THRESH"]) | (give_probs > cfg["LEGACY_THRESH"])
882
 
883
  any_exit = sl_hit | tp_hit | legacy_hit | hydra_hit
884
  last_update_map[open_sym] = end_idx
 
890
  exit_ts = int(full_ts[start_idx + idx])
891
 
892
  if sl_hit[idx]:
893
+ exit_p = float(pos["sl_p"]) * (1 - self.SLIPPAGE_PCT)
894
  elif tp_hit[idx]:
895
+ exit_p = float(pos["tp_p"]) * (1 - self.SLIPPAGE_PCT)
896
  elif legacy_hit[idx]:
897
  exit_p = float(interval_close[idx]) * (1 - (self.SLIPPAGE_PCT * 2.0))
898
  else:
899
  exit_p = float(interval_close[idx]) * (1 - self.SLIPPAGE_PCT)
900
 
901
+ net = (pos["qty"] * exit_p) * (1 - fees_pct)
902
  cash_bal += net
903
+ pnl_real = float(net - pos["cost"])
904
 
905
  trade_pnls.append(pnl_real)
906
+ trade_returns.append(pnl_real / (float(pos["cost"]) + 1e-12))
907
  trade_durations.append((exit_ts - entry_time) / 60000.0)
908
  equity_curve.append(float(cash_bal))
909
 
910
  if pnl_real > 0:
911
+ wins_losses["wins"] += 1
912
  else:
913
+ wins_losses["losses"] += 1
914
 
915
  return cash_bal, True
916
 
917
  def _worker_optimize(self, combinations_batch, scores_files, initial_capital, fees_pct, max_slots, target_state):
918
  all_signals = []
919
  sim_env = {}
920
+ crash_model = self.proc.guardian_hydra.models["crash"]
921
+ giveback_model = self.proc.guardian_hydra.models["giveback"]
922
 
923
  for f in scores_files:
924
  try:
925
  data = pd.read_pickle(f)
926
+ sig = optimize_dataframe_memory(data.get("signals", None))
927
  if sig is None or len(sig) == 0:
928
  continue
929
  all_signals.append(sig)
930
+ sym = str(sig["symbol"].iloc[0])
931
+ sim_env[sym] = data["sim_data"]
932
  except:
933
  pass
934
 
935
  if not all_signals:
936
  return []
937
 
938
+ timeline_df = pd.concat(all_signals).sort_values("timestamp").reset_index(drop=True)
939
+ t_ts = timeline_df["timestamp"].values.astype(np.int64)
940
+ t_sym = timeline_df["symbol"].values
941
+ t_close = timeline_df["close"].values.astype(np.float64)
942
+ t_state = timeline_df["coin_state"].values
943
+ t_gov = timeline_df["gov_score"].values.astype(np.float64)
944
+ t_oracle = timeline_df["oracle_conf"].values.astype(np.float64)
945
+ t_titan = timeline_df["titan_score"].values.astype(np.float64)
946
+ t_sniper = timeline_df["sniper_score"].values.astype(np.float64)
947
+ t_pattern = timeline_df["pattern_score"].values.astype(np.float64)
 
 
948
  del all_signals, timeline_df
949
  gc.collect()
950
 
951
+ start_ms = int(t_ts[0]) if len(t_ts) else 0
952
+ end_ms = int(t_ts[-1]) if len(t_ts) else 0
 
 
 
953
 
954
  res = []
955
  BATCH_SIZE = 300
956
  USE_MARK_TO_MARKET_EQUITY = True
957
 
958
  for i in range(0, len(combinations_batch), BATCH_SIZE):
959
+ batch = combinations_batch[i : i + BATCH_SIZE]
 
960
  for cfg in batch:
961
  cash_bal = float(initial_capital)
962
  active_positions = {}
 
966
  trade_pnls = []
967
  trade_returns = []
968
  trade_durations = []
 
969
  equity_curve = [float(initial_capital)]
970
  equity_ts = [start_ms]
971
 
972
+ wins_losses = {"wins": 0, "losses": 0}
973
  exposure_steps = 0
974
 
975
  def mark_to_market_equity(curr_ts):
 
981
  if px is None:
982
  continue
983
  has_open = True
984
+ open_val += (pos["qty"] * px * (1 - self.SLIPPAGE_PCT)) * (1 - fees_pct)
985
  if has_open:
986
  exposure_steps += 1
987
  eq = cash_bal + open_val
 
994
  sym = str(sym)
995
  last_price[sym] = float(p)
996
 
997
+ # 1) Update positions
998
  to_close = []
999
  for open_sym, pos in list(active_positions.items()):
1000
  cash_bal, closed = self._flush_position_interval(
1001
+ cfg,
1002
+ open_sym,
1003
+ pos,
1004
+ curr_ts,
1005
+ sim_env,
1006
+ crash_model,
1007
+ giveback_model,
1008
+ fees_pct,
1009
+ trade_pnls,
1010
+ trade_returns,
1011
+ trade_durations,
1012
+ equity_curve,
1013
+ cash_bal,
1014
+ wins_losses,
1015
+ last_update_map,
1016
  )
1017
  if closed:
1018
  to_close.append(open_sym)
 
1023
  if USE_MARK_TO_MARKET_EQUITY:
1024
  mark_to_market_equity(curr_ts)
1025
 
1026
+ # 2) Entry
1027
  is_valid = (
1028
  (int(c_state) == int(target_state))
1029
+ and (float(gov) >= float(cfg["GOV_SCORE"]))
1030
+ and (float(oracle) >= float(cfg["ORACLE"]))
1031
+ and (float(titan) >= float(cfg["TITAN"]))
1032
+ and (float(sniper) >= float(cfg["SNIPER"]))
1033
+ and (float(pattern) >= float(cfg["PATTERN"]))
1034
  )
1035
 
1036
  if is_valid and sym not in active_positions:
1037
  slots = 1 if cash_bal < self.MIN_CAPITAL_FOR_SPLIT else int(max_slots)
 
1038
  if len(active_positions) < slots and cash_bal >= 5.0:
1039
  size = (cash_bal * 0.95) if cash_bal < self.MIN_CAPITAL_FOR_SPLIT else (cash_bal / max_slots)
 
1040
  if size >= 5.0:
1041
  ep = float(p) * (1 + self.SLIPPAGE_PCT)
1042
  fee = float(size) * fees_pct
1043
  cost = float(size)
1044
  qty = (cost - fee) / (ep + 1e-12)
1045
 
1046
+ sym_ts = sim_env[sym]["timestamp"]
1047
+ idx = int(np.searchsorted(sym_ts, curr_ts, side="right") - 1)
1048
  idx = max(0, min(idx, len(sym_ts) - 1))
1049
+ atr_val = float(sim_env[sym]["atr"][idx])
1050
 
1051
  active_positions[sym] = {
1052
+ "qty": float(qty),
1053
+ "entry_p": float(ep),
1054
+ "cost": float(cost),
1055
+ "entry_ts": int(curr_ts),
1056
+ "sl_p": float(ep - 1.5 * atr_val),
1057
+ "tp_p": float(ep + 2.5 * atr_val),
1058
+ "highest_price": float(ep),
1059
  }
 
1060
  cash_bal -= float(cost)
1061
  last_update_map[sym] = min(idx + 1, len(sym_ts))
1062
 
1063
+ # Tail flush
1064
  for open_sym, pos in list(active_positions.items()):
1065
  cash_bal, closed = self._flush_position_interval(
1066
+ cfg,
1067
+ open_sym,
1068
+ pos,
1069
+ int(sim_env[open_sym]["timestamp"][-1]),
1070
+ sim_env,
1071
+ crash_model,
1072
+ giveback_model,
1073
+ fees_pct,
1074
+ trade_pnls,
1075
+ trade_returns,
1076
+ trade_durations,
1077
+ equity_curve,
1078
+ cash_bal,
1079
+ wins_losses,
1080
+ last_update_map,
1081
+ end_idx_override=len(sim_env[open_sym]["timestamp"]),
1082
  )
1083
 
1084
  if closed:
1085
  del active_positions[open_sym]
1086
  else:
1087
+ # Force Close (Conservative)
1088
  c_data = sim_env[open_sym]
1089
+ last_p = float(c_data["close"][-1])
1090
+ net = (pos["qty"] * last_p * (1 - self.SLIPPAGE_PCT)) * (1 - fees_pct)
1091
+ pnl_val = float(net - pos["cost"])
1092
  cash_bal += float(net)
1093
 
1094
  trade_pnls.append(pnl_val)
1095
+ trade_returns.append(pnl_val / (float(pos["cost"]) + 1e-12))
1096
+ entry_ts = int(pos["entry_ts"])
1097
+ last_ts = int(c_data["timestamp"][-1])
 
1098
  trade_durations.append((last_ts - entry_ts) / 60000.0)
 
1099
  equity_curve.append(float(cash_bal))
1100
 
1101
  if pnl_val > 0:
1102
+ wins_losses["wins"] += 1
1103
  else:
1104
+ wins_losses["losses"] += 1
1105
 
1106
  del active_positions[open_sym]
1107
 
 
1110
 
1111
  max_dd = calc_max_drawdown(equity_curve)
1112
  ulcer = calc_ulcer_index(equity_curve)
 
1113
  wins_list = [p for p in trade_pnls if p > 0]
1114
  loss_list = [p for p in trade_pnls if p <= 0]
 
1115
  prof_fac = calc_profit_factor(wins_list, loss_list)
 
1116
 
1117
+ mean_pnl = float(np.mean(trade_pnls))
1118
+ std_pnl = float(np.std(trade_pnls))
1119
+ sqn = float((mean_pnl / std_pnl) * np.sqrt(len(trade_pnls))) if std_pnl > 0 else 0.0
1120
 
1121
  sharpe = calc_sharpe(trade_returns)
1122
  sortino = calc_sortino(trade_returns)
 
1130
  med_dur = float(np.median(trade_durations)) if trade_durations else 0.0
1131
  max_w_streak, max_l_streak = calc_consecutive_streaks(trade_pnls)
1132
 
1133
+ payoff = (
1134
+ float(np.mean(wins_list) / max(abs(np.mean(loss_list)), 1e-12))
1135
+ if (len(wins_list) > 0 and len(loss_list) > 0)
1136
+ else 99.0
1137
+ )
1138
+
1139
+ res.append(
1140
+ {
1141
+ "config": cfg,
1142
+ "net_profit": float(cash_bal - initial_capital),
1143
+ "total_trades": int(len(trade_pnls)),
1144
+ "final_balance": float(cash_bal),
1145
+ "win_rate": float((wins_losses["wins"] / len(trade_pnls)) * 100.0),
1146
+ "sqn": sqn,
1147
+ "max_drawdown": float(max_dd),
1148
+ "ulcer_index": ulcer,
1149
+ "profit_factor": prof_fac,
1150
+ "payoff_ratio": payoff,
1151
+ "sharpe": sharpe,
1152
+ "sortino": sortino,
1153
+ "cagr": cagr,
1154
+ "calmar": calmar,
1155
+ "expectancy": mean_pnl,
1156
+ "exposure_pct": exposure_pct,
1157
+ "avg_trade_duration_min": avg_dur,
1158
+ "median_trade_duration_min": med_dur,
1159
+ "max_consec_wins": max_w_streak,
1160
+ "max_consec_losses": max_l_streak,
1161
+ }
1162
+ )
1163
 
1164
  gc.collect()
1165
 
 
1184
 
1185
  for state_name, state_id in [("ACCUMULATION", 1), ("SAFE_TREND", 2), ("EXPLOSIVE", 3)]:
1186
  print(f"\n🌀 Optimizing [{state_name}]...")
1187
+ results = self._worker_optimize(combos, files, self.INITIAL_CAPITAL, self.TRADING_FEES, self.MAX_SLOTS, state_id)
 
 
 
 
1188
  if not results:
1189
  continue
1190
 
1191
+ results.sort(key=lambda x: (x["calmar"], x["sqn"]), reverse=True)
1192
  best = results[0]
1193
 
1194
  print(f"🏆 BEST [{state_name}]:")
 
1197
  print(f" 🎲 SQN: {best['sqn']:.2f} | PF: {best['profit_factor']:.2f} | Payoff: {best['payoff_ratio']:.2f}")
1198
  print(f" 📉 MaxDD: {best['max_drawdown']:.2f}% | Ulcer: {best['ulcer_index']:.2f}")
1199
  print(f" 📈 Sharpe/Sortino: {best['sharpe']:.2f} / {best['sortino']:.2f}")
1200
+ print(f" 🧮 CAGR/Calmar: {(best['cagr']*100):.2f}% / {best['calmar']:.2f}")
1201
  print(f" ⏱️ Exposure: {best['exposure_pct']:.1f}% | Dur(avg/med): {best['avg_trade_duration_min']:.1f}/{best['median_trade_duration_min']:.1f} min")
1202
  print(f" 🔁 Streaks W/L: {best['max_consec_wins']} / {best['max_consec_losses']}")
1203
  print(f" ⚙️ Config: {best['config']}")
1204
 
1205
+
1206
+ # ============================================================
1207
+ # Runner (Guaranteed cleanup: exchange.close())
1208
+ # ============================================================
1209
  async def run_strategic_optimization_task():
1210
  r2 = R2Service()
1211
  dm = DataManager(None, None, r2)
1212
  proc = MLProcessor(dm)
1213
 
1214
+ try:
1215
+ await dm.initialize()
1216
+ await proc.initialize()
1217
+
1218
+ if getattr(proc, "guardian_hydra", None):
1219
+ proc.guardian_hydra.set_silent_mode(True)
1220
 
1221
+ opt = HeavyDutyBacktester(dm, proc)
1222
+ await opt.run_optimization()
1223
 
1224
+ except Exception as e:
1225
+ print(f"[ERROR] ❌ Backtest Failed: {e}")
1226
+ traceback.print_exc()
1227
+
1228
+ finally:
1229
+ # Explicit close for KuCoin/aiohttp
1230
+ try:
1231
+ ex = getattr(dm, "exchange", None)
1232
+ if ex is not None:
1233
+ await ex.close()
1234
+ except Exception:
1235
+ pass
1236
+
1237
+ try:
1238
+ await dm.close()
1239
+ except Exception:
1240
+ pass
1241
 
 
1242
 
1243
  if __name__ == "__main__":
1244
  asyncio.run(run_strategic_optimization_task())