Riy777 commited on
Commit
bbf82d6
·
verified ·
1 Parent(s): a98e9c9

Update backtest_engine.py

Browse files
Files changed (1) hide show
  1. backtest_engine.py +232 -196
backtest_engine.py CHANGED
@@ -1,5 +1,5 @@
1
  # ============================================================
2
- # 🧪 backtest_engine.py (V118.4 - GEM-Architect: Bulletproof)
3
  # ============================================================
4
 
5
  import asyncio
@@ -17,7 +17,7 @@ import traceback
17
  from datetime import datetime, timezone
18
  from typing import Dict, Any, List
19
 
20
- # ✅ استيراد المحركات
21
  try:
22
  from ml_engine.processor import MLProcessor, SystemLimits
23
  from ml_engine.data_manager import DataManager
@@ -36,11 +36,19 @@ class HeavyDutyBacktester:
36
  def __init__(self, data_manager, processor):
37
  self.dm = data_manager
38
  self.proc = processor
 
 
39
  self.GRID_DENSITY = 3
 
 
40
  self.INITIAL_CAPITAL = 10.0
41
  self.TRADING_FEES = 0.001
42
  self.MAX_SLOTS = 4
43
- self.TARGET_COINS = ['SOL/USDT', 'XRP/USDT', 'DOGE/USDT']
 
 
 
 
44
  self.force_start_date = None
45
  self.force_end_date = None
46
 
@@ -54,7 +62,7 @@ class HeavyDutyBacktester:
54
  else:
55
  os.makedirs(CACHE_DIR)
56
 
57
- print(f"🧪 [Backtest V118.4] Bulletproof Mode. Models: {self._check_models_status()}")
58
 
59
  def _check_models_status(self):
60
  status = []
@@ -124,7 +132,7 @@ class HeavyDutyBacktester:
124
  df['ema50'] = ta.ema(df['close'], length=50)
125
  df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
126
 
127
- # FIX: rel_vol calculated globally to avoid KeyError
128
  df['vol_ma50'] = df['volume'].rolling(50).mean()
129
  df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
130
 
@@ -138,7 +146,7 @@ class HeavyDutyBacktester:
138
  df['vol_z'] = (df['volume'] - vol_mean) / (vol_std + 1e-9)
139
  df['atr_pct'] = df['atr'] / df['close']
140
 
141
- # 🔥 L1 Score 🔥
142
  rsi_penalty = np.where(df['rsi'] > 70, (df['rsi'] - 70) * 2, 0)
143
  l1_score_raw = (df['rel_vol'] * 10) + (df['atr_pct'] * 1000) - rsi_penalty
144
  df['l1_score'] = l1_score_raw.fillna(0)
@@ -168,7 +176,7 @@ class HeavyDutyBacktester:
168
  return df
169
 
170
  # ==============================================================
171
- # 🧠 CPU PROCESSING (FIXED SAFE LOOKUP)
172
  # ==============================================================
173
  async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
174
  safe_sym = sym.replace('/', '_')
@@ -179,9 +187,10 @@ class HeavyDutyBacktester:
179
  print(f" 📂 [{sym}] Data Exists -> Skipping.")
180
  return
181
 
182
- print(f" ⚙️ [CPU] Analyzing {sym} (Real Models Active)...", flush=True)
183
  t0 = time.time()
184
 
 
185
  df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
186
  df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
187
  df_1m.set_index('datetime', inplace=True)
@@ -202,40 +211,51 @@ class HeavyDutyBacktester:
202
  frames[tf_str] = resampled
203
  numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
204
 
 
205
  map_1m_to_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['1h']['timestamp'])-1)
206
  map_1m_to_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['5m']['timestamp'])-1)
207
  map_1m_to_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['15m']['timestamp'])-1)
 
208
 
209
- # Load Models
210
- titan_engine = self.proc.titan
211
  oracle_dir_model = getattr(self.proc.oracle, 'model_direction', None)
212
  sniper_models = getattr(self.proc.sniper, 'models', [])
213
  hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
214
  legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
215
 
216
- # Pre-Calc Legacy V2
217
  global_v2_probs = np.zeros(len(fast_1m['close']))
218
  if legacy_v2:
219
  try:
220
- l_log = fast_1m['log_ret']; l_rsi = fast_1m['rsi'] / 100.0
221
- l_fib = fast_1m['fib_pos']; l_vol = fast_1m['volatility']
 
 
 
 
222
  l5_log = numpy_htf['5m']['log_ret'][map_1m_to_5m]
223
  l5_rsi = numpy_htf['5m']['rsi'][map_1m_to_5m] / 100.0
224
  l5_fib = numpy_htf['5m']['fib_pos'][map_1m_to_5m]
225
  l5_trd = numpy_htf['5m']['trend_slope'][map_1m_to_5m]
 
226
  l15_log = numpy_htf['15m']['log_ret'][map_1m_to_15m]
227
  l15_rsi = numpy_htf['15m']['rsi'][map_1m_to_15m] / 100.0
228
  l15_fib618 = numpy_htf['15m']['dist_fib618'][map_1m_to_15m]
229
  l15_trd = numpy_htf['15m']['trend_slope'][map_1m_to_15m]
 
230
  lag_cols = []
231
  for lag in [1, 2, 3, 5, 10, 20]:
232
- lag_cols.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'], fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']])
 
 
 
 
233
  X_GLOBAL_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lag_cols])
234
- gp = legacy_v2.predict(xgb.DMatrix(X_GLOBAL_V2))
235
- if len(gp.shape) > 1: global_v2_probs = gp[:, 2]
236
  except: pass
237
 
238
- # Pre-Assemble Hydra Static
239
  global_hydra_static = None
240
  if hydra_models:
241
  try:
@@ -249,187 +269,203 @@ class HeavyDutyBacktester:
249
  global_hydra_static = np.column_stack([h_rsi_1m, h_rsi_5m, h_rsi_15m, h_bb, h_vol, h_atr, h_close])
250
  except: pass
251
 
 
252
  valid_indices_mask = fast_1m['l1_score'] >= 5.0
253
  valid_indices = np.where(valid_indices_mask)[0]
254
- final_valid_indices = [idx for idx in valid_indices if idx > 500 and idx < len(fast_1m['close']) - 245]
 
 
255
 
256
- print(f" 🎯 Raw Candidates (Score > 5): {len(final_valid_indices)}. Calculating Model Scores...", flush=True)
257
 
258
- ai_results = []
259
- time_vec = np.arange(1, 241)
 
260
 
261
- oracle_batch_X, sniper_batch_X, hydra_batch_X, hydra_batch_indices = [], [], [], []
262
- temp_oracle_results, temp_sniper_results, temp_hydra_results, temp_titan_results = {}, {}, {}, {}
263
- BATCH_SIZE = 5000
264
- current_batch_count = 0 # ✅ Independent Batch Counter
265
-
266
- for i_idx in final_valid_indices:
267
- ts_val = fast_1m['timestamp'][i_idx]
268
- current_res_idx = len(ai_results)
269
-
270
- idx_1h = map_1m_to_1h[i_idx]
271
- idx_15m = map_1m_to_15m[i_idx]
272
- idx_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], ts_val), 0, len(numpy_htf['4h']['timestamp'])-1)
273
-
274
- # 1. Titan
275
- titan_score_est = min(0.95, max(0.1, fast_1m['l1_score'][i_idx] / 40.0))
276
- temp_titan_results[current_res_idx] = titan_score_est
277
 
278
- # 2. Oracle (SAFE LOOKUP FIX)
279
- if oracle_dir_model:
280
- o_vec = []
 
 
 
 
 
 
 
 
 
281
  for col in getattr(self.proc.oracle, 'feature_cols', []):
282
- val = 0.0
283
- if col.startswith('1h_'):
284
- c_name = col[3:]
285
- if c_name in numpy_htf['1h']: val = numpy_htf['1h'][c_name][idx_1h]
286
- elif col.startswith('15m_'):
287
- c_name = col[4:]
288
- if c_name in numpy_htf['15m']: val = numpy_htf['15m'][c_name][idx_15m]
289
- elif col.startswith('4h_'):
290
- c_name = col[3:]
291
- if c_name in numpy_htf['4h']: val = numpy_htf['4h'][c_name][idx_4h]
292
- elif col == 'sim_titan_score': val = titan_score_est
293
- elif col == 'sim_mc_score': val = 0.5
294
- elif col == 'sim_pattern_score': val = 0.5
295
- o_vec.append(val)
296
- oracle_batch_X.append(o_vec)
297
- else:
298
- temp_oracle_results[current_res_idx] = 0.5
299
-
300
- # 3. Sniper
301
- if sniper_models:
302
- s_vec = []
 
 
 
 
 
 
 
 
 
303
  for col in getattr(self.proc.sniper, 'feature_names', []):
304
- if col in fast_1m: s_vec.append(fast_1m[col][i_idx])
305
- elif col == 'L_score': s_vec.append(fast_1m.get('vol_zscore_50', [0])[i_idx])
306
- else: s_vec.append(0.0)
307
- sniper_batch_X.append(s_vec)
308
- else:
309
- temp_sniper_results[current_res_idx] = 0.5
310
-
311
- # 4. Hydra
312
- if hydra_models and global_hydra_static is not None:
313
- start_idx = i_idx + 1; end_idx = start_idx + 240
314
- sl_static = global_hydra_static[start_idx:end_idx]
315
- entry_price = fast_1m['close'][i_idx]
316
- sl_close = sl_static[:, 6]; sl_atr = sl_static[:, 5]
317
- sl_dist = np.maximum(1.5 * sl_atr, entry_price * 0.015)
318
- sl_pnl = sl_close - entry_price; sl_norm_pnl = sl_pnl / sl_dist
319
- sl_cum_max = np.maximum.accumulate(sl_close); sl_cum_max = np.maximum(sl_cum_max, entry_price)
320
- sl_max_pnl_r = (sl_cum_max - entry_price) / sl_dist
321
- sl_atr_pct = sl_atr / sl_close
322
- zeros = np.zeros(240); ones = np.full(240, 1.0)
323
- X_cand = np.column_stack([
324
- sl_static[:, 0], sl_static[:, 1], sl_static[:, 2],
325
- sl_static[:, 3], sl_static[:, 4],
326
- zeros, sl_atr_pct, sl_norm_pnl, sl_max_pnl_r,
327
- zeros, zeros, time_vec,
328
- zeros, ones * 0.6, ones * 0.7, ones * 3.0
329
- ])
330
- hydra_batch_X.append(X_cand)
331
- hydra_batch_indices.append(current_res_idx)
332
-
333
- ai_results.append({
334
- 'timestamp': ts_val, 'symbol': sym, 'close': fast_1m['close'][i_idx],
335
- 'real_titan': titan_score_est, 'oracle_conf': 0.5, 'sniper_score': 0.5,
336
- 'l1_score': fast_1m['l1_score'][i_idx],
337
- 'risk_hydra_crash': 0.0, 'time_hydra_crash': 0, 'risk_legacy_v2': 0.0
338
- })
339
-
340
- current_batch_count += 1
341
-
342
- # ✅ FIX: Trigger based on count, not just Hydra list size
343
- if current_batch_count >= BATCH_SIZE:
344
- if oracle_batch_X:
345
- try:
346
- preds = oracle_dir_model.predict(np.array(oracle_batch_X))
347
- start_i = current_res_idx - len(oracle_batch_X) + 1
348
- for i, p in enumerate(preds):
349
- val = float(p[0]) if hasattr(p, '__iter__') else float(p)
350
- if val < 0.5: val = 1 - val
351
- temp_oracle_results[start_i + i] = val
352
- except: pass
353
- oracle_batch_X = []
354
 
355
- if sniper_batch_X:
356
- try:
357
- s_X = np.array(sniper_batch_X)
358
- preds = np.mean([m.predict(s_X) for m in sniper_models], axis=0)
359
- start_i = current_res_idx - len(sniper_batch_X) + 1
360
- for i, p in enumerate(preds): temp_sniper_results[start_i + i] = float(p)
361
- except: pass
362
- sniper_batch_X = []
363
-
364
- if hydra_batch_X:
365
- try:
366
- big_X = np.vstack(hydra_batch_X)
367
- preds = hydra_models['crash'].predict_proba(big_X)[:, 1]
368
- for b_i, res_idx in enumerate(hydra_batch_indices):
369
- p_slice = preds[b_i*240 : (b_i+1)*240]
370
- max_p = np.max(p_slice)
371
- c_idx = np.where(p_slice > 0.6)[0]
372
- c_time = int(fast_1m['timestamp'][fast_1m['timestamp'].searchsorted(ai_results[res_idx]['timestamp']) + 1 + c_idx[0]]) if len(c_idx) > 0 else 0
373
- temp_hydra_results[res_idx] = (max_p, c_time)
374
- except: pass
375
- hydra_batch_X = []
376
- hydra_batch_indices = []
377
 
378
- current_batch_count = 0
379
-
380
- # Process Leftovers (Same logic)
381
- if oracle_batch_X:
382
- try:
383
- preds = oracle_dir_model.predict(np.array(oracle_batch_X))
384
- start_i = len(ai_results) - len(oracle_batch_X)
385
- for i, p in enumerate(preds):
386
- val = float(p[0]) if hasattr(p, '__iter__') else float(p)
387
- if val < 0.5: val = 1 - val
388
- temp_oracle_results[start_i + i] = val
389
- except: pass
390
- if sniper_batch_X:
391
- try:
392
- s_X = np.array(sniper_batch_X)
393
- preds = np.mean([m.predict(s_X) for m in sniper_models], axis=0)
394
- start_i = len(ai_results) - len(sniper_batch_X)
395
- for i, p in enumerate(preds): temp_sniper_results[start_i + i] = float(p)
396
- except: pass
397
- if hydra_batch_X:
398
- try:
399
- big_X = np.vstack(hydra_batch_X)
400
- preds = hydra_models['crash'].predict_proba(big_X)[:, 1]
401
- for b_i, res_idx in enumerate(hydra_batch_indices):
402
- p_slice = preds[b_i*240 : (b_i+1)*240]
403
- max_p = np.max(p_slice)
404
- c_idx = np.where(p_slice > 0.6)[0]
405
- c_time = int(fast_1m['timestamp'][fast_1m['timestamp'].searchsorted(ai_results[res_idx]['timestamp']) + 1 + c_idx[0]]) if len(c_idx) > 0 else 0
406
- temp_hydra_results[res_idx] = (max_p, c_time)
407
- except: pass
408
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  if legacy_v2:
410
- for idx, res in enumerate(ai_results):
411
- ts = res['timestamp']
412
- idx_1m = np.searchsorted(fast_1m['timestamp'], ts)
413
- start = idx_1m + 1
414
- if start < len(global_v2_probs) - 240:
415
- probs_slice = global_v2_probs[start:start+240]
416
- max_p = np.max(probs_slice)
417
- p_idx = np.where(probs_slice > 0.8)[0]
418
- p_time = int(fast_1m['timestamp'][start + p_idx[0]]) if len(p_idx) > 0 else 0
419
- ai_results[idx]['risk_legacy_v2'] = max_p
420
- ai_results[idx]['time_legacy_panic'] = p_time
421
-
422
- for i in range(len(ai_results)):
423
- if i in temp_oracle_results: ai_results[i]['oracle_conf'] = temp_oracle_results[i]
424
- if i in temp_sniper_results: ai_results[i]['sniper_score'] = temp_sniper_results[i]
425
- if i in temp_hydra_results:
426
- ai_results[i]['risk_hydra_crash'] = temp_hydra_results[i][0]
427
- ai_results[i]['time_hydra_crash'] = temp_hydra_results[i][1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
 
429
  dt = time.time() - t0
430
- if ai_results:
431
- pd.DataFrame(ai_results).to_pickle(scores_file)
432
- print(f" ✅ [{sym}] Completed {len(ai_results)} signals in {dt:.2f} seconds.", flush=True)
433
 
434
  del frames, fast_1m, numpy_htf, global_v2_probs, global_hydra_static
435
  gc.collect()
@@ -464,6 +500,7 @@ class HeavyDutyBacktester:
464
  global_df = pd.concat(all_data)
465
  global_df.sort_values('timestamp', inplace=True)
466
 
 
467
  arr_ts = global_df['timestamp'].values
468
  arr_close = global_df['close'].values.astype(np.float64)
469
  arr_symbol = global_df['symbol'].values
@@ -479,19 +516,12 @@ class HeavyDutyBacktester:
479
  arr_sym_int = np.array([sym_map[s] for s in arr_symbol], dtype=np.int32)
480
 
481
  total_len = len(arr_ts)
482
- total_combos = len(combinations_batch)
483
- print(f" 🚀 [System] Starting Optimized Grid Search on {total_combos} combos...", flush=True)
484
 
485
  results = []
486
- start_time = time.time()
487
 
488
  for idx, config in enumerate(combinations_batch):
489
- if idx > 0 and idx % 10 == 0:
490
- elapsed = time.time() - start_time
491
- avg_time = elapsed / idx
492
- rem_time = avg_time * (total_combos - idx)
493
- sys.stdout.write(f"\r ⚙️ Progress: {idx}/{total_combos} ({idx/total_combos:.1%}) | ETA: {rem_time:.0f}s")
494
- sys.stdout.flush()
495
 
496
  wallet_bal = initial_capital
497
  wallet_alloc = 0.0
@@ -513,6 +543,7 @@ class HeavyDutyBacktester:
513
  sym_id = arr_sym_int[i]
514
  price = arr_close[i]
515
 
 
516
  if sym_id in positions:
517
  pos = positions[sym_id]
518
  entry = pos[0]; h_risk = pos[2]; h_time = pos[3]
@@ -530,6 +561,7 @@ class HeavyDutyBacktester:
530
  dd = (peak_bal - tot) / peak_bal
531
  if dd > max_dd: max_dd = dd
532
 
 
533
  if len(positions) < max_slots:
534
  if mask_buy[i]:
535
  if sym_id not in positions:
@@ -540,6 +572,7 @@ class HeavyDutyBacktester:
540
  wallet_bal -= size
541
  wallet_alloc += size
542
 
 
543
  final_bal = wallet_bal + wallet_alloc
544
  net_profit = final_bal - initial_capital
545
  total_t = len(trades_log)
@@ -553,6 +586,9 @@ class HeavyDutyBacktester:
553
  hc_avg_pnl = (sum(p for p, s in trades_log if s > 0.65)/hc_count*100) if hc_count > 0 else 0.0
554
  agree_rate = (hc_count / total_t * 100) if total_t > 0 else 0.0
555
 
 
 
 
556
  results.append({
557
  'config': config, 'final_balance': final_bal, 'net_profit': net_profit,
558
  'total_trades': total_t, 'win_count': win_count, 'loss_count': loss_count,
@@ -609,9 +645,9 @@ class HeavyDutyBacktester:
609
  print(f" ⚖️ Weights: Titan={best['config']['w_titan']:.2f} | Patterns={best['config']['w_struct']:.2f} | L1={best['config']['l1_thresh']}")
610
  print("="*60)
611
  return best['config'], best
612
-
613
  async def run_strategic_optimization_task():
614
- print("\n🧪 [STRATEGIC BACKTEST] Full System Mirror Mode...")
615
  r2 = R2Service()
616
  dm = DataManager(None, None, r2)
617
  proc = MLProcessor(dm)
 
1
  # ============================================================
2
+ # 🧪 backtest_engine.py (V118.5 - GEM-Architect: Hyper-Vectorized)
3
  # ============================================================
4
 
5
  import asyncio
 
17
  from datetime import datetime, timezone
18
  from typing import Dict, Any, List
19
 
20
+ # ✅ استيراد المحركات الأساسية
21
  try:
22
  from ml_engine.processor import MLProcessor, SystemLimits
23
  from ml_engine.data_manager import DataManager
 
36
  def __init__(self, data_manager, processor):
37
  self.dm = data_manager
38
  self.proc = processor
39
+
40
+ # 🎛️ كثافة شبكة البحث
41
  self.GRID_DENSITY = 3
42
+
43
+ # إعدادات المحفظة
44
  self.INITIAL_CAPITAL = 10.0
45
  self.TRADING_FEES = 0.001
46
  self.MAX_SLOTS = 4
47
+
48
+ self.TARGET_COINS = [
49
+ 'SOL/USDT', 'XRP/USDT', 'DOGE/USDT'
50
+ ]
51
+
52
  self.force_start_date = None
53
  self.force_end_date = None
54
 
 
62
  else:
63
  os.makedirs(CACHE_DIR)
64
 
65
+ print(f"🧪 [Backtest V118.5] Hyper-Vectorized Mode. Models: {self._check_models_status()}")
66
 
67
  def _check_models_status(self):
68
  status = []
 
132
  df['ema50'] = ta.ema(df['close'], length=50)
133
  df['atr'] = ta.atr(df['high'], df['low'], df['close'], length=14)
134
 
135
+ # Global calc
136
  df['vol_ma50'] = df['volume'].rolling(50).mean()
137
  df['rel_vol'] = df['volume'] / (df['vol_ma50'] + 1e-9)
138
 
 
146
  df['vol_z'] = (df['volume'] - vol_mean) / (vol_std + 1e-9)
147
  df['atr_pct'] = df['atr'] / df['close']
148
 
149
+ # L1 Score
150
  rsi_penalty = np.where(df['rsi'] > 70, (df['rsi'] - 70) * 2, 0)
151
  l1_score_raw = (df['rel_vol'] * 10) + (df['atr_pct'] * 1000) - rsi_penalty
152
  df['l1_score'] = l1_score_raw.fillna(0)
 
176
  return df
177
 
178
  # ==============================================================
179
+ # 🧠 CPU PROCESSING (HYPER-VECTORIZED)
180
  # ==============================================================
181
  async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
182
  safe_sym = sym.replace('/', '_')
 
187
  print(f" 📂 [{sym}] Data Exists -> Skipping.")
188
  return
189
 
190
+ print(f" ⚙️ [CPU] Analyzing {sym} (Hyper-Vectorized Mode)...", flush=True)
191
  t0 = time.time()
192
 
193
+ # 1. Data Prep
194
  df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
195
  df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
196
  df_1m.set_index('datetime', inplace=True)
 
211
  frames[tf_str] = resampled
212
  numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
213
 
214
+ # 2. Time Alignment (Vectorized)
215
  map_1m_to_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['1h']['timestamp'])-1)
216
  map_1m_to_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['5m']['timestamp'])-1)
217
  map_1m_to_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['15m']['timestamp'])-1)
218
+ map_1m_to_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], fast_1m['timestamp']), 0, len(numpy_htf['4h']['timestamp'])-1)
219
 
220
+ # 3. Model Access
 
221
  oracle_dir_model = getattr(self.proc.oracle, 'model_direction', None)
222
  sniper_models = getattr(self.proc.sniper, 'models', [])
223
  hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
224
  legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
225
 
226
+ # 4. 🔥 Pre-Calc Legacy V2 (Vectorized) 🔥
227
  global_v2_probs = np.zeros(len(fast_1m['close']))
228
  if legacy_v2:
229
  try:
230
+ # Direct array construction
231
+ l_log = fast_1m['log_ret']
232
+ l_rsi = fast_1m['rsi'] / 100.0
233
+ l_fib = fast_1m['fib_pos']
234
+ l_vol = fast_1m['volatility']
235
+
236
  l5_log = numpy_htf['5m']['log_ret'][map_1m_to_5m]
237
  l5_rsi = numpy_htf['5m']['rsi'][map_1m_to_5m] / 100.0
238
  l5_fib = numpy_htf['5m']['fib_pos'][map_1m_to_5m]
239
  l5_trd = numpy_htf['5m']['trend_slope'][map_1m_to_5m]
240
+
241
  l15_log = numpy_htf['15m']['log_ret'][map_1m_to_15m]
242
  l15_rsi = numpy_htf['15m']['rsi'][map_1m_to_15m] / 100.0
243
  l15_fib618 = numpy_htf['15m']['dist_fib618'][map_1m_to_15m]
244
  l15_trd = numpy_htf['15m']['trend_slope'][map_1m_to_15m]
245
+
246
  lag_cols = []
247
  for lag in [1, 2, 3, 5, 10, 20]:
248
+ lag_cols.extend([
249
+ fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'],
250
+ fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']
251
+ ])
252
+
253
  X_GLOBAL_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lag_cols])
254
+ global_v2_probs = legacy_v2.predict(xgb.DMatrix(X_GLOBAL_V2))
255
+ if len(global_v2_probs.shape) > 1: global_v2_probs = global_v2_probs[:, 2]
256
  except: pass
257
 
258
+ # 5. 🔥 Pre-Assemble Hydra Static 🔥
259
  global_hydra_static = None
260
  if hydra_models:
261
  try:
 
269
  global_hydra_static = np.column_stack([h_rsi_1m, h_rsi_5m, h_rsi_15m, h_bb, h_vol, h_atr, h_close])
270
  except: pass
271
 
272
+ # 6. Candidate Filtering
273
  valid_indices_mask = fast_1m['l1_score'] >= 5.0
274
  valid_indices = np.where(valid_indices_mask)[0]
275
+ # Skip warmup and tail
276
+ mask_bounds = (valid_indices > 500) & (valid_indices < len(fast_1m['close']) - 245)
277
+ final_valid_indices = valid_indices[mask_bounds]
278
 
279
+ print(f" 🎯 Raw Candidates (Score > 5): {len(final_valid_indices)}. Vectorized Scoring...", flush=True)
280
 
281
+ # 🚀 HYPER-VECTORIZATION START 🚀
282
+ # Instead of looping, we construct the BIG matrices for all candidates at once.
283
+ # This brings speed back to ~60s
284
 
285
+ num_candidates = len(final_valid_indices)
286
+ if num_candidates == 0: return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ # --- A. ORACLE MATRIX CONSTRUCTION ---
289
+ oracle_preds = np.full(num_candidates, 0.5)
290
+ if oracle_dir_model:
291
+ try:
292
+ # Mapped Indices for all candidates
293
+ idx_1h = map_1m_to_1h[final_valid_indices]
294
+ idx_15m = map_1m_to_15m[final_valid_indices]
295
+ idx_4h = map_1m_to_4h[final_valid_indices]
296
+
297
+ titan_scores = np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95)
298
+
299
+ oracle_features = []
300
  for col in getattr(self.proc.oracle, 'feature_cols', []):
301
+ if col.startswith('1h_'):
302
+ c = col[3:]
303
+ oracle_features.append(numpy_htf['1h'][c][idx_1h] if c in numpy_htf['1h'] else np.zeros(num_candidates))
304
+ elif col.startswith('15m_'):
305
+ c = col[4:]
306
+ oracle_features.append(numpy_htf['15m'][c][idx_15m] if c in numpy_htf['15m'] else np.zeros(num_candidates))
307
+ elif col.startswith('4h_'):
308
+ c = col[3:]
309
+ oracle_features.append(numpy_htf['4h'][c][idx_4h] if c in numpy_htf['4h'] else np.zeros(num_candidates))
310
+ elif col == 'sim_titan_score': oracle_features.append(titan_scores)
311
+ elif col == 'sim_mc_score': oracle_features.append(np.full(num_candidates, 0.5))
312
+ elif col == 'sim_pattern_score': oracle_features.append(np.full(num_candidates, 0.5))
313
+ else: oracle_features.append(np.zeros(num_candidates))
314
+
315
+ X_oracle_big = np.column_stack(oracle_features)
316
+ preds = oracle_dir_model.predict(X_oracle_big)
317
+ # Handle output shape
318
+ if len(preds.shape) > 1 and preds.shape[1] > 1:
319
+ oracle_preds = preds[:, 1] # Prob of Class 1
320
+ else:
321
+ oracle_preds = preds.flatten()
322
+ # If model outputs 0/1 class, we might need proba. Assuming predict gives prob or class.
323
+ # Adjust if simple XGB classifier gives 0/1. For backtest, assume regression or proba.
324
+ except Exception as e: print(f"Oracle Error: {e}")
325
+
326
+ # --- B. SNIPER MATRIX CONSTRUCTION ---
327
+ sniper_preds = np.full(num_candidates, 0.5)
328
+ if sniper_models:
329
+ try:
330
+ sniper_features = []
331
  for col in getattr(self.proc.sniper, 'feature_names', []):
332
+ if col in fast_1m: sniper_features.append(fast_1m[col][final_valid_indices])
333
+ elif col == 'L_score': sniper_features.append(fast_1m.get('vol_zscore_50', np.zeros(len(fast_1m['close'])))[final_valid_indices])
334
+ else: sniper_features.append(np.zeros(num_candidates))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
+ X_sniper_big = np.column_stack(sniper_features)
337
+ # Ensemble Average
338
+ preds_list = [m.predict(X_sniper_big) for m in sniper_models]
339
+ sniper_preds = np.mean(preds_list, axis=0)
340
+ except Exception as e: print(f"Sniper Error: {e}")
341
+
342
+ # --- C. HYDRA MATRIX CONSTRUCTION (The Heavy One) ---
343
+ hydra_risk_preds = np.zeros(num_candidates)
344
+ hydra_time_preds = np.zeros(num_candidates, dtype=int)
345
+
346
+ # Hydra is sequence-based (window of 240). Vectorizing this is tricky without exploding memory.
347
+ # We will iterate but ONLY for prediction input construction, which is lighter than full logic.
348
+ # Actually, for 95k candidates, a (95000, 240, features) array is huge.
349
+ # We MUST batch Hydra. But efficiently.
350
+
351
+ if hydra_models and global_hydra_static is not None:
352
+ # We process in chunks of 5000 to keep memory sane
353
+ chunk_size = 5000
354
+ for i in range(0, num_candidates, chunk_size):
355
+ chunk_indices = final_valid_indices[i : i + chunk_size]
 
 
356
 
357
+ # Build batch X
358
+ batch_X = []
359
+ valid_batch_indices = [] # Map back to chunk index
360
+
361
+ for k, idx in enumerate(chunk_indices):
362
+ start = idx + 1
363
+ end = start + 240
364
+ # Quick slice
365
+ sl_static = global_hydra_static[start:end]
366
+
367
+ entry_p = fast_1m['close'][idx]
368
+ sl_close = sl_static[:, 6]
369
+ sl_atr = sl_static[:, 5]
370
+
371
+ sl_dist = np.maximum(1.5 * sl_atr, entry_p * 0.015)
372
+ sl_pnl = sl_close - entry_p
373
+ sl_norm_pnl = sl_pnl / sl_dist
374
+
375
+ # Accumulate max - vectorized for the window
376
+ sl_cum_max = np.maximum.accumulate(sl_close)
377
+ sl_cum_max = np.maximum(sl_cum_max, entry_p)
378
+ sl_max_pnl_r = (sl_cum_max - entry_p) / sl_dist
379
+
380
+ sl_atr_pct = sl_atr / sl_close
381
+
382
+ # Static cols
383
+ zeros = np.zeros(240); ones = np.ones(240)
384
+
385
+ row = np.column_stack([
386
+ sl_static[:, 0], sl_static[:, 1], sl_static[:, 2],
387
+ sl_static[:, 3], sl_static[:, 4],
388
+ zeros, sl_atr_pct, sl_norm_pnl, sl_max_pnl_r,
389
+ zeros, zeros, time_vec,
390
+ zeros, ones*0.6, ones*0.7, ones*3.0
391
+ ])
392
+ batch_X.append(row)
393
+ valid_batch_indices.append(i + k) # Global index in final_valid_indices
394
+
395
+ if batch_X:
396
+ try:
397
+ big_X = np.array(batch_X) # Shape: (Batch, 240, Feats)
398
+ # Flatten for 2D model if needed, or keeping 3D depending on Hydra.
399
+ # Assuming Hydra uses 2D input (stacking windows):
400
+ big_X_flat = big_X.reshape(-1, big_X.shape[-1])
401
+
402
+ preds_flat = hydra_models['crash'].predict_proba(big_X_flat)[:, 1]
403
+
404
+ # Reshape back to (Batch, 240)
405
+ preds_batch = preds_flat.reshape(len(batch_X), 240)
406
+
407
+ # Extract Max Risk & Time
408
+ batch_max_risk = np.max(preds_batch, axis=1)
409
+
410
+ # Find first index > thresh (0.6) for time
411
+ over_thresh = preds_batch > 0.6
412
+ # argmax gives first True index
413
+ has_crash = over_thresh.any(axis=1)
414
+ crash_times_rel = np.argmax(over_thresh, axis=1)
415
+
416
+ # Map back to global results
417
+ for j, glob_idx in enumerate(valid_batch_indices):
418
+ hydra_risk_preds[glob_idx] = batch_max_risk[j]
419
+ if has_crash[j]:
420
+ # Calc absolute timestamp
421
+ start_t_idx = final_valid_indices[glob_idx] + 1
422
+ abs_time = fast_1m['timestamp'][start_t_idx + crash_times_rel[j]]
423
+ hydra_time_preds[glob_idx] = abs_time
424
+
425
+ except Exception: pass
426
+
427
+ # --- D. LEGACY V2 MAPPING ---
428
+ legacy_risk_preds = np.zeros(num_candidates)
429
+ legacy_time_preds = np.zeros(num_candidates, dtype=int)
430
+
431
  if legacy_v2:
432
+ # Vectorized mapping logic
433
+ # For each candidate at idx, scan global_v2_probs[idx+1 : idx+241]
434
+ # This is a sliding window max. Can be slow if looped.
435
+ # Fast approx: Check max just for the entry? No, need lookahead.
436
+ # We loop simply because it's fast scalar lookups.
437
+ for k, idx in enumerate(final_valid_indices):
438
+ start = idx + 1
439
+ if start + 240 < len(global_v2_probs):
440
+ window = global_v2_probs[start : start + 240]
441
+ legacy_risk_preds[k] = np.max(window)
442
+ # Time logic can be added if needed, sticking to max risk for now
443
+
444
+ # --- E. CONSTRUCT FINAL DATAFRAME ---
445
+ # Titan Proxy
446
+ titan_scores_final = np.clip(fast_1m['l1_score'][final_valid_indices] / 40.0, 0.1, 0.95)
447
+ l1_scores_final = fast_1m['l1_score'][final_valid_indices]
448
+ timestamps_final = fast_1m['timestamp'][final_valid_indices]
449
+ closes_final = fast_1m['close'][final_valid_indices]
450
+
451
+ ai_df = pd.DataFrame({
452
+ 'timestamp': timestamps_final,
453
+ 'symbol': sym,
454
+ 'close': closes_final,
455
+ 'real_titan': titan_scores_final,
456
+ 'oracle_conf': oracle_preds,
457
+ 'sniper_score': sniper_preds,
458
+ 'l1_score': l1_scores_final,
459
+ 'risk_hydra_crash': hydra_risk_preds,
460
+ 'time_hydra_crash': hydra_time_preds,
461
+ 'risk_legacy_v2': legacy_risk_preds,
462
+ 'time_legacy_panic': legacy_time_preds
463
+ })
464
 
465
  dt = time.time() - t0
466
+ if not ai_df.empty:
467
+ ai_df.to_pickle(scores_file)
468
+ print(f" ✅ [{sym}] Completed {len(ai_df)} signals in {dt:.2f} seconds.", flush=True)
469
 
470
  del frames, fast_1m, numpy_htf, global_v2_probs, global_hydra_static
471
  gc.collect()
 
500
  global_df = pd.concat(all_data)
501
  global_df.sort_values('timestamp', inplace=True)
502
 
503
+ # 🚀 Numpy Conversion 🚀
504
  arr_ts = global_df['timestamp'].values
505
  arr_close = global_df['close'].values.astype(np.float64)
506
  arr_symbol = global_df['symbol'].values
 
516
  arr_sym_int = np.array([sym_map[s] for s in arr_symbol], dtype=np.int32)
517
 
518
  total_len = len(arr_ts)
519
+ print(f" 🚀 [System] Starting Optimized Grid Search on {len(combinations_batch)} combos...", flush=True)
 
520
 
521
  results = []
 
522
 
523
  for idx, config in enumerate(combinations_batch):
524
+ # No Annoying Progress Logs
 
 
 
 
 
525
 
526
  wallet_bal = initial_capital
527
  wallet_alloc = 0.0
 
543
  sym_id = arr_sym_int[i]
544
  price = arr_close[i]
545
 
546
+ # Exits
547
  if sym_id in positions:
548
  pos = positions[sym_id]
549
  entry = pos[0]; h_risk = pos[2]; h_time = pos[3]
 
561
  dd = (peak_bal - tot) / peak_bal
562
  if dd > max_dd: max_dd = dd
563
 
564
+ # Entries
565
  if len(positions) < max_slots:
566
  if mask_buy[i]:
567
  if sym_id not in positions:
 
572
  wallet_bal -= size
573
  wallet_alloc += size
574
 
575
+ # Stats
576
  final_bal = wallet_bal + wallet_alloc
577
  net_profit = final_bal - initial_capital
578
  total_t = len(trades_log)
 
586
  hc_avg_pnl = (sum(p for p, s in trades_log if s > 0.65)/hc_count*100) if hc_count > 0 else 0.0
587
  agree_rate = (hc_count / total_t * 100) if total_t > 0 else 0.0
588
 
589
+ # ✅ FIX: Ensure 'thresh' key exists for AdaptiveHub compatibility
590
+ config['thresh'] = l1_thresh
591
+
592
  results.append({
593
  'config': config, 'final_balance': final_bal, 'net_profit': net_profit,
594
  'total_trades': total_t, 'win_count': win_count, 'loss_count': loss_count,
 
645
  print(f" ⚖️ Weights: Titan={best['config']['w_titan']:.2f} | Patterns={best['config']['w_struct']:.2f} | L1={best['config']['l1_thresh']}")
646
  print("="*60)
647
  return best['config'], best
648
+
649
  async def run_strategic_optimization_task():
650
+ print("\n🧪 [STRATEGIC BACKTEST] Hyper-Vectorized Mode...")
651
  r2 = R2Service()
652
  dm = DataManager(None, None, r2)
653
  proc = MLProcessor(dm)