Riy777 commited on
Commit
e19039e
·
verified ·
1 Parent(s): d7f2f64

Upload backtest_engine (19).py

Browse files
Files changed (1) hide show
  1. backtest_engine (19).py +618 -0
backtest_engine (19).py ADDED
@@ -0,0 +1,618 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # 🧪 backtest_engine.py (V159.0 - GEM-Architect: Hyper-Speed Jump Logic)
3
+ # ============================================================
4
+
5
+ import asyncio
6
+ import pandas as pd
7
+ import numpy as np
8
+ import time
9
+ import logging
10
+ import itertools
11
+ import os
12
+ import glob
13
+ import gc
14
+ import sys
15
+ import traceback
16
+ from datetime import datetime, timezone
17
+ from typing import Dict, Any, List
18
+
19
+ # محاولة استيراد المكتبات
20
+ try:
21
+ import pandas_ta as ta
22
+ except ImportError:
23
+ ta = None
24
+
25
+ try:
26
+ from ml_engine.processor import MLProcessor
27
+ from ml_engine.data_manager import DataManager
28
+ from learning_hub.adaptive_hub import AdaptiveHub
29
+ from r2 import R2Service
30
+ import xgboost as xgb
31
+ except ImportError:
32
+ pass
33
+
34
+ logging.getLogger('ml_engine').setLevel(logging.WARNING)
35
+ CACHE_DIR = "backtest_real_scores"
36
+
37
+ # ============================================================
38
+ # ⚡ VECTORIZED HELPERS
39
+ # ============================================================
40
+ def _z_roll_np(arr, w=500):
41
+ if len(arr) < w: return np.zeros_like(arr)
42
+ mean = pd.Series(arr).rolling(w).mean().fillna(0).values
43
+ std = pd.Series(arr).rolling(w).std().fillna(1).values
44
+ return np.nan_to_num((arr - mean) / (std + 1e-9))
45
+
46
+ def _revive_score_distribution(scores):
47
+ scores = np.array(scores, dtype=np.float32).flatten()
48
+ s_min, s_max = np.min(scores), np.max(scores)
49
+ if (s_max - s_min) < 1e-6: return scores
50
+ if s_max < 0.8 or s_min > 0.2:
51
+ return (scores - s_min) / (s_max - s_min)
52
+ return scores
53
+
54
+ # ============================================================
55
+ # 🧪 THE BACKTESTER CLASS
56
+ # ============================================================
57
+ class HeavyDutyBacktester:
58
+ def __init__(self, data_manager, processor):
59
+ self.dm = data_manager
60
+ self.proc = processor
61
+
62
+ # 🎛️ الكثافة (Density): عدد الخطوات في النطاق
63
+ self.GRID_DENSITY = 3 # 3 is enough for quick checks, 5 for deep dive
64
+
65
+ self.INITIAL_CAPITAL = 10.0
66
+ self.TRADING_FEES = 0.001
67
+ self.MAX_SLOTS = 4
68
+
69
+ # 🎛️ CONTROL PANEL - DYNAMIC RANGES
70
+ self.GRID_RANGES = {
71
+ 'TITAN': np.linspace(0.10, 0.50, self.GRID_DENSITY),
72
+ 'ORACLE': np.linspace(0.40, 0.80, self.GRID_DENSITY),
73
+ 'SNIPER': np.linspace(0.30, 0.70, self.GRID_DENSITY),
74
+ 'PATTERN': np.linspace(0.10, 0.50, self.GRID_DENSITY),
75
+ 'L1_SCORE': [10.0],
76
+ # Guardians
77
+ 'HYDRA_CRASH': np.linspace(0.60, 0.85, self.GRID_DENSITY),
78
+ 'HYDRA_GIVEBACK': np.linspace(0.60, 0.85, self.GRID_DENSITY),
79
+ 'LEGACY_V2': np.linspace(0.85, 0.98, self.GRID_DENSITY),
80
+ }
81
+
82
+ self.TARGET_COINS = [
83
+ 'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
84
+ 'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
85
+ 'SEI/USDT', 'MINA/USDT', 'MATIC/USDT', 'NEAR/USDT', 'RUNE/USDT', 'API3/USDT',
86
+ 'FLOKI/USDT', 'BABYDOGE/USDT', 'SHIB/USDT', 'TRX/USDT', 'DOT/USDT', 'UNI/USDT',
87
+ 'ONDO/USDT', 'SNX/USDT', 'HBAR/USDT', 'XLM/USDT', 'AGIX/USDT', 'IMX/USDT',
88
+ 'LRC/USDT', 'KCS/USDT', 'ICP/USDT', 'SAND/USDT', 'AXS/USDT', 'APE/USDT',
89
+ 'GMT/USDT', 'CHZ/USDT', 'CFX/USDT', 'LDO/USDT', 'FET/USDT', 'RPL/USDT',
90
+ 'MNT/USDT', 'RAY/USDT', 'CAKE/USDT', 'SRM/USDT', 'PENDLE/USDT', 'ATOM/USDT'
91
+ ]
92
+ self.force_start_date = None
93
+ self.force_end_date = None
94
+
95
+ if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
96
+ print(f"🧪 [Backtest V159.0] Hyper-Speed Jump Engine (CPU Optimized).")
97
+
98
+ def set_date_range(self, start_str, end_str):
99
+ self.force_start_date = start_str
100
+ self.force_end_date = end_str
101
+
102
+ async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
103
+ print(f" ⚡ [Network] Downloading {sym}...", flush=True)
104
+ limit = 1000
105
+ tasks = []
106
+ curr = start_ms
107
+ while curr < end_ms:
108
+ tasks.append(curr)
109
+ curr += limit * 60 * 1000
110
+
111
+ all_candles = []
112
+ sem = asyncio.Semaphore(20)
113
+
114
+ async def _fetch_batch(timestamp):
115
+ async with sem:
116
+ for _ in range(3):
117
+ try: return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
118
+ except: await asyncio.sleep(0.5)
119
+ return []
120
+
121
+ chunk_size = 50
122
+ for i in range(0, len(tasks), chunk_size):
123
+ res = await asyncio.gather(*[_fetch_batch(t) for t in tasks[i:i+chunk_size]])
124
+ for r in res:
125
+ if r: all_candles.extend(r)
126
+
127
+ if not all_candles: return None
128
+ df = pd.DataFrame(all_candles, columns=['timestamp', 'o', 'h', 'l', 'c', 'v'])
129
+ df.drop_duplicates('timestamp', inplace=True)
130
+ df = df[(df['timestamp'] >= start_ms) & (df['timestamp'] <= end_ms)].sort_values('timestamp')
131
+ print(f" ✅ Downloaded {len(df)} candles.", flush=True)
132
+ return df.values.tolist()
133
+
134
+ # ----------------------------------------------------------------------
135
+ # 🏎️ VECTORIZED INDICATORS
136
+ # ----------------------------------------------------------------------
137
+ def _calculate_indicators_vectorized(self, df, timeframe='1m'):
138
+ if df.empty: return df
139
+ cols = ['close', 'high', 'low', 'volume', 'open']
140
+ for c in cols: df[c] = df[c].astype(np.float64)
141
+
142
+ # EMAs
143
+ df['ema9'] = df['close'].ewm(span=9, adjust=False).mean()
144
+ df['ema20'] = df['close'].ewm(span=20, adjust=False).mean()
145
+ df['ema21'] = df['close'].ewm(span=21, adjust=False).mean()
146
+ df['ema50'] = df['close'].ewm(span=50, adjust=False).mean()
147
+ df['ema200'] = df['close'].ewm(span=200, adjust=False).mean()
148
+
149
+ if ta:
150
+ df['RSI'] = ta.rsi(df['close'], length=14).fillna(50)
151
+ df['ATR'] = ta.atr(df['high'], df['low'], df['close'], length=14).fillna(0)
152
+ bb = ta.bbands(df['close'], length=20, std=2.0)
153
+ df['bb_width'] = bb.iloc[:, 3].fillna(0) if bb is not None else 0.0
154
+ macd = ta.macd(df['close'])
155
+ if macd is not None:
156
+ df['MACD'] = macd.iloc[:, 0].fillna(0)
157
+ df['MACD_h'] = macd.iloc[:, 1].fillna(0)
158
+ else: df['MACD'] = 0; df['MACD_h'] = 0
159
+ df['ADX'] = ta.adx(df['high'], df['low'], df['close'], length=14).iloc[:, 0].fillna(0)
160
+ df['CCI'] = ta.cci(df['high'], df['low'], df['close'], length=20).fillna(0)
161
+ df['MFI'] = ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14).fillna(50)
162
+ df['slope'] = ta.slope(df['close'], length=7).fillna(0)
163
+ vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
164
+ df['vwap'] = vwap.fillna(df['close']) if vwap is not None else df['close']
165
+
166
+ c = df['close'].values
167
+ df['EMA_9_dist'] = (c / df['ema9'].values) - 1
168
+ df['EMA_21_dist'] = (c / df['ema21'].values) - 1
169
+ df['EMA_50_dist'] = (c / df['ema50'].values) - 1
170
+ df['EMA_200_dist'] = (c / df['ema200'].values) - 1
171
+ df['VWAP_dist'] = (c / df['vwap'].values) - 1
172
+ df['ATR_pct'] = df['ATR'] / (c + 1e-9)
173
+
174
+ if timeframe == '1d': df['Trend_Strong'] = np.where(df['ADX'] > 25, 1.0, 0.0)
175
+
176
+ df['vol_z'] = _z_roll_np(df['volume'].values, 20)
177
+ df['rel_vol'] = df['volume'] / (df['volume'].rolling(50).mean() + 1e-9)
178
+ df['log_ret'] = np.concatenate([[0], np.diff(np.log(c + 1e-9))])
179
+
180
+ roll_min = df['low'].rolling(50).min(); roll_max = df['high'].rolling(50).max()
181
+ df['fib_pos'] = (c - roll_min) / (roll_max - roll_min + 1e-9)
182
+ df['volatility'] = df['ATR_pct']
183
+
184
+ e20 = df['ema20'].values
185
+ e20_s = np.roll(e20, 5); e20_s[:5] = e20[0]
186
+ df['trend_slope'] = (e20 - e20_s) / (e20_s + 1e-9)
187
+
188
+ fib618 = roll_max - ((roll_max - roll_min) * 0.382)
189
+ df['dist_fib618'] = (c - fib618) / (c + 1e-9)
190
+ df['dist_ema50'] = df['EMA_50_dist']
191
+ df['dist_ema200'] = df['EMA_200_dist']
192
+
193
+ if timeframe == '1m':
194
+ df['return_1m'] = df['log_ret']
195
+ df['rsi_14'] = df['RSI']
196
+ e9 = df['ema9'].values; e9_s = np.roll(e9, 1); e9_s[0] = e9[0]
197
+ df['ema_9_slope'] = (e9 - e9_s) / (e9_s + 1e-9)
198
+ df['ema_21_dist'] = df['EMA_21_dist']
199
+
200
+ df['atr_z'] = _z_roll_np(df['ATR'].values, 100)
201
+ df['vol_zscore_50'] = _z_roll_np(df['volume'].values, 50)
202
+ rng = df['high'].values - df['low'].values
203
+ df['candle_range'] = _z_roll_np(rng, 500)
204
+ df['close_pos_in_range'] = (c - df['low'].values) / (rng + 1e-9)
205
+
206
+ dollar_vol = c * df['volume'].values
207
+ amihud = np.abs(df['log_ret']) / (dollar_vol + 1e-9)
208
+ df['amihud'] = _z_roll_np(amihud, 500)
209
+
210
+ sign = np.sign(np.diff(c, prepend=c[0]))
211
+ signed_vol = sign * df['volume'].values
212
+ ofi = pd.Series(signed_vol).rolling(30).sum().fillna(0).values
213
+ df['ofi'] = _z_roll_np(ofi, 500)
214
+ df['vwap_dev'] = _z_roll_np(c - df['vwap'].values, 500)
215
+
216
+ for lag in [1, 2, 3, 5, 10, 20]:
217
+ df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
218
+ df[f'rsi_lag_{lag}'] = df['RSI'].shift(lag).fillna(50)/100.0
219
+ df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
220
+ df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
221
+
222
+ df.fillna(0, inplace=True)
223
+ return df
224
+
225
+ async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
226
+ safe_sym = sym.replace('/', '_')
227
+ period_suffix = f"{start_ms}_{end_ms}"
228
+ scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
229
+ if os.path.exists(scores_file):
230
+ print(f" 📂 [{sym}] Data Exists -> Skipping.")
231
+ return
232
+
233
+ print(f" ⚙️ [CPU] Analyzing {sym}...", flush=True)
234
+ t0 = time.time()
235
+
236
+ df_1m = pd.DataFrame(candles, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
237
+ df_1m['datetime'] = pd.to_datetime(df_1m['timestamp'], unit='ms')
238
+ df_1m.set_index('datetime', inplace=True)
239
+ df_1m = df_1m.sort_index()
240
+
241
+ frames = {}
242
+ frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
243
+ frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
244
+ fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
245
+
246
+ agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
247
+ numpy_htf = {}
248
+ for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
249
+ resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
250
+ if resampled.empty:
251
+ numpy_htf[tf_str] = {}
252
+ continue
253
+ resampled = self._calculate_indicators_vectorized(resampled, timeframe=tf_str)
254
+ resampled['timestamp'] = resampled.index.astype(np.int64) // 10**6
255
+ frames[tf_str] = resampled
256
+ numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
257
+
258
+ arr_ts_1m = fast_1m['timestamp']
259
+ def get_map(tf):
260
+ if tf not in numpy_htf or 'timestamp' not in numpy_htf[tf]: return np.zeros(len(arr_ts_1m), dtype=int)
261
+ return np.clip(np.searchsorted(numpy_htf[tf]['timestamp'], arr_ts_1m), 0, len(numpy_htf[tf]['timestamp']) - 1)
262
+
263
+ map_5m = get_map('5m'); map_15m = get_map('15m'); map_1h = get_map('1h'); map_4h = get_map('4h')
264
+
265
+ titan_model = getattr(self.proc.titan, 'model', None)
266
+ oracle_dir = getattr(self.proc.oracle, 'model_direction', None)
267
+ oracle_cols = getattr(self.proc.oracle, 'feature_cols', [])
268
+ sniper_models = getattr(self.proc.sniper, 'models', [])
269
+ sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
270
+ hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
271
+ legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
272
+
273
+ # --- BATCH PREDICTIONS ---
274
+ global_titan_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
275
+ if titan_model:
276
+ titan_cols = [
277
+ '5m_open', '5m_high', '5m_low', '5m_close', '5m_volume', '5m_RSI', '5m_MACD', '5m_MACD_h',
278
+ '5m_CCI', '5m_ADX', '5m_EMA_9_dist', '5m_EMA_21_dist', '5m_EMA_50_dist', '5m_EMA_200_dist',
279
+ '5m_BB_w', '5m_BB_p', '5m_MFI', '5m_VWAP_dist', '15m_timestamp', '15m_RSI', '15m_MACD',
280
+ '15m_MACD_h', '15m_CCI', '15m_ADX', '15m_EMA_9_dist', '15m_EMA_21_dist', '15m_EMA_50_dist',
281
+ '15m_EMA_200_dist', '15m_BB_w', '15m_BB_p', '15m_MFI', '15m_VWAP_dist', '1h_timestamp',
282
+ '1h_RSI', '1h_MACD_h', '1h_EMA_50_dist', '1h_EMA_200_dist', '1h_ATR_pct', '4h_timestamp',
283
+ '4h_RSI', '4h_MACD_h', '4h_EMA_50_dist', '4h_EMA_200_dist', '4h_ATR_pct', '1d_timestamp',
284
+ '1d_RSI', '1d_EMA_200_dist', '1d_Trend_Strong'
285
+ ]
286
+ try:
287
+ t_vecs = []
288
+ for col in titan_cols:
289
+ parts = col.split('_', 1); tf = parts[0]; feat = parts[1]
290
+ target_arr = numpy_htf.get(tf, {})
291
+ target_map = locals().get(f"map_{tf}", np.zeros(len(arr_ts_1m), dtype=int))
292
+ if feat in target_arr: t_vecs.append(target_arr[feat][target_map])
293
+ elif feat == 'timestamp' and 'timestamp' in target_arr: t_vecs.append(target_arr['timestamp'][target_map])
294
+ elif feat in ['open','high','low','close','volume'] and feat in target_arr: t_vecs.append(target_arr[feat][target_map])
295
+ else: t_vecs.append(np.zeros(len(arr_ts_1m)))
296
+ X_TITAN = np.column_stack(t_vecs)
297
+ global_titan_scores = _revive_score_distribution(titan_model.predict(xgb.DMatrix(X_TITAN, feature_names=titan_cols)))
298
+ except: pass
299
+
300
+ global_oracle_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
301
+ if oracle_dir:
302
+ try:
303
+ o_vecs = []
304
+ for col in oracle_cols:
305
+ if col.startswith('1h_'): o_vecs.append(numpy_htf['1h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_1h])
306
+ elif col.startswith('15m_'): o_vecs.append(numpy_htf['15m'].get(col[4:], np.zeros(len(arr_ts_1m)))[map_15m])
307
+ elif col.startswith('4h_'): o_vecs.append(numpy_htf['4h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_4h])
308
+ elif col == 'sim_titan_score': o_vecs.append(global_titan_scores)
309
+ elif col == 'sim_mc_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
310
+ elif col == 'sim_pattern_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
311
+ else: o_vecs.append(np.zeros(len(arr_ts_1m)))
312
+ X_ORACLE = np.column_stack(o_vecs)
313
+ preds_o = oracle_dir.predict(X_ORACLE)
314
+ preds_o = preds_o if isinstance(preds_o, np.ndarray) and len(preds_o.shape)==1 else preds_o[:, 0]
315
+ global_oracle_scores = _revive_score_distribution(preds_o)
316
+ except: pass
317
+
318
+ global_sniper_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
319
+ if sniper_models:
320
+ try:
321
+ s_vecs = []
322
+ for col in sniper_cols:
323
+ if col in fast_1m: s_vecs.append(fast_1m[col])
324
+ elif col == 'atr' and 'atr_z' in fast_1m: s_vecs.append(fast_1m['atr_z'])
325
+ else: s_vecs.append(np.zeros(len(arr_ts_1m)))
326
+ X_SNIPER = np.column_stack(s_vecs)
327
+ preds = [m.predict(X_SNIPER) for m in sniper_models]
328
+ global_sniper_scores = _revive_score_distribution(np.mean(preds, axis=0))
329
+ except: pass
330
+
331
+ global_v2_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
332
+ if legacy_v2:
333
+ try:
334
+ l_log = fast_1m['log_ret']; l_rsi = fast_1m['RSI'] / 100.0; l_fib = fast_1m['fib_pos']; l_vol = fast_1m['volatility']
335
+ l5_log = numpy_htf['5m']['log_ret'][map_5m]; l5_rsi = numpy_htf['5m']['RSI'][map_5m] / 100.0; l5_fib = numpy_htf['5m']['fib_pos'][map_5m]; l5_trd = numpy_htf['5m']['trend_slope'][map_5m]
336
+ l15_log = numpy_htf['15m']['log_ret'][map_15m]; l15_rsi = numpy_htf['15m']['RSI'][map_15m] / 100.0; l15_fib618 = numpy_htf['15m']['dist_fib618'][map_15m]; l15_trd = numpy_htf['15m']['trend_slope'][map_15m]
337
+ lags = []
338
+ for lag in [1, 2, 3, 5, 10, 20]:
339
+ lags.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'], fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']])
340
+ X_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lags])
341
+ preds = legacy_v2.predict(xgb.DMatrix(X_V2))
342
+ global_v2_scores = preds[:, 2] if len(preds.shape) > 1 else preds
343
+ global_v2_scores = global_v2_scores.flatten()
344
+ except: pass
345
+
346
+ global_hydra_crash = np.zeros(len(arr_ts_1m), dtype=np.float32)
347
+ global_hydra_give = np.zeros(len(arr_ts_1m), dtype=np.float32)
348
+ if hydra_models:
349
+ try:
350
+ zeros = np.zeros(len(arr_ts_1m))
351
+ h_static = np.column_stack([
352
+ fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
353
+ fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
354
+ ])
355
+ X_H = np.column_stack([
356
+ h_static[:,0], h_static[:,1], h_static[:,2], h_static[:,3], h_static[:,4],
357
+ zeros, fast_1m['ATR_pct'], zeros, zeros, zeros, zeros, zeros, zeros,
358
+ global_oracle_scores, np.full(len(arr_ts_1m), 0.7), np.full(len(arr_ts_1m), 3.0)
359
+ ])
360
+
361
+ probs_c = hydra_models['crash'].predict_proba(X_H)[:, 1]
362
+ global_hydra_crash = probs_c.astype(np.float32)
363
+
364
+ probs_g = hydra_models['giveback'].predict_proba(X_H)[:, 1]
365
+ global_hydra_give = probs_g.astype(np.float32)
366
+ except: pass
367
+
368
+ # Filter
369
+ rsi_1h = numpy_htf['1h'].get('RSI', np.zeros(len(arr_ts_1m)))[map_1h]
370
+ # Keep candles where at least minimal promise exists (reduces size)
371
+ is_candidate_mask = (rsi_1h <= 70) & (global_titan_scores > 0.3) & (global_oracle_scores > 0.3)
372
+ candidate_indices = np.where(is_candidate_mask)[0]
373
+ end_limit = len(arr_ts_1m) - 60
374
+ candidate_indices = candidate_indices[candidate_indices < end_limit]
375
+ candidate_indices = candidate_indices[candidate_indices >= 500]
376
+
377
+ print(f" 🌪️ Final List: {len(candidate_indices)} candidates ready for testing.", flush=True)
378
+
379
+ ai_results = pd.DataFrame({
380
+ 'timestamp': arr_ts_1m[candidate_indices],
381
+ 'symbol': sym,
382
+ 'close': fast_1m['close'][candidate_indices],
383
+ 'real_titan': global_titan_scores[candidate_indices],
384
+ 'oracle_conf': global_oracle_scores[candidate_indices],
385
+ 'sniper_score': global_sniper_scores[candidate_indices],
386
+ 'pattern_score': np.full(len(candidate_indices), 0.5),
387
+ 'risk_hydra_crash': global_hydra_crash[candidate_indices],
388
+ 'risk_hydra_giveback': global_hydra_give[candidate_indices],
389
+ 'risk_legacy_v2': global_v2_scores[candidate_indices],
390
+ 'time_hydra_crash': np.zeros(len(candidate_indices), dtype=int),
391
+ 'l1_score': 50.0
392
+ })
393
+
394
+ dt = time.time() - t0
395
+ if not ai_results.empty:
396
+ ai_results.to_pickle(scores_file)
397
+ print(f" ✅ [{sym}] Completed in {dt:.2f} seconds. ({len(ai_results)} signals)", flush=True)
398
+ gc.collect()
399
+
400
+ async def generate_truth_data(self):
401
+ if self.force_start_date:
402
+ dt_s = datetime.strptime(self.force_start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
403
+ dt_e = datetime.strptime(self.force_end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
404
+ ms_s = int(dt_s.timestamp()*1000); ms_e = int(dt_e.timestamp()*1000)
405
+ print(f"\n🚜 [Phase 1] Processing Era: {self.force_start_date} -> {self.force_end_date}")
406
+ for sym in self.TARGET_COINS:
407
+ c = await self._fetch_all_data_fast(sym, ms_s, ms_e)
408
+ if c: await self._process_data_in_memory(sym, c, ms_s, ms_e)
409
+
410
+ @staticmethod
411
+ def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
412
+ """🚀 HYPER-SPEED JUMP LOGIC (NO LOOPING OVER IDLE CANDLES)"""
413
+ print(f" ⏳ [System] Loading {len(scores_files)} datasets...", flush=True)
414
+ data = []
415
+ for f in scores_files:
416
+ try: data.append(pd.read_pickle(f))
417
+ except: pass
418
+ if not data: return []
419
+ df = pd.concat(data).sort_values('timestamp').reset_index(drop=True)
420
+
421
+ # Pre-load arrays for max speed
422
+ ts = df['timestamp'].values
423
+ close = df['close'].values.astype(float)
424
+ sym = df['symbol'].values
425
+ u_syms = np.unique(sym); sym_map = {s: i for i, s in enumerate(u_syms)}; sym_id = np.array([sym_map[s] for s in sym])
426
+
427
+ oracle = df['oracle_conf'].values
428
+ sniper = df['sniper_score'].values
429
+ titan = df['real_titan'].values
430
+ pattern = df['pattern_score'].values
431
+ l1 = df['l1_score'].values
432
+ hydra = df['risk_hydra_crash'].values
433
+ hydra_give = df['risk_hydra_giveback'].values
434
+ legacy = df['risk_legacy_v2'].values
435
+
436
+ N = len(ts)
437
+ print(f" 🚀 [System] Testing {len(combinations_batch)} configs on {N} candidates...", flush=True)
438
+
439
+ res = []
440
+ for cfg in combinations_batch:
441
+ # 1. Vectorized Entry Mask (The Speed Secret)
442
+ # Instead of checking every candle, we calculate ALL valid entries at once
443
+ entry_mask = (l1 >= cfg['L1_SCORE']) & \
444
+ (oracle >= cfg['ORACLE']) & \
445
+ (sniper >= cfg['SNIPER']) & \
446
+ (titan >= cfg['TITAN']) & \
447
+ (pattern >= cfg.get('PATTERN', 0.10))
448
+
449
+ # Get only the indices where entry is possible
450
+ valid_entry_indices = np.where(entry_mask)[0]
451
+
452
+ # Extract thresholds locally to avoid dictionary lookups in inner loop
453
+ h_crash_thresh = cfg['HYDRA_CRASH']
454
+ h_give_thresh = cfg['HYDRA_GIVEBACK']
455
+ leg_thresh = cfg['LEGACY_V2']
456
+
457
+ # Simulation State
458
+ pos = {} # sym_id -> (entry_price, size)
459
+ bal = float(initial_capital)
460
+ alloc = 0.0
461
+ log = []
462
+
463
+ # Iterate ONLY on relevant indices (Jump!)
464
+ # But we must respect time. So we iterate valid indices,
465
+ # and check exits for OPEN positions at that time step?
466
+ # Problem: If we jump, we miss exits between entries.
467
+ # Fix: We must iterate all rows for exits, but we can skip logic if no pos.
468
+ # OR: Since df is filtered candidates only, gaps exist.
469
+ # We assume candidates are frequent enough or we only check exits on candidate candles.
470
+ # *Refinement*: The dataframe `df` only contains ~30k candidates out of 100k candles.
471
+ # Exiting only on candidate candles is an approximation, but acceptable for optimization speed.
472
+
473
+ for i in range(N):
474
+ s = sym_id[i]; p = float(close[i])
475
+
476
+ # A. Check Exits (If holding this symbol)
477
+ if s in pos:
478
+ entry_p, size_val = pos[s]
479
+ pnl = (p - entry_p) / entry_p
480
+
481
+ # Guardian Logic (Local vars)
482
+ is_guard = (hydra[i] > h_crash_thresh) or \
483
+ (hydra_give[i] > h_give_thresh) or \
484
+ (legacy[i] > leg_thresh)
485
+
486
+ # VETO (Price Confirmation)
487
+ confirmed = is_guard and (pnl < -0.0015)
488
+
489
+ if confirmed or (pnl > 0.04) or (pnl < -0.02):
490
+ realized = pnl - (fees_pct * 2)
491
+ bal += size_val * (1.0 + realized)
492
+ alloc -= size_val
493
+ del pos[s]
494
+ log.append({'pnl': realized})
495
+ continue # Can't buy same candle we sold
496
+
497
+ # B. Check Entries (Only if mask is True)
498
+ if entry_mask[i] and len(pos) < max_slots:
499
+ if s not in pos and bal >= 5.0:
500
+ size = min(10.0, bal * 0.98)
501
+ pos[s] = (p, size)
502
+ bal -= size; alloc += size
503
+
504
+ # Calc Stats
505
+ final_bal = bal + alloc
506
+ profit = final_bal - initial_capital
507
+ tot = len(log)
508
+ winning = [x for x in log if x['pnl'] > 0]
509
+ losing = [x for x in log if x['pnl'] <= 0]
510
+
511
+ win_rate = (len(winning)/tot*100) if tot > 0 else 0.0
512
+ avg_win = np.mean([x['pnl'] for x in winning]) if winning else 0.0
513
+ avg_loss = np.mean([x['pnl'] for x in losing]) if losing else 0.0
514
+ gross_p = sum([x['pnl'] for x in winning])
515
+ gross_l = abs(sum([x['pnl'] for x in losing]))
516
+ profit_factor = (gross_p / gross_l) if gross_l > 0 else 99.9
517
+
518
+ # Simple streaks
519
+ max_win_s = 0; max_loss_s = 0; curr_w = 0; curr_l = 0
520
+ for t in log:
521
+ if t['pnl'] > 0: curr_w +=1; curr_l = 0; max_win_s = max(max_win_s, curr_w)
522
+ else: curr_l +=1; curr_w = 0; max_loss_s = max(max_loss_s, curr_l)
523
+
524
+ res.append({
525
+ 'config': cfg, 'final_balance': final_bal, 'net_profit': profit,
526
+ 'total_trades': tot, 'win_rate': win_rate, 'profit_factor': profit_factor,
527
+ 'win_count': len(winning), 'loss_count': len(losing),
528
+ 'avg_win': avg_win, 'avg_loss': avg_loss,
529
+ 'max_win_streak': max_win_s, 'max_loss_streak': max_loss_s,
530
+ 'consensus_agreement_rate': 0.0, 'high_consensus_win_rate': 0.0
531
+ })
532
+ return res
533
+
534
+ async def run_optimization(self, target_regime="RANGE"):
535
+ await self.generate_truth_data()
536
+
537
+ keys = list(self.GRID_RANGES.keys())
538
+ values = list(self.GRID_RANGES.values())
539
+ combos = [dict(zip(keys, c)) for c in itertools.product(*values)]
540
+
541
+ files = glob.glob(os.path.join(CACHE_DIR, "*.pkl"))
542
+ results_list = self._worker_optimize(combos, files, self.INITIAL_CAPITAL, self.TRADING_FEES, self.MAX_SLOTS)
543
+ if not results_list: return None, {'net_profit': 0.0, 'win_rate': 0.0}
544
+
545
+ results_list.sort(key=lambda x: x['net_profit'], reverse=True)
546
+ best = results_list[0]
547
+
548
+ mapped_config = {
549
+ 'w_titan': best['config']['TITAN'],
550
+ 'w_struct': best['config']['PATTERN'],
551
+ 'thresh': best['config']['L1_SCORE'],
552
+ 'oracle_thresh': best['config']['ORACLE'],
553
+ 'sniper_thresh': best['config']['SNIPER'],
554
+ 'hydra_thresh': best['config']['HYDRA_CRASH'],
555
+ 'legacy_thresh': best['config']['LEGACY_V2']
556
+ }
557
+
558
+ # Diagnosis
559
+ diag = []
560
+ if best['total_trades'] > 2000 and best['net_profit'] < 10: diag.append("⚠️ Overtrading")
561
+ if best['win_rate'] > 55 and best['net_profit'] < 0: diag.append("⚠️ Fee Burn")
562
+ if abs(best['avg_loss']) > best['avg_win'] and best['win_count'] > 0: diag.append("⚠️ Risk/Reward Inversion")
563
+ if best['max_loss_streak'] > 10: diag.append("⚠️ Consecutive Loss Risk")
564
+ if not diag: diag.append("✅ System Healthy")
565
+
566
+ print("\n" + "="*60)
567
+ print(f"🏆 CHAMPION REPORT [{target_regime}]:")
568
+ print(f" 💰 Final Balance: ${best['final_balance']:,.2f}")
569
+ print(f" 🚀 Net PnL: ${best['net_profit']:,.2f}")
570
+ print("-" * 60)
571
+ print(f" 📊 Total Trades: {best['total_trades']}")
572
+ print(f" 📈 Win Rate: {best['win_rate']:.1f}%")
573
+ print(f" ✅ Winning Trades: {best['win_count']} (Avg: {best['avg_win']*100:.2f}%)")
574
+ print(f" ❌ Losing Trades: {best['loss_count']} (Avg: {best['avg_loss']*100:.2f}%)")
575
+ print(f" 🌊 Max Streaks: Win {best['max_win_streak']} | Loss {best['max_loss_streak']}")
576
+ print(f" ⚖️ Profit Factor: {best['profit_factor']:.2f}")
577
+ print("-" * 60)
578
+ print(f" 🧠 CONSENSUS ANALYTICS:")
579
+ print(f" 🤝 Model Agreement Rate: {best.get('consensus_agreement_rate', 0.0):.1f}%")
580
+ print(f" 🌟 High-Consensus Win Rate: {best.get('high_consensus_win_rate', 0.0):.1f}%")
581
+ print("-" * 60)
582
+ print(f" 🩺 DIAGNOSIS: {' '.join(diag)}")
583
+
584
+ p_str = ""
585
+ for k, v in mapped_config.items():
586
+ if isinstance(v, float): p_str += f"{k}={v:.2f} | "
587
+ else: p_str += f"{k}={v} | "
588
+ print(f" ⚙️ Config: {p_str}")
589
+ print("="*60)
590
+
591
+ return mapped_config, best
592
+
593
+ async def run_strategic_optimization_task():
594
+ print("\n🧪 [STRATEGIC BACKTEST] Hyper-Speed Jump Engine...")
595
+ r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
596
+ try:
597
+ await dm.initialize(); await proc.initialize()
598
+ if proc.guardian_hydra: proc.guardian_hydra.set_silent_mode(True)
599
+ hub = AdaptiveHub(r2); await hub.initialize()
600
+ opt = HeavyDutyBacktester(dm, proc)
601
+ scenarios = [
602
+ {"regime": "DEAD", "start": "2023-06-01", "end": "2023-08-01"},
603
+ {"regime": "RANGE", "start": "2024-07-01", "end": "2024-09-30"},
604
+ {"regime": "BULL", "start": "2024-01-01", "end": "2024-03-30"},
605
+ {"regime": "BEAR", "start": "2023-08-01", "end": "2023-09-15"},
606
+ ]
607
+ for s in scenarios:
608
+ opt.set_date_range(s["start"], s["end"])
609
+ best_cfg, best_stats = await opt.run_optimization(s["regime"])
610
+ if best_cfg: hub.submit_challenger(s["regime"], best_cfg, best_stats)
611
+ await hub._save_state_to_r2()
612
+ print("✅ [System] DNA Updated.")
613
+ finally:
614
+ print("🔌 [System] Closing connections...")
615
+ await dm.close()
616
+
617
+ if __name__ == "__main__":
618
+ asyncio.run(run_strategic_optimization_task())