Spaces:
Paused
Paused
Update backtest_engine.py
Browse files- backtest_engine.py +142 -322
backtest_engine.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# ============================================================
|
| 2 |
-
# 🧪 backtest_engine.py (
|
| 3 |
# ============================================================
|
| 4 |
|
| 5 |
import asyncio
|
|
@@ -67,42 +67,28 @@ def safe_ta(ind_output, index, fill_method='smart'):
|
|
| 67 |
return s.fillna(0.0).astype('float64')
|
| 68 |
|
| 69 |
def _zv(x):
|
| 70 |
-
"""Z-Score Vectorized for Patterns"""
|
| 71 |
with np.errstate(divide='ignore', invalid='ignore'):
|
| 72 |
x = np.asarray(x, dtype="float32")
|
| 73 |
m = np.nanmean(x, axis=0)
|
| 74 |
s = np.nanstd(x, axis=0) + 1e-9
|
| 75 |
return np.nan_to_num((x - m) / s, nan=0.0)
|
| 76 |
|
| 77 |
-
# ============================================================
|
| 78 |
-
# 🧩 PATTERN RECOGNITION HELPER
|
| 79 |
-
# ============================================================
|
| 80 |
def _transform_window_for_pattern(df_window):
|
| 81 |
-
"""Prepares a window for the CNN/Pattern Model"""
|
| 82 |
try:
|
| 83 |
c = df_window['close'].values.astype('float32')
|
| 84 |
o = df_window['open'].values.astype('float32')
|
| 85 |
h = df_window['high'].values.astype('float32')
|
| 86 |
l = df_window['low'].values.astype('float32')
|
| 87 |
v = df_window['volume'].values.astype('float32')
|
| 88 |
-
|
| 89 |
-
# 1. Base Z-Score
|
| 90 |
base = np.stack([o, h, l, c, v], axis=1)
|
| 91 |
base_z = _zv(base)
|
| 92 |
-
|
| 93 |
-
# 2. Extra Features
|
| 94 |
lr = np.zeros_like(c); lr[1:] = np.diff(np.log1p(c))
|
| 95 |
rng = (h - l) / (c + 1e-9)
|
| 96 |
extra = np.stack([lr, rng], axis=1)
|
| 97 |
extra_z = _zv(extra)
|
| 98 |
-
|
| 99 |
-
# 3. Indicators
|
| 100 |
def _ema(arr, n): return pd.Series(arr).ewm(span=n, adjust=False).mean().values
|
| 101 |
ema9 = _ema(c, 9); ema21 = _ema(c, 21); ema50 = _ema(c, 50); ema200 = _ema(c, 200)
|
| 102 |
-
|
| 103 |
-
slope21 = np.gradient(ema21)
|
| 104 |
-
slope50 = np.gradient(ema50)
|
| 105 |
-
|
| 106 |
delta = np.diff(c, prepend=c[0])
|
| 107 |
up, down = delta.copy(), delta.copy()
|
| 108 |
up[up < 0] = 0; down[down > 0] = 0
|
|
@@ -110,16 +96,9 @@ def _transform_window_for_pattern(df_window):
|
|
| 110 |
roll_down = pd.Series(down).abs().ewm(alpha=1/14, adjust=False).mean().values
|
| 111 |
rs = roll_up / (roll_down + 1e-9)
|
| 112 |
rsi = 100.0 - (100.0 / (1.0 + rs))
|
| 113 |
-
|
| 114 |
indicators = np.stack([ema9, ema21, ema50, ema200, slope21, slope50, rsi], axis=1)
|
| 115 |
-
# Pad to match shape if needed or specific model reqs
|
| 116 |
-
# Assuming model expects specific width, here we stick to basic concat
|
| 117 |
-
|
| 118 |
-
# Flatten for XGBoost Pattern Model
|
| 119 |
X_seq = np.concatenate([base_z, extra_z, _zv(indicators)], axis=1)
|
| 120 |
X_flat = X_seq.flatten()
|
| 121 |
-
|
| 122 |
-
# Add Stat Placeholders (Matches training logic)
|
| 123 |
X_stat = np.array([0.5, 0.0, 0.5], dtype="float32")
|
| 124 |
return np.concatenate([X_flat, X_stat])
|
| 125 |
except: return None
|
|
@@ -135,7 +114,6 @@ class HeavyDutyBacktester:
|
|
| 135 |
self.INITIAL_CAPITAL = 10.0
|
| 136 |
self.TRADING_FEES = 0.001
|
| 137 |
self.MAX_SLOTS = 4
|
| 138 |
-
|
| 139 |
self.TARGET_COINS = [
|
| 140 |
'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
|
| 141 |
'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
|
|
@@ -147,20 +125,15 @@ class HeavyDutyBacktester:
|
|
| 147 |
'STRK/USDT', 'BLUR/USDT', 'ALT/USDT', 'JUP/USDT', 'PENDLE/USDT', 'ETHFI/USDT',
|
| 148 |
'MEME/USDT', 'ATOM/USDT'
|
| 149 |
]
|
| 150 |
-
|
| 151 |
self.force_start_date = None
|
| 152 |
self.force_end_date = None
|
| 153 |
-
|
| 154 |
if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
|
| 155 |
-
print(f"🧪 [Backtest
|
| 156 |
|
| 157 |
def set_date_range(self, start_str, end_str):
|
| 158 |
self.force_start_date = start_str
|
| 159 |
self.force_end_date = end_str
|
| 160 |
|
| 161 |
-
# ==============================================================
|
| 162 |
-
# ⚡ FAST DATA DOWNLOADER
|
| 163 |
-
# ==============================================================
|
| 164 |
async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
|
| 165 |
print(f" ⚡ [Network] Downloading {sym}...", flush=True)
|
| 166 |
limit = 1000
|
|
@@ -172,7 +145,6 @@ class HeavyDutyBacktester:
|
|
| 172 |
current += duration_per_batch
|
| 173 |
all_candles = []
|
| 174 |
sem = asyncio.Semaphore(20)
|
| 175 |
-
|
| 176 |
async def _fetch_batch(timestamp):
|
| 177 |
async with sem:
|
| 178 |
for _ in range(3):
|
|
@@ -180,7 +152,6 @@ class HeavyDutyBacktester:
|
|
| 180 |
return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
|
| 181 |
except: await asyncio.sleep(0.5)
|
| 182 |
return []
|
| 183 |
-
|
| 184 |
chunk_size = 50
|
| 185 |
for i in range(0, len(tasks), chunk_size):
|
| 186 |
chunk_tasks = tasks[i:i + chunk_size]
|
|
@@ -188,7 +159,6 @@ class HeavyDutyBacktester:
|
|
| 188 |
results = await asyncio.gather(*futures)
|
| 189 |
for res in results:
|
| 190 |
if res: all_candles.extend(res)
|
| 191 |
-
|
| 192 |
if not all_candles: return None
|
| 193 |
df = pd.DataFrame(all_candles, columns=['timestamp', 'o', 'h', 'l', 'c', 'v'])
|
| 194 |
df.drop_duplicates('timestamp', inplace=True)
|
|
@@ -197,162 +167,112 @@ class HeavyDutyBacktester:
|
|
| 197 |
print(f" ✅ Downloaded {len(df)} candles.", flush=True)
|
| 198 |
return df.values.tolist()
|
| 199 |
|
| 200 |
-
# ==============================================================
|
| 201 |
-
# 🏎️ VECTORIZED INDICATORS (EXACT MATCH TO LIVE SYSTEM)
|
| 202 |
-
# ==============================================================
|
| 203 |
def _calculate_indicators_vectorized(self, df, timeframe='1m'):
|
| 204 |
-
# 1. Clean Types
|
| 205 |
cols = ['close', 'high', 'low', 'volume', 'open']
|
| 206 |
for c in cols: df[c] = df[c].astype(np.float64)
|
| 207 |
idx = df.index
|
| 208 |
-
|
| 209 |
-
# ---------------------------------------------------------
|
| 210 |
-
# 🧠 PART 1: TITAN FEATURES
|
| 211 |
-
# ---------------------------------------------------------
|
| 212 |
df['RSI'] = safe_ta(ta.rsi(df['close'], length=14), idx, 50)
|
| 213 |
-
|
| 214 |
macd = ta.macd(df['close'])
|
| 215 |
if macd is not None:
|
| 216 |
df['MACD'] = safe_ta(macd.iloc[:, 0], idx, 0)
|
| 217 |
df['MACD_h'] = safe_ta(macd.iloc[:, 1], idx, 0)
|
| 218 |
-
else:
|
| 219 |
-
df['MACD'] = 0.0; df['MACD_h'] = 0.0
|
| 220 |
-
|
| 221 |
df['CCI'] = safe_ta(ta.cci(df['high'], df['low'], df['close'], length=20), idx, 0)
|
| 222 |
-
|
| 223 |
adx = ta.adx(df['high'], df['low'], df['close'], length=14)
|
| 224 |
if adx is not None: df['ADX'] = safe_ta(adx.iloc[:, 0], idx, 0)
|
| 225 |
else: df['ADX'] = 0.0
|
| 226 |
-
|
| 227 |
-
# ✅ NEW: Trend_Strong for 1D timeframe (Titan requirement)
|
| 228 |
-
if timeframe == '1d':
|
| 229 |
-
df['Trend_Strong'] = np.where(df['ADX'] > 25, 1.0, 0.0)
|
| 230 |
-
|
| 231 |
for p in [9, 21, 50, 200]:
|
| 232 |
ema = safe_ta(ta.ema(df['close'], length=p), idx, 0)
|
| 233 |
df[f'EMA_{p}_dist'] = ((df['close'] / ema.replace(0, np.nan)) - 1).fillna(0)
|
| 234 |
df[f'ema{p}'] = ema
|
| 235 |
-
|
| 236 |
df['ema20'] = safe_ta(ta.ema(df['close'], length=20), idx, df['close'])
|
| 237 |
-
|
| 238 |
bb = ta.bbands(df['close'], length=20, std=2.0)
|
| 239 |
if bb is not None:
|
| 240 |
w = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1].replace(0, np.nan)).fillna(0)
|
| 241 |
p = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0]).replace(0, np.nan)).fillna(0)
|
| 242 |
-
df['BB_w'] = w; df['BB_p'] = p
|
| 243 |
-
|
| 244 |
-
else:
|
| 245 |
-
df['BB_w'] = 0; df['BB_p'] = 0; df['bb_width'] = 0
|
| 246 |
-
|
| 247 |
df['MFI'] = safe_ta(ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14), idx, 50)
|
| 248 |
-
|
| 249 |
vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
|
| 250 |
if vwap is not None:
|
| 251 |
df['VWAP_dist'] = ((df['close'] / vwap.replace(0, np.nan)) - 1).fillna(0)
|
| 252 |
df['vwap'] = vwap
|
| 253 |
-
else:
|
| 254 |
-
df['VWAP_dist'] = 0.0
|
| 255 |
-
df['vwap'] = df['close']
|
| 256 |
-
|
| 257 |
df['atr'] = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=14), idx, 0)
|
| 258 |
df['atr_pct'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
| 259 |
-
df['ATR_pct'] = df['atr_pct']
|
| 260 |
|
| 261 |
-
# ---------------------------------------------------------
|
| 262 |
-
# 🎯 PART 2: SNIPER FEATURES (1m Only)
|
| 263 |
-
# ---------------------------------------------------------
|
| 264 |
if timeframe == '1m':
|
| 265 |
df['return_1m'] = df['close'].pct_change().fillna(0)
|
| 266 |
df['return_3m'] = df['close'].pct_change(3).fillna(0)
|
| 267 |
df['return_5m'] = df['close'].pct_change(5).fillna(0)
|
| 268 |
df['return_15m'] = df['close'].pct_change(15).fillna(0)
|
| 269 |
-
|
| 270 |
df['rsi_14'] = df['RSI']
|
| 271 |
e9 = df['ema9'].replace(0, np.nan)
|
| 272 |
df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / e9.shift(1)).fillna(0)
|
| 273 |
df['ema_21_dist'] = df['EMA_21_dist']
|
| 274 |
-
|
| 275 |
atr_100 = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=100), idx, 0)
|
| 276 |
df['atr_z'] = _z_roll(atr_100)
|
| 277 |
-
|
| 278 |
df['vol_zscore_50'] = _z_roll(df['volume'], 50)
|
| 279 |
-
|
| 280 |
rng = (df['high'] - df['low']).replace(0, 1e-9)
|
| 281 |
df['candle_range'] = _z_roll(rng, 500)
|
| 282 |
df['close_pos_in_range'] = ((df['close'] - df['low']) / rng).fillna(0.5)
|
| 283 |
-
|
| 284 |
df['dollar_vol'] = df['close'] * df['volume']
|
| 285 |
amihud_raw = (df['return_1m'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
|
| 286 |
df['amihud'] = _z_roll(amihud_raw)
|
| 287 |
-
|
| 288 |
dp = df['close'].diff()
|
| 289 |
roll_cov = dp.rolling(64).cov(dp.shift(1))
|
| 290 |
roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov))).fillna(0)
|
| 291 |
df['roll_spread'] = _z_roll(roll_spread_raw)
|
| 292 |
-
|
| 293 |
sign = np.sign(df['close'].diff()).fillna(0)
|
| 294 |
signed_vol = sign * df['volume']
|
| 295 |
-
ofi_raw = signed_vol.rolling(30).sum()
|
| 296 |
df['ofi'] = _z_roll(ofi_raw)
|
| 297 |
-
|
| 298 |
buy_vol = (sign > 0) * df['volume']
|
| 299 |
sell_vol = (sign < 0) * df['volume']
|
| 300 |
imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
|
| 301 |
tot = df['volume'].rolling(60).sum().replace(0, np.nan)
|
| 302 |
df['vpin'] = (imb / tot).fillna(0)
|
| 303 |
-
|
| 304 |
vwap_win = 20
|
| 305 |
v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
|
| 306 |
df['vwap_dev'] = _z_roll(df['close'] - v_short)
|
| 307 |
-
|
| 308 |
rv_gk = ((np.log(df['high'] / df['low'])**2) / 2) - ((2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2))
|
| 309 |
df['rv_gk'] = _z_roll(rv_gk)
|
| 310 |
-
|
| 311 |
df['L_score'] = (df['vol_zscore_50'] - df['amihud'] - df['roll_spread'] - df['rv_gk'].abs() - df['vwap_dev'].abs() + df['ofi']).fillna(0)
|
| 312 |
|
| 313 |
-
# ---------------------------------------------------------
|
| 314 |
-
# 🧠 PART 3: ORACLE / HYDRA / LEGACY EXTRAS
|
| 315 |
-
# ---------------------------------------------------------
|
| 316 |
df['slope'] = safe_ta(ta.slope(df['close'], length=7), idx, 0)
|
| 317 |
vol_mean = df['volume'].rolling(20).mean()
|
| 318 |
vol_std = df['volume'].rolling(20).std().replace(0, np.nan)
|
| 319 |
df['vol_z'] = ((df['volume'] - vol_mean) / vol_std).fillna(0)
|
| 320 |
-
|
| 321 |
df['rel_vol'] = df['volume'] / (df['volume'].rolling(50).mean() + 1e-9)
|
| 322 |
-
|
| 323 |
df['log_ret'] = np.log(df['close'] / df['close'].shift(1).replace(0, np.nan)).fillna(0)
|
| 324 |
roll_max = df['high'].rolling(50).max()
|
| 325 |
roll_min = df['low'].rolling(50).min()
|
| 326 |
diff = (roll_max - roll_min).replace(0, 1e-9)
|
| 327 |
df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
|
| 328 |
-
|
|
|
|
| 329 |
df['volatility'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
| 330 |
-
|
| 331 |
fib618 = roll_max - (diff * 0.382)
|
| 332 |
df['dist_fib618'] = ((df['close'] - fib618) / df['close'].replace(0, np.nan)).fillna(0)
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
df['ema200'] =
|
| 336 |
-
df['dist_ema200'] = ((df['close'] -
|
| 337 |
-
|
| 338 |
if timeframe == '1m':
|
| 339 |
for lag in [1, 2, 3, 5, 10, 20]:
|
| 340 |
df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
|
| 341 |
df[f'rsi_lag_{lag}'] = (df['RSI'].shift(lag) / 100.0).fillna(0.5)
|
| 342 |
df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
|
| 343 |
df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
|
| 344 |
-
|
| 345 |
df.fillna(0, inplace=True)
|
| 346 |
return df
|
| 347 |
|
| 348 |
-
# ==============================================================
|
| 349 |
-
# 🧠 CPU PROCESSING
|
| 350 |
-
# ==============================================================
|
| 351 |
async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
|
| 352 |
safe_sym = sym.replace('/', '_')
|
| 353 |
period_suffix = f"{start_ms}_{end_ms}"
|
| 354 |
scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
|
| 355 |
-
|
| 356 |
if os.path.exists(scores_file):
|
| 357 |
print(f" 📂 [{sym}] Data Exists -> Skipping.")
|
| 358 |
return
|
|
@@ -366,14 +286,11 @@ class HeavyDutyBacktester:
|
|
| 366 |
df_1m = df_1m.sort_index()
|
| 367 |
|
| 368 |
frames = {}
|
| 369 |
-
agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
|
| 370 |
-
|
| 371 |
-
# 1. Calc 1m
|
| 372 |
frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
|
| 373 |
frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
|
| 374 |
fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
|
| 375 |
|
| 376 |
-
|
| 377 |
numpy_htf = {}
|
| 378 |
for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
|
| 379 |
resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
|
|
@@ -382,105 +299,51 @@ class HeavyDutyBacktester:
|
|
| 382 |
frames[tf_str] = resampled
|
| 383 |
numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
|
| 384 |
|
| 385 |
-
# 3. Global Maps
|
| 386 |
arr_ts_1m = fast_1m['timestamp']
|
| 387 |
map_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['5m']['timestamp']) - 1)
|
| 388 |
map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
|
| 389 |
map_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1h']['timestamp']) - 1)
|
| 390 |
map_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['4h']['timestamp']) - 1)
|
| 391 |
-
|
| 392 |
-
if '1d' in numpy_htf:
|
| 393 |
-
map_1d = np.clip(np.searchsorted(numpy_htf['1d']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1d']['timestamp']) - 1)
|
| 394 |
-
else:
|
| 395 |
-
map_1d = np.zeros(len(arr_ts_1m), dtype=int)
|
| 396 |
|
| 397 |
-
# 4. Load Models
|
| 398 |
hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
|
| 399 |
hydra_cols = getattr(self.proc.guardian_hydra, 'feature_cols', []) if self.proc.guardian_hydra else []
|
| 400 |
legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
|
| 401 |
-
|
| 402 |
oracle_dir = getattr(self.proc.oracle, 'model_direction', None)
|
| 403 |
oracle_cols = getattr(self.proc.oracle, 'feature_cols', [])
|
| 404 |
-
|
| 405 |
sniper_models = getattr(self.proc.sniper, 'models', [])
|
| 406 |
sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
|
| 407 |
-
|
| 408 |
titan_model = getattr(self.proc.titan, 'model', None)
|
| 409 |
-
# ✅ TITAN EXACT FEATURES LIST (From your pickle)
|
| 410 |
-
titan_cols = [
|
| 411 |
-
'5m_open', '5m_high', '5m_low', '5m_close', '5m_volume', '5m_RSI', '5m_MACD', '5m_MACD_h',
|
| 412 |
-
'5m_CCI', '5m_ADX', '5m_EMA_9_dist', '5m_EMA_21_dist', '5m_EMA_50_dist', '5m_EMA_200_dist',
|
| 413 |
-
'5m_BB_w', '5m_BB_p', '5m_MFI', '5m_VWAP_dist',
|
| 414 |
-
'15m_timestamp', '15m_RSI', '15m_MACD', '15m_MACD_h', '15m_CCI', '15m_ADX',
|
| 415 |
-
'15m_EMA_9_dist', '15m_EMA_21_dist', '15m_EMA_50_dist', '15m_EMA_200_dist',
|
| 416 |
-
'15m_BB_w', '15m_BB_p', '15m_MFI', '15m_VWAP_dist',
|
| 417 |
-
'1h_timestamp', '1h_RSI', '1h_MACD_h', '1h_EMA_50_dist', '1h_EMA_200_dist', '1h_ATR_pct',
|
| 418 |
-
'4h_timestamp', '4h_RSI', '4h_MACD_h', '4h_EMA_50_dist', '4h_EMA_200_dist', '4h_ATR_pct',
|
| 419 |
-
'1d_timestamp', '1d_RSI', '1d_EMA_200_dist', '1d_Trend_Strong'
|
| 420 |
-
]
|
| 421 |
-
|
| 422 |
-
# ======================================================================
|
| 423 |
-
# 🔥 GLOBAL INFERENCE (Batch)
|
| 424 |
-
# ======================================================================
|
| 425 |
|
| 426 |
-
# A. TITAN (
|
| 427 |
global_titan_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 428 |
if titan_model:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
print(" 🚀 Running Global Titan...", flush=True)
|
| 430 |
try:
|
| 431 |
t_vecs = []
|
| 432 |
for col in titan_cols:
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
feat
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
target_map = None
|
| 441 |
-
|
| 442 |
-
if tf == '5m': target_arr = numpy_htf['5m']; target_map = map_5m
|
| 443 |
-
elif tf == '15m': target_arr = numpy_htf['15m']; target_map = map_15m
|
| 444 |
-
elif tf == '1h': target_arr = numpy_htf['1h']; target_map = map_1h
|
| 445 |
-
elif tf == '4h': target_arr = numpy_htf['4h']; target_map = map_4h
|
| 446 |
-
elif tf == '1d': target_arr = numpy_htf['1d']; target_map = map_1d
|
| 447 |
-
|
| 448 |
-
# Special cases for raw columns in numpy_htf
|
| 449 |
-
# timestamp, open, high, low, close, volume are preserved
|
| 450 |
-
|
| 451 |
-
if target_arr and feat in target_arr:
|
| 452 |
-
t_vecs.append(target_arr[feat][target_map])
|
| 453 |
-
elif target_arr and feat == 'timestamp': # Handle 15m_timestamp explicitly
|
| 454 |
-
t_vecs.append(target_arr['timestamp'][target_map])
|
| 455 |
-
else:
|
| 456 |
-
# Fallback for raw OHLCV if not found directly
|
| 457 |
-
if target_arr and feat in ['open', 'high', 'low', 'close', 'volume']:
|
| 458 |
-
t_vecs.append(target_arr[feat][target_map])
|
| 459 |
-
else:
|
| 460 |
-
t_vecs.append(np.zeros(len(arr_ts_1m)))
|
| 461 |
-
|
| 462 |
X_TITAN = np.column_stack(t_vecs)
|
| 463 |
-
|
| 464 |
-
preds_t = titan_model.predict(dmat)
|
| 465 |
-
global_titan_scores = _revive_score_distribution(preds_t)
|
| 466 |
-
except Exception as e: print(f"Titan Error: {e}")
|
| 467 |
-
|
| 468 |
-
# B. PATTERNS (The Missing Link)
|
| 469 |
-
global_pattern_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 470 |
-
pattern_models = getattr(self.proc.pattern_engine, 'models', {})
|
| 471 |
-
if pattern_models and '15m' in pattern_models:
|
| 472 |
-
print(" 🚀 Running Global Patterns...", flush=True)
|
| 473 |
-
try:
|
| 474 |
-
# Patterns use 15m window of 200 candles
|
| 475 |
-
# We need to construct this efficiently.
|
| 476 |
-
# Since we are in Batch Mode, we can't easily slide window for 100k candles quickly in Python.
|
| 477 |
-
# Strategy: Calculate only for candidates LATER?
|
| 478 |
-
# OR: Use a simplified logic or skip if too slow.
|
| 479 |
-
# For now, let's keep it placeholder 0.5 or try calculating for Candidate Indices ONLY in the loop.
|
| 480 |
-
pass
|
| 481 |
except: pass
|
| 482 |
|
| 483 |
-
#
|
| 484 |
global_sniper_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 485 |
if sniper_models:
|
| 486 |
print(" 🚀 Running Global Sniper...", flush=True)
|
|
@@ -490,13 +353,12 @@ class HeavyDutyBacktester:
|
|
| 490 |
if col in fast_1m: s_vecs.append(fast_1m[col])
|
| 491 |
elif col == 'atr' and 'atr_z' in fast_1m: s_vecs.append(fast_1m['atr_z'])
|
| 492 |
else: s_vecs.append(np.zeros(len(arr_ts_1m)))
|
| 493 |
-
|
| 494 |
X_SNIPER = np.column_stack(s_vecs)
|
| 495 |
-
|
| 496 |
-
global_sniper_scores = _revive_score_distribution(np.mean(
|
| 497 |
-
except
|
| 498 |
|
| 499 |
-
#
|
| 500 |
global_oracle_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 501 |
if oracle_dir:
|
| 502 |
print(" 🚀 Running Global Oracle...", flush=True)
|
|
@@ -508,179 +370,137 @@ class HeavyDutyBacktester:
|
|
| 508 |
elif col.startswith('4h_'): o_vecs.append(numpy_htf['4h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_4h])
|
| 509 |
elif col == 'sim_titan_score': o_vecs.append(global_titan_scores)
|
| 510 |
elif col == 'sim_mc_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
|
| 511 |
-
elif col == 'sim_pattern_score': o_vecs.append(
|
| 512 |
else: o_vecs.append(np.zeros(len(arr_ts_1m)))
|
| 513 |
-
|
| 514 |
X_ORACLE = np.column_stack(o_vecs)
|
| 515 |
preds_o = oracle_dir.predict(X_ORACLE)
|
| 516 |
preds_o = preds_o if isinstance(preds_o, np.ndarray) and len(preds_o.shape)==1 else preds_o[:, 0]
|
| 517 |
global_oracle_scores = _revive_score_distribution(preds_o)
|
| 518 |
-
except
|
| 519 |
|
| 520 |
-
#
|
| 521 |
global_v2_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
|
| 522 |
if legacy_v2:
|
| 523 |
try:
|
| 524 |
-
l_log = fast_1m['log_ret']
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
l_vol = fast_1m['volatility']
|
| 528 |
-
|
| 529 |
-
l5_log = numpy_htf['5m']['log_ret'][map_5m]
|
| 530 |
-
l5_rsi = numpy_htf['5m']['RSI'][map_5m] / 100.0
|
| 531 |
-
l5_fib = numpy_htf['5m']['fib_pos'][map_5m]
|
| 532 |
-
l5_trd = numpy_htf['5m']['trend_slope'][map_5m]
|
| 533 |
-
|
| 534 |
-
l15_log = numpy_htf['15m']['log_ret'][map_15m]
|
| 535 |
-
l15_rsi = numpy_htf['15m']['RSI'][map_15m] / 100.0
|
| 536 |
-
l15_fib618 = numpy_htf['15m']['dist_fib618'][map_15m]
|
| 537 |
-
l15_trd = numpy_htf['15m']['trend_slope'][map_15m]
|
| 538 |
-
|
| 539 |
lags = []
|
| 540 |
for lag in [1, 2, 3, 5, 10, 20]:
|
| 541 |
-
lags.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'],
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
X_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd,
|
| 545 |
-
l15_log, l15_rsi, l15_fib618, l15_trd, *lags])
|
| 546 |
preds = legacy_v2.predict(xgb.DMatrix(X_V2))
|
| 547 |
global_v2_scores = preds[:, 2] if len(preds.shape) > 1 else preds
|
| 548 |
except: pass
|
| 549 |
|
| 550 |
-
#
|
| 551 |
-
|
| 552 |
-
if hydra_models:
|
| 553 |
-
try:
|
| 554 |
-
global_hydra_static = np.column_stack([
|
| 555 |
-
fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
|
| 556 |
-
fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
|
| 557 |
-
])
|
| 558 |
-
except: pass
|
| 559 |
-
|
| 560 |
-
# --- 5. Filtering Candidates ---
|
| 561 |
-
is_candidate = (
|
| 562 |
-
(numpy_htf['1h']['RSI'][map_1h] <= 70) &
|
| 563 |
-
(global_titan_scores > 0.4) &
|
| 564 |
-
(global_oracle_scores > 0.4)
|
| 565 |
-
)
|
| 566 |
-
|
| 567 |
candidate_indices = np.where(is_candidate)[0]
|
| 568 |
-
|
| 569 |
start_ts_val = frames['1m'].index[0] + pd.Timedelta(minutes=500)
|
| 570 |
start_idx_offset = np.searchsorted(arr_ts_1m, int(start_ts_val.timestamp()*1000))
|
| 571 |
candidate_indices = candidate_indices[candidate_indices >= start_idx_offset]
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
print(f" 🎯 Candidates: {len(candidate_indices)}. Simulating Trades...", flush=True)
|
| 576 |
|
|
|
|
| 577 |
ai_results = []
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
# --- PATTERN CHECK (On Demand) ---
|
| 591 |
-
s_pattern = 0.5
|
| 592 |
-
if pattern_models:
|
| 593 |
-
# Only check patterns for candidates (expensive)
|
| 594 |
-
try:
|
| 595 |
-
# Map to 15m index
|
| 596 |
-
idx_15m_entry = map_15m[idx_entry]
|
| 597 |
-
if idx_15m_entry > 200:
|
| 598 |
-
# Reconstruct window from numpy_htf['15m']
|
| 599 |
-
# Need o, h, l, c, v arrays
|
| 600 |
-
# Construct simple df for _transform
|
| 601 |
-
p_win = pd.DataFrame({
|
| 602 |
-
'open': frames['15m']['open'].values[idx_15m_entry-200:idx_15m_entry],
|
| 603 |
-
'high': frames['15m']['high'].values[idx_15m_entry-200:idx_15m_entry],
|
| 604 |
-
'low': frames['15m']['low'].values[idx_15m_entry-200:idx_15m_entry],
|
| 605 |
-
'close': frames['15m']['close'].values[idx_15m_entry-200:idx_15m_entry],
|
| 606 |
-
'volume': frames['15m']['volume'].values[idx_15m_entry-200:idx_15m_entry]
|
| 607 |
-
})
|
| 608 |
-
vec = _transform_window_for_pattern(p_win)
|
| 609 |
-
if vec is not None:
|
| 610 |
-
s_pattern = pattern_models['15m'].predict(xgb.DMatrix(vec.reshape(1,-1)))[0]
|
| 611 |
-
# Update Oracle with real pattern score? Too late for global, but good for logs
|
| 612 |
-
except: pass
|
| 613 |
-
|
| 614 |
-
idx_exit = idx_entry + 240
|
| 615 |
-
|
| 616 |
-
# Legacy Max Risk
|
| 617 |
-
max_v2 = np.max(global_v2_scores[idx_entry:idx_exit])
|
| 618 |
-
v2_time = 0
|
| 619 |
-
if max_v2 > 0.8:
|
| 620 |
-
rel = np.argmax(global_v2_scores[idx_entry:idx_exit])
|
| 621 |
-
v2_time = int(arr_ts_1m[idx_entry + rel])
|
| 622 |
-
|
| 623 |
-
# Hydra Dynamic Risk
|
| 624 |
-
max_hydra = 0.0; hydra_time = 0
|
| 625 |
-
if hydra_models and global_hydra_static is not None:
|
| 626 |
-
sl_st = global_hydra_static[idx_entry:idx_exit]
|
| 627 |
-
sl_close = sl_st[:, 6]
|
| 628 |
-
sl_atr = sl_st[:, 5]
|
| 629 |
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
cum_max = np.maximum.accumulate(sl_close)
|
| 634 |
-
max_pnl_r = (np.maximum(cum_max, entry_price) - entry_price) / dist
|
| 635 |
-
atr_pct = sl_atr / sl_close
|
| 636 |
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
zeros, zeros, time_vec, zeros,
|
| 646 |
-
oracle_arr, l2_arr, tgt_arr
|
| 647 |
-
])
|
| 648 |
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
'
|
| 661 |
-
|
| 662 |
-
'
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
|
| 671 |
dt = time.time() - t0
|
| 672 |
if ai_results:
|
| 673 |
pd.DataFrame(ai_results).to_pickle(scores_file)
|
| 674 |
print(f" ✅ [{sym}] Completed in {dt:.2f} seconds. ({len(ai_results)} signals)", flush=True)
|
| 675 |
-
else:
|
| 676 |
-
print(f" ⚠️ [{sym}] No signals.", flush=True)
|
| 677 |
-
|
| 678 |
-
del frames, fast_1m, numpy_htf, global_v2_scores, global_oracle_scores, global_sniper_scores, global_titan_scores
|
| 679 |
gc.collect()
|
| 680 |
|
| 681 |
-
# ==============================================================
|
| 682 |
-
# PHASE 1 & 2 (Standard Optimization)
|
| 683 |
-
# ==============================================================
|
| 684 |
async def generate_truth_data(self):
|
| 685 |
if self.force_start_date:
|
| 686 |
dt_s = datetime.strptime(self.force_start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
@@ -835,7 +655,7 @@ class HeavyDutyBacktester:
|
|
| 835 |
return best['config'], best
|
| 836 |
|
| 837 |
async def run_strategic_optimization_task():
|
| 838 |
-
print("\n🧪 [STRATEGIC BACKTEST]
|
| 839 |
r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
|
| 840 |
try:
|
| 841 |
await dm.initialize(); await proc.initialize()
|
|
|
|
| 1 |
# ============================================================
|
| 2 |
+
# 🧪 backtest_engine.py (V139.0 - GEM-Architect: Vectorized Hydra Speed)
|
| 3 |
# ============================================================
|
| 4 |
|
| 5 |
import asyncio
|
|
|
|
| 67 |
return s.fillna(0.0).astype('float64')
|
| 68 |
|
| 69 |
def _zv(x):
|
|
|
|
| 70 |
with np.errstate(divide='ignore', invalid='ignore'):
|
| 71 |
x = np.asarray(x, dtype="float32")
|
| 72 |
m = np.nanmean(x, axis=0)
|
| 73 |
s = np.nanstd(x, axis=0) + 1e-9
|
| 74 |
return np.nan_to_num((x - m) / s, nan=0.0)
|
| 75 |
|
|
|
|
|
|
|
|
|
|
| 76 |
def _transform_window_for_pattern(df_window):
|
|
|
|
| 77 |
try:
|
| 78 |
c = df_window['close'].values.astype('float32')
|
| 79 |
o = df_window['open'].values.astype('float32')
|
| 80 |
h = df_window['high'].values.astype('float32')
|
| 81 |
l = df_window['low'].values.astype('float32')
|
| 82 |
v = df_window['volume'].values.astype('float32')
|
|
|
|
|
|
|
| 83 |
base = np.stack([o, h, l, c, v], axis=1)
|
| 84 |
base_z = _zv(base)
|
|
|
|
|
|
|
| 85 |
lr = np.zeros_like(c); lr[1:] = np.diff(np.log1p(c))
|
| 86 |
rng = (h - l) / (c + 1e-9)
|
| 87 |
extra = np.stack([lr, rng], axis=1)
|
| 88 |
extra_z = _zv(extra)
|
|
|
|
|
|
|
| 89 |
def _ema(arr, n): return pd.Series(arr).ewm(span=n, adjust=False).mean().values
|
| 90 |
ema9 = _ema(c, 9); ema21 = _ema(c, 21); ema50 = _ema(c, 50); ema200 = _ema(c, 200)
|
| 91 |
+
slope21 = np.gradient(ema21); slope50 = np.gradient(ema50)
|
|
|
|
|
|
|
|
|
|
| 92 |
delta = np.diff(c, prepend=c[0])
|
| 93 |
up, down = delta.copy(), delta.copy()
|
| 94 |
up[up < 0] = 0; down[down > 0] = 0
|
|
|
|
| 96 |
roll_down = pd.Series(down).abs().ewm(alpha=1/14, adjust=False).mean().values
|
| 97 |
rs = roll_up / (roll_down + 1e-9)
|
| 98 |
rsi = 100.0 - (100.0 / (1.0 + rs))
|
|
|
|
| 99 |
indicators = np.stack([ema9, ema21, ema50, ema200, slope21, slope50, rsi], axis=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
X_seq = np.concatenate([base_z, extra_z, _zv(indicators)], axis=1)
|
| 101 |
X_flat = X_seq.flatten()
|
|
|
|
|
|
|
| 102 |
X_stat = np.array([0.5, 0.0, 0.5], dtype="float32")
|
| 103 |
return np.concatenate([X_flat, X_stat])
|
| 104 |
except: return None
|
|
|
|
| 114 |
self.INITIAL_CAPITAL = 10.0
|
| 115 |
self.TRADING_FEES = 0.001
|
| 116 |
self.MAX_SLOTS = 4
|
|
|
|
| 117 |
self.TARGET_COINS = [
|
| 118 |
'SOL/USDT', 'XRP/USDT', 'DOGE/USDT', 'ADA/USDT', 'AVAX/USDT', 'LINK/USDT',
|
| 119 |
'TON/USDT', 'INJ/USDT', 'APT/USDT', 'OP/USDT', 'ARB/USDT', 'SUI/USDT',
|
|
|
|
| 125 |
'STRK/USDT', 'BLUR/USDT', 'ALT/USDT', 'JUP/USDT', 'PENDLE/USDT', 'ETHFI/USDT',
|
| 126 |
'MEME/USDT', 'ATOM/USDT'
|
| 127 |
]
|
|
|
|
| 128 |
self.force_start_date = None
|
| 129 |
self.force_end_date = None
|
|
|
|
| 130 |
if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
|
| 131 |
+
print(f"🧪 [Backtest V139.0] Vectorized Hydra Speed Optimization.")
|
| 132 |
|
| 133 |
def set_date_range(self, start_str, end_str):
|
| 134 |
self.force_start_date = start_str
|
| 135 |
self.force_end_date = end_str
|
| 136 |
|
|
|
|
|
|
|
|
|
|
| 137 |
async def _fetch_all_data_fast(self, sym, start_ms, end_ms):
|
| 138 |
print(f" ⚡ [Network] Downloading {sym}...", flush=True)
|
| 139 |
limit = 1000
|
|
|
|
| 145 |
current += duration_per_batch
|
| 146 |
all_candles = []
|
| 147 |
sem = asyncio.Semaphore(20)
|
|
|
|
| 148 |
async def _fetch_batch(timestamp):
|
| 149 |
async with sem:
|
| 150 |
for _ in range(3):
|
|
|
|
| 152 |
return await self.dm.exchange.fetch_ohlcv(sym, '1m', since=timestamp, limit=limit)
|
| 153 |
except: await asyncio.sleep(0.5)
|
| 154 |
return []
|
|
|
|
| 155 |
chunk_size = 50
|
| 156 |
for i in range(0, len(tasks), chunk_size):
|
| 157 |
chunk_tasks = tasks[i:i + chunk_size]
|
|
|
|
| 159 |
results = await asyncio.gather(*futures)
|
| 160 |
for res in results:
|
| 161 |
if res: all_candles.extend(res)
|
|
|
|
| 162 |
if not all_candles: return None
|
| 163 |
df = pd.DataFrame(all_candles, columns=['timestamp', 'o', 'h', 'l', 'c', 'v'])
|
| 164 |
df.drop_duplicates('timestamp', inplace=True)
|
|
|
|
| 167 |
print(f" ✅ Downloaded {len(df)} candles.", flush=True)
|
| 168 |
return df.values.tolist()
|
| 169 |
|
|
|
|
|
|
|
|
|
|
| 170 |
def _calculate_indicators_vectorized(self, df, timeframe='1m'):
|
|
|
|
| 171 |
cols = ['close', 'high', 'low', 'volume', 'open']
|
| 172 |
for c in cols: df[c] = df[c].astype(np.float64)
|
| 173 |
idx = df.index
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
df['RSI'] = safe_ta(ta.rsi(df['close'], length=14), idx, 50)
|
|
|
|
| 175 |
macd = ta.macd(df['close'])
|
| 176 |
if macd is not None:
|
| 177 |
df['MACD'] = safe_ta(macd.iloc[:, 0], idx, 0)
|
| 178 |
df['MACD_h'] = safe_ta(macd.iloc[:, 1], idx, 0)
|
| 179 |
+
else: df['MACD'] = 0.0; df['MACD_h'] = 0.0
|
|
|
|
|
|
|
| 180 |
df['CCI'] = safe_ta(ta.cci(df['high'], df['low'], df['close'], length=20), idx, 0)
|
|
|
|
| 181 |
adx = ta.adx(df['high'], df['low'], df['close'], length=14)
|
| 182 |
if adx is not None: df['ADX'] = safe_ta(adx.iloc[:, 0], idx, 0)
|
| 183 |
else: df['ADX'] = 0.0
|
| 184 |
+
if timeframe == '1d': df['Trend_Strong'] = np.where(df['ADX'] > 25, 1.0, 0.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
for p in [9, 21, 50, 200]:
|
| 186 |
ema = safe_ta(ta.ema(df['close'], length=p), idx, 0)
|
| 187 |
df[f'EMA_{p}_dist'] = ((df['close'] / ema.replace(0, np.nan)) - 1).fillna(0)
|
| 188 |
df[f'ema{p}'] = ema
|
|
|
|
| 189 |
df['ema20'] = safe_ta(ta.ema(df['close'], length=20), idx, df['close'])
|
|
|
|
| 190 |
bb = ta.bbands(df['close'], length=20, std=2.0)
|
| 191 |
if bb is not None:
|
| 192 |
w = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1].replace(0, np.nan)).fillna(0)
|
| 193 |
p = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0]).replace(0, np.nan)).fillna(0)
|
| 194 |
+
df['BB_w'] = w; df['BB_p'] = p; df['bb_width'] = w
|
| 195 |
+
else: df['BB_w'] = 0; df['BB_p'] = 0; df['bb_width'] = 0
|
|
|
|
|
|
|
|
|
|
| 196 |
df['MFI'] = safe_ta(ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14), idx, 50)
|
|
|
|
| 197 |
vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
|
| 198 |
if vwap is not None:
|
| 199 |
df['VWAP_dist'] = ((df['close'] / vwap.replace(0, np.nan)) - 1).fillna(0)
|
| 200 |
df['vwap'] = vwap
|
| 201 |
+
else: df['VWAP_dist'] = 0.0; df['vwap'] = df['close']
|
|
|
|
|
|
|
|
|
|
| 202 |
df['atr'] = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=14), idx, 0)
|
| 203 |
df['atr_pct'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
| 204 |
+
df['ATR_pct'] = df['atr_pct']
|
| 205 |
|
|
|
|
|
|
|
|
|
|
| 206 |
if timeframe == '1m':
|
| 207 |
df['return_1m'] = df['close'].pct_change().fillna(0)
|
| 208 |
df['return_3m'] = df['close'].pct_change(3).fillna(0)
|
| 209 |
df['return_5m'] = df['close'].pct_change(5).fillna(0)
|
| 210 |
df['return_15m'] = df['close'].pct_change(15).fillna(0)
|
|
|
|
| 211 |
df['rsi_14'] = df['RSI']
|
| 212 |
e9 = df['ema9'].replace(0, np.nan)
|
| 213 |
df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / e9.shift(1)).fillna(0)
|
| 214 |
df['ema_21_dist'] = df['EMA_21_dist']
|
|
|
|
| 215 |
atr_100 = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=100), idx, 0)
|
| 216 |
df['atr_z'] = _z_roll(atr_100)
|
|
|
|
| 217 |
df['vol_zscore_50'] = _z_roll(df['volume'], 50)
|
|
|
|
| 218 |
rng = (df['high'] - df['low']).replace(0, 1e-9)
|
| 219 |
df['candle_range'] = _z_roll(rng, 500)
|
| 220 |
df['close_pos_in_range'] = ((df['close'] - df['low']) / rng).fillna(0.5)
|
|
|
|
| 221 |
df['dollar_vol'] = df['close'] * df['volume']
|
| 222 |
amihud_raw = (df['return_1m'].abs() / df['dollar_vol'].replace(0, np.nan)).fillna(0)
|
| 223 |
df['amihud'] = _z_roll(amihud_raw)
|
|
|
|
| 224 |
dp = df['close'].diff()
|
| 225 |
roll_cov = dp.rolling(64).cov(dp.shift(1))
|
| 226 |
roll_spread_raw = (2 * np.sqrt(np.maximum(0, -roll_cov))).fillna(0)
|
| 227 |
df['roll_spread'] = _z_roll(roll_spread_raw)
|
|
|
|
| 228 |
sign = np.sign(df['close'].diff()).fillna(0)
|
| 229 |
signed_vol = sign * df['volume']
|
| 230 |
+
ofi_raw = signed_vol.rolling(30).sum().fillna(0)
|
| 231 |
df['ofi'] = _z_roll(ofi_raw)
|
|
|
|
| 232 |
buy_vol = (sign > 0) * df['volume']
|
| 233 |
sell_vol = (sign < 0) * df['volume']
|
| 234 |
imb = (buy_vol.rolling(60).sum() - sell_vol.rolling(60).sum()).abs()
|
| 235 |
tot = df['volume'].rolling(60).sum().replace(0, np.nan)
|
| 236 |
df['vpin'] = (imb / tot).fillna(0)
|
|
|
|
| 237 |
vwap_win = 20
|
| 238 |
v_short = (df['dollar_vol'].rolling(vwap_win).sum() / df['volume'].rolling(vwap_win).sum().replace(0, np.nan)).fillna(df['close'])
|
| 239 |
df['vwap_dev'] = _z_roll(df['close'] - v_short)
|
|
|
|
| 240 |
rv_gk = ((np.log(df['high'] / df['low'])**2) / 2) - ((2 * np.log(2) - 1) * (np.log(df['close'] / df['open'])**2))
|
| 241 |
df['rv_gk'] = _z_roll(rv_gk)
|
|
|
|
| 242 |
df['L_score'] = (df['vol_zscore_50'] - df['amihud'] - df['roll_spread'] - df['rv_gk'].abs() - df['vwap_dev'].abs() + df['ofi']).fillna(0)
|
| 243 |
|
|
|
|
|
|
|
|
|
|
| 244 |
df['slope'] = safe_ta(ta.slope(df['close'], length=7), idx, 0)
|
| 245 |
vol_mean = df['volume'].rolling(20).mean()
|
| 246 |
vol_std = df['volume'].rolling(20).std().replace(0, np.nan)
|
| 247 |
df['vol_z'] = ((df['volume'] - vol_mean) / vol_std).fillna(0)
|
|
|
|
| 248 |
df['rel_vol'] = df['volume'] / (df['volume'].rolling(50).mean() + 1e-9)
|
|
|
|
| 249 |
df['log_ret'] = np.log(df['close'] / df['close'].shift(1).replace(0, np.nan)).fillna(0)
|
| 250 |
roll_max = df['high'].rolling(50).max()
|
| 251 |
roll_min = df['low'].rolling(50).min()
|
| 252 |
diff = (roll_max - roll_min).replace(0, 1e-9)
|
| 253 |
df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
|
| 254 |
+
e20_s = df['ema20'].shift(5).replace(0, np.nan)
|
| 255 |
+
df['trend_slope'] = ((df['ema20'] - df['ema20'].shift(5)) / e20_s).fillna(0)
|
| 256 |
df['volatility'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
|
|
|
| 257 |
fib618 = roll_max - (diff * 0.382)
|
| 258 |
df['dist_fib618'] = ((df['close'] - fib618) / df['close'].replace(0, np.nan)).fillna(0)
|
| 259 |
+
df['dist_ema50'] = ((df['close'] - df['ema50']) / df['ema50'].replace(0, np.nan)).fillna(0)
|
| 260 |
+
e200 = safe_ta(ta.ema(df['close'], length=200), idx, df['close'])
|
| 261 |
+
df['ema200'] = e200
|
| 262 |
+
df['dist_ema200'] = ((df['close'] - e200) / e200.replace(0, np.nan)).fillna(0)
|
|
|
|
| 263 |
if timeframe == '1m':
|
| 264 |
for lag in [1, 2, 3, 5, 10, 20]:
|
| 265 |
df[f'log_ret_lag_{lag}'] = df['log_ret'].shift(lag).fillna(0)
|
| 266 |
df[f'rsi_lag_{lag}'] = (df['RSI'].shift(lag) / 100.0).fillna(0.5)
|
| 267 |
df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
|
| 268 |
df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
|
|
|
|
| 269 |
df.fillna(0, inplace=True)
|
| 270 |
return df
|
| 271 |
|
|
|
|
|
|
|
|
|
|
| 272 |
async def _process_data_in_memory(self, sym, candles, start_ms, end_ms):
|
| 273 |
safe_sym = sym.replace('/', '_')
|
| 274 |
period_suffix = f"{start_ms}_{end_ms}"
|
| 275 |
scores_file = f"{CACHE_DIR}/{safe_sym}_{period_suffix}_scores.pkl"
|
|
|
|
| 276 |
if os.path.exists(scores_file):
|
| 277 |
print(f" 📂 [{sym}] Data Exists -> Skipping.")
|
| 278 |
return
|
|
|
|
| 286 |
df_1m = df_1m.sort_index()
|
| 287 |
|
| 288 |
frames = {}
|
|
|
|
|
|
|
|
|
|
| 289 |
frames['1m'] = self._calculate_indicators_vectorized(df_1m.copy(), timeframe='1m')
|
| 290 |
frames['1m']['timestamp'] = frames['1m'].index.floor('1min').astype(np.int64) // 10**6
|
| 291 |
fast_1m = {col: frames['1m'][col].values for col in frames['1m'].columns}
|
| 292 |
|
| 293 |
+
agg_dict = {'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}
|
| 294 |
numpy_htf = {}
|
| 295 |
for tf_str, tf_code in [('5m', '5T'), ('15m', '15T'), ('1h', '1h'), ('4h', '4h'), ('1d', '1D')]:
|
| 296 |
resampled = df_1m.resample(tf_code).agg(agg_dict).dropna()
|
|
|
|
| 299 |
frames[tf_str] = resampled
|
| 300 |
numpy_htf[tf_str] = {col: resampled[col].values for col in resampled.columns}
|
| 301 |
|
|
|
|
| 302 |
arr_ts_1m = fast_1m['timestamp']
|
| 303 |
map_5m = np.clip(np.searchsorted(numpy_htf['5m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['5m']['timestamp']) - 1)
|
| 304 |
map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
|
| 305 |
map_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1h']['timestamp']) - 1)
|
| 306 |
map_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['4h']['timestamp']) - 1)
|
| 307 |
+
map_1d = np.clip(np.searchsorted(numpy_htf['1d']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1d']['timestamp']) - 1) if '1d' in numpy_htf else np.zeros(len(arr_ts_1m), dtype=int)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
|
|
|
| 309 |
hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
|
| 310 |
hydra_cols = getattr(self.proc.guardian_hydra, 'feature_cols', []) if self.proc.guardian_hydra else []
|
| 311 |
legacy_v2 = getattr(self.proc.guardian_legacy, 'model_v2', None)
|
|
|
|
| 312 |
oracle_dir = getattr(self.proc.oracle, 'model_direction', None)
|
| 313 |
oracle_cols = getattr(self.proc.oracle, 'feature_cols', [])
|
|
|
|
| 314 |
sniper_models = getattr(self.proc.sniper, 'models', [])
|
| 315 |
sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
|
|
|
|
| 316 |
titan_model = getattr(self.proc.titan, 'model', None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
+
# A. TITAN (Global)
|
| 319 |
global_titan_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 320 |
if titan_model:
|
| 321 |
+
titan_cols = [
|
| 322 |
+
'5m_open', '5m_high', '5m_low', '5m_close', '5m_volume', '5m_RSI', '5m_MACD', '5m_MACD_h',
|
| 323 |
+
'5m_CCI', '5m_ADX', '5m_EMA_9_dist', '5m_EMA_21_dist', '5m_EMA_50_dist', '5m_EMA_200_dist',
|
| 324 |
+
'5m_BB_w', '5m_BB_p', '5m_MFI', '5m_VWAP_dist', '15m_timestamp', '15m_RSI', '15m_MACD',
|
| 325 |
+
'15m_MACD_h', '15m_CCI', '15m_ADX', '15m_EMA_9_dist', '15m_EMA_21_dist', '15m_EMA_50_dist',
|
| 326 |
+
'15m_EMA_200_dist', '15m_BB_w', '15m_BB_p', '15m_MFI', '15m_VWAP_dist', '1h_timestamp',
|
| 327 |
+
'1h_RSI', '1h_MACD_h', '1h_EMA_50_dist', '1h_EMA_200_dist', '1h_ATR_pct', '4h_timestamp',
|
| 328 |
+
'4h_RSI', '4h_MACD_h', '4h_EMA_50_dist', '4h_EMA_200_dist', '4h_ATR_pct', '1d_timestamp',
|
| 329 |
+
'1d_RSI', '1d_EMA_200_dist', '1d_Trend_Strong'
|
| 330 |
+
]
|
| 331 |
print(" 🚀 Running Global Titan...", flush=True)
|
| 332 |
try:
|
| 333 |
t_vecs = []
|
| 334 |
for col in titan_cols:
|
| 335 |
+
parts = col.split('_', 1); tf = parts[0]; feat = parts[1]
|
| 336 |
+
target_arr = numpy_htf.get(tf, None)
|
| 337 |
+
target_map = locals().get(f"map_{tf}", None)
|
| 338 |
+
if target_arr and feat in target_arr: t_vecs.append(target_arr[feat][target_map])
|
| 339 |
+
elif target_arr and feat == 'timestamp': t_vecs.append(target_arr['timestamp'][target_map])
|
| 340 |
+
elif target_arr and feat in ['open','high','low','close','volume']: t_vecs.append(target_arr[feat][target_map])
|
| 341 |
+
else: t_vecs.append(np.zeros(len(arr_ts_1m)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
X_TITAN = np.column_stack(t_vecs)
|
| 343 |
+
global_titan_scores = _revive_score_distribution(titan_model.predict(xgb.DMatrix(X_TITAN, feature_names=titan_cols)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
except: pass
|
| 345 |
|
| 346 |
+
# B. SNIPER (Global)
|
| 347 |
global_sniper_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 348 |
if sniper_models:
|
| 349 |
print(" 🚀 Running Global Sniper...", flush=True)
|
|
|
|
| 353 |
if col in fast_1m: s_vecs.append(fast_1m[col])
|
| 354 |
elif col == 'atr' and 'atr_z' in fast_1m: s_vecs.append(fast_1m['atr_z'])
|
| 355 |
else: s_vecs.append(np.zeros(len(arr_ts_1m)))
|
|
|
|
| 356 |
X_SNIPER = np.column_stack(s_vecs)
|
| 357 |
+
preds = [m.predict(X_SNIPER) for m in sniper_models]
|
| 358 |
+
global_sniper_scores = _revive_score_distribution(np.mean(preds, axis=0))
|
| 359 |
+
except: pass
|
| 360 |
|
| 361 |
+
# C. ORACLE (Global)
|
| 362 |
global_oracle_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 363 |
if oracle_dir:
|
| 364 |
print(" 🚀 Running Global Oracle...", flush=True)
|
|
|
|
| 370 |
elif col.startswith('4h_'): o_vecs.append(numpy_htf['4h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_4h])
|
| 371 |
elif col == 'sim_titan_score': o_vecs.append(global_titan_scores)
|
| 372 |
elif col == 'sim_mc_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
|
| 373 |
+
elif col == 'sim_pattern_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
|
| 374 |
else: o_vecs.append(np.zeros(len(arr_ts_1m)))
|
|
|
|
| 375 |
X_ORACLE = np.column_stack(o_vecs)
|
| 376 |
preds_o = oracle_dir.predict(X_ORACLE)
|
| 377 |
preds_o = preds_o if isinstance(preds_o, np.ndarray) and len(preds_o.shape)==1 else preds_o[:, 0]
|
| 378 |
global_oracle_scores = _revive_score_distribution(preds_o)
|
| 379 |
+
except: pass
|
| 380 |
|
| 381 |
+
# D. LEGACY (Global)
|
| 382 |
global_v2_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
|
| 383 |
if legacy_v2:
|
| 384 |
try:
|
| 385 |
+
l_log = fast_1m['log_ret']; l_rsi = fast_1m['RSI'] / 100.0; l_fib = fast_1m['fib_pos']; l_vol = fast_1m['volatility']
|
| 386 |
+
l5_log = numpy_htf['5m']['log_ret'][map_5m]; l5_rsi = numpy_htf['5m']['RSI'][map_5m] / 100.0; l5_fib = numpy_htf['5m']['fib_pos'][map_5m]; l5_trd = numpy_htf['5m']['trend_slope'][map_5m]
|
| 387 |
+
l15_log = numpy_htf['15m']['log_ret'][map_15m]; l15_rsi = numpy_htf['15m']['RSI'][map_15m] / 100.0; l15_fib618 = numpy_htf['15m']['dist_fib618'][map_15m]; l15_trd = numpy_htf['15m']['trend_slope'][map_15m]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
lags = []
|
| 389 |
for lag in [1, 2, 3, 5, 10, 20]:
|
| 390 |
+
lags.extend([fast_1m[f'log_ret_lag_{lag}'], fast_1m[f'rsi_lag_{lag}'], fast_1m[f'fib_pos_lag_{lag}'], fast_1m[f'volatility_lag_{lag}']])
|
| 391 |
+
X_V2 = np.column_stack([l_log, l_rsi, l_fib, l_vol, l5_log, l5_rsi, l5_fib, l5_trd, l15_log, l15_rsi, l15_fib618, l15_trd, *lags])
|
|
|
|
|
|
|
|
|
|
| 392 |
preds = legacy_v2.predict(xgb.DMatrix(X_V2))
|
| 393 |
global_v2_scores = preds[:, 2] if len(preds.shape) > 1 else preds
|
| 394 |
except: pass
|
| 395 |
|
| 396 |
+
# Filter
|
| 397 |
+
is_candidate = (numpy_htf['1h']['RSI'][map_1h] <= 70) & (global_titan_scores > 0.4) & (global_oracle_scores > 0.4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
candidate_indices = np.where(is_candidate)[0]
|
|
|
|
| 399 |
start_ts_val = frames['1m'].index[0] + pd.Timedelta(minutes=500)
|
| 400 |
start_idx_offset = np.searchsorted(arr_ts_1m, int(start_ts_val.timestamp()*1000))
|
| 401 |
candidate_indices = candidate_indices[candidate_indices >= start_idx_offset]
|
| 402 |
+
candidate_indices = candidate_indices[candidate_indices < (len(arr_ts_1m) - 245)]
|
| 403 |
+
print(f" 🎯 Candidates: {len(candidate_indices)}. Running Vectorized Hydra...", flush=True)
|
|
|
|
|
|
|
| 404 |
|
| 405 |
+
# 🚀 VECTORIZED HYDRA SIMULATION 🚀
|
| 406 |
ai_results = []
|
| 407 |
+
if hydra_models and len(candidate_indices) > 0:
|
| 408 |
+
# Prepare Static Features Matrix (Global)
|
| 409 |
+
h_static = np.column_stack([
|
| 410 |
+
fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
|
| 411 |
+
fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
|
| 412 |
+
]) # Shape: (N, 7)
|
| 413 |
+
|
| 414 |
+
# Process candidates in chunks to avoid RAM explosion
|
| 415 |
+
chunk_size = 5000
|
| 416 |
+
for i in range(0, len(candidate_indices), chunk_size):
|
| 417 |
+
chunk_idxs = candidate_indices[i:i+chunk_size]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
+
# We need sliding windows of 240 steps for each candidate
|
| 420 |
+
# Trick: Use broadcasting or sliding_window_view on static features
|
| 421 |
+
# But sliding_window_view on huge array is slow. Better to just slice.
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
+
# Vectorized construction for chunk
|
| 424 |
+
# 1. Extract entry prices
|
| 425 |
+
entries = fast_1m['close'][chunk_idxs]
|
| 426 |
+
entries_ts = fast_1m['timestamp'][chunk_idxs]
|
| 427 |
|
| 428 |
+
# 2. Prepare sequences (Vectorized slice is hard in numpy without creating copies)
|
| 429 |
+
# We stick to a tight loop or specialized indexing.
|
| 430 |
+
# Given we need to construct a [Batch, 240, Features] array for Hydra...
|
|
|
|
|
|
|
|
|
|
| 431 |
|
| 432 |
+
# Fastest way: List comprehension for slicing, then stack.
|
| 433 |
+
# Since Hydra is XGBoost, we can flatten the time dimension? No, Hydra is 1D input (snapshot).
|
| 434 |
+
# Wait, Hydra predicts Crash Probability for a SNAPSHOT state.
|
| 435 |
+
# In simulation, we need to check crash prob at t+1, t+2... t+240.
|
| 436 |
+
# That is 240 checks per candidate. 42,000 * 240 = 10 Million checks.
|
| 437 |
+
# This IS the bottleneck.
|
| 438 |
+
|
| 439 |
+
# OPTIMIZATION: Only check Hydra if PnL drops below -0.5% or something? No, that misses the point.
|
| 440 |
+
# OPTIMIZATION 2 (Implemented): Vectorize the "Check" logic.
|
| 441 |
+
|
| 442 |
+
# Construct big matrix for ALL checks: (N_Candidates * 240, Features)
|
| 443 |
+
# But that's 10M rows. XGBoost inference on 10M rows takes ~3-5 seconds on CPU. This is feasible!
|
| 444 |
+
|
| 445 |
+
# Let's do it per candidate to be safe on RAM, but fast.
|
| 446 |
+
for idx in chunk_idxs:
|
| 447 |
+
# Slicing is fast
|
| 448 |
+
sl_st = h_static[idx:idx+240]
|
| 449 |
+
sl_close = sl_st[:, 6]; sl_atr = sl_st[:, 5]
|
| 450 |
+
entry = fast_1m['close'][idx]
|
| 451 |
+
|
| 452 |
+
dist = np.maximum(1.5 * sl_atr, entry * 0.015)
|
| 453 |
+
pnl = sl_close - entry
|
| 454 |
+
norm_pnl = pnl / dist
|
| 455 |
+
max_pnl_r = (np.maximum.accumulate(sl_close) - entry) / dist
|
| 456 |
+
atr_pct = sl_atr / sl_close
|
| 457 |
+
|
| 458 |
+
# Stack Hydra Input (240 rows)
|
| 459 |
+
# Cols: rsi1, rsi5, rsi15, bb, vol, dist_ema(0), atr_pct, norm, max, dists(0), time, entry(0), oracle, l2, target
|
| 460 |
+
zeros = np.zeros(240)
|
| 461 |
+
time_vec = np.arange(1, 241)
|
| 462 |
+
s_oracle = global_oracle_scores[idx]
|
| 463 |
+
|
| 464 |
+
X_H = np.column_stack([
|
| 465 |
+
sl_st[:,0], sl_st[:,1], sl_st[:,2], sl_st[:,3], sl_st[:,4],
|
| 466 |
+
zeros, atr_pct, norm_pnl, max_pnl_r, zeros, zeros, time_vec, zeros,
|
| 467 |
+
np.full(240, s_oracle), np.full(240, 0.7), np.full(240, 3.0)
|
| 468 |
+
])
|
| 469 |
+
|
| 470 |
+
# Predict 240 steps at once
|
| 471 |
+
max_hydra = 0.0; hydra_time = 0
|
| 472 |
+
try:
|
| 473 |
+
probs = hydra_models['crash'].predict_proba(X_H)[:, 1]
|
| 474 |
+
max_hydra = np.max(probs)
|
| 475 |
+
if max_hydra > 0.6:
|
| 476 |
+
t = np.argmax(probs)
|
| 477 |
+
hydra_time = int(fast_1m['timestamp'][idx + t])
|
| 478 |
+
except: pass
|
| 479 |
+
|
| 480 |
+
# Legacy Max
|
| 481 |
+
max_v2 = np.max(global_v2_scores[idx:idx+240])
|
| 482 |
+
v2_time = 0
|
| 483 |
+
if max_v2 > 0.8:
|
| 484 |
+
t2 = np.argmax(global_v2_scores[idx:idx+240])
|
| 485 |
+
v2_time = int(fast_1m['timestamp'][idx + t2])
|
| 486 |
+
|
| 487 |
+
ai_results.append({
|
| 488 |
+
'timestamp': int(fast_1m['timestamp'][idx]),
|
| 489 |
+
'symbol': sym, 'close': entry,
|
| 490 |
+
'real_titan': global_titan_scores[idx],
|
| 491 |
+
'oracle_conf': s_oracle,
|
| 492 |
+
'sniper_score': global_sniper_scores[idx],
|
| 493 |
+
'risk_hydra_crash': max_hydra, 'time_hydra_crash': hydra_time,
|
| 494 |
+
'risk_legacy_v2': max_v2, 'time_legacy_panic': v2_time,
|
| 495 |
+
'signal_type': 'BREAKOUT', 'l1_score': 50.0
|
| 496 |
+
})
|
| 497 |
|
| 498 |
dt = time.time() - t0
|
| 499 |
if ai_results:
|
| 500 |
pd.DataFrame(ai_results).to_pickle(scores_file)
|
| 501 |
print(f" ✅ [{sym}] Completed in {dt:.2f} seconds. ({len(ai_results)} signals)", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
gc.collect()
|
| 503 |
|
|
|
|
|
|
|
|
|
|
| 504 |
async def generate_truth_data(self):
|
| 505 |
if self.force_start_date:
|
| 506 |
dt_s = datetime.strptime(self.force_start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
|
|
| 655 |
return best['config'], best
|
| 656 |
|
| 657 |
async def run_strategic_optimization_task():
|
| 658 |
+
print("\n🧪 [STRATEGIC BACKTEST] Vectorized Hydra Speed...")
|
| 659 |
r2 = R2Service(); dm = DataManager(None, None, r2); proc = MLProcessor(dm)
|
| 660 |
try:
|
| 661 |
await dm.initialize(); await proc.initialize()
|