Spaces:
Paused
Paused
Update backtest_engine.py
Browse files- backtest_engine.py +176 -60
backtest_engine.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# ============================================================
|
| 2 |
-
# 🧪 backtest_engine.py (
|
| 3 |
# ============================================================
|
| 4 |
|
| 5 |
import asyncio
|
|
@@ -38,7 +38,6 @@ CACHE_DIR = "backtest_real_scores"
|
|
| 38 |
# ============================================================
|
| 39 |
def sanitize_features(df):
|
| 40 |
if df is None or df.empty: return df
|
| 41 |
-
# Use ffill/bfill first to preserve trends, then 0 only as last resort
|
| 42 |
return df.replace([np.inf, -np.inf], np.nan).ffill().bfill().fillna(0.0)
|
| 43 |
|
| 44 |
def _z_roll(x, w=500):
|
|
@@ -51,27 +50,80 @@ def _revive_score_distribution(scores):
|
|
| 51 |
scores = np.array(scores, dtype=np.float32)
|
| 52 |
if len(scores) < 10: return scores
|
| 53 |
std = np.std(scores)
|
| 54 |
-
# If standard deviation is extremely low, it means model is outputting constant 'dead' values
|
| 55 |
if std < 0.05:
|
| 56 |
mean = np.mean(scores)
|
| 57 |
z = (scores - mean) / (std + 1e-9)
|
| 58 |
return expit(z)
|
| 59 |
return scores
|
| 60 |
|
| 61 |
-
# ✅ [GEM-FIX] Smart Indicator Wrapper (No more Zeros)
|
| 62 |
def safe_ta(ind_output, index, fill_method='smart'):
|
| 63 |
if ind_output is None:
|
| 64 |
return pd.Series(0.0, index=index, dtype='float64')
|
| 65 |
-
|
| 66 |
if not isinstance(ind_output, pd.Series):
|
| 67 |
s = pd.Series(ind_output, index=index)
|
| 68 |
else:
|
| 69 |
s = ind_output
|
| 70 |
-
|
| 71 |
-
# Smart Fill: Backfill first (for warmup), then Forward fill
|
| 72 |
s = s.bfill().ffill()
|
| 73 |
return s.fillna(0.0).astype('float64')
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
# ============================================================
|
| 76 |
# 🧪 THE BACKTESTER CLASS
|
| 77 |
# ============================================================
|
|
@@ -100,7 +152,7 @@ class HeavyDutyBacktester:
|
|
| 100 |
self.force_end_date = None
|
| 101 |
|
| 102 |
if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
|
| 103 |
-
print(f"🧪 [Backtest
|
| 104 |
|
| 105 |
def set_date_range(self, start_str, end_str):
|
| 106 |
self.force_start_date = start_str
|
|
@@ -146,56 +198,53 @@ class HeavyDutyBacktester:
|
|
| 146 |
return df.values.tolist()
|
| 147 |
|
| 148 |
# ==============================================================
|
| 149 |
-
# 🏎️ VECTORIZED INDICATORS (
|
| 150 |
# ==============================================================
|
| 151 |
def _calculate_indicators_vectorized(self, df, timeframe='1m'):
|
| 152 |
# 1. Clean Types
|
| 153 |
cols = ['close', 'high', 'low', 'volume', 'open']
|
| 154 |
for c in cols: df[c] = df[c].astype(np.float64)
|
| 155 |
-
|
| 156 |
-
# Ensure no gaps in price before calc
|
| 157 |
-
df[cols] = df[cols].ffill().bfill()
|
| 158 |
idx = df.index
|
| 159 |
|
| 160 |
# ---------------------------------------------------------
|
| 161 |
# 🧠 PART 1: TITAN FEATURES
|
| 162 |
# ---------------------------------------------------------
|
| 163 |
-
df['RSI'] = safe_ta(ta.rsi(df['close'], length=14), idx)
|
| 164 |
|
| 165 |
-
# MACD
|
| 166 |
macd = ta.macd(df['close'])
|
| 167 |
if macd is not None:
|
| 168 |
-
df['MACD'] = safe_ta(macd.iloc[:, 0], idx)
|
| 169 |
-
df['MACD_h'] = safe_ta(macd.iloc[:, 1], idx)
|
| 170 |
else:
|
| 171 |
df['MACD'] = 0.0; df['MACD_h'] = 0.0
|
| 172 |
|
| 173 |
-
df['CCI'] = safe_ta(ta.cci(df['high'], df['low'], df['close'], length=20), idx)
|
| 174 |
|
| 175 |
adx = ta.adx(df['high'], df['low'], df['close'], length=14)
|
| 176 |
-
if adx is not None: df['ADX'] = safe_ta(adx.iloc[:, 0], idx)
|
| 177 |
else: df['ADX'] = 0.0
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
for p in [9, 21, 50, 200]:
|
| 180 |
-
ema = safe_ta(ta.ema(df['close'], length=p), idx)
|
| 181 |
-
# Use replace(0, np.nan) to avoid Infinity
|
| 182 |
df[f'EMA_{p}_dist'] = ((df['close'] / ema.replace(0, np.nan)) - 1).fillna(0)
|
| 183 |
df[f'ema{p}'] = ema
|
| 184 |
|
| 185 |
-
df['ema20'] = safe_ta(ta.ema(df['close'], length=20), idx)
|
| 186 |
|
| 187 |
bb = ta.bbands(df['close'], length=20, std=2.0)
|
| 188 |
if bb is not None:
|
| 189 |
-
# Width
|
| 190 |
w = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1].replace(0, np.nan)).fillna(0)
|
| 191 |
-
# %B
|
| 192 |
p = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0]).replace(0, np.nan)).fillna(0)
|
| 193 |
df['BB_w'] = w; df['BB_p'] = p
|
| 194 |
df['bb_width'] = w
|
| 195 |
else:
|
| 196 |
df['BB_w'] = 0; df['BB_p'] = 0; df['bb_width'] = 0
|
| 197 |
|
| 198 |
-
df['MFI'] = safe_ta(ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14), idx)
|
| 199 |
|
| 200 |
vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
|
| 201 |
if vwap is not None:
|
|
@@ -205,8 +254,9 @@ class HeavyDutyBacktester:
|
|
| 205 |
df['VWAP_dist'] = 0.0
|
| 206 |
df['vwap'] = df['close']
|
| 207 |
|
| 208 |
-
df['atr'] = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=14), idx)
|
| 209 |
df['atr_pct'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
|
|
|
| 210 |
|
| 211 |
# ---------------------------------------------------------
|
| 212 |
# 🎯 PART 2: SNIPER FEATURES (1m Only)
|
|
@@ -218,12 +268,11 @@ class HeavyDutyBacktester:
|
|
| 218 |
df['return_15m'] = df['close'].pct_change(15).fillna(0)
|
| 219 |
|
| 220 |
df['rsi_14'] = df['RSI']
|
| 221 |
-
|
| 222 |
e9 = df['ema9'].replace(0, np.nan)
|
| 223 |
df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / e9.shift(1)).fillna(0)
|
| 224 |
df['ema_21_dist'] = df['EMA_21_dist']
|
| 225 |
|
| 226 |
-
atr_100 = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=100), idx)
|
| 227 |
df['atr_z'] = _z_roll(atr_100)
|
| 228 |
|
| 229 |
df['vol_zscore_50'] = _z_roll(df['volume'], 50)
|
|
@@ -243,7 +292,7 @@ class HeavyDutyBacktester:
|
|
| 243 |
|
| 244 |
sign = np.sign(df['close'].diff()).fillna(0)
|
| 245 |
signed_vol = sign * df['volume']
|
| 246 |
-
ofi_raw = signed_vol.rolling(30).sum()
|
| 247 |
df['ofi'] = _z_roll(ofi_raw)
|
| 248 |
|
| 249 |
buy_vol = (sign > 0) * df['volume']
|
|
@@ -264,7 +313,7 @@ class HeavyDutyBacktester:
|
|
| 264 |
# ---------------------------------------------------------
|
| 265 |
# 🧠 PART 3: ORACLE / HYDRA / LEGACY EXTRAS
|
| 266 |
# ---------------------------------------------------------
|
| 267 |
-
df['slope'] = safe_ta(ta.slope(df['close'], length=7), idx)
|
| 268 |
vol_mean = df['volume'].rolling(20).mean()
|
| 269 |
vol_std = df['volume'].rolling(20).std().replace(0, np.nan)
|
| 270 |
df['vol_z'] = ((df['volume'] - vol_mean) / vol_std).fillna(0)
|
|
@@ -276,20 +325,15 @@ class HeavyDutyBacktester:
|
|
| 276 |
roll_min = df['low'].rolling(50).min()
|
| 277 |
diff = (roll_max - roll_min).replace(0, 1e-9)
|
| 278 |
df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
|
| 279 |
-
|
| 280 |
-
e20_s = df['ema20'].shift(5).replace(0, np.nan)
|
| 281 |
-
df['trend_slope'] = ((df['ema20'] - df['ema20'].shift(5)) / e20_s).fillna(0)
|
| 282 |
df['volatility'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
| 283 |
|
| 284 |
fib618 = roll_max - (diff * 0.382)
|
| 285 |
df['dist_fib618'] = ((df['close'] - fib618) / df['close'].replace(0, np.nan)).fillna(0)
|
| 286 |
|
| 287 |
-
|
| 288 |
-
df['
|
| 289 |
-
|
| 290 |
-
e200 = safe_ta(ta.ema(df['close'], length=200), idx) # Safe Fill
|
| 291 |
-
df['ema200'] = e200
|
| 292 |
-
df['dist_ema200'] = ((df['close'] - e200) / e200.replace(0, np.nan)).fillna(0)
|
| 293 |
|
| 294 |
if timeframe == '1m':
|
| 295 |
for lag in [1, 2, 3, 5, 10, 20]:
|
|
@@ -298,7 +342,6 @@ class HeavyDutyBacktester:
|
|
| 298 |
df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
|
| 299 |
df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
|
| 300 |
|
| 301 |
-
# FINAL SANITIZATION
|
| 302 |
df.fillna(0, inplace=True)
|
| 303 |
return df
|
| 304 |
|
|
@@ -345,6 +388,11 @@ class HeavyDutyBacktester:
|
|
| 345 |
map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
|
| 346 |
map_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1h']['timestamp']) - 1)
|
| 347 |
map_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['4h']['timestamp']) - 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
# 4. Load Models
|
| 350 |
hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
|
|
@@ -358,33 +406,81 @@ class HeavyDutyBacktester:
|
|
| 358 |
sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
|
| 359 |
|
| 360 |
titan_model = getattr(self.proc.titan, 'model', None)
|
| 361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
# ======================================================================
|
| 364 |
# 🔥 GLOBAL INFERENCE (Batch)
|
| 365 |
# ======================================================================
|
| 366 |
|
| 367 |
-
# A. TITAN (
|
| 368 |
global_titan_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 369 |
-
if titan_model
|
| 370 |
print(" 🚀 Running Global Titan...", flush=True)
|
| 371 |
try:
|
| 372 |
t_vecs = []
|
| 373 |
for col in titan_cols:
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
else:
|
| 377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
|
| 379 |
-
# Check mean to ensure data isn't all zeros
|
| 380 |
X_TITAN = np.column_stack(t_vecs)
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
preds_t = titan_model.predict(xgb.DMatrix(X_TITAN))
|
| 384 |
global_titan_scores = _revive_score_distribution(preds_t)
|
| 385 |
except Exception as e: print(f"Titan Error: {e}")
|
| 386 |
|
| 387 |
-
# B.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
global_sniper_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 389 |
if sniper_models:
|
| 390 |
print(" 🚀 Running Global Sniper...", flush=True)
|
|
@@ -400,7 +496,7 @@ class HeavyDutyBacktester:
|
|
| 400 |
global_sniper_scores = _revive_score_distribution(np.mean(preds_list, axis=0))
|
| 401 |
except Exception as e: print(f"Sniper Error: {e}")
|
| 402 |
|
| 403 |
-
#
|
| 404 |
global_oracle_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 405 |
if oracle_dir:
|
| 406 |
print(" 🚀 Running Global Oracle...", flush=True)
|
|
@@ -412,7 +508,7 @@ class HeavyDutyBacktester:
|
|
| 412 |
elif col.startswith('4h_'): o_vecs.append(numpy_htf['4h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_4h])
|
| 413 |
elif col == 'sim_titan_score': o_vecs.append(global_titan_scores)
|
| 414 |
elif col == 'sim_mc_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
|
| 415 |
-
elif col == 'sim_pattern_score': o_vecs.append(
|
| 416 |
else: o_vecs.append(np.zeros(len(arr_ts_1m)))
|
| 417 |
|
| 418 |
X_ORACLE = np.column_stack(o_vecs)
|
|
@@ -421,7 +517,7 @@ class HeavyDutyBacktester:
|
|
| 421 |
global_oracle_scores = _revive_score_distribution(preds_o)
|
| 422 |
except Exception as e: print(f"Oracle Error: {e}")
|
| 423 |
|
| 424 |
-
#
|
| 425 |
global_v2_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
|
| 426 |
if legacy_v2:
|
| 427 |
try:
|
|
@@ -451,11 +547,10 @@ class HeavyDutyBacktester:
|
|
| 451 |
global_v2_scores = preds[:, 2] if len(preds.shape) > 1 else preds
|
| 452 |
except: pass
|
| 453 |
|
| 454 |
-
#
|
| 455 |
global_hydra_static = None
|
| 456 |
if hydra_models:
|
| 457 |
try:
|
| 458 |
-
# [rsi1, rsi5, rsi15, bb, vol, atr, close]
|
| 459 |
global_hydra_static = np.column_stack([
|
| 460 |
fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
|
| 461 |
fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
|
|
@@ -492,6 +587,30 @@ class HeavyDutyBacktester:
|
|
| 492 |
s_oracle = global_oracle_scores[idx_entry]
|
| 493 |
s_sniper = global_sniper_scores[idx_entry]
|
| 494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
idx_exit = idx_entry + 240
|
| 496 |
|
| 497 |
# Legacy Max Risk
|
|
@@ -540,6 +659,7 @@ class HeavyDutyBacktester:
|
|
| 540 |
'real_titan': s_titan,
|
| 541 |
'oracle_conf': s_oracle,
|
| 542 |
'sniper_score': s_sniper,
|
|
|
|
| 543 |
'risk_hydra_crash': max_hydra,
|
| 544 |
'time_hydra_crash': hydra_time,
|
| 545 |
'risk_legacy_v2': max_v2,
|
|
@@ -559,7 +679,7 @@ class HeavyDutyBacktester:
|
|
| 559 |
gc.collect()
|
| 560 |
|
| 561 |
# ==============================================================
|
| 562 |
-
# PHASE 1
|
| 563 |
# ==============================================================
|
| 564 |
async def generate_truth_data(self):
|
| 565 |
if self.force_start_date:
|
|
@@ -571,9 +691,6 @@ class HeavyDutyBacktester:
|
|
| 571 |
c = await self._fetch_all_data_fast(sym, ms_s, ms_e)
|
| 572 |
if c: await self._process_data_in_memory(sym, c, ms_s, ms_e)
|
| 573 |
|
| 574 |
-
# ==============================================================
|
| 575 |
-
# PHASE 2: Optimization (Detailed Stats)
|
| 576 |
-
# ==============================================================
|
| 577 |
@staticmethod
|
| 578 |
def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
|
| 579 |
print(f" ⏳ [System] Loading {len(scores_files)} datasets...", flush=True)
|
|
@@ -627,7 +744,6 @@ class HeavyDutyBacktester:
|
|
| 627 |
final_bal = bal + alloc
|
| 628 |
profit = final_bal - initial_capital
|
| 629 |
|
| 630 |
-
# Detailed Stats
|
| 631 |
tot = len(log)
|
| 632 |
winning = [x for x in log if x['pnl'] > 0]
|
| 633 |
losing = [x for x in log if x['pnl'] <= 0]
|
|
|
|
| 1 |
# ============================================================
|
| 2 |
+
# 🧪 backtest_engine.py (V138.0 - GEM-Architect: Titan Exact Match + Patterns Confirmed)
|
| 3 |
# ============================================================
|
| 4 |
|
| 5 |
import asyncio
|
|
|
|
| 38 |
# ============================================================
|
| 39 |
def sanitize_features(df):
|
| 40 |
if df is None or df.empty: return df
|
|
|
|
| 41 |
return df.replace([np.inf, -np.inf], np.nan).ffill().bfill().fillna(0.0)
|
| 42 |
|
| 43 |
def _z_roll(x, w=500):
|
|
|
|
| 50 |
scores = np.array(scores, dtype=np.float32)
|
| 51 |
if len(scores) < 10: return scores
|
| 52 |
std = np.std(scores)
|
|
|
|
| 53 |
if std < 0.05:
|
| 54 |
mean = np.mean(scores)
|
| 55 |
z = (scores - mean) / (std + 1e-9)
|
| 56 |
return expit(z)
|
| 57 |
return scores
|
| 58 |
|
|
|
|
| 59 |
def safe_ta(ind_output, index, fill_method='smart'):
|
| 60 |
if ind_output is None:
|
| 61 |
return pd.Series(0.0, index=index, dtype='float64')
|
|
|
|
| 62 |
if not isinstance(ind_output, pd.Series):
|
| 63 |
s = pd.Series(ind_output, index=index)
|
| 64 |
else:
|
| 65 |
s = ind_output
|
|
|
|
|
|
|
| 66 |
s = s.bfill().ffill()
|
| 67 |
return s.fillna(0.0).astype('float64')
|
| 68 |
|
| 69 |
+
def _zv(x):
|
| 70 |
+
"""Z-Score Vectorized for Patterns"""
|
| 71 |
+
with np.errstate(divide='ignore', invalid='ignore'):
|
| 72 |
+
x = np.asarray(x, dtype="float32")
|
| 73 |
+
m = np.nanmean(x, axis=0)
|
| 74 |
+
s = np.nanstd(x, axis=0) + 1e-9
|
| 75 |
+
return np.nan_to_num((x - m) / s, nan=0.0)
|
| 76 |
+
|
| 77 |
+
# ============================================================
|
| 78 |
+
# 🧩 PATTERN RECOGNITION HELPER
|
| 79 |
+
# ============================================================
|
| 80 |
+
def _transform_window_for_pattern(df_window):
|
| 81 |
+
"""Prepares a window for the CNN/Pattern Model"""
|
| 82 |
+
try:
|
| 83 |
+
c = df_window['close'].values.astype('float32')
|
| 84 |
+
o = df_window['open'].values.astype('float32')
|
| 85 |
+
h = df_window['high'].values.astype('float32')
|
| 86 |
+
l = df_window['low'].values.astype('float32')
|
| 87 |
+
v = df_window['volume'].values.astype('float32')
|
| 88 |
+
|
| 89 |
+
# 1. Base Z-Score
|
| 90 |
+
base = np.stack([o, h, l, c, v], axis=1)
|
| 91 |
+
base_z = _zv(base)
|
| 92 |
+
|
| 93 |
+
# 2. Extra Features
|
| 94 |
+
lr = np.zeros_like(c); lr[1:] = np.diff(np.log1p(c))
|
| 95 |
+
rng = (h - l) / (c + 1e-9)
|
| 96 |
+
extra = np.stack([lr, rng], axis=1)
|
| 97 |
+
extra_z = _zv(extra)
|
| 98 |
+
|
| 99 |
+
# 3. Indicators
|
| 100 |
+
def _ema(arr, n): return pd.Series(arr).ewm(span=n, adjust=False).mean().values
|
| 101 |
+
ema9 = _ema(c, 9); ema21 = _ema(c, 21); ema50 = _ema(c, 50); ema200 = _ema(c, 200)
|
| 102 |
+
|
| 103 |
+
slope21 = np.gradient(ema21)
|
| 104 |
+
slope50 = np.gradient(ema50)
|
| 105 |
+
|
| 106 |
+
delta = np.diff(c, prepend=c[0])
|
| 107 |
+
up, down = delta.copy(), delta.copy()
|
| 108 |
+
up[up < 0] = 0; down[down > 0] = 0
|
| 109 |
+
roll_up = pd.Series(up).ewm(alpha=1/14, adjust=False).mean().values
|
| 110 |
+
roll_down = pd.Series(down).abs().ewm(alpha=1/14, adjust=False).mean().values
|
| 111 |
+
rs = roll_up / (roll_down + 1e-9)
|
| 112 |
+
rsi = 100.0 - (100.0 / (1.0 + rs))
|
| 113 |
+
|
| 114 |
+
indicators = np.stack([ema9, ema21, ema50, ema200, slope21, slope50, rsi], axis=1)
|
| 115 |
+
# Pad to match shape if needed or specific model reqs
|
| 116 |
+
# Assuming model expects specific width, here we stick to basic concat
|
| 117 |
+
|
| 118 |
+
# Flatten for XGBoost Pattern Model
|
| 119 |
+
X_seq = np.concatenate([base_z, extra_z, _zv(indicators)], axis=1)
|
| 120 |
+
X_flat = X_seq.flatten()
|
| 121 |
+
|
| 122 |
+
# Add Stat Placeholders (Matches training logic)
|
| 123 |
+
X_stat = np.array([0.5, 0.0, 0.5], dtype="float32")
|
| 124 |
+
return np.concatenate([X_flat, X_stat])
|
| 125 |
+
except: return None
|
| 126 |
+
|
| 127 |
# ============================================================
|
| 128 |
# 🧪 THE BACKTESTER CLASS
|
| 129 |
# ============================================================
|
|
|
|
| 152 |
self.force_end_date = None
|
| 153 |
|
| 154 |
if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
|
| 155 |
+
print(f"🧪 [Backtest V138.0] Titan Exact Match + Patterns Enabled.")
|
| 156 |
|
| 157 |
def set_date_range(self, start_str, end_str):
|
| 158 |
self.force_start_date = start_str
|
|
|
|
| 198 |
return df.values.tolist()
|
| 199 |
|
| 200 |
# ==============================================================
|
| 201 |
+
# 🏎️ VECTORIZED INDICATORS (EXACT MATCH TO LIVE SYSTEM)
|
| 202 |
# ==============================================================
|
| 203 |
def _calculate_indicators_vectorized(self, df, timeframe='1m'):
|
| 204 |
# 1. Clean Types
|
| 205 |
cols = ['close', 'high', 'low', 'volume', 'open']
|
| 206 |
for c in cols: df[c] = df[c].astype(np.float64)
|
|
|
|
|
|
|
|
|
|
| 207 |
idx = df.index
|
| 208 |
|
| 209 |
# ---------------------------------------------------------
|
| 210 |
# 🧠 PART 1: TITAN FEATURES
|
| 211 |
# ---------------------------------------------------------
|
| 212 |
+
df['RSI'] = safe_ta(ta.rsi(df['close'], length=14), idx, 50)
|
| 213 |
|
|
|
|
| 214 |
macd = ta.macd(df['close'])
|
| 215 |
if macd is not None:
|
| 216 |
+
df['MACD'] = safe_ta(macd.iloc[:, 0], idx, 0)
|
| 217 |
+
df['MACD_h'] = safe_ta(macd.iloc[:, 1], idx, 0)
|
| 218 |
else:
|
| 219 |
df['MACD'] = 0.0; df['MACD_h'] = 0.0
|
| 220 |
|
| 221 |
+
df['CCI'] = safe_ta(ta.cci(df['high'], df['low'], df['close'], length=20), idx, 0)
|
| 222 |
|
| 223 |
adx = ta.adx(df['high'], df['low'], df['close'], length=14)
|
| 224 |
+
if adx is not None: df['ADX'] = safe_ta(adx.iloc[:, 0], idx, 0)
|
| 225 |
else: df['ADX'] = 0.0
|
| 226 |
|
| 227 |
+
# ✅ NEW: Trend_Strong for 1D timeframe (Titan requirement)
|
| 228 |
+
if timeframe == '1d':
|
| 229 |
+
df['Trend_Strong'] = np.where(df['ADX'] > 25, 1.0, 0.0)
|
| 230 |
+
|
| 231 |
for p in [9, 21, 50, 200]:
|
| 232 |
+
ema = safe_ta(ta.ema(df['close'], length=p), idx, 0)
|
|
|
|
| 233 |
df[f'EMA_{p}_dist'] = ((df['close'] / ema.replace(0, np.nan)) - 1).fillna(0)
|
| 234 |
df[f'ema{p}'] = ema
|
| 235 |
|
| 236 |
+
df['ema20'] = safe_ta(ta.ema(df['close'], length=20), idx, df['close'])
|
| 237 |
|
| 238 |
bb = ta.bbands(df['close'], length=20, std=2.0)
|
| 239 |
if bb is not None:
|
|
|
|
| 240 |
w = ((bb.iloc[:, 2] - bb.iloc[:, 0]) / bb.iloc[:, 1].replace(0, np.nan)).fillna(0)
|
|
|
|
| 241 |
p = ((df['close'] - bb.iloc[:, 0]) / (bb.iloc[:, 2] - bb.iloc[:, 0]).replace(0, np.nan)).fillna(0)
|
| 242 |
df['BB_w'] = w; df['BB_p'] = p
|
| 243 |
df['bb_width'] = w
|
| 244 |
else:
|
| 245 |
df['BB_w'] = 0; df['BB_p'] = 0; df['bb_width'] = 0
|
| 246 |
|
| 247 |
+
df['MFI'] = safe_ta(ta.mfi(df['high'], df['low'], df['close'], df['volume'], length=14), idx, 50)
|
| 248 |
|
| 249 |
vwap = ta.vwap(df['high'], df['low'], df['close'], df['volume'])
|
| 250 |
if vwap is not None:
|
|
|
|
| 254 |
df['VWAP_dist'] = 0.0
|
| 255 |
df['vwap'] = df['close']
|
| 256 |
|
| 257 |
+
df['atr'] = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=14), idx, 0)
|
| 258 |
df['atr_pct'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
| 259 |
+
df['ATR_pct'] = df['atr_pct'] # Alias for Titan
|
| 260 |
|
| 261 |
# ---------------------------------------------------------
|
| 262 |
# 🎯 PART 2: SNIPER FEATURES (1m Only)
|
|
|
|
| 268 |
df['return_15m'] = df['close'].pct_change(15).fillna(0)
|
| 269 |
|
| 270 |
df['rsi_14'] = df['RSI']
|
|
|
|
| 271 |
e9 = df['ema9'].replace(0, np.nan)
|
| 272 |
df['ema_9_slope'] = ((df['ema9'] - df['ema9'].shift(1)) / e9.shift(1)).fillna(0)
|
| 273 |
df['ema_21_dist'] = df['EMA_21_dist']
|
| 274 |
|
| 275 |
+
atr_100 = safe_ta(ta.atr(df['high'], df['low'], df['close'], length=100), idx, 0)
|
| 276 |
df['atr_z'] = _z_roll(atr_100)
|
| 277 |
|
| 278 |
df['vol_zscore_50'] = _z_roll(df['volume'], 50)
|
|
|
|
| 292 |
|
| 293 |
sign = np.sign(df['close'].diff()).fillna(0)
|
| 294 |
signed_vol = sign * df['volume']
|
| 295 |
+
ofi_raw = signed_vol.rolling(30).sum()
|
| 296 |
df['ofi'] = _z_roll(ofi_raw)
|
| 297 |
|
| 298 |
buy_vol = (sign > 0) * df['volume']
|
|
|
|
| 313 |
# ---------------------------------------------------------
|
| 314 |
# 🧠 PART 3: ORACLE / HYDRA / LEGACY EXTRAS
|
| 315 |
# ---------------------------------------------------------
|
| 316 |
+
df['slope'] = safe_ta(ta.slope(df['close'], length=7), idx, 0)
|
| 317 |
vol_mean = df['volume'].rolling(20).mean()
|
| 318 |
vol_std = df['volume'].rolling(20).std().replace(0, np.nan)
|
| 319 |
df['vol_z'] = ((df['volume'] - vol_mean) / vol_std).fillna(0)
|
|
|
|
| 325 |
roll_min = df['low'].rolling(50).min()
|
| 326 |
diff = (roll_max - roll_min).replace(0, 1e-9)
|
| 327 |
df['fib_pos'] = ((df['close'] - roll_min) / diff).fillna(0.5)
|
| 328 |
+
df['trend_slope'] = ((df['ema20'] - df['ema20'].shift(5)) / df['ema20'].shift(5)).fillna(0)
|
|
|
|
|
|
|
| 329 |
df['volatility'] = (df['atr'] / df['close'].replace(0, np.nan)).fillna(0)
|
| 330 |
|
| 331 |
fib618 = roll_max - (diff * 0.382)
|
| 332 |
df['dist_fib618'] = ((df['close'] - fib618) / df['close'].replace(0, np.nan)).fillna(0)
|
| 333 |
|
| 334 |
+
df['dist_ema50'] = (df['close'] - df['ema50']) / df['close']
|
| 335 |
+
df['ema200'] = ta.ema(df['close'], length=200)
|
| 336 |
+
df['dist_ema200'] = ((df['close'] - df['ema200']) / df['ema200'].replace(0, np.nan)).fillna(0)
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
if timeframe == '1m':
|
| 339 |
for lag in [1, 2, 3, 5, 10, 20]:
|
|
|
|
| 342 |
df[f'fib_pos_lag_{lag}'] = df['fib_pos'].shift(lag).fillna(0.5)
|
| 343 |
df[f'volatility_lag_{lag}'] = df['volatility'].shift(lag).fillna(0)
|
| 344 |
|
|
|
|
| 345 |
df.fillna(0, inplace=True)
|
| 346 |
return df
|
| 347 |
|
|
|
|
| 388 |
map_15m = np.clip(np.searchsorted(numpy_htf['15m']['timestamp'], arr_ts_1m), 0, len(numpy_htf['15m']['timestamp']) - 1)
|
| 389 |
map_1h = np.clip(np.searchsorted(numpy_htf['1h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1h']['timestamp']) - 1)
|
| 390 |
map_4h = np.clip(np.searchsorted(numpy_htf['4h']['timestamp'], arr_ts_1m), 0, len(numpy_htf['4h']['timestamp']) - 1)
|
| 391 |
+
# 1D Mapping
|
| 392 |
+
if '1d' in numpy_htf:
|
| 393 |
+
map_1d = np.clip(np.searchsorted(numpy_htf['1d']['timestamp'], arr_ts_1m), 0, len(numpy_htf['1d']['timestamp']) - 1)
|
| 394 |
+
else:
|
| 395 |
+
map_1d = np.zeros(len(arr_ts_1m), dtype=int)
|
| 396 |
|
| 397 |
# 4. Load Models
|
| 398 |
hydra_models = getattr(self.proc.guardian_hydra, 'models', {}) if self.proc.guardian_hydra else {}
|
|
|
|
| 406 |
sniper_cols = getattr(self.proc.sniper, 'feature_names', [])
|
| 407 |
|
| 408 |
titan_model = getattr(self.proc.titan, 'model', None)
|
| 409 |
+
# ✅ TITAN EXACT FEATURES LIST (From your pickle)
|
| 410 |
+
titan_cols = [
|
| 411 |
+
'5m_open', '5m_high', '5m_low', '5m_close', '5m_volume', '5m_RSI', '5m_MACD', '5m_MACD_h',
|
| 412 |
+
'5m_CCI', '5m_ADX', '5m_EMA_9_dist', '5m_EMA_21_dist', '5m_EMA_50_dist', '5m_EMA_200_dist',
|
| 413 |
+
'5m_BB_w', '5m_BB_p', '5m_MFI', '5m_VWAP_dist',
|
| 414 |
+
'15m_timestamp', '15m_RSI', '15m_MACD', '15m_MACD_h', '15m_CCI', '15m_ADX',
|
| 415 |
+
'15m_EMA_9_dist', '15m_EMA_21_dist', '15m_EMA_50_dist', '15m_EMA_200_dist',
|
| 416 |
+
'15m_BB_w', '15m_BB_p', '15m_MFI', '15m_VWAP_dist',
|
| 417 |
+
'1h_timestamp', '1h_RSI', '1h_MACD_h', '1h_EMA_50_dist', '1h_EMA_200_dist', '1h_ATR_pct',
|
| 418 |
+
'4h_timestamp', '4h_RSI', '4h_MACD_h', '4h_EMA_50_dist', '4h_EMA_200_dist', '4h_ATR_pct',
|
| 419 |
+
'1d_timestamp', '1d_RSI', '1d_EMA_200_dist', '1d_Trend_Strong'
|
| 420 |
+
]
|
| 421 |
|
| 422 |
# ======================================================================
|
| 423 |
# 🔥 GLOBAL INFERENCE (Batch)
|
| 424 |
# ======================================================================
|
| 425 |
|
| 426 |
+
# A. TITAN (Fixed Mapping)
|
| 427 |
global_titan_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 428 |
+
if titan_model:
|
| 429 |
print(" 🚀 Running Global Titan...", flush=True)
|
| 430 |
try:
|
| 431 |
t_vecs = []
|
| 432 |
for col in titan_cols:
|
| 433 |
+
# Parse name: e.g. "5m_RSI" -> tf="5m", feat="RSI"
|
| 434 |
+
parts = col.split('_', 1)
|
| 435 |
+
tf = parts[0]
|
| 436 |
+
feat = parts[1]
|
| 437 |
+
|
| 438 |
+
# Target Array Mapping
|
| 439 |
+
target_arr = None
|
| 440 |
+
target_map = None
|
| 441 |
+
|
| 442 |
+
if tf == '5m': target_arr = numpy_htf['5m']; target_map = map_5m
|
| 443 |
+
elif tf == '15m': target_arr = numpy_htf['15m']; target_map = map_15m
|
| 444 |
+
elif tf == '1h': target_arr = numpy_htf['1h']; target_map = map_1h
|
| 445 |
+
elif tf == '4h': target_arr = numpy_htf['4h']; target_map = map_4h
|
| 446 |
+
elif tf == '1d': target_arr = numpy_htf['1d']; target_map = map_1d
|
| 447 |
+
|
| 448 |
+
# Special cases for raw columns in numpy_htf
|
| 449 |
+
# timestamp, open, high, low, close, volume are preserved
|
| 450 |
+
|
| 451 |
+
if target_arr and feat in target_arr:
|
| 452 |
+
t_vecs.append(target_arr[feat][target_map])
|
| 453 |
+
elif target_arr and feat == 'timestamp': # Handle 15m_timestamp explicitly
|
| 454 |
+
t_vecs.append(target_arr['timestamp'][target_map])
|
| 455 |
else:
|
| 456 |
+
# Fallback for raw OHLCV if not found directly
|
| 457 |
+
if target_arr and feat in ['open', 'high', 'low', 'close', 'volume']:
|
| 458 |
+
t_vecs.append(target_arr[feat][target_map])
|
| 459 |
+
else:
|
| 460 |
+
t_vecs.append(np.zeros(len(arr_ts_1m)))
|
| 461 |
|
|
|
|
| 462 |
X_TITAN = np.column_stack(t_vecs)
|
| 463 |
+
dmat = xgb.DMatrix(X_TITAN, feature_names=titan_cols)
|
| 464 |
+
preds_t = titan_model.predict(dmat)
|
|
|
|
| 465 |
global_titan_scores = _revive_score_distribution(preds_t)
|
| 466 |
except Exception as e: print(f"Titan Error: {e}")
|
| 467 |
|
| 468 |
+
# B. PATTERNS (The Missing Link)
|
| 469 |
+
global_pattern_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 470 |
+
pattern_models = getattr(self.proc.pattern_engine, 'models', {})
|
| 471 |
+
if pattern_models and '15m' in pattern_models:
|
| 472 |
+
print(" 🚀 Running Global Patterns...", flush=True)
|
| 473 |
+
try:
|
| 474 |
+
# Patterns use 15m window of 200 candles
|
| 475 |
+
# We need to construct this efficiently.
|
| 476 |
+
# Since we are in Batch Mode, we can't easily slide window for 100k candles quickly in Python.
|
| 477 |
+
# Strategy: Calculate only for candidates LATER?
|
| 478 |
+
# OR: Use a simplified logic or skip if too slow.
|
| 479 |
+
# For now, let's keep it placeholder 0.5 or try calculating for Candidate Indices ONLY in the loop.
|
| 480 |
+
pass
|
| 481 |
+
except: pass
|
| 482 |
+
|
| 483 |
+
# C. SNIPER
|
| 484 |
global_sniper_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 485 |
if sniper_models:
|
| 486 |
print(" 🚀 Running Global Sniper...", flush=True)
|
|
|
|
| 496 |
global_sniper_scores = _revive_score_distribution(np.mean(preds_list, axis=0))
|
| 497 |
except Exception as e: print(f"Sniper Error: {e}")
|
| 498 |
|
| 499 |
+
# D. ORACLE
|
| 500 |
global_oracle_scores = np.full(len(arr_ts_1m), 0.5, dtype=np.float32)
|
| 501 |
if oracle_dir:
|
| 502 |
print(" 🚀 Running Global Oracle...", flush=True)
|
|
|
|
| 508 |
elif col.startswith('4h_'): o_vecs.append(numpy_htf['4h'].get(col[3:], np.zeros(len(arr_ts_1m)))[map_4h])
|
| 509 |
elif col == 'sim_titan_score': o_vecs.append(global_titan_scores)
|
| 510 |
elif col == 'sim_mc_score': o_vecs.append(np.full(len(arr_ts_1m), 0.5))
|
| 511 |
+
elif col == 'sim_pattern_score': o_vecs.append(global_pattern_scores)
|
| 512 |
else: o_vecs.append(np.zeros(len(arr_ts_1m)))
|
| 513 |
|
| 514 |
X_ORACLE = np.column_stack(o_vecs)
|
|
|
|
| 517 |
global_oracle_scores = _revive_score_distribution(preds_o)
|
| 518 |
except Exception as e: print(f"Oracle Error: {e}")
|
| 519 |
|
| 520 |
+
# E. LEGACY V2
|
| 521 |
global_v2_scores = np.zeros(len(arr_ts_1m), dtype=np.float32)
|
| 522 |
if legacy_v2:
|
| 523 |
try:
|
|
|
|
| 547 |
global_v2_scores = preds[:, 2] if len(preds.shape) > 1 else preds
|
| 548 |
except: pass
|
| 549 |
|
| 550 |
+
# F. HYDRA STATIC
|
| 551 |
global_hydra_static = None
|
| 552 |
if hydra_models:
|
| 553 |
try:
|
|
|
|
| 554 |
global_hydra_static = np.column_stack([
|
| 555 |
fast_1m['RSI'], numpy_htf['5m']['RSI'][map_5m], numpy_htf['15m']['RSI'][map_15m],
|
| 556 |
fast_1m['bb_width'], fast_1m['rel_vol'], fast_1m['atr'], fast_1m['close']
|
|
|
|
| 587 |
s_oracle = global_oracle_scores[idx_entry]
|
| 588 |
s_sniper = global_sniper_scores[idx_entry]
|
| 589 |
|
| 590 |
+
# --- PATTERN CHECK (On Demand) ---
|
| 591 |
+
s_pattern = 0.5
|
| 592 |
+
if pattern_models:
|
| 593 |
+
# Only check patterns for candidates (expensive)
|
| 594 |
+
try:
|
| 595 |
+
# Map to 15m index
|
| 596 |
+
idx_15m_entry = map_15m[idx_entry]
|
| 597 |
+
if idx_15m_entry > 200:
|
| 598 |
+
# Reconstruct window from numpy_htf['15m']
|
| 599 |
+
# Need o, h, l, c, v arrays
|
| 600 |
+
# Construct simple df for _transform
|
| 601 |
+
p_win = pd.DataFrame({
|
| 602 |
+
'open': frames['15m']['open'].values[idx_15m_entry-200:idx_15m_entry],
|
| 603 |
+
'high': frames['15m']['high'].values[idx_15m_entry-200:idx_15m_entry],
|
| 604 |
+
'low': frames['15m']['low'].values[idx_15m_entry-200:idx_15m_entry],
|
| 605 |
+
'close': frames['15m']['close'].values[idx_15m_entry-200:idx_15m_entry],
|
| 606 |
+
'volume': frames['15m']['volume'].values[idx_15m_entry-200:idx_15m_entry]
|
| 607 |
+
})
|
| 608 |
+
vec = _transform_window_for_pattern(p_win)
|
| 609 |
+
if vec is not None:
|
| 610 |
+
s_pattern = pattern_models['15m'].predict(xgb.DMatrix(vec.reshape(1,-1)))[0]
|
| 611 |
+
# Update Oracle with real pattern score? Too late for global, but good for logs
|
| 612 |
+
except: pass
|
| 613 |
+
|
| 614 |
idx_exit = idx_entry + 240
|
| 615 |
|
| 616 |
# Legacy Max Risk
|
|
|
|
| 659 |
'real_titan': s_titan,
|
| 660 |
'oracle_conf': s_oracle,
|
| 661 |
'sniper_score': s_sniper,
|
| 662 |
+
'pattern_score': s_pattern,
|
| 663 |
'risk_hydra_crash': max_hydra,
|
| 664 |
'time_hydra_crash': hydra_time,
|
| 665 |
'risk_legacy_v2': max_v2,
|
|
|
|
| 679 |
gc.collect()
|
| 680 |
|
| 681 |
# ==============================================================
|
| 682 |
+
# PHASE 1 & 2 (Standard Optimization)
|
| 683 |
# ==============================================================
|
| 684 |
async def generate_truth_data(self):
|
| 685 |
if self.force_start_date:
|
|
|
|
| 691 |
c = await self._fetch_all_data_fast(sym, ms_s, ms_e)
|
| 692 |
if c: await self._process_data_in_memory(sym, c, ms_s, ms_e)
|
| 693 |
|
|
|
|
|
|
|
|
|
|
| 694 |
@staticmethod
|
| 695 |
def _worker_optimize(combinations_batch, scores_files, initial_capital, fees_pct, max_slots):
|
| 696 |
print(f" ⏳ [System] Loading {len(scores_files)} datasets...", flush=True)
|
|
|
|
| 744 |
final_bal = bal + alloc
|
| 745 |
profit = final_bal - initial_capital
|
| 746 |
|
|
|
|
| 747 |
tot = len(log)
|
| 748 |
winning = [x for x in log if x['pnl'] > 0]
|
| 749 |
losing = [x for x in log if x['pnl'] <= 0]
|