GitHub Actions
Sync from GitHub: 2b6b738e1021eb6f4e59686269f8628697980366
0582694
"""
strategy/backtest.py
Strategy execution, performance metrics, and benchmark calculations.
Supports CASH as a class (earns T-bill rate when selected).
"""
import numpy as np
import pandas as pd
from datetime import datetime
# ── Strategy execution ────────────────────────────────────────────────────────
def execute_strategy(
preds: np.ndarray,
proba: np.ndarray,
y_raw_test: np.ndarray,
test_dates: pd.DatetimeIndex,
target_etfs: list,
fee_bps: int,
tbill_rate: float,
include_cash: bool = True,
) -> dict:
"""
Execute strategy from model predictions.
Args:
preds : [n] integer class predictions
proba : [n, n_classes] softmax probabilities
y_raw_test : [n, n_etfs] actual next-day ETF returns
test_dates : DatetimeIndex aligned with y_raw_test
target_etfs : list of ETF return column names e.g. ["TLT_Ret", ...]
fee_bps : transaction fee in basis points
tbill_rate : annualised 3m T-bill rate (e.g. 0.045)
include_cash: whether CASH is a valid class (index = n_etfs)
Returns:
dict with keys:
strat_rets, cum_returns, ann_return, sharpe,
hit_ratio, max_dd, max_daily_dd, cum_max,
audit_trail, next_signal, next_proba
"""
n_etfs = len(target_etfs)
daily_tbill = tbill_rate / 252
today = datetime.now().date()
strat_rets = []
audit_trail = []
for i, cls in enumerate(preds):
if include_cash and cls == n_etfs:
signal_etf = "CASH"
realized_ret = daily_tbill
else:
cls = min(cls, n_etfs - 1)
signal_etf = target_etfs[cls].replace("_Ret", "")
realized_ret = float(y_raw_test[i][cls])
# Sanity clip: daily returns should never exceed Β±50%
realized_ret = max(-0.50, min(0.50, realized_ret))
net_ret = realized_ret - (fee_bps / 10000)
strat_rets.append(net_ret)
trade_date = test_dates[i]
if trade_date.date() < today:
audit_trail.append({
"Date": trade_date.strftime("%Y-%m-%d"),
"Signal": signal_etf,
"Realized": realized_ret,
"Net_Return": net_ret,
})
strat_rets = np.array(strat_rets, dtype=np.float64)
# Next signal (last prediction)
last_cls = int(preds[-1])
next_proba = proba[-1]
if include_cash and last_cls == n_etfs:
next_signal = "CASH"
else:
last_cls = min(last_cls, n_etfs - 1)
next_signal = target_etfs[last_cls].replace("_Ret", "")
metrics = _compute_metrics(strat_rets, tbill_rate)
return {
**metrics,
"strat_rets": strat_rets,
"audit_trail": audit_trail,
"next_signal": next_signal,
"next_proba": next_proba,
}
# ── Performance metrics ───────────────────────────────────────────────────────
def _compute_metrics(strat_rets: np.ndarray, tbill_rate: float) -> dict:
if len(strat_rets) == 0:
return {}
cum_returns = np.cumprod(1 + strat_rets)
n = len(strat_rets)
ann_return = float(cum_returns[-1] ** (252 / n) - 1)
excess = strat_rets - tbill_rate / 252
sharpe = float(np.mean(excess) / (np.std(strat_rets) + 1e-9) * np.sqrt(252))
recent = strat_rets[-15:]
hit_ratio = float(np.mean(recent > 0))
cum_max = np.maximum.accumulate(cum_returns)
drawdown = (cum_returns - cum_max) / cum_max
max_dd = float(np.min(drawdown))
max_daily = float(np.min(strat_rets))
return {
"cum_returns": cum_returns,
"ann_return": ann_return,
"sharpe": sharpe,
"hit_ratio": hit_ratio,
"max_dd": max_dd,
"max_daily_dd":max_daily,
"cum_max": cum_max,
}
def compute_benchmark_metrics(returns: np.ndarray, tbill_rate: float) -> dict:
"""Compute metrics for a benchmark return series."""
return _compute_metrics(returns, tbill_rate)
# ── Winner selection ──────────────────────────────────────────────────────────
def select_winner(results: dict) -> str:
"""
Given a dict of {approach_name: result_dict}, return the approach name
with the highest annualised return (raw, not risk-adjusted).
Args:
results : {"Approach 1": {...}, "Approach 2": {...}, "Approach 3": {...}}
Returns:
winner_name : str
"""
best_name = None
best_return = -np.inf
for name, res in results.items():
if res is None:
continue
ret = res.get("ann_return", -np.inf)
if ret > best_return:
best_return = ret
best_name = name
return best_name
# ── Comparison table ──────────────────────────────────────────────────────────
def build_comparison_table(results: dict, winner_name: str) -> pd.DataFrame:
"""
Build a summary DataFrame comparing all three approaches.
Args:
results : {name: result_dict}
winner_name : name of the winner
Returns:
pd.DataFrame with one row per approach
"""
rows = []
for name, res in results.items():
if res is None:
rows.append({
"Approach": name,
"Ann. Return": "N/A",
"Sharpe": "N/A",
"Hit Ratio (15d)":"N/A",
"Max Drawdown": "N/A",
"Winner": "",
})
continue
rows.append({
"Approach": name,
"Ann. Return": f"{res['ann_return']*100:.2f}%",
"Sharpe": f"{res['sharpe']:.2f}",
"Hit Ratio (15d)": f"{res['hit_ratio']*100:.0f}%",
"Max Drawdown": f"{res['max_dd']*100:.2f}%",
"Winner": "⭐ WINNER" if name == winner_name else "",
})
return pd.DataFrame(rows)