P2SAMAPA's picture
[auto] Deploy to HF Space from GitHub
29b9731 verified
# app.py β€” P2-ETF-DQN-ENGINE Streamlit UI
import json
import os
import shutil
from datetime import datetime, date, timedelta, timezone
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import requests as req
import streamlit as st
import config
SWEEP_YEARS = [2008, 2013, 2015, 2017, 2019, 2021]
WORKFLOW_FILE = "train_models.yml"
ETF_COLORS = {
"TLT": "#4e79a7", "VCIT": "#f28e2b", "LQD": "#59a14f",
"HYG": "#e15759", "VNQ": "#76b7b2", "SLV": "#edc948",
"GLD": "#b07aa1", "CASH": "#aaaaaa",
}
# ── Page config ───────────────────────────────────────────────────────────────
st.set_page_config(
page_title="P2 ETF DQN Engine",
layout="wide",
initial_sidebar_state="expanded",
)
st.markdown("""
<style>
.main { background-color: #ffffff; color: #1a1a1a; }
div[data-testid="stMetric"] {
background: #f8f9fa; border: 1px solid #e9ecef;
border-radius: 10px; padding: 15px;
}
[data-testid="stMetricValue"] { color: #0066cc !important; font-size: 26px !important; font-weight: 700 !important; }
[data-testid="stMetricLabel"] { color: #6c757d !important; font-size: 11px !important; text-transform: uppercase; }
.hero-card {
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
border: 2px solid #0066cc; border-radius: 16px;
padding: 32px; text-align: center; margin-bottom: 24px;
}
.hero-label { color: #6c757d; font-size: 13px; text-transform: uppercase; letter-spacing: 2px; }
.hero-value { color: #0066cc; font-size: 72px; font-weight: 900; margin: 8px 0; line-height: 1; }
.hero-sub { color: #495057; font-size: 14px; margin-top: 8px; }
.cash-card {
background: linear-gradient(135deg, #fff8f0 0%, #ffe4cc 100%);
border: 2px solid #cc6600; border-radius: 16px;
padding: 32px; text-align: center; margin-bottom: 24px;
}
.provenance { background: #f8f9fa; border-left: 4px solid #0066cc; padding: 10px 16px;
border-radius: 4px; font-size: 13px; color: #6c757d; margin-top: 8px; }
.method-box { background: #ffffff; border: 1px solid #dee2e6; border-radius: 12px; padding: 20px; }
</style>
""", unsafe_allow_html=True)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _load_json(path: str) -> dict:
if os.path.exists(path):
with open(path) as f:
return json.load(f)
return {}
def _next_trading_day() -> date:
US_HOLIDAYS = {
date(2025,1,1), date(2025,1,20), date(2025,2,17), date(2025,4,18),
date(2025,5,26), date(2025,6,19), date(2025,7,4), date(2025,9,1),
date(2025,11,27), date(2025,12,25),
date(2026,1,1), date(2026,1,19), date(2026,2,16), date(2026,4,3),
date(2026,5,25), date(2026,6,19), date(2026,7,3), date(2026,9,7),
date(2026,11,26), date(2026,12,25),
}
now_est = datetime.utcnow() - timedelta(hours=5)
today = now_est.date()
if today.weekday() < 5 and today not in US_HOLIDAYS and now_est.hour < 16:
return today
d = today + timedelta(days=1)
while d.weekday() >= 5 or d in US_HOLIDAYS:
d += timedelta(days=1)
return d
def _trigger_github(start_year: int, fee_bps: int,
tsl_pct: float, z_reentry: float,
sweep_mode: str = "") -> bool:
try:
token = os.getenv("GITHUB_TOKEN", "")
if not token:
st.error("❌ GITHUB_TOKEN not found in Space secrets.")
return False
url = f"https://api.github.com/repos/{config.GITHUB_REPO}/actions/workflows/{WORKFLOW_FILE}/dispatches"
resp = req.post(url,
headers={"Authorization": f"token {token}",
"Accept": "application/vnd.github+json"},
json={"ref": "main",
"inputs": {
"start_year": str(start_year),
"fee_bps": str(fee_bps),
"tsl_pct": str(tsl_pct),
"z_reentry": str(z_reentry),
"sweep_mode": sweep_mode,
}},
timeout=10,
)
if resp.status_code != 204:
st.error(f"❌ GitHub API returned HTTP {resp.status_code} β€” {resp.text[:300]}")
return resp.status_code == 204
except Exception as e:
st.error(f"❌ Exception: {str(e)}")
return False
def _get_latest_workflow_run() -> dict:
try:
token = os.getenv("GITHUB_TOKEN", "")
if not token:
return {}
url = f"https://api.github.com/repos/{config.GITHUB_REPO}/actions/workflows/{WORKFLOW_FILE}/runs?per_page=1"
r = req.get(url, headers={"Authorization": f"token {token}",
"Accept": "application/vnd.github+json"}, timeout=10)
if r.status_code == 200:
runs = r.json().get("workflow_runs", [])
return runs[0] if runs else {}
except Exception:
pass
return {}
def _today_est() -> date:
return (datetime.now(timezone.utc) - timedelta(hours=5)).date()
def _sweep_filename(year: int, for_date: date) -> str:
return f"sweep_{year}_{for_date.strftime('%Y%m%d')}.json"
def _load_sweep_cache(for_date: date) -> dict:
"""Load date-stamped sweep files from HF Dataset."""
cache = {}
try:
from huggingface_hub import hf_hub_download
token = os.getenv("HF_TOKEN")
repo_id = os.getenv("HF_DATASET_REPO", "P2SAMAPA/P2-ETF-DQN-ENGINE-DATASET")
date_tag = for_date.strftime("%Y%m%d")
for yr in SWEEP_YEARS:
fname = f"sweep_{yr}_{date_tag}.json"
try:
path = hf_hub_download(repo_id=repo_id, filename=f"sweep/{fname}",
repo_type="dataset", token=token, force_download=True)
with open(path) as f:
cache[yr] = json.load(f)
except Exception:
pass
except Exception:
pass
return cache
def _load_sweep_cache_any() -> tuple:
"""Load most recent sweep files from HF Dataset regardless of date. Returns (cache, date)."""
found, best_date = {}, None
try:
from huggingface_hub import HfApi, hf_hub_download
token = os.getenv("HF_TOKEN")
repo_id = os.getenv("HF_DATASET_REPO", "P2SAMAPA/P2-ETF-DQN-ENGINE-DATASET")
api = HfApi()
files = list(api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token))
# Find most recent date across all sweep files
for fname in files:
fname = os.path.basename(fname)
if fname.startswith("sweep_") and fname.endswith(".json"):
parts = fname.replace(".json","").split("_")
if len(parts) == 3:
try:
dt = datetime.strptime(parts[2], "%Y%m%d").date()
if best_date is None or dt > best_date:
best_date = dt
except Exception:
pass
if best_date:
date_tag = best_date.strftime("%Y%m%d")
for yr in SWEEP_YEARS:
fname = f"sweep_{yr}_{date_tag}.json"
try:
path = hf_hub_download(repo_id=repo_id, filename=f"sweep/{fname}",
repo_type="dataset", token=token, force_download=True)
with open(path) as f:
found[yr] = json.load(f)
except Exception:
pass
except Exception:
pass
return found, best_date
def _compute_consensus(sweep_data: dict) -> dict:
"""40% Return Β· 20% Z Β· 20% Sharpe Β· 20% (-MaxDD), min-max normalised."""
rows = []
for yr, sig in sweep_data.items():
rows.append({
"year": yr,
"signal": sig.get("signal", "?"),
"ann_return": sig.get("ann_return", 0.0),
"z_score": sig.get("z_score", 0.0),
"sharpe": sig.get("sharpe", 0.0),
"max_dd": sig.get("max_dd", 0.0),
"conviction": sig.get("conviction", "?"),
"lookback": sig.get("lookback", "?"),
})
if not rows:
return {}
df = pd.DataFrame(rows)
def _mm(s):
mn, mx = s.min(), s.max()
return (s - mn) / (mx - mn + 1e-9)
df["n_ret"] = _mm(df["ann_return"])
df["n_z"] = _mm(df["z_score"])
df["n_sharpe"] = _mm(df["sharpe"])
df["n_negdd"] = _mm(-df["max_dd"])
df["wtd"] = 0.40*df["n_ret"] + 0.20*df["n_z"] + 0.20*df["n_sharpe"] + 0.20*df["n_negdd"]
etf_agg = {}
for _, row in df.iterrows():
e = row["signal"]
etf_agg.setdefault(e, {"years": [], "scores": [], "returns": [],
"zs": [], "sharpes": [], "dds": []})
etf_agg[e]["years"].append(row["year"])
etf_agg[e]["scores"].append(row["wtd"])
etf_agg[e]["returns"].append(row["ann_return"])
etf_agg[e]["zs"].append(row["z_score"])
etf_agg[e]["sharpes"].append(row["sharpe"])
etf_agg[e]["dds"].append(row["max_dd"])
total = sum(sum(v["scores"]) for v in etf_agg.values()) + 1e-9
summary = {}
for e, v in etf_agg.items():
cs = sum(v["scores"])
summary[e] = {
"cum_score": round(cs, 4),
"score_share": round(cs / total, 3),
"n_years": len(v["years"]),
"years": v["years"],
"avg_return": round(float(np.mean(v["returns"])), 4),
"avg_z": round(float(np.mean(v["zs"])), 3),
"avg_sharpe": round(float(np.mean(v["sharpes"])), 3),
"avg_max_dd": round(float(np.mean(v["dds"])), 4),
}
winner = max(summary, key=lambda e: summary[e]["cum_score"])
return {"winner": winner, "etf_summary": summary,
"per_year": df.to_dict("records"), "n_years": len(rows)}
# ── Sidebar ───────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("## βš™οΈ Configuration")
if st.button("πŸ”„ Refresh Data & Clear Cache"):
st.cache_data.clear()
st.toast("Cache cleared β€” reloading...")
st.rerun()
st.divider()
st.markdown("### πŸ“… Training Parameters")
st.caption("Changes here require retraining via GitHub Actions.")
start_year = st.slider("Start Year", config.START_YEAR_MIN if hasattr(config, "START_YEAR_MIN") else 2008,
2024, 2015)
fee_bps = st.number_input("T-Costs (bps)", 0, 50, 10)
st.divider()
st.markdown("### πŸ›‘οΈ Risk Controls")
st.caption("Instant β€” no retraining needed.")
tsl_pct = st.slider("Trailing Stop Loss (%)", 5.0, 25.0, 10.0, 0.5)
z_reentry = st.slider("Re-entry Z-Score Threshold", 0.5, 3.0, 1.1, 0.1)
st.divider()
run_btn = st.button("πŸš€ Retrain DQN Agent",
help="Triggers GitHub Actions training job",
use_container_width=True)
if run_btn:
triggered = _trigger_github(start_year, fee_bps, tsl_pct, z_reentry, sweep_mode="")
if triggered:
st.success(
f"βœ… Training triggered!\n\n"
f"Training from **{start_year}** Β· 200 episodes Β· "
f"**{fee_bps}bps** fees\n\n"
f"Results update here in ~50–65 min."
)
else:
st.warning(
"⚠️ Could not trigger GitHub Actions automatically.\n\n"
f"**Manual steps:**\n"
f"- Go to GitHub β†’ Actions β†’ Train DQN Agent\n"
f"- Set `start_year = {start_year}`\n"
f"- Or add `GITHUB_TOKEN` to HF Space secrets."
)
st.caption(f"↑ Trains from {start_year} onwards Β· 200 episodes (hardcoded in train_models.yml)")
# ── Load outputs ──────────────────────────────────────────────────────────────
pred = _load_json("latest_prediction.json")
evalu = _load_json("evaluation_results.json")
next_td = _next_trading_day()
final_signal = pred.get("final_signal", "β€”")
z_score = pred.get("z_score", 0.0)
confidence = pred.get("confidence", pred.get("final_confidence", 0.0))
tsl_stat = pred.get("tsl_status", {})
tbill_rt = pred.get("tbill_rate", 3.6)
probs = pred.get("probabilities", {})
q_vals = pred.get("q_values", {})
trained_from_year= pred.get("trained_from_year")
trained_at = pred.get("trained_at")
in_cash = tsl_stat.get("in_cash", False)
tsl_triggered = tsl_stat.get("tsl_triggered", False)
two_day_ret = tsl_stat.get("two_day_cumul_pct", 0.0)
# ── Header ────────────────────────────────────────────────────────────────────
st.title("πŸ€– P2 ETF DQN Engine")
st.caption("Dueling Deep Q-Network Β· Multi-Asset ETF Selection Β· arXiv:2411.07585")
# ── Check latest workflow run status ─────────────────────────────────────────
latest_run = _get_latest_workflow_run()
is_training = latest_run.get("status") in ("queued", "in_progress")
run_started = latest_run.get("created_at", "")[:16].replace("T", " ") if latest_run else ""
# ── Tabs ──────────────────────────────────────────────────────────────────────
tab1, tab2 = st.tabs(["πŸ“Š Single-Year Results", "πŸ”„ Multi-Year Consensus Sweep"])
# ═══════════════════════════════════════════════════════════════════════════════
# TAB 1 β€” Single-Year Results (existing content)
# ═══════════════════════════════════════════════════════════════════════════════
with tab1:
# ── Provenance banner ─────────────────────────────────────────────────────────
if trained_from_year and trained_at:
trained_date = trained_at[:10]
st.markdown(
f'<div class="provenance">πŸ“‹ Active model trained from '
f'<b>{trained_from_year}</b> Β· Generated <b>{trained_date}</b> Β· '
f'Val Sharpe <b>{evalu.get("sharpe", "β€”")}</b></div>',
unsafe_allow_html=True
)
else:
st.info("⚠️ No trained model found. Click **πŸš€ Retrain DQN Agent** to train.")
st.markdown("---")
# ── TSL override banner ───────────────────────────────────────────────────────
if tsl_triggered:
st.markdown(f"""
<div style="background:#fff8f0;border:2px solid #cc6600;border-radius:10px;
padding:16px;margin-bottom:16px;">
πŸ”΄ <b>TRAILING STOP LOSS TRIGGERED</b> β€” 2-day return
({float(two_day_ret):+.1f}%) breached βˆ’{tsl_pct:.0f}% threshold.
Holding CASH @ {tbill_rt:.2f}% T-bill until Z β‰₯ {z_reentry:.1f}Οƒ.
</div>""", unsafe_allow_html=True)
# ── Signal Hero Card ──────────────────────────────────────────────────────────
now_est = datetime.utcnow() - timedelta(hours=5)
is_today = (next_td == now_est.date())
td_label = "TODAY'S SIGNAL" if is_today else "NEXT TRADING DAY"
if in_cash or not pred:
st.markdown(f"""
<div class="cash-card">
<div class="hero-label">⚠️ Risk Override Active · {td_label}</div>
<div class="hero-value" style="color:#cc6600;">πŸ’΅ CASH</div>
<div class="hero-sub">
Earning 3m T-bill: <b>{tbill_rt:.2f}% p.a.</b> &nbsp;|&nbsp;
Re-entry when Z β‰₯ {z_reentry:.1f}Οƒ
</div>
</div>""", unsafe_allow_html=True)
else:
prov_str = ""
if trained_from_year and trained_at:
prov_str = (f"πŸ“‹ Trained from {trained_from_year} Β· "
f"Generated {trained_at[:10]} Β· Z-Score {z_score:.2f}Οƒ")
st.markdown(f"""
<div class="hero-card">
<div class="hero-label">Dueling DQN Β· {td_label}</div>
<div class="hero-value">{final_signal}</div>
<div class="hero-sub">
🎯 {next_td} &nbsp;|&nbsp; Confidence {float(confidence):.1%}
&nbsp;|&nbsp; Z-Score {float(z_score):.2f}Οƒ
</div>
{"<div class='hero-sub' style='margin-top:6px;font-size:12px;opacity:0.7;'>" + prov_str + "</div>" if prov_str else ""}
</div>""", unsafe_allow_html=True)
# ── Key Metrics ───────────────────────────────────────────────────────────────
if evalu:
c1, c2, c3, c4, c5 = st.columns(5)
c1.metric("Ann. Return", f"{evalu.get('ann_return', 0):.1%}")
c2.metric("Sharpe Ratio", f"{evalu.get('sharpe', 0):.2f}")
c3.metric("Max Drawdown", f"{evalu.get('max_drawdown', 0):.1%}")
c4.metric("Calmar Ratio", f"{evalu.get('calmar', 0):.2f}")
c5.metric("Hit Ratio", f"{evalu.get('hit_ratio', 0):.1%}")
# Benchmark comparison β€” ann return
bench_ann = evalu.get("benchmark_ann", {})
if bench_ann:
bc1, bc2 = st.columns(2)
strat_ann = evalu.get("ann_return", 0)
for col, (k, v) in zip([bc1, bc2], bench_ann.items()):
delta = strat_ann - v
col.metric(f"{k} Ann. Return", f"{v:.1%}",
delta=f"{delta:+.1%} vs strategy")
st.markdown("---")
# ── Q-Value / Probability Bar Chart ──────────────────────────────────────────
if probs:
st.subheader("πŸ“Š Action Probabilities (Softmax Q-Values)")
actions = list(probs.keys())
values = [probs[a] for a in actions]
colours = ["#cc6600" if a == "CASH" else
"#0066cc" if a == final_signal else "#6c757d"
for a in actions]
fig = go.Figure(go.Bar(
x=actions, y=values,
marker_color=colours,
text=[f"{v:.1%}" for v in values],
textposition="outside",
))
fig.update_layout(
paper_bgcolor="#ffffff", plot_bgcolor="#ffffff",
font_color="#1a1a1a",
yaxis_title="Probability", xaxis_title="Action",
height=300, margin=dict(t=20, b=20),
yaxis=dict(gridcolor="#e9ecef"),
)
st.plotly_chart(fig, use_container_width=True)
st.caption(
"**How to read:** Each bar is the agent's probability of choosing that action today, "
"derived from softmax of the DQN Q-values. πŸ”΅ Blue = chosen action. 🟠 Orange = CASH. "
"Grey = rejected. A dominant bar = high conviction. Similar-height bars = low conviction / uncertain signal."
)
# ── Equity Curve ──────────────────────────────────────────────────────────────
if evalu and "equity_curve" in evalu:
st.subheader("πŸ“ˆ Test-Set Equity Curve vs Benchmarks")
st.caption("Normalised to 1.0 at start of test period. SPY and AGG shown for comparison.")
equity = evalu["equity_curve"]
test_dates = evalu.get("test_dates", [])
x_axis = test_dates if len(test_dates) == len(equity) else list(range(len(equity)))
fig2 = go.Figure()
fig2.add_trace(go.Scatter(
x=x_axis, y=equity, mode="lines", name="DQN Strategy",
line=dict(color="#0066cc", width=2.5),
))
# SPY and AGG from json β€” no load_local needed
bench_equity = evalu.get("benchmark_equity", {})
bench_colours = {"SPY": "#e63946", "AGG": "#2a9d8f"}
for b, beq in bench_equity.items():
bx = test_dates if len(test_dates) == len(beq) else list(range(len(beq)))
fig2.add_trace(go.Scatter(
x=bx, y=beq, mode="lines", name=b,
line=dict(width=1.5, dash="dot", color=bench_colours.get(b, "#888888")),
))
fig2.update_layout(
paper_bgcolor="#ffffff", plot_bgcolor="#ffffff",
font_color="#1a1a1a", height=420,
yaxis_title="Normalised Equity (start = 1.0)",
xaxis_title="Date",
legend=dict(bgcolor="#f8f9fa", orientation="h",
yanchor="bottom", y=1.02, xanchor="left", x=0),
yaxis=dict(gridcolor="#e9ecef", tickformat=".2f"),
xaxis=dict(tickangle=-45, nticks=12, gridcolor="#e9ecef"),
margin=dict(t=40, b=60),
)
st.plotly_chart(fig2, use_container_width=True)
# ── Allocation Breakdown ──────────────────────────────────────────────────────
if evalu and "allocation_pct" in evalu:
st.subheader("πŸ“Š Allocation Breakdown (Test Set)")
alloc = evalu["allocation_pct"]
fig3 = go.Figure(go.Pie(
labels=list(alloc.keys()),
values=list(alloc.values()),
hole=0.45,
marker_colors=["#0066cc","#28a745","#ffc107","#fd7e14",
"#6f42c1","#e83e8c","#17a2b8","#adb5bd"],
))
fig3.update_layout(
paper_bgcolor="#ffffff", font_color="#1a1a1a",
height=320, margin=dict(t=20),
)
st.plotly_chart(fig3, use_container_width=True)
st.caption(
"**How to read:** Percentage of out-of-sample (OOS) test days the agent held each position. "
"A well-trained agent rotates across ETFs. Heavy concentration in one slice (80%+) = "
"agent defaulted to a single-asset strategy β€” sign of poor learning."
)
st.markdown("---")
# ── 15-Day Audit Trail ────────────────────────────────────────────────────────
if evalu and "allocations" in evalu and len(evalu["allocations"]) > 0:
st.subheader("πŸ—“οΈ 15-Day Audit Trail β€” Most Recent OOS Days")
st.caption("Last 15 trading days from the out-of-sample test period.")
allocs = evalu["allocations"]
eq_curve = evalu.get("equity_curve", [])
test_dates = evalu.get("test_dates", [])
daily_rets = []
if len(eq_curve) > 1:
for i in range(1, len(eq_curve)):
daily_rets.append(eq_curve[i] / eq_curve[i-1] - 1)
n_show = min(15, len(allocs))
last_allocs = allocs[-n_show:]
last_rets = daily_rets[-n_show:] if daily_rets else [0.0] * n_show
last_equity = eq_curve[-n_show:] if eq_curve else [1.0] * n_show
last_dates = test_dates[-n_show:] if len(test_dates) >= n_show else [f"Day {i+1}" for i in range(n_show)]
audit_df = pd.DataFrame({
"Date" : last_dates,
"Allocation": last_allocs,
"Daily Ret" : last_rets,
"Equity" : last_equity,
})
def _colour_ret(val):
color = "#d93025" if val < 0 else "#188038"
return f"color: {color}; font-weight: bold"
styled = (
audit_df.style
.format({"Daily Ret": "{:+.2%}", "Equity": "{:.4f}"})
.applymap(_colour_ret, subset=["Daily Ret"])
.set_properties(**{"text-align": "center"})
.hide(axis="index")
)
st.dataframe(styled, use_container_width=True)
st.markdown("---")
# ── Methodology Section ───────────────────────────────────────────────────────
st.subheader("🧠 Methodology")
st.markdown("""
<div class="method-box">
<h4 style="color:#0066cc;">Reinforcement Learning Framework β€” Dueling DQN</h4>
<p>This engine implements a <b>Dueling Deep Q-Network (Dueling DQN)</b> for daily ETF
selection, directly extending the RL framework proposed by
<b>Yasin & Gill (2024)</b> β€” <i>"Reinforcement Learning Framework for Quantitative Trading"</i>,
presented at the <b>ICAIF 2024 FM4TS Workshop</b>
(<a href="https://arxiv.org/abs/2411.07585" style="color:#0066cc;">arXiv:2411.07585</a>).</p>
<h5 style="color:#0066cc;">From the Paper β†’ Our Implementation</h5>
<p>The paper benchmarks DQN, PPO, and A2C agents on single-stock buy/sell decisions using
20 technical indicators, finding that <b>DQN with MLP policy significantly outperforms
policy-gradient methods</b> (PPO, A2C) on daily financial time-series, and that
<b>higher learning rates</b> (lr = 0.001) produce the most profitable signals.</p>
<p>We extend this methodology in three key ways:</p>
<ol>
<li><b>Multi-Asset Action Space:</b> Rather than binary buy/sell on a single asset,
the agent selects from 8 discrete actions β€” CASH or one of 7 ETFs
(TLT, VCIT, LQD, HYG, VNQ, GLD, SLV). This is fundamentally a harder problem than
the paper's setup, requiring the agent to learn relative value across assets.</li>
<li><b>Dueling Architecture</b> (Wang et al., 2016): We replace the paper's standard DQN
with a <b>Dueling DQN</b>, which separates the Q-function into a state-value stream V(s)
and an advantage stream A(s,a):
<br><code>Q(s,a) = V(s) + A(s,a) βˆ’ mean_a(A(s,a))</code><br>
This is specifically more effective for multi-action spaces because it explicitly learns
which state is valuable independent of which action to take β€” critical when TLT and VCIT
have similar Q-values in a rate-falling regime.</li>
<li><b>Macro State Augmentation:</b> The paper's state space uses only price-derived
technical indicators. We add six FRED macro signals to the state:
VIX, T10Y2Y (yield curve slope), TBILL_3M, DXY, Corp Spread, and HY Spread.
These directly encode the macro regime that drives fixed-income and credit ETF selection.</li>
</ol>
<h5 style="color:#0066cc;">State Space (per trading day)</h5>
<p>20 technical indicators per ETF Γ— 7 ETFs + 6 macro signals (+ z-scored variants),
all computed over a rolling <b>20-day lookback window</b>. The flattened window is fed
to the DQN as a single state vector. Indicators follow the paper exactly:
RSI(14), MACD(12/26/9), Stochastic(14), CCI(20), ROC(10), CMO(14), Williams%R,
ATR, Bollinger %B + Width, StochRSI, Ultimate Oscillator, Momentum(10),
rolling returns at 1/5/10/21d, and 21d realised volatility.</p>
<h5 style="color:#0066cc;">Reward Function</h5>
<p>Reward = excess daily return over 3m T-bill, minus transaction cost on switches,
scaled by inverse 21d realised volatility to penalise drawdown-prone positions.
This replaces the paper's raw P&L reward with a risk-adjusted signal aligned with
Sharpe Ratio maximisation.</p>
<h5 style="color:#0066cc;">Training</h5>
<p>Data split is 80/10/10 (train/val/test) from the user-selected start year to present.
Best weights are saved by <b>validation-set Sharpe Ratio</b>. The agent uses
<b>Double DQN</b> (online network selects action, frozen target network evaluates)
to reduce Q-value overestimation β€” a known instability in financial RL applications.
Experience replay buffer of 100k transitions; hard target network update every 500 steps;
Ξ΅-greedy exploration decaying from 1.0 β†’ 0.05 over the first 50% of training.</p>
<h5 style="color:#0066cc;">Risk Controls</h5>
<p>A post-signal <b>Trailing Stop Loss</b> overrides the DQN signal to CASH if the
2-day cumulative return of the held ETF breaches the configured threshold.
Re-entry from CASH requires the DQN's best-action Z-score to clear the re-entry
threshold, ensuring the model has recovered conviction before re-entering risk.</p>
</div>
""", unsafe_allow_html=True)
# ── Reference ─────────────────────────────────────────────────────────────────
st.markdown("""
<div style="background:#f8f9fa;border:1px solid #dee2e6;border-radius:8px;
padding:14px;font-size:12px;color:#6c757d;margin-top:8px;">
<b>Reference:</b> Yasin, A.S. & Gill, P.S. (2024).
<i>Reinforcement Learning Framework for Quantitative Trading.</i>
arXiv:2411.07585 [q-fin.TR]. Accepted at ICAIF 2024 FM4TS Workshop.
&nbsp;Β·&nbsp;
<b>Dueling DQN:</b> Wang, Z. et al. (2016).
<i>Dueling Network Architectures for Deep Reinforcement Learning.</i>
ICML 2016.
</div>
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════════════════════
# TAB 2 β€” Multi-Year Consensus Sweep
# ═══════════════════════════════════════════════════════════════════════════════
with tab2:
st.subheader("πŸ”„ Multi-Year Consensus Sweep")
st.markdown(
f"Runs the DQN agent across **{len(SWEEP_YEARS)} start years** and aggregates signals "
f"into a weighted consensus vote. \n"
f"**Sweep years:** {', '.join(str(y) for y in SWEEP_YEARS)} &nbsp;Β·&nbsp; "
f"**Score:** 40% Return Β· 20% Z Β· 20% Sharpe Β· 20% (–MaxDD) \n"
f"Auto-runs daily at **8pm EST**. Results are date-stamped β€” stale cache never shown."
)
today_est = _today_est()
# ── Load today's cache ────────────────────────────────────────────────────
today_cache = _load_sweep_cache(today_est)
prev_cache, prev_date = _load_sweep_cache_any()
# Separate prev from today
if prev_date == today_est:
prev_cache, prev_date = {}, None
n_today = len(today_cache)
n_total = len(SWEEP_YEARS)
sweep_done = n_today == n_total
sweep_partial = 0 < n_today < n_total
# ── Training-in-progress banner ───────────────────────────────────────────
if is_training:
st.warning(
f"⏳ **Training in progress** (started {run_started} UTC) β€” "
f"{n_today}/{n_total} years complete today. "
f"Showing previous day's results below where available.",
icon="πŸ”„"
)
# ── Date stamp warning if showing previous day ────────────────────────────
display_cache = today_cache if today_cache else prev_cache
display_date = today_est if today_cache else prev_date
if display_cache and display_date and display_date < today_est:
st.warning(
f"⚠️ Showing results from **{display_date}** (previous day). "
f"Today's sweep has not run yet β€” it will auto-trigger at 8pm EST.",
icon="πŸ“…"
)
# ── Year status grid ──────────────────────────────────────────────────────
cols = st.columns(n_total)
for i, yr in enumerate(SWEEP_YEARS):
with cols[i]:
today_has = yr in today_cache
prev_has = yr in prev_cache
if today_has:
sig = today_cache[yr].get("signal", "?")
st.success(f"**{yr}**\nβœ… {sig}")
elif is_training and prev_has:
sig = prev_cache[yr].get("signal", "?")
st.info(f"**{yr}**\n⏳ {sig}*")
else:
st.error(f"**{yr}**\n⏳ Not run")
if is_training:
st.caption("\\* = previous day's result shown while today's training is in progress")
st.divider()
# ── Manual sweep button ───────────────────────────────────────────────────
missing_today = [yr for yr in SWEEP_YEARS if yr not in today_cache]
force_rerun = st.checkbox("πŸ”„ Force re-run all years", value=False,
help="Re-trains even if today's results already exist")
trigger_years = SWEEP_YEARS if force_rerun else missing_today
col_btn, col_info = st.columns([1, 3])
with col_btn:
sweep_btn = st.button(
"πŸš€ Run Consensus Sweep",
type="primary",
use_container_width=True,
disabled=(is_training or (sweep_done and not force_rerun)),
help="Triggers parallel GitHub Actions jobs for missing years"
)
with col_info:
if sweep_done and not force_rerun:
st.success(f"βœ… Today's sweep complete ({today_est}) β€” {n_total}/{n_total} years ready")
elif is_training:
st.warning(f"⏳ Training in progress... ({n_today}/{n_total} done today)")
else:
st.info(
f"**{n_today}/{n_total}** years done for today ({today_est}). \n"
f"Will trigger **{len(trigger_years)}** parallel jobs: "
f"{', '.join(str(y) for y in trigger_years)}"
)
if sweep_btn and trigger_years:
sweep_str = ",".join(str(y) for y in trigger_years)
with st.spinner(f"πŸš€ Triggering sweep for {sweep_str}..."):
ok = _trigger_github(
start_year=trigger_years[0],
fee_bps=fee_bps, tsl_pct=tsl_pct, z_reentry=z_reentry,
sweep_mode=sweep_str
)
if ok:
st.success(
f"βœ… Triggered **{len(trigger_years)}** parallel jobs for: {sweep_str}. \n"
f"Each takes ~90 mins. Refresh when complete."
)
else:
st.error("❌ Failed to trigger GitHub Actions sweep.")
# ── Show consensus ────────────────────────────────────────────────────────
if not display_cache:
st.info("πŸ‘† No sweep results yet. Click **πŸš€ Run Consensus Sweep** or wait for 8pm EST auto-run.")
st.stop()
consensus = _compute_consensus(display_cache)
if not consensus:
st.warning("⚠️ Could not compute consensus.")
st.stop()
winner = consensus["winner"]
w_info = consensus["etf_summary"][winner]
win_color = ETF_COLORS.get(winner, "#0066cc")
score_pct = w_info["score_share"] * 100
score_pct = score_pct if (score_pct == score_pct) else 0.0 # guard nan
split_sig = w_info["score_share"] < 0.40
sig_label = "⚠️ Split Signal" if split_sig else "βœ… Clear Signal"
note = f"Score share {score_pct:.0f}% Β· {w_info['n_years']}/{n_total} years Β· avg score {w_info['cum_score']:.4f}"
date_note = f"Results from: {display_date}"
# ── Winner banner ─────────────────────────────────────────────────────────
st.markdown(f"""
<div style="background:linear-gradient(135deg,#1a1a2e,#16213e);
border:2px solid {win_color};border-radius:16px;
padding:32px;text-align:center;margin:16px 0;
box-shadow:0 8px 24px rgba(0,0,0,0.4);">
<div style="font-size:11px;letter-spacing:3px;color:#aaa;margin-bottom:8px;">
WEIGHTED CONSENSUS Β· DQN Β· {len(display_cache)} START YEARS Β· {date_note}
</div>
<div style="font-size:72px;font-weight:900;color:{win_color};
text-shadow:0 0 30px {win_color}88;letter-spacing:2px;">
{winner}
</div>
<div style="font-size:14px;color:#ccc;margin-top:8px;">{sig_label} Β· {note}</div>
<div style="display:flex;justify-content:center;gap:32px;margin-top:20px;flex-wrap:wrap;">
<div style="text-align:center;">
<div style="font-size:11px;color:#aaa;">Avg Return</div>
<div style="font-size:22px;font-weight:700;color:{'#00b894' if w_info['avg_return']>0 else '#d63031'};">
{w_info['avg_return']*100:.1f}%</div>
</div>
<div style="text-align:center;">
<div style="font-size:11px;color:#aaa;">Avg Z</div>
<div style="font-size:22px;font-weight:700;color:#74b9ff;">{w_info['avg_z']:.2f}Οƒ</div>
</div>
<div style="text-align:center;">
<div style="font-size:11px;color:#aaa;">Avg Sharpe</div>
<div style="font-size:22px;font-weight:700;color:#a29bfe;">{w_info['avg_sharpe']:.2f}</div>
</div>
<div style="text-align:center;">
<div style="font-size:11px;color:#aaa;">Avg MaxDD</div>
<div style="font-size:22px;font-weight:700;color:#fd79a8;">{w_info['avg_max_dd']*100:.1f}%</div>
</div>
</div>
</div>
""", unsafe_allow_html=True)
# Also-ranked
others = sorted([(e, v) for e, v in consensus["etf_summary"].items() if e != winner],
key=lambda x: -x[1]["cum_score"])
parts = []
for etf, v in others:
c = ETF_COLORS.get(etf, "#888")
parts.append(f'<span style="color:{c};font-weight:600;">{etf}</span> '
f'<span style="color:#aaa;">({v["cum_score"]:.2f} Β· {v["n_years"]}yr)</span>')
st.markdown(
'<div style="text-align:center;margin-bottom:12px;font-size:13px;">'
'Also ranked: ' + " &nbsp;|&nbsp; ".join(parts) + '</div>',
unsafe_allow_html=True
)
st.divider()
# ── Charts ────────────────────────────────────────────────────────────────
c1, c2 = st.columns(2)
with c1:
st.markdown("**Weighted Score per ETF**")
es = consensus["etf_summary"]
sorted_etfs = sorted(es.keys(), key=lambda e: -es[e]["cum_score"])
fig_bar = go.Figure(go.Bar(
x=sorted_etfs,
y=[es[e]["cum_score"] for e in sorted_etfs],
marker_color=[ETF_COLORS.get(e, "#888") for e in sorted_etfs],
text=[f"{es[e]['n_years']}yr Β· {es[e]['score_share']*100 if es[e]['score_share']==es[e]['score_share'] else 0:.0f}%<br>{es[e]['cum_score']:.2f}"
for e in sorted_etfs],
textposition="outside",
))
fig_bar.update_layout(
template="plotly_dark", height=360,
yaxis_title="Cumulative Weighted Score",
showlegend=False, margin=dict(t=20, b=20)
)
st.plotly_chart(fig_bar, use_container_width=True)
with c2:
st.markdown("**Conviction Z-Score by Start Year**")
per_year = consensus["per_year"]
fig_sc = go.Figure()
for row in per_year:
etf = row["signal"]
col = ETF_COLORS.get(etf, "#888")
fig_sc.add_trace(go.Scatter(
x=[row["year"]], y=[row["z_score"]],
mode="markers+text",
marker=dict(size=18, color=col, line=dict(color="white", width=1)),
text=[etf], textposition="top center",
name=etf, showlegend=False,
hovertemplate=f"<b>{etf}</b><br>Year: {row['year']}<br>"
f"Z: {row['z_score']:.2f}Οƒ<br>"
f"Return: {row['ann_return']*100:.1f}%<extra></extra>"
))
fig_sc.add_hline(y=0, line_dash="dot",
line_color="rgba(255,255,255,0.3)",
annotation_text="Neutral")
fig_sc.update_layout(
template="plotly_dark", height=360,
xaxis_title="Start Year", yaxis_title="Z-Score (Οƒ)",
margin=dict(t=20, b=20)
)
st.plotly_chart(fig_sc, use_container_width=True)
# ── Per-year breakdown table ──────────────────────────────────────────────
st.subheader("πŸ“‹ Full Per-Year Breakdown")
st.caption(
"**Wtd Score** = 40% Ann. Return + 20% Z-Score + 20% Sharpe + 20% (–Max DD), "
"min-max normalised across years. "
f"Results dated: **{display_date}**"
)
tbl_rows = []
for row in sorted(per_year, key=lambda r: r["year"]):
cached_today = row["year"] in today_cache
tbl_rows.append({
"Start Year": row["year"],
"Signal": row["signal"],
"Wtd Score": round(row["wtd"], 3),
"Conviction": row["conviction"],
"Z-Score": f"{row['z_score']:.2f}Οƒ",
"Ann. Return": f"{row['ann_return']*100:.2f}%",
"Sharpe": f"{row['sharpe']:.2f}",
"Max Drawdown": f"{row['max_dd']*100:.2f}%",
"Lookback": f"{row['lookback']}d",
"Date": "βœ… Today" if cached_today else f"πŸ“… {display_date}",
})
tbl_df = pd.DataFrame(tbl_rows)
def _style_sig(val):
c = ETF_COLORS.get(val, "#888")
return f"background-color:{c}22;color:{c};font-weight:700;"
def _style_ret(val):
try:
v = float(val.replace("%", ""))
return "color:#00b894;font-weight:600" if v > 0 else "color:#d63031;font-weight:600"
except Exception:
return ""
styled_tbl = (tbl_df.style
.applymap(_style_sig, subset=["Signal"])
.applymap(_style_ret, subset=["Ann. Return"])
.set_properties(**{"text-align": "center", "font-size": "14px"})
.set_table_styles([
{"selector": "th", "props": [("font-size", "14px"),
("font-weight", "bold"),
("text-align", "center"),
("background-color", "#1a1a2e"),
("color", "#0066cc")]},
{"selector": "td", "props": [("padding", "10px")]}
]))
st.dataframe(styled_tbl, use_container_width=True, height=280)
# ── How to read ───────────────────────────────────────────────────────────
st.divider()
st.subheader("πŸ“– How to Read These Results")
st.markdown(f"""
**Why does the signal change by start year?**
Each start year defines the market regime the DQN agent trains on.
A model trained from 2008 has seen the GFC, 2013 taper tantrum, COVID, and rate hike cycles.
A model from 2019 focuses on post-COVID dynamics. The consensus aggregates all regime views.
**How is the winner chosen?**
Each year's signal is scored: 40% Ann. Return + 20% Z-Score + 20% Sharpe + 20% (–MaxDD),
all min-max normalised so no metric dominates. The ETF with the highest total weighted score wins.
**Why is the button disabled when today's results exist?**
To protect your GitHub Actions minutes (~{n_total * 90} mins per full sweep).
The sweep auto-runs at 8pm EST daily so you rarely need to trigger it manually.
**Split Signal warning (score share < 40%)**
Signals are fragmented β€” no single ETF dominates across regimes. Treat with caution.
**Date stamp**
Results filename includes the run date (e.g. `sweep_2019_20260304.json`).
The app only shows today's results if available, otherwise yesterday's with a warning banner.
Previous day's files are automatically deleted at 8pm EST before the new sweep starts.
""")